Skip to content

Commit

Permalink
WIP API caching (#7)
Browse files Browse the repository at this point in the history
  • Loading branch information
woodruffw authored Aug 28, 2024
1 parent 699fcbd commit f8468b8
Show file tree
Hide file tree
Showing 10 changed files with 114 additions and 51 deletions.
2 changes: 1 addition & 1 deletion src/audit/artipacked.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ impl<'a> WorkflowAudit<'a> for Artipacked<'a> {
Ok(Self { config })
}

fn audit<'w>(&self, workflow: &'w Workflow) -> Result<Vec<Finding<'w>>> {
fn audit<'w>(&mut self, workflow: &'w Workflow) -> Result<Vec<Finding<'w>>> {
log::debug!("audit: {} evaluating {}", Self::ident(), &workflow.filename);

let mut findings = vec![];
Expand Down
2 changes: 1 addition & 1 deletion src/audit/hardcoded_container_credentials.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ impl<'a> WorkflowAudit<'a> for HardcodedContainerCredentials<'a> {
}

fn audit<'w>(
&self,
&mut self,
workflow: &'w crate::models::Workflow,
) -> anyhow::Result<Vec<crate::finding::Finding<'w>>> {
let mut findings = vec![];
Expand Down
109 changes: 80 additions & 29 deletions src/audit/impostor_commit.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,14 @@
//!
//! [`clank`]: https://github.com/chainguard-dev/clank

use std::ops::Deref;
use std::{
collections::{hash_map::Entry, HashMap},
ops::Deref,
};

use crate::{
finding::{Confidence, Finding, Severity},
github_api::{self, ComparisonStatus},
github_api::{self, Branch, ComparisonStatus, Tag},
models::{AuditConfig, Uses, Workflow},
};

Expand All @@ -23,21 +26,84 @@ pub const IMPOSTOR_ANNOTATION: &str = "uses a commit that doesn't belong to the
pub(crate) struct ImpostorCommit<'a> {
pub(crate) _config: AuditConfig<'a>,
pub(crate) client: github_api::Client,
pub(crate) ref_cache: HashMap<(String, String), (Vec<Branch>, Vec<Tag>)>,
/// A cache of `(base_ref, head_ref) => status`.
///
/// We don't bother disambiguating this cache by org/repo, since `head_ref`
/// is a commit ref and we expect those to be globally unique.
/// This is not technically true of Git SHAs due to SHAttered, but is
/// effectively true for SHAs on GitHub due to GitHub's collision detection.
pub(crate) ref_comparison_cache: HashMap<(String, String), bool>,
}

impl<'a> ImpostorCommit<'a> {
fn named_refs(&mut self, uses: Uses<'_>) -> Result<(Vec<Branch>, Vec<Tag>)> {
let entry = match self
.ref_cache
.entry((uses.owner.to_string(), uses.repo.to_string()))
{
Entry::Occupied(o) => o.into_mut(),
Entry::Vacant(v) => {
let branches = self.client.list_branches(uses.owner, uses.repo)?;
let tags = self.client.list_tags(uses.owner, uses.repo)?;

v.insert((branches, tags))
}
};

// Dumb: we shold be able to borrow immutably here.
Ok((entry.0.clone(), entry.1.clone()))
}

fn named_ref_contains_commit(
&mut self,
uses: &Uses<'_>,
base_ref: &str,
head_ref: &str,
) -> Result<bool> {
let presence = match self
.ref_comparison_cache
.entry((base_ref.to_string(), head_ref.to_string()))
{
Entry::Occupied(o) => {
log::debug!("cache hit: {base_ref}..{head_ref}");
o.into_mut()
}
Entry::Vacant(v) => {
let presence = match self
.client
.compare_commits(uses.owner, uses.repo, base_ref, head_ref)?
{
// A base ref "contains" a commit if the base is either identical
// to the head ("identical") or the target is behind the base ("behind").
Some(comp) => matches!(
comp.status,
ComparisonStatus::Behind | ComparisonStatus::Identical
),
// GitHub's API returns 404 when the refs under comparison
// are completely divergent, i.e. no contains relationship is possible.
None => false,
};

v.insert(presence)
}
};

Ok(*presence)
}

/// Returns a boolean indicating whether or not this commit is an "impostor",
/// i.e. resolves due to presence in GitHub's fork network but is not actually
/// present in any of the specified `owner/repo`'s tags or branches.
fn impostor(&self, uses: Uses<'_>) -> Result<bool> {
let branches = self.client.list_branches(uses.owner, uses.repo)?;
fn impostor(&mut self, uses: Uses<'_>) -> Result<bool> {
let (branches, tags) = self.named_refs(uses)?;

// If there's no ref or the ref is not a commit, there's nothing to impersonate.
let Some(head_ref) = uses.commit_ref() else {
return Ok(false);
};

for branch in &branches {
for branch in branches {
if self.named_ref_contains_commit(
&uses,
&format!("refs/heads/{}", &branch.name),
Expand All @@ -47,9 +113,7 @@ impl<'a> ImpostorCommit<'a> {
}
}

let tags = self.client.list_tags(uses.owner, uses.repo)?;

for tag in &tags {
for tag in tags {
if self.named_ref_contains_commit(
&uses,
&format!("refs/tags/{}", &tag.name),
Expand All @@ -61,28 +125,13 @@ impl<'a> ImpostorCommit<'a> {

// If we've made it here, the commit isn't present in any commit or tag's history,
// strongly suggesting that it's an impostor.
log::warn!(
"strong impostor candidate: {head_ref} for {org}/{repo}",
org = uses.owner,
repo = uses.repo
);
Ok(true)
}

fn named_ref_contains_commit(
&self,
uses: &Uses<'_>,
base_ref: &str,
head_ref: &str,
) -> Result<bool> {
match self
.client
.compare_commits(uses.owner, uses.repo, base_ref, head_ref)?
{
// A base ref "contains" a commit if the base is either identical
// to the head ("identical") or the target is behind the base ("behind").
Some(comparison) => Ok(matches!(
comparison.status,
ComparisonStatus::Behind | ComparisonStatus::Identical
)),
None => Ok(false),
}
}
}

impl<'a> WorkflowAudit<'a> for ImpostorCommit<'a> {
Expand All @@ -96,10 +145,12 @@ impl<'a> WorkflowAudit<'a> for ImpostorCommit<'a> {
Ok(ImpostorCommit {
_config: config,
client,
ref_cache: Default::default(),
ref_comparison_cache: Default::default(),
})
}

fn audit<'w>(&self, workflow: &'w Workflow) -> Result<Vec<Finding<'w>>> {
fn audit<'w>(&mut self, workflow: &'w Workflow) -> Result<Vec<Finding<'w>>> {
log::debug!("audit: {} evaluating {}", Self::ident(), &workflow.filename);

let mut findings = vec![];
Expand Down
2 changes: 1 addition & 1 deletion src/audit/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -28,5 +28,5 @@ pub(crate) trait WorkflowAudit<'a> {
where
Self: Sized;

fn audit<'w>(&self, workflow: &'w Workflow) -> Result<Vec<Finding<'w>>>;
fn audit<'w>(&mut self, workflow: &'w Workflow) -> Result<Vec<Finding<'w>>>;
}
2 changes: 1 addition & 1 deletion src/audit/pull_request_target.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ impl<'a> WorkflowAudit<'a> for PullRequestTarget<'a> {
Ok(Self { _config: config })
}

fn audit<'w>(&self, workflow: &'w Workflow) -> Result<Vec<Finding<'w>>> {
fn audit<'w>(&mut self, workflow: &'w Workflow) -> Result<Vec<Finding<'w>>> {
log::debug!("audit: {} evaluating {}", Self::ident(), &workflow.filename);

let trigger = &workflow.on;
Expand Down
2 changes: 1 addition & 1 deletion src/audit/ref_confusion.rs
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ impl<'a> WorkflowAudit<'a> for RefConfusion<'a> {
}

fn audit<'w>(
&self,
&mut self,
workflow: &'w crate::models::Workflow,
) -> anyhow::Result<Vec<crate::finding::Finding<'w>>> {
log::debug!("audit: {} evaluating {}", Self::ident(), &workflow.filename);
Expand Down
4 changes: 2 additions & 2 deletions src/audit/template_injection.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ impl<'a> TemplateInjection<'a> {
for expr in iter_expressions(run) {
let bare = expr.as_bare();

if bare.starts_with("secrets.") {
if bare.starts_with("secrets.") || bare == "github.token" {
// While not ideal, secret expansion is typically not exploitable.
continue;
} else if bare.starts_with("inputs.") {
Expand Down Expand Up @@ -70,7 +70,7 @@ impl<'a> WorkflowAudit<'a> for TemplateInjection<'a> {
}

fn audit<'w>(
&self,
&mut self,
workflow: &'w crate::models::Workflow,
) -> anyhow::Result<Vec<crate::finding::Finding<'w>>> {
let mut findings = vec![];
Expand Down
2 changes: 1 addition & 1 deletion src/audit/use_trusted_publishing.rs
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ impl<'a> WorkflowAudit<'a> for UseTrustedPublishing<'a> {
}

fn audit<'w>(
&self,
&mut self,
workflow: &'w crate::models::Workflow,
) -> anyhow::Result<Vec<crate::finding::Finding<'w>>> {
log::debug!("audit: {} evaluating {}", Self::ident(), &workflow.filename);
Expand Down
8 changes: 4 additions & 4 deletions src/github_api/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ impl Client {
head: &str,
) -> Result<Option<Comparison>> {
let url = format!(
"{api_base}/repos/{owner}/{repo}/compare/{base}..{head}",
"{api_base}/repos/{owner}/{repo}/compare/{base}...{head}",
api_base = self.api_base
);

Expand All @@ -96,7 +96,7 @@ impl Client {
StatusCode::OK => Ok(Some(resp.json()?)),
StatusCode::NOT_FOUND => Ok(None),
s => Err(anyhow!(
"error from GitHub API while comparing commits: {s}"
"{owner}/{repo}: error from GitHub API while comparing commits: {s}"
)),
}
}
Expand All @@ -107,15 +107,15 @@ impl Client {
/// This model is intentionally incomplete.
///
/// See <https://docs.github.com/en/rest/branches/branches?apiVersion=2022-11-28>.
#[derive(Deserialize)]
#[derive(Deserialize, Clone)]
pub(crate) struct Branch {
pub(crate) name: String,
}

/// A single tag, as returned by GitHub's tags endpoints.
///
/// This model is intentionally incomplete.
#[derive(Deserialize)]
#[derive(Deserialize, Clone)]
pub(crate) struct Tag {
pub(crate) name: String,
}
Expand Down
32 changes: 22 additions & 10 deletions src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ use std::{io::stdout, path::PathBuf};

use anyhow::{anyhow, Result};
use audit::WorkflowAudit;
use clap::Parser;
use clap::{Parser, ValueEnum};
use models::AuditConfig;

mod audit;
Expand All @@ -26,10 +26,22 @@ struct Args {
#[arg(long, env)]
gh_token: String,

/// The output format to emit. By default, plain text will be emitted
/// on an interactive terminal and JSON otherwise.
#[arg(long, value_enum)]
format: Option<OutputFormat>,

/// The workflow filename or directory to audit.
input: PathBuf,
}

#[derive(Debug, Copy, Clone, ValueEnum)]
pub(crate) enum OutputFormat {
Plain,
Json,
Sarif,
}

impl<'a> From<&'a Args> for AuditConfig<'a> {
fn from(value: &'a Args) -> Self {
Self {
Expand Down Expand Up @@ -79,18 +91,18 @@ fn main() -> Result<()> {
}

let mut results = vec![];
let audits: &[&dyn WorkflowAudit] = &[
&audit::artipacked::Artipacked::new(config)?,
&audit::pull_request_target::PullRequestTarget::new(config)?,
&audit::impostor_commit::ImpostorCommit::new(config)?,
&audit::ref_confusion::RefConfusion::new(config)?,
&audit::use_trusted_publishing::UseTrustedPublishing::new(config)?,
&audit::template_injection::TemplateInjection::new(config)?,
&audit::hardcoded_container_credentials::HardcodedContainerCredentials::new(config)?,
let audits: &mut [&mut dyn WorkflowAudit] = &mut [
&mut audit::artipacked::Artipacked::new(config)?,
&mut audit::pull_request_target::PullRequestTarget::new(config)?,
&mut audit::impostor_commit::ImpostorCommit::new(config)?,
&mut audit::ref_confusion::RefConfusion::new(config)?,
&mut audit::use_trusted_publishing::UseTrustedPublishing::new(config)?,
&mut audit::template_injection::TemplateInjection::new(config)?,
&mut audit::hardcoded_container_credentials::HardcodedContainerCredentials::new(config)?,
];
for workflow in workflows.iter() {
// TODO: Proper abstraction for multiple audits here.
for audit in audits {
for audit in audits.iter_mut() {
for finding in audit.audit(workflow)? {
results.push(finding);
}
Expand Down

0 comments on commit f8468b8

Please sign in to comment.