From 0e04aed2eca135b5930162c0d34fba66383e1d5a Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Wed, 4 Sep 2024 17:10:47 -0400 Subject: [PATCH] WIP non-step job key locations (#11) --- src/audit/hardcoded_container_credentials.rs | 4 +- src/finding/locate.rs | 121 +++++++++++++++---- src/finding/mod.rs | 25 +++- src/models.rs | 8 ++ 4 files changed, 132 insertions(+), 26 deletions(-) diff --git a/src/audit/hardcoded_container_credentials.rs b/src/audit/hardcoded_container_credentials.rs index 197e547..614db73 100644 --- a/src/audit/hardcoded_container_credentials.rs +++ b/src/audit/hardcoded_container_credentials.rs @@ -62,7 +62,7 @@ impl<'a> WorkflowAudit<'a> for HardcodedContainerCredentials<'a> { .severity(Severity::High) .confidence(Confidence::High) .add_location( - job.location() + job.key_location("container") .annotated("container registry password is hard-coded"), ) .build(workflow)?, @@ -86,7 +86,7 @@ impl<'a> WorkflowAudit<'a> for HardcodedContainerCredentials<'a> { Self::finding() .severity(Severity::High) .confidence(Confidence::High) - .add_location(job.location().annotated(format!( + .add_location(job.key_location("services").annotated(format!( "service {service}: container registry password is hard-coded" ))) .build(workflow)?, diff --git a/src/finding/locate.rs b/src/finding/locate.rs index 62ea59b..09afaf1 100644 --- a/src/finding/locate.rs +++ b/src/finding/locate.rs @@ -1,4 +1,4 @@ -//! `tree-sitter` helpers for extracting and locating `Finding` features +//! `tree-sitter` helpers for extracting and locating concrete features //! in the original YAML. use anyhow::{Ok, Result}; @@ -24,6 +24,38 @@ const TOP_LEVEL_KEY: &str = r#" ) "#; +/// Captures an arbitrary job-level key. +const JOB_LEVEL_KEY: &str = r#" +( + (block_mapping_pair + key: (flow_node (plain_scalar (string_scalar) @jobs_key)) + value: (block_node + (block_mapping + (block_mapping_pair + key: (flow_node (plain_scalar (string_scalar) @job_name)) + value: (block_node + (block_mapping + (block_mapping_pair + key: (flow_node (plain_scalar (string_scalar) @job_key_name)) + value: ( + [ + (block_node (block_mapping)) + (flow_node) + ] + ) + ) @job_key_value + ) + ) + ) + ) + ) + ) + (#eq? @jobs_key "jobs") + (#eq? @job_name "__JOB_NAME__") + (#eq? @job_key_name "__JOB_KEY__") +) +"#; + /// Captures an entire workflow job, including non-step keys. const ENTIRE_JOB: &str = r#" ( @@ -111,7 +143,11 @@ impl Locator { .next() .expect("horrific, embarassing tree-sitter query failure"); - let cap = group.captures[capture_index as usize]; + let cap = group + .captures + .iter() + .find(|qc| qc.index == capture_index) + .unwrap(); let children = cap.node.children(&mut cap.node.walk()).collect::>(); let step_node = children[step.index]; @@ -121,28 +157,71 @@ impl Locator { feature: step_node.utf8_text(workflow.raw.as_bytes())?, }) } - None => { - // Job with no interior step: capture the entire job - // and emit it. - let job_query = - Query::new(&self.language, &ENTIRE_JOB.replace("__JOB_NAME__", job.id))?; + None => match job.key { + Some(key) => { + // Job with a non-step key; capture the matching key's + // span and emit it. + let job_key_query = Query::new( + &self.language, + &JOB_LEVEL_KEY + .replace("__JOB_NAME__", job.id) + .replace("__JOB_KEY__", key), + )?; - let (group, _) = cursor - .captures( - &job_query, - workflow.tree.root_node(), - workflow.raw.as_bytes(), - ) - .next() - .expect("horrific, embarassing tree-sitter query failure"); + let capture_index = job_key_query + .capture_index_for_name("job_key_value") + .unwrap(); - let cap = group.captures[0]; + let (group, _) = cursor + .captures( + &job_key_query, + workflow.tree.root_node(), + workflow.raw.as_bytes(), + ) + .next() + .expect("horrific, embarassing tree-sitter query failure"); - Ok(Feature { - location: cap.node.into(), - feature: cap.node.utf8_text(workflow.raw.as_bytes())?, - }) - } + // NOTE(ww): Empirically the captures are sometimes out + // of order here (i.e. the list and index orders don't + // match up). I'm sure there's a good reason for this, but + // it means we have to find() instead of just indexing + // via `capture_index`. + let cap = group + .captures + .iter() + .find(|qc| qc.index == capture_index) + .unwrap(); + + Ok(Feature { + location: cap.node.into(), + feature: cap.node.utf8_text(workflow.raw.as_bytes())?, + }) + } + None => { + // Job with no interior step and no explicit key: + // capture the entire job and emit it. + let job_query = Query::new( + &self.language, + &ENTIRE_JOB.replace("__JOB_NAME__", job.id), + )?; + + let (group, _) = cursor + .captures( + &job_query, + workflow.tree.root_node(), + workflow.raw.as_bytes(), + ) + .next() + .expect("horrific, embarassing tree-sitter query failure"); + + let cap = group.captures[0]; + + Ok(Feature { + location: cap.node.into(), + feature: cap.node.utf8_text(workflow.raw.as_bytes())?, + }) + } + }, }, None => match &location.key { // If we're given a top-level key to isolate, query for it. diff --git a/src/finding/mod.rs b/src/finding/mod.rs index c9d2227..297ff1c 100644 --- a/src/finding/mod.rs +++ b/src/finding/mod.rs @@ -49,7 +49,9 @@ pub(crate) struct JobLocation<'w> { /// The job's unique ID within its parent workflow. pub(crate) id: &'w str, - // TODO: key for non-step isolation, like WorkflowLocation. + /// A non-step job-level key, like [`WorkflowLocation::key`]. + pub(crate) key: Option<&'w str>, + /// The job's name, if present. pub(crate) name: Option<&'w str>, @@ -58,9 +60,25 @@ pub(crate) struct JobLocation<'w> { } impl<'w> JobLocation<'w> { + /// Creates a new `JobLocation` with the given non-step `key`. + /// + /// Clears any `step` in the process. + pub(crate) fn with_key(&self, key: &'w str) -> JobLocation<'w> { + JobLocation { + id: self.id, + key: Some(key), + name: self.name, + step: None, + } + } + + /// Creates a new `JobLocation` with the given interior step location. + /// + /// Clears any non-step `key` in the process. fn with_step(&self, step: &Step<'w>) -> JobLocation<'w> { JobLocation { id: self.id, + key: None, name: self.name, step: Some(step.into()), } @@ -73,7 +91,7 @@ pub(crate) struct WorkflowLocation<'w> { pub(crate) name: &'w str, /// A top-level workflow key to isolate, if present. - pub(crate) key: Option<&'static str>, + pub(crate) key: Option<&'w str>, /// The job location within this workflow, if present. pub(crate) job: Option>, @@ -85,7 +103,7 @@ pub(crate) struct WorkflowLocation<'w> { impl<'w> WorkflowLocation<'w> { /// Creates a new `WorkflowLocation` with the given `key`. Any inner /// job location is cleared. - pub(crate) fn with_key(&self, key: &'static str) -> WorkflowLocation<'w> { + pub(crate) fn with_key(&self, key: &'w str) -> WorkflowLocation<'w> { WorkflowLocation { name: self.name, key: Some(key), @@ -101,6 +119,7 @@ impl<'w> WorkflowLocation<'w> { key: None, job: Some(JobLocation { id: job.id, + key: None, name: job.inner.name(), step: None, }), diff --git a/src/models.rs b/src/models.rs index fcf08b0..a91d363 100644 --- a/src/models.rs +++ b/src/models.rs @@ -93,6 +93,14 @@ impl<'w> Job<'w> { self.parent.with_job(self) } + pub(crate) fn key_location(&self, key: &'w str) -> WorkflowLocation<'w> { + let mut location = self.parent.with_job(self); + // NOTE: Infallible unwrap due to job always being supplied above. + location.job = Some(location.job.unwrap().with_key(key)); + + location + } + pub(crate) fn steps(&self) -> Steps<'w> { Steps::new(self) }