From e202bd4ea287a67dce05d7c5aa23b6a9cf2bcd03 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Mon, 20 Oct 2025 21:30:23 -0400 Subject: [PATCH] feat: yamlpath: anchor support (#1266) --- crates/yamlpath/src/lib.rs | 193 ++++++++++++++---- .../tests/testcases/anchors-basic.yml | 22 ++ .../yamlpath/tests/testcases/anchors-list.yml | 70 +++++++ .../tests/testcases/anchors-nested.yml | 23 +++ .../tests/testcases/interceding-comment.yml | 15 ++ crates/zizmor/src/main.rs | 10 + crates/zizmor/src/registry.rs | 2 +- crates/zizmor/src/utils.rs | 20 ++ crates/zizmor/tests/integration/e2e.rs | 1 + .../zizmor/tests/integration/e2e/anchors.rs | 47 +++++ .../integration__e2e__gha_hazmat.snap | 5 - .../integration__e2e__issue_1065.snap | 5 - .../integration__e2e__issue_1207.snap | 5 - .../integration__e2e__issue_569.snap | 5 - .../integration__e2e__issue_726.snap | 5 - .../integration__e2e__menagerie-2.snap | 6 - .../integration__e2e__menagerie.snap | 5 - .../integration__e2e__pr_960_backstop.snap | 5 - .../integration/test-data/anchors/basic.yml | 15 ++ docs/release-notes.md | 3 + docs/usage.md | 37 +++- 21 files changed, 418 insertions(+), 81 deletions(-) create mode 100644 crates/yamlpath/tests/testcases/anchors-basic.yml create mode 100644 crates/yamlpath/tests/testcases/anchors-list.yml create mode 100644 crates/yamlpath/tests/testcases/anchors-nested.yml create mode 100644 crates/zizmor/tests/integration/e2e/anchors.rs create mode 100644 crates/zizmor/tests/integration/test-data/anchors/basic.yml diff --git a/crates/yamlpath/src/lib.rs b/crates/yamlpath/src/lib.rs index 46ec72b0..60cc54c9 100644 --- a/crates/yamlpath/src/lib.rs +++ b/crates/yamlpath/src/lib.rs @@ -315,16 +315,33 @@ impl Tree { let mut anchor_map = HashMap::new(); for anchor in TreeIter::new(tree).filter(|n| n.kind() == "anchor") { - let anchor_name = anchor.utf8_text(tree.source.as_bytes()).unwrap(); + // NOTE(ww): We could poke into the `anchor_name` child + // instead of slicing, but this is simpler. + let anchor_name = &anchor.utf8_text(tree.source.as_bytes()).unwrap()[1..]; // Only insert if the anchor name is unique. - if anchor_map.contains_key(&anchor_name[1..]) { + if anchor_map.contains_key(anchor_name) { return Err(QueryError::DuplicateAnchor(anchor_name[1..].to_string())); } - // NOTE(ww): We could poke into the `anchor_name` child - // instead of slicing, but this is simpler. - anchor_map.insert(&anchor_name[1..], anchor.parent().unwrap()); + // NOTE(ww): We insert the anchor's next non-comment + // sibling as the anchor's target. This makes things + // a bit simpler when descending later, plus it produces + // more useful spans, since neither the anchor node + // nor its parent are useful in the aliased context. + let parent = anchor.parent().ok_or_else(|| { + QueryError::UnexpectedNode("anchor node has no parent".into()) + })?; + + let mut cursor = parent.walk(); + let sibling = parent + .named_children(&mut cursor) + .find(|child| child.kind() != "anchor" && child.kind() != "comment") + .ok_or_else(|| { + QueryError::UnexpectedNode("anchor has no non-comment sibling".into()) + })?; + + anchor_map.insert(anchor_name, sibling); } Ok(anchor_map) @@ -371,6 +388,9 @@ pub struct Document { flow_pair_id: u16, block_sequence_item_id: u16, comment_id: u16, + anchor_id: u16, + alias_id: u16, + block_scalar_id: u16, } impl Document { @@ -396,9 +416,6 @@ impl Document { tree, }; - // let anchor_id = language.id_for_node_kind("anchor", true); - // let alias_id = language.id_for_node_kind("alias", true); - Ok(Self { tree: Tree::build(source_tree)?, line_index, @@ -413,6 +430,9 @@ impl Document { flow_pair_id: language.id_for_node_kind("flow_pair", true), block_sequence_item_id: language.id_for_node_kind("block_sequence_item", true), comment_id: language.id_for_node_kind("comment", true), + anchor_id: language.id_for_node_kind("anchor", true), + alias_id: language.id_for_node_kind("alias", true), + block_scalar_id: language.id_for_node_kind("block_scalar", true), }) } @@ -666,6 +686,20 @@ impl Document { } } + // Our focus node might be an alias, in which case we need to + // do one last leap to get our "real" final focus node. + // TODO(ww): What about nested aliases? + focus_node = match focus_node.child(0) { + Some(child) if child.kind_id() == self.alias_id => { + let alias_name = child.utf8_text(self.source().as_bytes()).unwrap(); + let anchor_map = self.tree.borrow_dependent(); + *anchor_map + .get(&alias_name[1..]) + .ok_or_else(|| QueryError::Other(format!("unknown alias: {}", alias_name)))? + } + _ => focus_node, + }; + focus_node = match mode { QueryMode::Pretty => { // If we're in "pretty" mode, we want to return the @@ -689,13 +723,20 @@ impl Document { // the parent block/flow pair node that contains the key, // and isolate on the key child instead. - // If we're already on block/flow pair, then we're already - // the key's parent. let parent_node = if focus_node.kind_id() == self.block_mapping_pair_id || focus_node.kind_id() == self.flow_pair_id { + // If we're already on block/flow pair, then we're already + // the key's parent. focus_node + } else if focus_node.kind_id() == self.block_scalar_id { + // We might be on the internal `block_scalar` node, if + // we got here via an alias. We need to go up two levels + // to get to the mapping pair. + focus_node.parent().unwrap().parent().unwrap() } else { + // Otherwise, we expect to be on the `block_node` + // or `flow_node`, so we go up one level. focus_node.parent().unwrap() }; @@ -738,11 +779,55 @@ impl Document { Ok(focus_node) } - fn descend<'b>(&self, node: &Node<'b>, component: &Component) -> Result, QueryError> { + fn descend<'b>( + &'b self, + node: &Node<'b>, + component: &Component, + ) -> Result, QueryError> { // The cursor is assumed to start on a block_node or flow_node, - // which has a single child containing the inner scalar/vector + // which has a child containing the inner scalar/vector/alias // type we're descending through. - let child = node.child(0).unwrap(); + // + // To get to that child, we might have to skip over any + // anchor nodes that we're not actually aliasing through + // in this descent step. + // + // For example, for a YAML snippet like: + // + // ```yaml + // foo: &foo + // bar: baz + // ``` + // + // ...the relevant part of the CST looks roughly like: + // + // ``` + // block_node <- `node` points here + // |--- anchor <- we need to skip this + // |--- block_mapping <- we want `child` to point here + // ``` + let mut child = { + let mut cursor = node.walk(); + node.named_children(&mut cursor) + .find(|n| n.kind_id() != self.anchor_id) + .ok_or_else(|| { + QueryError::Other(format!( + "node of kind {} has no non-anchor child", + node.kind() + )) + })? + }; + + // We might be on an alias node, in which case we need to + // jump to the alias's target via the anchor map. + if child.kind_id() == self.alias_id { + let alias_name = node.utf8_text(self.source().as_bytes()).unwrap(); + let anchor_map = self.tree.borrow_dependent(); + + child = *anchor_map + .get(&alias_name[1..]) + .ok_or_else(|| QueryError::Other(format!("unknown alias: {}", alias_name)))?; + } // We expect the child to be a sequence or mapping of either // flow or block type. @@ -816,33 +901,67 @@ impl Document { Err(QueryError::ExhaustedMapping(expected.into())) } - fn descend_sequence<'b>(&self, node: &Node<'b>, idx: usize) -> Result, QueryError> { + /// Given a `block_sequence` or `flow_sequence` node, return + /// a full list of child nodes after expanding any aliases present. + /// + /// The returned child nodes are the inner + /// `block_node`/`flow_node`/`flow_pair` nodes for each sequence item. + fn flatten_sequence<'b>(&'b self, node: &Node<'b>) -> Result>, QueryError> { + let mut children = vec![]; + let mut cur = node.walk(); - // TODO: Optimize; we shouldn't collect the entire child set just to extract one. - let children = node - .named_children(&mut cur) - .filter(|n| { - n.kind_id() == self.block_sequence_item_id - || n.kind_id() == self.flow_node_id - || n.kind_id() == self.flow_pair_id - }) - .collect::>(); + for child in node.named_children(&mut cur).filter(|child| { + child.kind_id() == self.block_sequence_item_id + || child.kind_id() == self.flow_node_id + || child.kind_id() == self.flow_pair_id + }) { + let mut child = child; + + // If we have a `block_sequence_item`, we need to get its + // inner `block_node`/`flow_node`, which might be interceded + // by comments. + if child.kind_id() == self.block_sequence_item_id { + let mut cur = child.walk(); + child = child + .named_children(&mut cur) + .find(|c| c.kind_id() == self.block_node_id || c.kind_id() == self.flow_node_id) + .ok_or_else(|| { + QueryError::MissingChild(child.kind().into(), "block_sequence_item".into()) + })?; + } + + // `child` is now a `block_node`, a `flow_node`, or `flow_pair`: + // + // `block_node` looks like `- a: b` + // `flow_node` looks like `- a` + // `flow_pair` looks like `[a: b]` + // + // From here, we need to peek inside each and see if it's + // an alias. If it is, we expand the alias; otherwise, we + // just keep the child as-is. + if child.named_child(0).unwrap().kind() == "alias" { + let alias_name = &child.utf8_text(self.source().as_bytes()).unwrap()[1..]; + let anchor_map = self.tree.borrow_dependent(); + let aliased_node = anchor_map + .get(alias_name) + .ok_or_else(|| QueryError::Other(format!("unknown alias: {}", alias_name)))?; + + children.extend(self.flatten_sequence(aliased_node)?); + } else { + children.push(child); + } + } + + Ok(children) + } + + fn descend_sequence<'b>(&'b self, node: &Node<'b>, idx: usize) -> Result, QueryError> { + let children = self.flatten_sequence(node)?; let Some(child) = children.get(idx) else { return Err(QueryError::ExhaustedList(idx, children.len())); }; - // If we're in a block_sequence, there's an intervening `block_sequence_item` - // getting in the way of our `block_node`/`flow_node`. - if child.kind_id() == self.block_sequence_item_id { - // NOTE: We can't just get the first named child here, since there might - // be interceding comments. - return child - .named_children(&mut cur) - .find(|c| c.kind_id() == self.block_node_id || c.kind_id() == self.flow_node_id) - .ok_or_else(|| { - QueryError::MissingChild(child.kind().into(), "block_sequence_item".into()) - }); - } else if child.kind_id() == self.flow_pair_id { + if child.kind_id() == self.flow_pair_id { // Similarly, if our index happens to be a `flow_pair`, we need to // get the `value` child to get the next `flow_node`. // The `value` might not be present (e.g. `{foo: }`), in which case @@ -1182,7 +1301,7 @@ foo: &foo-anchor let anchor_map = doc.tree.borrow_dependent(); assert_eq!(anchor_map.len(), 2); - assert_eq!(anchor_map["foo-anchor"].kind(), "block_node"); - assert_eq!(anchor_map["bar-anchor"].kind(), "block_node"); + assert_eq!(anchor_map["foo-anchor"].kind(), "block_mapping"); + assert_eq!(anchor_map["bar-anchor"].kind(), "block_mapping"); } } diff --git a/crates/yamlpath/tests/testcases/anchors-basic.yml b/crates/yamlpath/tests/testcases/anchors-basic.yml new file mode 100644 index 00000000..7848bf66 --- /dev/null +++ b/crates/yamlpath/tests/testcases/anchors-basic.yml @@ -0,0 +1,22 @@ +testcase: + jobs: + job1: + env: &env_vars # Define the anchor on first use + NODE_ENV: production + DATABASE_URL: ${{ secrets.DATABASE_URL }} + steps: + - run: echo "Using production settings" + + job2: + env: *env_vars # Reuse the environment variables + steps: + - run: echo "Same environment variables here" + +queries: + - query: [jobs, job2, env, DATABASE_URL] + mode: exact + expected: "${{ secrets.DATABASE_URL }}" + + - query: [jobs, job2, env, DATABASE_URL] + mode: key-only + expected: " DATABASE_URL" diff --git a/crates/yamlpath/tests/testcases/anchors-list.yml b/crates/yamlpath/tests/testcases/anchors-list.yml new file mode 100644 index 00000000..ca88be34 --- /dev/null +++ b/crates/yamlpath/tests/testcases/anchors-list.yml @@ -0,0 +1,70 @@ +testcase: + defaults: &defaults + - a + - b + - c + + merge-before: + - *defaults + - d + - e + + merge-after: + - d + - e + - *defaults + + merge-multi: + - *defaults + - *defaults + + nested-inner: &inner + - a + - b + + nested-outer: &outer + - *inner + - c + + nested-top: + - *outer + - d + + nested-top-flow: [*outer, d] + +queries: + - query: [merge-before, 0] + mode: exact + expected: "a" + + - query: [merge-after, 2] + mode: exact + expected: "a" + + - query: [merge-multi, 5] + mode: exact + expected: "c" + + - query: [nested-top, 0] + mode: exact + expected: "a" + + - query: [nested-top, 1] + mode: exact + expected: "b" + + - query: [nested-top, 2] + mode: exact + expected: "c" + + - query: [nested-top-flow, 0] + mode: exact + expected: "a" + + - query: [nested-top-flow, 1] + mode: exact + expected: "b" + + - query: [nested-top-flow, 2] + mode: exact + expected: "c" diff --git a/crates/yamlpath/tests/testcases/anchors-nested.yml b/crates/yamlpath/tests/testcases/anchors-nested.yml new file mode 100644 index 00000000..554de7d5 --- /dev/null +++ b/crates/yamlpath/tests/testcases/anchors-nested.yml @@ -0,0 +1,23 @@ +testcase: + qux: &qux + value: 42 + + foo: + bar: &bar + baz: *qux + + query: + me: *bar + +queries: + - query: [foo, bar, baz, value] + mode: exact + expected: "42" + + - query: [query, me, baz, value] + mode: exact + expected: "42" + + - query: [query, me, baz] + mode: exact + expected: " value: 42" diff --git a/crates/yamlpath/tests/testcases/interceding-comment.yml b/crates/yamlpath/tests/testcases/interceding-comment.yml index 00fa5175..f2af0698 100644 --- a/crates/yamlpath/tests/testcases/interceding-comment.yml +++ b/crates/yamlpath/tests/testcases/interceding-comment.yml @@ -14,6 +14,13 @@ testcase: - # bar bar: baz + after-key: # hmm + foo: bar + + after-key-nl: + # hmm + foo: bar + queries: - query: [foo, 0, bar] expected: " bar: baz" @@ -23,3 +30,11 @@ queries: - query: [many-children, 1, bar] expected: " bar: baz" + + - query: [after-key, foo] + mode: exact + expected: "bar" + + - query: [after-key-nl, foo] + mode: exact + expected: "bar" diff --git a/crates/zizmor/src/main.rs b/crates/zizmor/src/main.rs index 119fd94c..09aa1138 100644 --- a/crates/zizmor/src/main.rs +++ b/crates/zizmor/src/main.rs @@ -30,7 +30,9 @@ use tracing_subscriber::{EnvFilter, layer::SubscriberExt as _, util::SubscriberI use crate::{ config::{Config, ConfigError, ConfigErrorInner}, github_api::Client, + models::AsDocument, registry::input::CollectionError, + utils::once::warn_once, }; mod audit; @@ -732,6 +734,14 @@ fn run(app: &mut App) -> Result { for (input_key, input) in registry.iter_inputs() { Span::current().pb_set_message(input.key().filename()); + + if input.as_document().has_anchors() { + warn_once!( + "one or more inputs contains YAML anchors; you may encounter crashes or unpredictable behavior" + ); + warn_once!("for more information, see: https://docs.zizmor.sh/usage/#yaml-anchors"); + } + let config = registry.get_config(input_key.group()); for (ident, audit) in audit_registry.iter_audits() { tracing::debug!("running {ident} on {input}", input = input.key()); diff --git a/crates/zizmor/src/registry.rs b/crates/zizmor/src/registry.rs index 1c7ea329..e6bc49f1 100644 --- a/crates/zizmor/src/registry.rs +++ b/crates/zizmor/src/registry.rs @@ -39,7 +39,7 @@ impl AuditRegistry { match base::new(&audit_state) { Ok(audit) => registry.register_audit(base::ident(), Box::new(audit)), Err(AuditLoadError::Skip(e)) => { - tracing::info!("skipping {audit}: {e}", audit = base::ident()) + tracing::debug!("skipping {audit}: {e}", audit = base::ident()) } Err(AuditLoadError::Fail(e)) => { return Err(anyhow::anyhow!(tips( diff --git a/crates/zizmor/src/utils.rs b/crates/zizmor/src/utils.rs index b1bae77d..53615fed 100644 --- a/crates/zizmor/src/utils.rs +++ b/crates/zizmor/src/utils.rs @@ -641,6 +641,26 @@ impl SpannedQuery { } } +pub(crate) mod once { + macro_rules! once { + ($expression:expr) => {{ + static ONCE: std::sync::Once = std::sync::Once::new(); + ONCE.call_once(|| { + $expression; + }); + }}; + } + + macro_rules! warn_once { + ($($arg:tt)+) => ({ + crate::utils::once::once!(tracing::warn!($($arg)+)) + }); + } + + pub(crate) use once; + pub(crate) use warn_once; +} + #[cfg(test)] mod tests { use anyhow::Result; diff --git a/crates/zizmor/tests/integration/e2e.rs b/crates/zizmor/tests/integration/e2e.rs index b8ab8f25..b79ff271 100644 --- a/crates/zizmor/tests/integration/e2e.rs +++ b/crates/zizmor/tests/integration/e2e.rs @@ -4,6 +4,7 @@ use anyhow::Result; use crate::common::{OutputMode, input_under_test, zizmor}; +mod anchors; mod collect; mod json_v1; diff --git a/crates/zizmor/tests/integration/e2e/anchors.rs b/crates/zizmor/tests/integration/e2e/anchors.rs new file mode 100644 index 00000000..854d4440 --- /dev/null +++ b/crates/zizmor/tests/integration/e2e/anchors.rs @@ -0,0 +1,47 @@ +//! End-to-end tests for YAML anchor handling in zizmor. + +use anyhow::Result; + +use crate::common::{input_under_test, zizmor}; + +/// Basic sanity test for anchor handling. +/// +/// This test reveals duplicate findings, since zizmor doesn't +/// (yet) de-duplicate findings that arise from YAML anchors. +#[test] +fn test_basic() -> Result<()> { + insta::assert_snapshot!( + zizmor() + .input(input_under_test( + "anchors/basic.yml" + )) + .run()?, + @r#" + error[template-injection]: code injection via template expansion + --> @@INPUT@@:13:31 + | + 12 | - run: &run | + | --- this run block + 13 | "doing a thing: ${{ github.event.issue.title }}" + | ^^^^^^^^^^^^^^^^^^^^^^^^ may expand into attacker-controllable code + | + = note: audit confidence → High + = note: this finding has an auto-fix + + error[template-injection]: code injection via template expansion + --> @@INPUT@@:13:31 + | + 12 | - run: &run | + | --- this run block + 13 | "doing a thing: ${{ github.event.issue.title }}" + | ^^^^^^^^^^^^^^^^^^^^^^^^ may expand into attacker-controllable code + | + = note: audit confidence → High + = note: this finding has an auto-fix + + 5 findings (3 suppressed, 2 fixable): 0 informational, 0 low, 0 medium, 2 high + "# + ); + + Ok(()) +} diff --git a/crates/zizmor/tests/integration/snapshots/integration__e2e__gha_hazmat.snap b/crates/zizmor/tests/integration/snapshots/integration__e2e__gha_hazmat.snap index f8a44634..3ecfe162 100644 --- a/crates/zizmor/tests/integration/snapshots/integration__e2e__gha_hazmat.snap +++ b/crates/zizmor/tests/integration/snapshots/integration__e2e__gha_hazmat.snap @@ -4,11 +4,6 @@ expression: "zizmor().offline(false).output(OutputMode::Both).args([\"--no-onlin --- 🌈 zizmor v@@VERSION@@ INFO collect_inputs: zizmor::registry::input: collected 22 inputs from woodruffw/gha-hazmat - INFO zizmor::registry: skipping impostor-commit: offline audits only requested - INFO zizmor::registry: skipping ref-confusion: offline audits only requested - INFO zizmor::registry: skipping known-vulnerable-actions: offline audits only requested - INFO zizmor::registry: skipping stale-action-refs: offline audits only requested - INFO zizmor::registry: skipping ref-version-mismatch: offline audits only requested INFO audit: zizmor: 🌈 completed .github/workflows/artipacked.yml INFO audit: zizmor: 🌈 completed .github/workflows/bot-conditions.yml INFO audit: zizmor: 🌈 completed .github/workflows/cache-poisoning.yml diff --git a/crates/zizmor/tests/integration/snapshots/integration__e2e__issue_1065.snap b/crates/zizmor/tests/integration/snapshots/integration__e2e__issue_1065.snap index 467c0cd9..85657302 100644 --- a/crates/zizmor/tests/integration/snapshots/integration__e2e__issue_1065.snap +++ b/crates/zizmor/tests/integration/snapshots/integration__e2e__issue_1065.snap @@ -3,11 +3,6 @@ source: crates/zizmor/tests/integration/e2e.rs expression: "zizmor().output(OutputMode::Both).input(input_under_test(\"issue-1065.yml\")).run()?" --- 🌈 zizmor v@@VERSION@@ - INFO zizmor::registry: skipping impostor-commit: can't run without a GitHub API token - INFO zizmor::registry: skipping ref-confusion: can't run without a GitHub API token - INFO zizmor::registry: skipping known-vulnerable-actions: can't run without a GitHub API token - INFO zizmor::registry: skipping stale-action-refs: can't run without a GitHub API token - INFO zizmor::registry: skipping ref-version-mismatch: can't run without a GitHub API token INFO audit: zizmor: 🌈 completed @@INPUT@@ warning[excessive-permissions]: overly broad permissions --> @@INPUT@@:12:3 diff --git a/crates/zizmor/tests/integration/snapshots/integration__e2e__issue_1207.snap b/crates/zizmor/tests/integration/snapshots/integration__e2e__issue_1207.snap index 00fc3e3d..f5455862 100644 --- a/crates/zizmor/tests/integration/snapshots/integration__e2e__issue_1207.snap +++ b/crates/zizmor/tests/integration/snapshots/integration__e2e__issue_1207.snap @@ -3,10 +3,5 @@ source: crates/zizmor/tests/integration/e2e.rs expression: "zizmor().expects_failure(false).output(OutputMode::Both).working_dir(input_under_test(\"e2e-menagerie/dummy-action-1\")).input(\"action.yaml\").run()?" --- 🌈 zizmor v@@VERSION@@ - INFO zizmor::registry: skipping impostor-commit: can't run without a GitHub API token - INFO zizmor::registry: skipping ref-confusion: can't run without a GitHub API token - INFO zizmor::registry: skipping known-vulnerable-actions: can't run without a GitHub API token - INFO zizmor::registry: skipping stale-action-refs: can't run without a GitHub API token - INFO zizmor::registry: skipping ref-version-mismatch: can't run without a GitHub API token INFO audit: zizmor: 🌈 completed @@INPUT@@ No findings to report. Good job! diff --git a/crates/zizmor/tests/integration/snapshots/integration__e2e__issue_569.snap b/crates/zizmor/tests/integration/snapshots/integration__e2e__issue_569.snap index a0ca6b78..d60372eb 100644 --- a/crates/zizmor/tests/integration/snapshots/integration__e2e__issue_569.snap +++ b/crates/zizmor/tests/integration/snapshots/integration__e2e__issue_569.snap @@ -3,11 +3,6 @@ source: crates/zizmor/tests/integration/e2e.rs expression: "zizmor().offline(false).output(OutputMode::Both).args([\"--no-online-audits\",\n\"--collect=workflows\"]).input(\"python/cpython@f963239ff1f986742d4c6bab2ab7b73f5a4047f6\").run()?" --- 🌈 zizmor v@@VERSION@@ - INFO zizmor::registry: skipping impostor-commit: offline audits only requested - INFO zizmor::registry: skipping ref-confusion: offline audits only requested - INFO zizmor::registry: skipping known-vulnerable-actions: offline audits only requested - INFO zizmor::registry: skipping stale-action-refs: offline audits only requested - INFO zizmor::registry: skipping ref-version-mismatch: offline audits only requested INFO audit: zizmor: 🌈 completed .github/workflows/add-issue-header.yml INFO audit: zizmor: 🌈 completed .github/workflows/build.yml INFO audit: zizmor: 🌈 completed .github/workflows/documentation-links.yml diff --git a/crates/zizmor/tests/integration/snapshots/integration__e2e__issue_726.snap b/crates/zizmor/tests/integration/snapshots/integration__e2e__issue_726.snap index beb1b98f..6d5bf1f9 100644 --- a/crates/zizmor/tests/integration/snapshots/integration__e2e__issue_726.snap +++ b/crates/zizmor/tests/integration/snapshots/integration__e2e__issue_726.snap @@ -4,11 +4,6 @@ expression: "zizmor().offline(false).output(OutputMode::Both).args([\"--no-onlin --- 🌈 zizmor v@@VERSION@@ INFO collect_inputs: zizmor::registry::input: collected 6 inputs from woodruffw-experiments/zizmor-bug-726 - INFO zizmor::registry: skipping impostor-commit: offline audits only requested - INFO zizmor::registry: skipping ref-confusion: offline audits only requested - INFO zizmor::registry: skipping known-vulnerable-actions: offline audits only requested - INFO zizmor::registry: skipping stale-action-refs: offline audits only requested - INFO zizmor::registry: skipping ref-version-mismatch: offline audits only requested INFO audit: zizmor: 🌈 completed .github/actions/custom-action/action.yml INFO audit: zizmor: 🌈 completed .github/workflows/actions/custom-action/action.yml INFO audit: zizmor: 🌈 completed .github/workflows/custom-action/action.yml diff --git a/crates/zizmor/tests/integration/snapshots/integration__e2e__menagerie-2.snap b/crates/zizmor/tests/integration/snapshots/integration__e2e__menagerie-2.snap index 0dfebb09..6085adea 100644 --- a/crates/zizmor/tests/integration/snapshots/integration__e2e__menagerie-2.snap +++ b/crates/zizmor/tests/integration/snapshots/integration__e2e__menagerie-2.snap @@ -1,14 +1,8 @@ --- source: crates/zizmor/tests/integration/e2e.rs -assertion_line: 71 expression: "zizmor().output(OutputMode::Both).args([\"--collect=all\"]).input(input_under_test(\"e2e-menagerie\")).run()?" --- 🌈 zizmor v@@VERSION@@ - INFO zizmor::registry: skipping impostor-commit: can't run without a GitHub API token - INFO zizmor::registry: skipping ref-confusion: can't run without a GitHub API token - INFO zizmor::registry: skipping known-vulnerable-actions: can't run without a GitHub API token - INFO zizmor::registry: skipping stale-action-refs: can't run without a GitHub API token - INFO zizmor::registry: skipping ref-version-mismatch: can't run without a GitHub API token INFO audit: zizmor: 🌈 completed @@INPUT@@/.github/dummy-action-2/action.yml INFO audit: zizmor: 🌈 completed @@INPUT@@/.github/workflows/another-dummy.yml INFO audit: zizmor: 🌈 completed @@INPUT@@/.github/workflows/dummy.yml diff --git a/crates/zizmor/tests/integration/snapshots/integration__e2e__menagerie.snap b/crates/zizmor/tests/integration/snapshots/integration__e2e__menagerie.snap index 2bf2b073..4f9abf8a 100644 --- a/crates/zizmor/tests/integration/snapshots/integration__e2e__menagerie.snap +++ b/crates/zizmor/tests/integration/snapshots/integration__e2e__menagerie.snap @@ -3,11 +3,6 @@ source: crates/zizmor/tests/integration/e2e.rs expression: "zizmor().output(OutputMode::Both).input(input_under_test(\"e2e-menagerie\")).run()?" --- 🌈 zizmor v@@VERSION@@ - INFO zizmor::registry: skipping impostor-commit: can't run without a GitHub API token - INFO zizmor::registry: skipping ref-confusion: can't run without a GitHub API token - INFO zizmor::registry: skipping known-vulnerable-actions: can't run without a GitHub API token - INFO zizmor::registry: skipping stale-action-refs: can't run without a GitHub API token - INFO zizmor::registry: skipping ref-version-mismatch: can't run without a GitHub API token INFO audit: zizmor: 🌈 completed @@INPUT@@/.github/dummy-action-2/action.yml INFO audit: zizmor: 🌈 completed @@INPUT@@/.github/workflows/another-dummy.yml INFO audit: zizmor: 🌈 completed @@INPUT@@/.github/workflows/dummy.yml diff --git a/crates/zizmor/tests/integration/snapshots/integration__e2e__pr_960_backstop.snap b/crates/zizmor/tests/integration/snapshots/integration__e2e__pr_960_backstop.snap index 5c0a0b85..9cf7fa98 100644 --- a/crates/zizmor/tests/integration/snapshots/integration__e2e__pr_960_backstop.snap +++ b/crates/zizmor/tests/integration/snapshots/integration__e2e__pr_960_backstop.snap @@ -3,10 +3,5 @@ source: crates/zizmor/tests/integration/e2e.rs expression: "zizmor().output(OutputMode::Both).input(input_under_test(\"pr-960-backstop\")).run()?" --- 🌈 zizmor v@@VERSION@@ - INFO zizmor::registry: skipping impostor-commit: can't run without a GitHub API token - INFO zizmor::registry: skipping ref-confusion: can't run without a GitHub API token - INFO zizmor::registry: skipping known-vulnerable-actions: can't run without a GitHub API token - INFO zizmor::registry: skipping stale-action-refs: can't run without a GitHub API token - INFO zizmor::registry: skipping ref-version-mismatch: can't run without a GitHub API token INFO audit: zizmor: 🌈 completed @@INPUT@@/action.yml No findings to report. Good job! diff --git a/crates/zizmor/tests/integration/test-data/anchors/basic.yml b/crates/zizmor/tests/integration/test-data/anchors/basic.yml new file mode 100644 index 00000000..ffa91815 --- /dev/null +++ b/crates/zizmor/tests/integration/test-data/anchors/basic.yml @@ -0,0 +1,15 @@ +name: basic + +on: [pull_request] + +permissions: {} + +jobs: + basic: + name: basic + runs-on: ubuntu-latest + steps: + - run: &run | + "doing a thing: ${{ github.event.issue.title }}" + + - run: *run diff --git a/docs/release-notes.md b/docs/release-notes.md index db4f4b58..b4f5ce97 100644 --- a/docs/release-notes.md +++ b/docs/release-notes.md @@ -42,6 +42,9 @@ of `zizmor`. Many thanks to @mostafa for implementing this improvement! +* `zizmor` now has **limited, experimental** support for handling + inputs that contain YAML anchors (#1266) + ## 1.15.2 ### Bug Fixes 🐛 diff --git a/docs/usage.md b/docs/usage.md index 057fcd5e..7a8fe92b 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -805,7 +805,7 @@ However, like all tools, `zizmor` is **not a panacea**, and has fundamental limitations that must be kept in mind. This page documents some of those limitations. -### `zizmor` is a _static_ analysis tool +### `zizmor` is a _static_ analysis tool { #static-analysis } `zizmor` is a _static_ analysis tool. It never executes any code, nor does it have access to any runtime state. @@ -849,7 +849,7 @@ the [template-injection](./audits.md#template-injection) audit will flag `${{ matrix.something }}` as a potential code injection risk, since it can't infer anything about what `matrix.something` might expand to. -### `zizmor` audits workflow and action _definitions_ only +### `zizmor` audits workflow and action _definitions_ only { #definitions-only } `zizmor` audits workflow and action _definitions_ only. That means the contents of `foo.yml` (for your workflow definitions) or `action.yml` (for your @@ -881,3 +881,36 @@ More generally, `zizmor` cannot analyze files indirectly referenced within workflow/action definitions, as they may not actually exist until runtime. For example, `some-script.sh` above may have been generated or downloaded outside of any repository-tracked state. + +### YAML anchors stymie analysis { #yaml-anchors } + +!!! tip "TL;DR" + + `zizmor`'s support for YAML anchors is provided on a **best effort** + basis. Users of `zizmor` are **strongly encouraged** to avoid anchors + if they care about accurate static analysis results. + +`zizmor` tries very hard to present *useful* source spans in its audit +results. + +To do this, `zizmor` needs to know a lot of about the inner workings +of the YAML serialization format that GitHub Actions workflows, action +definitions, and Dependabot files are expressed in. + +YAML is a complicated serialization format, but GitHub *mostly* uses +a tractable subset of it. One conspicuous exception to this is +[YAML anchors](https://yaml.org/spec/1.2.2/#3222-anchors-and-aliases), +which GitHub has +[decided to support](https://github.blog/changelog/2025-09-18-actions-yaml-anchors-and-non-public-workflow-templates/) +in workflow and action definitions as of September 2025. + +Anchors make `zizmor`'s analysis job *much* harder, as they introduce a +layer of (arbitrarily deep) indirection and misalignment between the +deserialized object model (which is what `zizmor` analyzes) and the source +representation (which `zizmor` spans back to). + +As a result, `zizmor`'s support for YAML anchors is **best effort only**. +Users are **strongly encouraged** to avoid anchors in their workflows +and actions. Bug reports for issues in inputs containing anchors are +appreciated, but will be given a lower priority relative to bugs that +aren't observed with YAML anchors.