From d4c5a62cfb6804beadc02fb0fb504719b43054f3 Mon Sep 17 00:00:00 2001 From: William Woodruff Date: Sun, 12 Oct 2025 00:41:32 -0400 Subject: [PATCH] feat: refactor `--collect` (#1228) --- crates/zizmor/src/main.rs | 137 ++++++++++++++---- crates/zizmor/src/registry/input.rs | 12 +- crates/zizmor/tests/integration/common.rs | 6 +- crates/zizmor/tests/integration/e2e.rs | 3 +- .../zizmor/tests/integration/e2e/collect.rs | 92 ++++++++++++ docs/quickstart.md | 13 +- docs/release-notes.md | 12 +- docs/snippets/help.txt | 8 +- docs/usage.md | 27 +++- 9 files changed, 257 insertions(+), 53 deletions(-) create mode 100644 crates/zizmor/tests/integration/e2e/collect.rs diff --git a/crates/zizmor/src/main.rs b/crates/zizmor/src/main.rs index 4db92176..d4d7384c 100644 --- a/crates/zizmor/src/main.rs +++ b/crates/zizmor/src/main.rs @@ -1,6 +1,7 @@ #![warn(clippy::all, clippy::dbg_macro)] use std::{ + collections::HashSet, io::{Write, stdout}, process::ExitCode, }; @@ -22,7 +23,7 @@ use registry::{AuditRegistry, FindingRegistry}; use state::AuditState; use terminal_link::Link; use thiserror::Error; -use tracing::{Span, info_span, instrument}; +use tracing::{Span, info_span, instrument, warn}; use tracing_indicatif::{IndicatifLayer, span_ext::IndicatifSpanExt}; use tracing_subscriber::{EnvFilter, layer::SubscriberExt as _, util::SubscriberInitExt as _}; @@ -153,8 +154,8 @@ struct App { /// /// By default, all workflows and composite actions are collected, /// while honoring `.gitignore` files. - #[arg(long, value_enum, default_value_t)] - collect: CollectionMode, + #[arg(long, default_values = ["default"], num_args=1.., value_delimiter=',')] + collect: Vec, /// Fail instead of warning on syntax and schema errors /// in collected inputs. @@ -215,6 +216,7 @@ impl App { // is fully removed. #[derive(Debug, Copy, Clone, ValueEnum)] enum CliSeverity { + #[value(hide = true)] Unknown, Informational, Low, @@ -226,6 +228,7 @@ enum CliSeverity { // is fully removed. #[derive(Debug, Copy, Clone, ValueEnum)] enum CliConfidence { + #[value(hide = true)] Unknown, Low, Medium, @@ -353,48 +356,128 @@ impl From for anstream::ColorChoice { } /// How `zizmor` collects inputs from local and remote repository sources. -#[derive(Copy, Clone, Debug, Default, ValueEnum)] -pub(crate) enum CollectionMode { +#[derive(Copy, Clone, Debug, Default, ValueEnum, Eq, PartialEq, Hash)] +pub(crate) enum CliCollectionMode { /// Collect all possible inputs, ignoring `.gitignore` files. All, /// Collect all possible inputs, respecting `.gitignore` files. #[default] Default, /// Collect only workflow definitions. + /// + /// Deprecated; use `--collect=workflows` + #[value(hide = true)] WorkflowsOnly, /// Collect only action definitions (i.e. `action.yml`). + /// + /// Deprecated; use `--collect=actions` + #[value(hide = true)] ActionsOnly, - /// Collect only Dependabot configuration files (i.e. `dependabot.yml`). - DependabotOnly, + /// Collect workflows. + Workflows, + /// Collect action definitions (i.e. `action.yml`). + Actions, + /// Collect Dependabot configuration files (i.e. `dependabot.yml`). + Dependabot, } -impl CollectionMode { +#[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)] +pub(crate) enum CollectionMode { + All, + Default, + Workflows, + Actions, + Dependabot, +} + +pub(crate) struct CollectionModeSet(HashSet); + +impl From<&[CliCollectionMode]> for CollectionModeSet { + fn from(modes: &[CliCollectionMode]) -> Self { + if modes.len() > 1 + && modes.iter().any(|mode| { + matches!( + mode, + CliCollectionMode::WorkflowsOnly | CliCollectionMode::ActionsOnly + ) + }) + { + let mut cmd = App::command(); + + cmd.error( + clap::error::ErrorKind::ArgumentConflict, + "`workflows-only` and `actions-only` cannot be combined with other collection modes", + ) + .exit(); + } + + Self( + modes + .iter() + .map(|mode| match mode { + CliCollectionMode::All => CollectionMode::All, + CliCollectionMode::Default => CollectionMode::Default, + CliCollectionMode::WorkflowsOnly => { + warn!("--collect=workflows-only is deprecated; use --collect=workflows instead"); + warn!("future versions of zizmor will reject this mode"); + + CollectionMode::Workflows + } + CliCollectionMode::ActionsOnly => { + warn!("--collect=actions-only is deprecated; use --collect=actions instead"); + warn!("future versions of zizmor will reject this mode"); + + CollectionMode::Actions + } + CliCollectionMode::Workflows => CollectionMode::Workflows, + CliCollectionMode::Actions => CollectionMode::Actions, + CliCollectionMode::Dependabot => CollectionMode::Dependabot, + }) + .collect(), + ) + } +} + +impl CollectionModeSet { + /// Does our collection mode respect `.gitignore` files? pub(crate) fn respects_gitignore(&self) -> bool { - matches!( - self, - CollectionMode::Default | CollectionMode::WorkflowsOnly | CollectionMode::ActionsOnly - ) + // All modes except 'all' respect .gitignore files. + !self.0.contains(&CollectionMode::All) } + /// Should we collect workflows? pub(crate) fn workflows(&self) -> bool { - matches!( - self, - CollectionMode::All | CollectionMode::Default | CollectionMode::WorkflowsOnly - ) + self.0.iter().any(|mode| { + matches!( + mode, + CollectionMode::All | CollectionMode::Default | CollectionMode::Workflows + ) + }) } + /// Should we collect *only* workflows? + pub(crate) fn workflows_only(&self) -> bool { + self.0.len() == 1 && self.0.contains(&CollectionMode::Workflows) + } + + /// Should we collect actions? pub(crate) fn actions(&self) -> bool { - matches!( - self, - CollectionMode::All | CollectionMode::Default | CollectionMode::ActionsOnly - ) + self.0.iter().any(|mode| { + matches!( + mode, + CollectionMode::All | CollectionMode::Default | CollectionMode::Actions + ) + }) } + /// Should we collect Dependabot configuration files? pub(crate) fn dependabot(&self) -> bool { - matches!( - self, - CollectionMode::All | CollectionMode::Default | CollectionMode::DependabotOnly - ) + self.0.iter().any(|mode| { + matches!( + mode, + CollectionMode::All | CollectionMode::Default | CollectionMode::Dependabot + ) + }) } } @@ -422,7 +505,7 @@ pub(crate) fn tips(err: impl AsRef, tips: &[impl AsRef]) -> String { /// State used when collecting input groups. pub(crate) struct CollectionOptions { - pub(crate) mode: CollectionMode, + pub(crate) mode_set: CollectionModeSet, pub(crate) strict: bool, pub(crate) no_config: bool, /// Global configuration, if any. @@ -584,6 +667,8 @@ fn run(app: &mut App) -> Result { eprintln!("🌈 zizmor v{version}", version = env!("CARGO_PKG_VERSION")); + let collection_mode_set = CollectionModeSet::from(app.collect.as_slice()); + let min_severity = match app.min_severity { Some(CliSeverity::Unknown) => { tracing::warn!("`unknown` is a deprecated minimum severity that has no effect"); @@ -618,7 +703,7 @@ fn run(app: &mut App) -> Result { .transpose()?; let collection_options = CollectionOptions { - mode: app.collect, + mode_set: collection_mode_set, strict: app.strict_collection, no_config: app.no_config, global_config, diff --git a/crates/zizmor/src/registry/input.rs b/crates/zizmor/src/registry/input.rs index ca14c074..76d8fbf9 100644 --- a/crates/zizmor/src/registry/input.rs +++ b/crates/zizmor/src/registry/input.rs @@ -11,7 +11,7 @@ use serde::Serialize; use thiserror::Error; use crate::{ - CollectionMode, CollectionOptions, + CollectionOptions, audit::AuditInput, config::{Config, ConfigError}, github_api::{Client, ClientError}, @@ -430,7 +430,7 @@ impl InputGroup { // zizmor integrators. // // See: https://github.com/zizmorcore/zizmor/issues/596 - if options.mode.respects_gitignore() { + if options.mode_set.respects_gitignore() { walker .require_git(false) .git_ignore(true) @@ -443,7 +443,7 @@ impl InputGroup { let entry = <&Utf8Path>::try_from(entry.path()) .map_err(|e| CollectionError::InvalidPath(e, entry.path().into()))?; - if options.mode.workflows() + if options.mode_set.workflows() && entry.is_file() && matches!(entry.extension(), Some("yml" | "yaml")) && entry @@ -462,7 +462,7 @@ impl InputGroup { group.register(InputKind::Workflow, contents, key, options.strict)?; } - if options.mode.actions() + if options.mode_set.actions() && entry.is_file() && matches!(entry.file_name(), Some("action.yml" | "action.yaml")) { @@ -478,7 +478,7 @@ impl InputGroup { group.register(InputKind::Action, contents, key, options.strict)?; } - if options.mode.dependabot() + if options.mode_set.dependabot() && entry.is_file() && matches!( entry.file_name(), @@ -511,7 +511,7 @@ impl InputGroup { let config = Config::discover(options, || Config::discover_remote(client, &slug))?; let mut group = Self::new(config); - if matches!(options.mode, CollectionMode::WorkflowsOnly) { + if options.mode_set.workflows_only() { // Performance: if we're *only* collecting workflows, then we // can save ourselves a full repo download and only fetch the // repo's workflow files. diff --git a/crates/zizmor/tests/integration/common.rs b/crates/zizmor/tests/integration/common.rs index 461a18a4..f006bbbe 100644 --- a/crates/zizmor/tests/integration/common.rs +++ b/crates/zizmor/tests/integration/common.rs @@ -188,10 +188,10 @@ impl Zizmor { if let Some(exit_code) = output.status.code() { // There are other nonzero exit codes that don't indicate failure; - // 1 is our only failure code. - let is_failure = exit_code == 1; + // 1 and 2 do. + let is_failure = matches!(exit_code, 1 | 2); if is_failure != self.expects_failure { - anyhow::bail!("zizmor exited with unexpected code {exit_code}"); + anyhow::bail!("zizmor exited with unexpected code {exit_code}: {raw}"); } } diff --git a/crates/zizmor/tests/integration/e2e.rs b/crates/zizmor/tests/integration/e2e.rs index 20e8e38d..b8ab8f25 100644 --- a/crates/zizmor/tests/integration/e2e.rs +++ b/crates/zizmor/tests/integration/e2e.rs @@ -4,6 +4,7 @@ use anyhow::Result; use crate::common::{OutputMode, input_under_test, zizmor}; +mod collect; mod json_v1; #[cfg_attr(not(feature = "gh-token-tests"), ignore)] @@ -33,7 +34,7 @@ fn issue_569() -> Result<()> { zizmor() .offline(false) .output(OutputMode::Both) - .args(["--no-online-audits", "--collect=workflows-only"]) + .args(["--no-online-audits", "--collect=workflows"]) .input("python/cpython@f963239ff1f986742d4c6bab2ab7b73f5a4047f6") .run()? ); diff --git a/crates/zizmor/tests/integration/e2e/collect.rs b/crates/zizmor/tests/integration/e2e/collect.rs new file mode 100644 index 00000000..92dcb080 --- /dev/null +++ b/crates/zizmor/tests/integration/e2e/collect.rs @@ -0,0 +1,92 @@ +//! End-to-end integration tests for `--collect=`. + +use anyhow::Result; +use insta::assert_snapshot; + +use crate::common::{input_under_test, zizmor}; + +#[test] +fn test_fails_incompatible_modes() -> Result<()> { + assert_snapshot!( + zizmor() + .expects_failure(true) + .args(["--collect=workflows,actions-only"]) + .input(input_under_test("neutral.yml")) + .run()?, + @r" + 🌈 zizmor v@@VERSION@@ + error: `workflows-only` and `actions-only` cannot be combined with other collection modes + + Usage: zizmor [OPTIONS] ... + + For more information, try '--help'. + " + ); + + assert_snapshot!( + zizmor() + .expects_failure(true) + .args(["--collect=actions,workflows-only"]) + .input(input_under_test("neutral.yml")) + .run()?, + @r" + 🌈 zizmor v@@VERSION@@ + error: `workflows-only` and `actions-only` cannot be combined with other collection modes + + Usage: zizmor [OPTIONS] ... + + For more information, try '--help'. + " + ); + + assert_snapshot!( + zizmor() + .expects_failure(true) + .args(["--collect=actions-only,workflows-only"]) + .input(input_under_test("neutral.yml")) + .run()?, + @r" + 🌈 zizmor v@@VERSION@@ + error: `workflows-only` and `actions-only` cannot be combined with other collection modes + + Usage: zizmor [OPTIONS] ... + + For more information, try '--help'. + " + ); + + Ok(()) +} + +#[test] +fn test_warn_deprecated_modes() -> Result<()> { + assert_snapshot!( + zizmor() + .args(["--collect=workflows-only"]) + .output(crate::common::OutputMode::Both) + .input(input_under_test("neutral.yml")) + .setenv("RUST_LOG", "warn") + .run()?, + @r" + 🌈 zizmor v@@VERSION@@ + WARN zizmor: --collect=workflows-only is deprecated; use --collect=workflows instead + WARN zizmor: future versions of zizmor will reject this mode + No findings to report. Good job! + "); + + assert_snapshot!( + zizmor() + .args(["--collect=actions-only"]) + .output(crate::common::OutputMode::Both) + .input(input_under_test("neutral.yml")) + .setenv("RUST_LOG", "warn") + .run()?, + @r" + 🌈 zizmor v@@VERSION@@ + WARN zizmor: --collect=actions-only is deprecated; use --collect=actions instead + WARN zizmor: future versions of zizmor will reject this mode + No findings to report. Good job! + "); + + Ok(()) +} diff --git a/docs/quickstart.md b/docs/quickstart.md index 18ed78b2..fd5bff4d 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -39,13 +39,15 @@ Here are some different ways you can run `zizmor` locally: !!! tip - Pass `--collect=workflows-only` to disable collecting composite actions. + Pass `--collect=workflows` to avoid collecting anything except + workflow definitions. When given one or more local directories, `zizmor` will treat each as a GitHub repository and attempt to discover workflows defined under the `.github/workflows` subdirectory for each. `zizmor` will also walk each directory to find composite action definitions (`action.yml` in any - subdirectory). + subdirectory) and Dependabot configuration files + (`.github/dependabot.yml`). ```bash # repo-a/ contains .github/workflows/{ci,tests}.yml @@ -55,8 +57,8 @@ Here are some different ways you can run `zizmor` locally: # or with multiple directories zizmor repo-a/ ../../repo-b/ - # collect only workflows, not composite actions - zizmor --collect=workflows-only + # collect only workflows, not composite actions or Dependabot configs + zizmor --collect=workflows ``` === "On one or more remote repositories" @@ -68,7 +70,8 @@ Here are some different ways you can run `zizmor` locally: !!! tip - Pass `--collect=workflows-only` to disable collecting composite actions. + Pass `--collect=workflows` to disable collecting anything except + workflow definitions. `zizmor` can also fetch workflows and actions directly from GitHub, if given a GitHub API token via `GH_TOKEN` or `--gh-token`: diff --git a/docs/release-notes.md b/docs/release-notes.md index 32bce976..bc7905d5 100644 --- a/docs/release-notes.md +++ b/docs/release-notes.md @@ -51,7 +51,7 @@ To complement this new functionality, this release comes with two new audits: * `zizmor` is now more resilient to sporadic request failures when performing GitHub API requests (#1219) -* `--collect=dependabot-only` is now supported as a collection option, +* `--collect=dependabot` is now supported as a collection option, allowing users to audit only Dependabot configuration files (#1215) ### Bug Fixes 🐛 @@ -60,6 +60,16 @@ To complement this new functionality, this release comes with two new audits: inputs that lacked an explicit parent path component, e.g. `zizmor foo.yml` instead of `zizmor ./foo.yml` (#1212) +### Deprecations ⚠️ + +* The `workflows-only` and `actions-only` values for `--collect` are now + deprecated. These values have been replaced with `workflows` and + `actions`, respectively, which have the same behavior but + can be composed together with other collection modes. The deprecated + modes will be removed in a future release (#1228) + + Until removal, using these values will emit a warning. + ## 1.14.2 ### Bug Fixes 🐛 diff --git a/docs/snippets/help.txt b/docs/snippets/help.txt index be73a170..5c21ca53 100644 --- a/docs/snippets/help.txt +++ b/docs/snippets/help.txt @@ -37,13 +37,13 @@ Options: --no-exit-codes Disable all error codes besides success and tool failure --min-severity - Filter all results below this severity [possible values: unknown, informational, low, medium, high] + Filter all results below this severity [possible values: informational, low, medium, high] --min-confidence - Filter all results below this confidence [possible values: unknown, low, medium, high] + Filter all results below this confidence [possible values: low, medium, high] --cache-dir The directory to use for HTTP caching. By default, a host-appropriate user-caching directory will be used - --collect - Control which kinds of inputs are collected for auditing [default: default] [possible values: all, default, workflows-only, actions-only, dependabot-only] + --collect ... + Control which kinds of inputs are collected for auditing [default: default] [possible values: all, default, workflows, actions, dependabot] --strict-collection Fail instead of warning on syntax and schema errors in collected inputs --completions diff --git a/docs/usage.md b/docs/usage.md index 5e18b8e4..0143c078 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -44,13 +44,13 @@ There are three input sources that `zizmor` knows about: sources can be mixed and matched: ```bash -# audit a single local workflow, an entire local repository, and +# audit a single local workflow, an entire local directory, and # a remote repository all in the same run zizmor ../example.yml ../other-repo/ example/example ``` -When auditing local and/or remote repositories, `zizmor` will collect -all known input kinds by default. To configure collection behavior, +When auditing local directories and/or remote repositories, `zizmor` will +collect all known input kinds by default. To configure collection behavior, you can use the `--collect=...` option. ```bash @@ -61,15 +61,27 @@ zizmor --collect=all example/example zizmor --collect=default example/example # collect only workflows -zizmor --collect=workflows-only example/example +zizmor --collect=workflows example/example # collect only actions -zizmor --collect=actions-only example/example +zizmor --collect=actions example/example # collect only Dependabot configs -zizmor --collect=dependabot-only example/example +zizmor --collect=dependabot example/example + +# collect only workflows and actions (not Dependabot configs) +zizmor --collect=workflows,actions example/example ``` +!!! warning "Deprecation" + + `--collect=workflows-only` and `--collect=actions-only` are + deprecated aliases for `--collect=workflows` and + `--collect=actions`, respectively, as of `v1.15.0`. + + Users should switch to the non-deprecated forms, as the deprecated + forms will be removed in a future release. + !!! tip `--collect=all` can be significantly slower than `--collect=default`, @@ -79,7 +91,7 @@ zizmor --collect=dependabot-only example/example !!! tip `--collect=...` only controls input collection from repository input - sources. In other words, `zizmor --collect=actions-only workflow.yml` + sources. In other words, `zizmor --collect=actions workflow.yml` *will* audit `workflow.yml`, since it was passed explicitly and not collected indirectly. @@ -398,6 +410,7 @@ annotations. | ---- | ------- | | 0 | Successful audit; no findings to report (or SARIF mode enabled). | | 1 | Error during audit; consult output. | +| 2 | Argument parsing failure; consult output. | | 11 | One or more findings found; highest finding is "informational" level. | | 12 | One or more findings found; highest finding is "low" level. | | 13 | One or more findings found; highest finding is "medium" level. |