refactor: simplify use of WorkflowAudit trait (#185)

This commit is contained in:
William Woodruff 2024-11-22 18:37:39 -05:00 committed by GitHub
parent 37a87c888e
commit f0627d32c7
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 247 additions and 200 deletions

View file

@ -127,12 +127,12 @@ Some things that can be useful to discuss beforehand:
- Which criticality should we assign for this new finding?
- Which confidence should we assign for this new finding?
- Should this new audit be pedantic at all?
- Does this new audit require using the Github API, or is it entirely off-line?
- Does this new audit require using the Github API, or is it entirely offline?
When developing a new `zizmor` audit, there are a couple of implementation details to be aware of:
- All existing audits live in a Rust modules grouped under `src/audit` folder
- The expected behaviour for all audits is defined by the `WorkflowAudit` trait at `src/audit/mod.rs`
- The expected behavior for all audits is defined by the `WorkflowAudit` trait at `src/audit/mod.rs`
- The expected outcome of an executed audit is defined by the `Finding` struct at `src/finding/mod.rs`
- Any `WorkflowAudit` implementation can have access to an `AuditState` instance, as per `src/state.rs`
- If an audit requires data from the GitHub API, there is a `Client` implementation at `src/github_api.rs`
@ -148,21 +148,32 @@ cargo test
### Adding a new audit
!!! tip
`WorkflowAudit` has various default implementations that are useful if your
audit only needs to look at individual jobs, steps, etc.
For example, you may want to implement `WorkflowAudit::audit_step` to
audit each step individually rather than having to iterate from the workflow
downwards with `WorkflowAudit::audit`.
!!! tip
When in doubt, refer to pre-existing audits for inspiration!
The general procedure for adding a new audit can be described as:
- Define a new file at `src/audit/my_new_audit.rs`
- Define a struct like `MyNewAudit` and implement the `WorkflowAudit` trait for it
- You may want to use both the `AuditState` and `github_api::Client` to get the job done
- Assign the proper YML `location` when creating a `Finding`, grabbing it from the proper `Workflow`, `Job` or `Step` instance
- You may want to use both the `AuditState` and `github_api::Client` to get the job done
- Assign the proper `location` when creating a `Finding`, grabbing it from the
proper `Workflow`, `Job` or `Step` instance
- Register `MyNewAudit` in the known audits at `src/main.rs`
- Add proper integration tests covering some scenarios at `tests/acceptance.rs`
- Add proper docs for this new audit at `docs/audits`. Please add related public information about the underlying vulnerability
- Add proper docs for this new audit at `docs/audits`. Please add related public
information about the underlying vulnerability
- Open your Pull Request!
!!! tip
When in doubt, you can always refer to existing audit implementations as well!
### Changing an existing audit
The general procedure for changing an existing audit is:
@ -171,7 +182,7 @@ The general procedure for changing an existing audit is:
- Change the behaviour to match new requirements there (e.g. consuming a new CLI info exposed through `AuditState`)
- Ensure that tests and samples at `tests/` reflect changed behaviour accordingly (e.g. the confidence for finding has changed)
- Ensure that `docs/audits` reflect changed behaviour accordingly (e.g. an audit that is no longer pedantic)
- Open your Pull Request
- Open your Pull Request!
## Changing `zizmor`'s CLI

View file

@ -6,7 +6,6 @@
//! See: <https://docs.github.com/en/rest/security-advisories/global-advisories?apiVersion=2022-11-28>
use anyhow::{anyhow, Context, Result};
use github_actions_models::workflow::Job;
use crate::{
finding::{Confidence, Severity},
@ -145,37 +144,26 @@ impl WorkflowAudit for KnownVulnerableActions {
Ok(Self { client })
}
fn audit<'w>(
&self,
workflow: &'w crate::models::Workflow,
) -> anyhow::Result<Vec<crate::finding::Finding<'w>>> {
fn audit_step<'w>(&self, step: &super::Step<'w>) -> Result<Vec<super::Finding<'w>>> {
let mut findings = vec![];
for job in workflow.jobs() {
let Job::NormalJob(_) = *job else {
continue;
};
let Some(Uses::Repository(uses)) = step.uses() else {
return Ok(findings);
};
for step in job.steps() {
let Some(Uses::Repository(uses)) = step.uses() else {
continue;
};
for (severity, id) in self.action_known_vulnerabilities(&uses)? {
findings.push(
Self::finding()
.confidence(Confidence::High)
.severity(severity)
.add_location(
step.location()
.with_keys(&["uses".into()])
.annotated(&id)
.with_url(format!("https://github.com/advisories/{id}")),
)
.build(workflow)?,
);
}
}
for (severity, id) in self.action_known_vulnerabilities(&uses)? {
findings.push(
Self::finding()
.confidence(Confidence::High)
.severity(severity)
.add_location(
step.location()
.with_keys(&["uses".into()])
.annotated(&id)
.with_url(format!("https://github.com/advisories/{id}")),
)
.build(step.workflow())?,
);
}
Ok(findings)

View file

@ -4,7 +4,7 @@ use anyhow::Result;
use crate::{
finding::{Finding, FindingBuilder},
models::Workflow,
models::{Job, Step, Workflow},
state::AuditState,
};
@ -34,7 +34,38 @@ pub(crate) trait WorkflowAudit {
where
Self: Sized;
fn audit<'w>(&self, workflow: &'w Workflow) -> Result<Vec<Finding<'w>>>;
fn audit_step<'w>(&self, _step: &Step<'w>) -> Result<Vec<Finding<'w>>> {
Ok(vec![])
}
fn audit_normal_job<'w>(&self, job: &Job<'w>) -> Result<Vec<Finding<'w>>> {
let mut results = vec![];
for step in job.steps() {
results.extend(self.audit_step(&step)?);
}
Ok(results)
}
fn audit_reusable_job<'w>(&self, _job: &Job<'w>) -> Result<Vec<Finding<'w>>> {
Ok(vec![])
}
fn audit<'w>(&self, workflow: &'w Workflow) -> Result<Vec<Finding<'w>>> {
let mut results = vec![];
for job in workflow.jobs() {
match *job {
github_actions_models::workflow::Job::NormalJob(_) => {
results.extend(self.audit_normal_job(&job)?);
}
github_actions_models::workflow::Job::ReusableWorkflowCallJob(_) => {
results.extend(self.audit_reusable_job(&job)?);
}
}
}
Ok(results)
}
fn finding<'w>() -> FindingBuilder<'w>
where

View file

@ -14,10 +14,7 @@ use std::ops::Deref;
use github_actions_models::{
common::expr::LoE,
workflow::{
job::{Matrix, NormalJob, StepBody, Strategy},
Job,
},
workflow::job::{Matrix, NormalJob, StepBody, Strategy},
};
use super::WorkflowAudit;
@ -257,50 +254,40 @@ impl WorkflowAudit for TemplateInjection {
Ok(Self { state })
}
fn audit<'w>(
&self,
workflow: &'w crate::models::Workflow,
) -> anyhow::Result<Vec<crate::finding::Finding<'w>>> {
fn audit_step<'w>(&self, step: &super::Step<'w>) -> anyhow::Result<Vec<super::Finding<'w>>> {
let mut findings = vec![];
for job in workflow.jobs() {
let Job::NormalJob(normal) = job.deref() else {
continue;
};
for step in job.steps() {
let (script, script_loc) = match &step.deref().body {
StepBody::Uses { uses, with } => {
if uses.starts_with("actions/github-script") {
match with.get("script") {
Some(script) => (
&script.to_string(),
step.location().with_keys(&["with".into(), "script".into()]),
),
None => continue,
}
} else {
continue;
}
let (script, script_loc) = match &step.deref().body {
StepBody::Uses { uses, with } => {
if uses.starts_with("actions/github-script") {
match with.get("script") {
Some(script) => (
&script.to_string(),
step.location().with_keys(&["with".into(), "script".into()]),
),
None => return Ok(findings),
}
StepBody::Run { run, .. } => (run, step.location().with_keys(&["run".into()])),
};
for (expr, severity, confidence) in
self.injectable_template_expressions(script, normal)
{
findings.push(
Self::finding()
.severity(severity)
.confidence(confidence)
.add_location(step.location_with_name())
.add_location(script_loc.clone().annotated(format!(
"{expr} may expand into attacker-controllable code"
)))
.build(workflow)?,
)
} else {
return Ok(findings);
}
}
StepBody::Run { run, .. } => (run, step.location().with_keys(&["run".into()])),
};
for (expr, severity, confidence) in self.injectable_template_expressions(script, step.job())
{
findings.push(
Self::finding()
.severity(severity)
.confidence(confidence)
.add_location(step.location_with_name())
.add_location(
script_loc.clone().annotated(format!(
"{expr} may expand into attacker-controllable code"
)),
)
.build(step.workflow())?,
)
}
Ok(findings)

View file

@ -1,8 +1,6 @@
use github_actions_models::workflow::Job;
use crate::finding::{Confidence, Severity};
use super::{AuditState, Finding, Workflow, WorkflowAudit};
use super::{AuditState, Finding, Step, WorkflowAudit};
pub(crate) struct UnpinnedUses {}
@ -28,35 +26,25 @@ impl WorkflowAudit for UnpinnedUses {
Ok(Self {})
}
fn audit<'w>(&self, workflow: &'w Workflow) -> anyhow::Result<Vec<Finding<'w>>> {
fn audit_step<'w>(&self, step: &Step<'w>) -> anyhow::Result<Vec<Finding<'w>>> {
let mut findings = vec![];
for job in workflow.jobs() {
// No point in checking reusable workflows, since they
// require a ref pin when used outside of the local repo.
let Job::NormalJob(_) = *job else {
continue;
};
let Some(uses) = step.uses() else {
return Ok(vec![]);
};
for step in job.steps() {
let Some(uses) = step.uses() else {
continue;
};
if uses.unpinned() {
findings.push(
Self::finding()
.confidence(Confidence::High)
.severity(Severity::Informational)
.add_location(
step.location().with_keys(&["uses".into()]).annotated(
"action is not pinned to a tag, branch, or hash ref",
),
)
.build(workflow)?,
);
}
}
if uses.unpinned() {
findings.push(
Self::finding()
.confidence(Confidence::High)
.severity(Severity::Informational)
.add_location(
step.location()
.with_keys(&["uses".into()])
.annotated("action is not pinned to a tag, branch, or hash ref"),
)
.build(step.workflow())?,
);
}
Ok(findings)

View file

@ -1,9 +1,7 @@
use std::{collections::HashMap, ops::Deref};
use github_actions_models::{
common::EnvValue,
workflow::{job::StepBody, Job},
};
use anyhow::Ok;
use github_actions_models::{common::EnvValue, workflow::job::StepBody};
use super::WorkflowAudit;
use crate::{
@ -76,73 +74,62 @@ impl WorkflowAudit for UseTrustedPublishing {
Ok(Self { _state: state })
}
fn audit<'w>(
&self,
workflow: &'w crate::models::Workflow,
) -> anyhow::Result<Vec<crate::finding::Finding<'w>>> {
fn audit_step<'w>(&self, step: &super::Step<'w>) -> anyhow::Result<Vec<super::Finding<'w>>> {
let mut findings = vec![];
for job in workflow.jobs() {
if !matches!(job.deref(), Job::NormalJob(_)) {
continue;
}
let StepBody::Uses { uses, with } = &step.deref().body else {
return Ok(findings);
};
for step in job.steps() {
let StepBody::Uses { uses, with } = &step.deref().body else {
continue;
};
if uses.starts_with("pypa/gh-action-pypi-publish") {
if self.pypi_publish_uses_manual_credentials(with) {
findings.push(
Self::finding()
.severity(Severity::Informational)
.confidence(Confidence::High)
.add_location(
step.location()
.with_keys(&["uses".into()])
.annotated("this step"),
)
.add_location(
step.location()
.with_keys(&["with".into(), "password".into()])
.annotated(USES_MANUAL_CREDENTIAL),
)
.build(workflow)?,
);
}
} else if uses.starts_with("rubygems/release-gem") {
if self.release_gem_uses_manual_credentials(with) {
findings.push(
Self::finding()
.severity(Severity::Informational)
.confidence(Confidence::High)
.add_location(
step.location()
.with_keys(&["uses".into()])
.annotated("this step"),
)
.add_location(step.location().annotated(USES_MANUAL_CREDENTIAL))
.build(workflow)?,
);
}
} else if uses.starts_with("rubygems/configure-rubygems-credential")
&& self.rubygems_credential_uses_manual_credentials(with)
{
findings.push(
Self::finding()
.severity(Severity::Informational)
.confidence(Confidence::High)
.add_location(
step.location()
.with_keys(&["uses".into()])
.annotated("this step"),
)
.add_location(step.location().annotated(USES_MANUAL_CREDENTIAL))
.build(workflow)?,
);
}
if uses.starts_with("pypa/gh-action-pypi-publish") {
if self.pypi_publish_uses_manual_credentials(with) {
findings.push(
Self::finding()
.severity(Severity::Informational)
.confidence(Confidence::High)
.add_location(
step.location()
.with_keys(&["uses".into()])
.annotated("this step"),
)
.add_location(
step.location()
.with_keys(&["with".into(), "password".into()])
.annotated(USES_MANUAL_CREDENTIAL),
)
.build(step.workflow())?,
);
}
} else if uses.starts_with("rubygems/release-gem") {
if self.release_gem_uses_manual_credentials(with) {
findings.push(
Self::finding()
.severity(Severity::Informational)
.confidence(Confidence::High)
.add_location(
step.location()
.with_keys(&["uses".into()])
.annotated("this step"),
)
.add_location(step.location().annotated(USES_MANUAL_CREDENTIAL))
.build(step.workflow())?,
);
}
} else if uses.starts_with("rubygems/configure-rubygems-credential")
&& self.rubygems_credential_uses_manual_credentials(with)
{
findings.push(
Self::finding()
.severity(Severity::Informational)
.confidence(Confidence::High)
.add_location(
step.location()
.with_keys(&["uses".into()])
.annotated("this step"),
)
.add_location(step.location().annotated(USES_MANUAL_CREDENTIAL))
.build(step.workflow())?,
);
}
Ok(findings)

View file

@ -4,10 +4,17 @@
use std::{collections::hash_map, iter::Enumerate, ops::Deref, path::Path};
use anyhow::{anyhow, Context, Result};
use github_actions_models::workflow::{self, job::StepBody};
use github_actions_models::workflow::{
self,
job::{NormalJob, StepBody},
};
use crate::finding::{Route, SymbolicLocation};
/// Represents an entire GitHub Actions workflow.
///
/// This type implements [`Deref`] for [`workflow::Workflow`],
/// providing access to the underlying data model.
pub(crate) struct Workflow {
pub(crate) path: String,
pub(crate) document: yamlpath::Document,
@ -23,6 +30,7 @@ impl Deref for Workflow {
}
impl Workflow {
/// Load a workflow from the given file on disk.
pub(crate) fn from_file<P: AsRef<Path>>(p: P) -> Result<Self> {
let raw = std::fs::read_to_string(p.as_ref())?;
@ -42,12 +50,17 @@ impl Workflow {
})
}
/// Returns the filename (i.e. base component) of the loaded workflow.
///
/// For example, if the workflow was loaded from `/foo/bar/baz.yml`,
/// [`Self::filename()`] returns `baz.yml`.
pub(crate) fn filename(&self) -> &str {
// NOTE: Unwraps are safe here since we enforce UTF-8 paths
// and require a filename as an invariant.
Path::new(&self.path).file_name().unwrap().to_str().unwrap()
}
/// This workflow's [`SymbolicLocation`].
pub(crate) fn location(&self) -> SymbolicLocation {
SymbolicLocation {
name: self.filename(),
@ -57,15 +70,24 @@ impl Workflow {
}
}
/// A [`Jobs`] iterator over this workflow's constituent [`Job`]s.
pub(crate) fn jobs(&self) -> Jobs<'_> {
Jobs::new(self)
}
}
/// Represents a single GitHub Actions job.
///
/// This type implements [`Deref`] for [`workflow::Job`], providing
/// access to the underlying data model.
#[derive(Clone)]
pub(crate) struct Job<'w> {
/// The job's unique ID (i.e., its key in the workflow's `jobs:` block).
pub(crate) id: &'w str,
/// The underlying job.
inner: &'w workflow::Job,
parent: SymbolicLocation<'w>,
/// The job's parent [`Workflow`].
parent: &'w Workflow,
}
impl<'w> Deref for Job<'w> {
@ -77,29 +99,37 @@ impl<'w> Deref for Job<'w> {
}
impl<'w> Job<'w> {
pub(crate) fn new(id: &'w str, inner: &'w workflow::Job, parent: SymbolicLocation<'w>) -> Self {
fn new(id: &'w str, inner: &'w workflow::Job, parent: &'w Workflow) -> Self {
Self { id, inner, parent }
}
pub(crate) fn location(&self) -> SymbolicLocation<'w> {
self.parent.with_job(self)
/// This job's parent [`Workflow`]
pub(crate) fn parent(&self) -> &'w Workflow {
self.parent
}
/// This job's [`SymbolicLocation`].
pub(crate) fn location(&self) -> SymbolicLocation<'w> {
self.parent().location().with_job(self)
}
/// An iterator of this job's constituent [`Step`]s.
pub(crate) fn steps(&self) -> Steps<'w> {
Steps::new(self)
}
}
/// An iterable container for jobs within a [`Workflow`].
pub(crate) struct Jobs<'w> {
parent: &'w Workflow,
inner: hash_map::Iter<'w, String, workflow::Job>,
location: SymbolicLocation<'w>,
}
impl<'w> Jobs<'w> {
pub(crate) fn new(workflow: &'w Workflow) -> Self {
fn new(workflow: &'w Workflow) -> Self {
Self {
parent: workflow,
inner: workflow.jobs.iter(),
location: workflow.location(),
}
}
}
@ -111,17 +141,24 @@ impl<'w> Iterator for Jobs<'w> {
let item = self.inner.next();
match item {
Some((id, job)) => Some(Job::new(id, job, self.location.clone())),
Some((id, job)) => Some(Job::new(id, job, self.parent)),
None => None,
}
}
}
/// Represents a single step in a normal workflow job.
///
/// This type implements [`Deref`] for [`workflow::job::Step`], which
/// provides access to the step's actual fields.
#[derive(Clone)]
pub(crate) struct Step<'w> {
/// The step's index within its parent job.
pub(crate) index: usize,
/// The inner step model.
inner: &'w workflow::job::Step,
parent: SymbolicLocation<'w>,
/// The parent [`Job`].
parent: Job<'w>,
}
impl<'w> Deref for Step<'w> {
@ -133,11 +170,7 @@ impl<'w> Deref for Step<'w> {
}
impl<'w> Step<'w> {
pub(crate) fn new(
index: usize,
inner: &'w workflow::job::Step,
parent: SymbolicLocation<'w>,
) -> Self {
fn new(index: usize, inner: &'w workflow::job::Step, parent: Job<'w>) -> Self {
Self {
index,
inner,
@ -145,6 +178,22 @@ impl<'w> Step<'w> {
}
}
/// Returns this step's parent [`NormalJob`].
///
/// Note that this returns the [`NormalJob`], not the wrapper [`Job`].
pub(crate) fn job(&self) -> &'w NormalJob {
match *self.parent {
workflow::Job::NormalJob(job) => job,
// NOTE(ww): Unreachable because steps are always parented by normal jobs.
workflow::Job::ReusableWorkflowCallJob(_) => unreachable!(),
}
}
/// Returns this step's (grand)parent [`Workflow`].
pub(crate) fn workflow(&self) -> &'w Workflow {
self.parent.parent()
}
/// Returns a [`Uses`] for this [`Step`], if it has one.
pub(crate) fn uses(&self) -> Option<Uses<'w>> {
let StepBody::Uses { uses, .. } = &self.inner.body else {
@ -156,7 +205,7 @@ impl<'w> Step<'w> {
/// Returns a symbolic location for this [`Step`].
pub(crate) fn location(&self) -> SymbolicLocation<'w> {
self.parent.with_step(self)
self.parent.location().with_step(self)
}
/// Like [`Step::location`], except with the step's `name`
@ -170,13 +219,17 @@ impl<'w> Step<'w> {
}
}
/// An iterable container for steps within a [`Job`].
pub(crate) struct Steps<'w> {
inner: Enumerate<std::slice::Iter<'w, github_actions_models::workflow::job::Step>>,
location: SymbolicLocation<'w>,
parent: Job<'w>,
}
impl<'w> Steps<'w> {
pub(crate) fn new(job: &Job<'w>) -> Self {
/// Create a new [`Steps`].
///
/// Panics if the given [`Job`] is a reusable job, rather than a "normal" job.
fn new(job: &Job<'w>) -> Self {
// TODO: do something less silly here.
match &job.inner {
workflow::Job::ReusableWorkflowCallJob(_) => {
@ -184,7 +237,7 @@ impl<'w> Steps<'w> {
}
workflow::Job::NormalJob(ref n) => Self {
inner: n.steps.iter().enumerate(),
location: job.location(),
parent: job.clone(),
},
}
}
@ -197,12 +250,13 @@ impl<'w> Iterator for Steps<'w> {
let item = self.inner.next();
match item {
Some((idx, step)) => Some(Step::new(idx, step, self.location.clone())),
Some((idx, step)) => Some(Step::new(idx, step, self.parent.clone())),
None => None,
}
}
}
/// The contents of a `uses: docker://` step stanza.
#[derive(Copy, Clone, Debug, PartialEq)]
pub(crate) struct DockerUses<'a> {
pub(crate) registry: Option<&'a str>,
@ -211,6 +265,7 @@ pub(crate) struct DockerUses<'a> {
pub(crate) hash: Option<&'a str>,
}
/// The contents of a `uses: some/repo` step stanza.
#[derive(Copy, Clone, Debug, PartialEq)]
pub(crate) struct RepositoryUses<'a> {
pub(crate) owner: &'a str,