diff --git a/Cargo.lock b/Cargo.lock index 4d777ee2..5bd0b696 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -315,8 +315,6 @@ version = "1.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "04da6a0d40b948dfc4fa8f5bbf402b0fc1a64a28dbf7d12ffd683550f2c1b63a" dependencies = [ - "jobserver", - "libc", "shlex", ] @@ -877,21 +875,6 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" -[[package]] -name = "git2" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2deb07a133b1520dc1a5690e9bd08950108873d7ed5de38dcc74d3b5ebffa110" -dependencies = [ - "bitflags 2.9.0", - "libc", - "libgit2-sys", - "log", - "openssl-probe", - "openssl-sys", - "url", -] - [[package]] name = "github-actions-expressions" version = "0.0.10" @@ -1399,16 +1382,6 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" -[[package]] -name = "jobserver" -version = "0.1.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" -dependencies = [ - "getrandom 0.3.2", - "libc", -] - [[package]] name = "js-sys" version = "0.3.77" @@ -1458,20 +1431,6 @@ version = "0.2.172" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" -[[package]] -name = "libgit2-sys" -version = "0.18.2+1.9.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1c42fe03df2bd3c53a3a9c7317ad91d80c81cd1fb0caec8d7cc4cd2bfa10c222" -dependencies = [ - "cc", - "libc", - "libssh2-sys", - "libz-sys", - "openssl-sys", - "pkg-config", -] - [[package]] name = "libredox" version = "0.1.3" @@ -1483,32 +1442,6 @@ dependencies = [ "redox_syscall", ] -[[package]] -name = "libssh2-sys" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "220e4f05ad4a218192533b300327f5150e809b54c4ec83b5a1d91833601811b9" -dependencies = [ - "cc", - "libc", - "libz-sys", - "openssl-sys", - "pkg-config", - "vcpkg", -] - -[[package]] -name = "libz-sys" -version = "1.1.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b70e7a7df205e92a1a4cd9aaae7898dac0aa555503cc0a649494d0d60e7651d" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - [[package]] name = "line-index" version = "0.1.2" @@ -1780,24 +1713,6 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" -[[package]] -name = "openssl-probe" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d05e27ee213611ffe7d6348b942e8f942b37114c00cc03cec254295a4a17852e" - -[[package]] -name = "openssl-sys" -version = "0.9.110" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0a9f0075ba3c21b09f8e8b2026584b1d18d49388648f2fbbf3c97ea8deced8e2" -dependencies = [ - "cc", - "libc", - "pkg-config", - "vcpkg", -] - [[package]] name = "os_info" version = "3.10.0" @@ -1911,12 +1826,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" -[[package]] -name = "pkg-config" -version = "0.3.32" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" - [[package]] name = "portable-atomic" version = "1.11.0" @@ -3345,12 +3254,6 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" -[[package]] -name = "vcpkg" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426" - [[package]] name = "version_check" version = "0.9.5" @@ -4017,7 +3920,6 @@ dependencies = [ "etcetera", "flate2", "fst", - "git2", "github-actions-expressions", "github-actions-models", "http", diff --git a/Cargo.toml b/Cargo.toml index 004fe6a0..41fc7233 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -39,7 +39,6 @@ csv = "1.3.1" etcetera = "0.10.0" flate2 = "1.1.4" fst = "0.4.7" -git2 = "0.20.2" http = "1.3.1" http-cache-reqwest = { version = "1.0.0-alpha.2", features = ["manager-moka"] } human-panic = "2.0.3" diff --git a/crates/zizmor/Cargo.toml b/crates/zizmor/Cargo.toml index 40f77257..67710555 100644 --- a/crates/zizmor/Cargo.toml +++ b/crates/zizmor/Cargo.toml @@ -39,7 +39,6 @@ clap_complete_nushell.workspace = true etcetera.workspace = true flate2.workspace = true fst.workspace = true -git2.workspace = true github-actions-expressions.workspace = true github-actions-models.workspace = true http.workspace = true diff --git a/crates/zizmor/src/audit/impostor_commit.rs b/crates/zizmor/src/audit/impostor_commit.rs index f5e77787..6358d6a9 100644 --- a/crates/zizmor/src/audit/impostor_commit.rs +++ b/crates/zizmor/src/audit/impostor_commit.rs @@ -15,7 +15,7 @@ use crate::{ Confidence, Finding, Fix, FixDisposition, Severity, location::{Locatable as _, Routable}, }, - github_api::{self, ComparisonStatus}, + github::{self, ComparisonStatus}, models::{ StepCommon, uses::RepositoryUsesExt as _, @@ -31,7 +31,7 @@ use yamlpatch::{Op, Patch}; pub const IMPOSTOR_ANNOTATION: &str = "uses a commit that doesn't belong to the specified org/repo"; pub(crate) struct ImpostorCommit { - pub(crate) client: github_api::Client, + pub(crate) client: github::Client, } audit_meta!( @@ -335,10 +335,9 @@ jobs: let state = crate::state::AuditState { no_online_audits: false, gh_client: Some( - crate::github_api::Client::new( - &crate::github_api::GitHubHost::Standard("github.com".to_string()), - &crate::github_api::GitHubToken::new(&std::env::var("GH_TOKEN").unwrap()) - .unwrap(), + crate::github::Client::new( + &crate::github::GitHubHost::Standard("github.com".to_string()), + &crate::github::GitHubToken::new(&std::env::var("GH_TOKEN").unwrap()).unwrap(), "/tmp".into(), ) .unwrap(), @@ -395,10 +394,9 @@ jobs: let state = crate::state::AuditState { no_online_audits: false, gh_client: Some( - crate::github_api::Client::new( - &crate::github_api::GitHubHost::Standard("github.com".to_string()), - &crate::github_api::GitHubToken::new(&std::env::var("GH_TOKEN").unwrap()) - .unwrap(), + crate::github::Client::new( + &crate::github::GitHubHost::Standard("github.com".to_string()), + &crate::github::GitHubToken::new(&std::env::var("GH_TOKEN").unwrap()).unwrap(), "/tmp".into(), ) .unwrap(), diff --git a/crates/zizmor/src/audit/known_vulnerable_actions.rs b/crates/zizmor/src/audit/known_vulnerable_actions.rs index e6882816..8df2ed17 100644 --- a/crates/zizmor/src/audit/known_vulnerable_actions.rs +++ b/crates/zizmor/src/audit/known_vulnerable_actions.rs @@ -12,14 +12,14 @@ use super::{Audit, AuditLoadError, audit_meta}; use crate::{ config::Config, finding::{Confidence, Finding, Fix, Severity, location::Routable as _}, - github_api, + github, models::{StepCommon, action::CompositeStep, uses::RepositoryUsesExt as _, workflow::Step}, state::AuditState, }; use yamlpatch::{Op, Patch}; pub(crate) struct KnownVulnerableActions { - client: github_api::Client, + client: github::Client, } audit_meta!( @@ -303,9 +303,9 @@ mod tests { let state = crate::state::AuditState::new( false, Some( - github_api::Client::new( - &github_api::GitHubHost::default(), - &github_api::GitHubToken::new("fake").unwrap(), + github::Client::new( + &github::GitHubHost::default(), + &github::GitHubToken::new("fake").unwrap(), "/tmp".into(), ) .unwrap(), @@ -748,9 +748,9 @@ jobs: let state = crate::state::AuditState::new( false, Some( - github_api::Client::new( - &github_api::GitHubHost::default(), - &github_api::GitHubToken::new(&std::env::var("GH_TOKEN").unwrap()).unwrap(), + github::Client::new( + &github::GitHubHost::default(), + &github::GitHubToken::new(&std::env::var("GH_TOKEN").unwrap()).unwrap(), "/tmp".into(), ) .unwrap(), @@ -803,9 +803,9 @@ jobs: let state = crate::state::AuditState::new( false, Some( - github_api::Client::new( - &github_api::GitHubHost::default(), - &github_api::GitHubToken::new(&std::env::var("GH_TOKEN").unwrap()).unwrap(), + github::Client::new( + &github::GitHubHost::default(), + &github::GitHubToken::new(&std::env::var("GH_TOKEN").unwrap()).unwrap(), "/tmp".into(), ) .unwrap(), diff --git a/crates/zizmor/src/audit/ref_confusion.rs b/crates/zizmor/src/audit/ref_confusion.rs index e569a4ae..0e6295af 100644 --- a/crates/zizmor/src/audit/ref_confusion.rs +++ b/crates/zizmor/src/audit/ref_confusion.rs @@ -15,7 +15,7 @@ use crate::finding::location::Locatable as _; use crate::models::{StepCommon, action::CompositeStep}; use crate::{ finding::{Confidence, Severity}, - github_api, + github, models::uses::RepositoryUsesExt as _, state::AuditState, }; @@ -24,7 +24,7 @@ const REF_CONFUSION_ANNOTATION: &str = "uses a ref that's provided by both the branch and tag namespaces"; pub(crate) struct RefConfusion { - client: github_api::Client, + client: github::Client, } audit_meta!( diff --git a/crates/zizmor/src/audit/ref_version_mismatch.rs b/crates/zizmor/src/audit/ref_version_mismatch.rs index 67b16cfd..284131c9 100644 --- a/crates/zizmor/src/audit/ref_version_mismatch.rs +++ b/crates/zizmor/src/audit/ref_version_mismatch.rs @@ -13,12 +13,12 @@ use crate::{ Confidence, Finding, Fix, Severity, location::{Comment, Feature, Location, Routable}, }, - github_api, + github, models::{StepCommon, action::CompositeStep, uses::RepositoryUsesExt, workflow::Step}, }; pub(crate) struct RefVersionMismatch { - client: github_api::Client, + client: github::Client, } audit_meta!( @@ -246,9 +246,9 @@ jobs: let state = crate::state::AuditState::new( false, Some( - github_api::Client::new( - &github_api::GitHubHost::default(), - &github_api::GitHubToken::new(&std::env::var("GH_TOKEN").unwrap()).unwrap(), + github::Client::new( + &github::GitHubHost::default(), + &github::GitHubToken::new(&std::env::var("GH_TOKEN").unwrap()).unwrap(), "/tmp".into(), ) .unwrap(), @@ -318,9 +318,9 @@ jobs: let state = crate::state::AuditState::new( false, Some( - github_api::Client::new( - &github_api::GitHubHost::default(), - &github_api::GitHubToken::new(&std::env::var("GH_TOKEN").unwrap()).unwrap(), + github::Client::new( + &github::GitHubHost::default(), + &github::GitHubToken::new(&std::env::var("GH_TOKEN").unwrap()).unwrap(), "/tmp".into(), ) .unwrap(), diff --git a/crates/zizmor/src/audit/stale_action_refs.rs b/crates/zizmor/src/audit/stale_action_refs.rs index 14ced804..9b926bfe 100644 --- a/crates/zizmor/src/audit/stale_action_refs.rs +++ b/crates/zizmor/src/audit/stale_action_refs.rs @@ -8,13 +8,13 @@ use crate::{ Persona, config::Config, finding::{Confidence, Finding, Severity}, - github_api, + github, models::{StepCommon, action::CompositeStep, uses::RepositoryUsesExt as _, workflow::Step}, state::AuditState, }; pub(crate) struct StaleActionRefs { - client: github_api::Client, + client: github::Client, } audit_meta!( diff --git a/crates/zizmor/src/config.rs b/crates/zizmor/src/config.rs index c35dcaf6..cb56296a 100644 --- a/crates/zizmor/src/config.rs +++ b/crates/zizmor/src/config.rs @@ -13,7 +13,7 @@ use crate::{ App, CollectionOptions, audit::{AuditCore, forbidden_uses::ForbiddenUses, unpinned_uses::UnpinnedUses}, finding::Finding, - github_api::{Client, ClientError}, + github::{Client, ClientError}, models::uses::RepositoryUsesPattern, registry::input::RepoSlug, }; diff --git a/crates/zizmor/src/github_api.rs b/crates/zizmor/src/github.rs similarity index 82% rename from crates/zizmor/src/github_api.rs rename to crates/zizmor/src/github.rs index 0c516a18..121b6811 100644 --- a/crates/zizmor/src/github_api.rs +++ b/crates/zizmor/src/github.rs @@ -3,18 +3,17 @@ //! The [`Client`] type uses a mixture of GitHub's REST API and //! direct Git access, depending on the operation being performed. -use std::{collections::HashSet, fmt::Display, io::Read, ops::Deref, str::FromStr}; +use std::{collections::HashSet, fmt::Display, io::Read, ops::Deref, str::FromStr, sync::Arc}; use camino::Utf8Path; use flate2::read::GzDecoder; -use git2::{Cred, Remote, RemoteCallbacks}; use http_cache_reqwest::{ CACacheManager, Cache, CacheManager, CacheMode, CacheOptions, HttpCache, HttpCacheOptions, MokaCache, MokaManager, }; use reqwest::{ Response, StatusCode, - header::{ACCEPT, AUTHORIZATION, HeaderMap, HeaderValue, InvalidHeaderValue, USER_AGENT}, + header::{ACCEPT, AUTHORIZATION, HeaderMap, HeaderValue, InvalidHeaderValue}, retry, }; use reqwest_middleware::{ClientBuilder, ClientWithMiddleware}; @@ -26,9 +25,12 @@ use tracing::instrument; use crate::{ CollectionOptions, registry::input::{CollectionError, InputGroup, InputKey, InputKind, RepoSlug}, - utils::PipeSelf, + utils::{PipeSelf, ZIZMOR_AGENT}, }; +mod lineref; +mod pktline; + /// Represents different types of GitHub hosts. #[derive(Clone, Debug, PartialEq)] pub(crate) enum GitHubHost { @@ -105,10 +107,6 @@ impl GitHubToken { Ok(Self(token.to_owned())) } - fn to_cred(&self) -> Result { - Cred::userpass_plaintext("x-access-token", &self.0) - } - fn to_header_value(&self) -> Result { HeaderValue::from_str(&format!("Bearer {}", self.0)) } @@ -126,9 +124,10 @@ pub(crate) enum ClientError { /// We couldn't turn the user's token into a valid header value. #[error("invalid token header")] InvalidTokenHeader(#[from] InvalidHeaderValue), - /// An error originating from the libgit client - #[error("request error while accessing Git")] - Git(#[from] git2::Error), + /// An error originating from listing refs through direct + /// Git access. + #[error("error while listing Git references")] + ListRefs(#[from] lineref::LineRefError), /// We couldn't list branches because of an underlying error. #[error("couldn't list branches for {owner}/{repo}")] ListBranches { @@ -149,6 +148,9 @@ pub(crate) enum ClientError { /// between listing and fetching it. #[error("couldn't fetch file {file} from {slug}: is the branch/tag being modified?")] FileTOCTOU { file: String, slug: String }, + /// Any of the errors above, wrapped from concurrent contexts. + #[error(transparent)] + Inner(#[from] Arc), } #[derive(Clone, Copy, Debug)] @@ -216,15 +218,16 @@ impl reqwest_middleware::Middleware for ChainedCache { #[derive(Clone)] struct RemoteHead { name: String, - oid: git2::Oid, + oid: String, } #[derive(Clone)] pub(crate) struct Client { api_base: String, _host: GitHubHost, - http: ClientWithMiddleware, token: GitHubToken, + base_client: ClientWithMiddleware, + api_client: ClientWithMiddleware, ref_cache: MokaCache>, } @@ -234,27 +237,25 @@ impl Client { token: &GitHubToken, cache_dir: &Utf8Path, ) -> Result { - let mut headers = HeaderMap::new(); - headers.insert(USER_AGENT, "zizmor".parse().unwrap()); - headers.insert(AUTHORIZATION, token.to_header_value()?); - headers.insert("X-GitHub-Api-Version", "2022-11-28".parse().unwrap()); - headers.insert(ACCEPT, "application/vnd.github+json".parse().unwrap()); + // Base HTTP client for non-API requests, e.g. direct Git access. + // This client currently has no middleware. + let base_client = reqwest::Client::builder() + .user_agent(ZIZMOR_AGENT) + .build() + // TODO: Add retries here too? + .expect("couldn't build base HTTP client"); - let http_cache_options = HttpCacheOptions { - cache_options: Some(CacheOptions { - // GitHub API requests made with an API token seem to - // always have `Cache-Control: private`, so we need to - // explicitly tell http-cache that our cache is not shared - // in order for things to cache correctly. - shared: false, - ..Default::default() - }), - ..Default::default() - }; + // GitHub REST API client. + let mut api_client_headers = HeaderMap::new(); + api_client_headers.insert(AUTHORIZATION, token.to_header_value()?); + api_client_headers.insert("X-GitHub-Api-Version", "2022-11-28".parse().unwrap()); + api_client_headers.insert(ACCEPT, "application/vnd.github+json".parse().unwrap()); - let http = ClientBuilder::new( + let api_client = Self::default_middleware( + cache_dir, reqwest::Client::builder() - .default_headers(headers) + .user_agent(ZIZMOR_AGENT) + .default_headers(api_client_headers) .retry( retry::for_host(host.to_api_host()) .max_retries_per_request(3) @@ -279,75 +280,126 @@ impl Client { }), ) .build() - .expect("couldn't build GitHub client?"), - ) - .with(CacheLoggingMiddleware) - .with(ChainedCache( - Cache(HttpCache { - mode: CacheMode::Default, - manager: CACacheManager { - path: cache_dir.into(), - remove_opts: Default::default(), - }, - options: http_cache_options.clone(), - }), - CacheType::File, - )) - .with(ChainedCache( - Cache(HttpCache { - mode: CacheMode::ForceCache, - manager: MokaManager::new(MokaCache::new(1000)), - options: http_cache_options, - }), - CacheType::Memory, - )) - .build(); + .expect("couldn't build GitHub client"), + ); Ok(Self { api_base: host.to_api_url(), _host: host.clone(), - http, token: token.clone(), + base_client: base_client.into(), + api_client, ref_cache: MokaCache::new(100), }) } - async fn list_refs(&self, owner: &str, repo: &str) -> Result, git2::Error> { - let url = format!("https://github.com/{owner}/{repo}.git"); - let key = url.clone(); + fn default_middleware(cache_dir: &Utf8Path, client: reqwest::Client) -> ClientWithMiddleware { + let http_cache_options = HttpCacheOptions { + cache_options: Some(CacheOptions { + // GitHub API requests made with an API token seem to + // always have `Cache-Control: private`, so we need to + // explicitly tell http-cache that our cache is not shared + // in order for things to cache correctly. + shared: false, + ..Default::default() + }), + ..Default::default() + }; - if let Some(cached_entry) = self.ref_cache.get(&key).await { - return Ok(cached_entry); + ClientBuilder::new(client) + .with(CacheLoggingMiddleware) + .with(ChainedCache( + Cache(HttpCache { + mode: CacheMode::Default, + manager: CACacheManager { + path: cache_dir.into(), + remove_opts: Default::default(), + }, + options: http_cache_options.clone(), + }), + CacheType::File, + )) + .with(ChainedCache( + Cache(HttpCache { + mode: CacheMode::ForceCache, + manager: MokaManager::new(MokaCache::new(1000)), + options: http_cache_options, + }), + CacheType::Memory, + )) + .build() + } + + async fn list_refs(&self, owner: &str, repo: &str) -> Result, ClientError> { + let url = format!("https://github.com/{owner}/{repo}.git/git-upload-pack"); + + let entry = self + .ref_cache + .entry(url.clone()) + .or_try_insert_with(async { + // Build our `ls-refs` request. + // This effectively mimics what `git ls-remote` does under the hood. + // We additionally use the ref-prefix arguments to (hopefully) limit + // the server's response to only branches and tags. + let mut req = vec![]; + pktline::Packet::data("command=ls-refs\n".as_bytes()) + .unwrap() + .encode(&mut req) + .unwrap(); + pktline::Packet::data(format!("agent={}\n", ZIZMOR_AGENT).as_bytes()) + .unwrap() + .encode(&mut req) + .unwrap(); + pktline::Packet::Delim.encode(&mut req).unwrap(); + pktline::Packet::data("peel\n".as_bytes()) + .unwrap() + .encode(&mut req) + .unwrap(); + pktline::Packet::data("ref-prefix refs/heads/\n".as_bytes()) + .unwrap() + .encode(&mut req) + .unwrap(); + pktline::Packet::data("ref-prefix refs/tags/\n".as_bytes()) + .unwrap() + .encode(&mut req) + .unwrap(); + pktline::Packet::Flush.encode(&mut req).unwrap(); + + let resp = self + .base_client + .post(&url) + .header("Git-Protocol", "version=2") + .body(req) + .basic_auth("x-access-token", Some(&self.token.0)) + .send() + .await? + .error_for_status()?; + + let mut remote_refs = vec![]; + let content = resp.bytes().await?; + + for line_ref in lineref::LineRefIterator::new(content.as_ref()) { + let line_ref = line_ref?; + + // We prefer the peeled object ID if present, since that + // gives us the commit ID for annotated tags. + remote_refs.push(RemoteHead { + name: line_ref.ref_name.to_string(), + oid: line_ref + .peeled_obj_id + .unwrap_or(line_ref.obj_id) + .to_string(), + }); + } + + Ok::, ClientError>(remote_refs) + }) + .await; + + match entry { + Ok(heads) => Ok(heads.into_value()), + Err(e) => Err(e.into()), } - - tracing::debug!("Fetching refs for {url}"); - - let token = self.token.clone(); - let remote_refs: Vec<_> = tokio::task::spawn_blocking(move || { - let mut remote = Remote::create_detached(url)?; - - let mut callbacks = RemoteCallbacks::new(); - callbacks.credentials(|_url, _username_from_url, _allowed_types| token.to_cred()); - - let connection = remote.connect_auth(git2::Direction::Fetch, Some(callbacks), None)?; - - Ok::, git2::Error>( - connection - .list()? - .iter() - .map(|head| RemoteHead { - name: head.name().to_string(), - oid: head.oid(), - }) - .collect(), - ) - }) - .await - .unwrap()?; - - self.ref_cache.insert(key, remote_refs.clone()).await; - - Ok(remote_refs) } async fn list_branches_internal( @@ -370,7 +422,7 @@ impl Client { .collect() }) .map_err(|e| ClientError::ListBranches { - source: ClientError::from(e).into(), + source: e.into(), owner: owner.to_string(), repo: repo.to_string(), }) @@ -421,7 +473,7 @@ impl Client { tags }) .map_err(|e| ClientError::ListTags { - source: ClientError::from(e).into(), + source: e.into(), owner: owner.to_string(), repo: repo.to_string(), }) @@ -529,7 +581,7 @@ impl Client { api_base = self.api_base ); - let resp = self.http.get(url).send().await?; + let resp = self.api_client.get(url).send().await?; match resp.status() { StatusCode::OK => { @@ -552,7 +604,7 @@ impl Client { // TODO: Paginate this as well. let url = format!("{api_base}/advisories", api_base = self.api_base); - self.http + self.api_client .get(url) .query(&[ ("ecosystem", "actions"), @@ -596,7 +648,7 @@ impl Client { ); let resp = self - .http + .api_client .get(&url) .header(ACCEPT, "application/vnd.github.raw+json") .pipe(|req| match slug.git_ref.as_ref() { @@ -645,7 +697,7 @@ impl Client { api_base = self.api_base ); let resp: Vec = self - .http + .api_client .get(&url) .pipe(|req| match git_ref { Some(g) => req.query(&[("ref", g)]), @@ -708,7 +760,7 @@ impl Client { // streaming asynchronously into the decompression, // probably with the async-compression crate. let resp = self - .http + .api_client .get(&url) .send() .await @@ -838,7 +890,7 @@ pub(crate) struct File { #[cfg(test)] mod tests { - use crate::github_api::{GitHubHost, GitHubToken}; + use crate::github::{GitHubHost, GitHubToken}; #[test] fn test_github_host() { diff --git a/crates/zizmor/src/github/lineref.rs b/crates/zizmor/src/github/lineref.rs new file mode 100644 index 00000000..ee141543 --- /dev/null +++ b/crates/zizmor/src/github/lineref.rs @@ -0,0 +1,203 @@ +//! Direct "line ref" parsing for Git references. +//! +//! This builds on top of the pkt-line protocol implementation in +//! [`pktline`](crate::github::pktline) to parse Git references +//! directly from a Git server's reference list advertisement +//! (over the "smart" HTTP protocol + Git v2 protocol). + +use std::sync::LazyLock; + +use regex::Regex; +use thiserror::Error; + +use crate::github::pktline; + +/// A regex pattern for parsing Git line refs. +/// +/// This only matches the subset of line refs that we expect to see +/// in practice: those with an object ID, a ref name, and optionally +/// a peeled object ID. +static LINE_REF_PATTERN: LazyLock = LazyLock::new(|| { + Regex::new( + r#"(?x) # verbose mode + ^ # start of string + (?P[0-9a-f]{40} ) # object ID + (?-x: ) # single space (temporarily disable verbose) + (?P\S+) # ref name + ( # start optional peeled group + (?-x: ) # space + peeled: # 'peeled:' label + (?P + [0-9a-f]{40} # peeled object ID + ) + )? # end optional peeled group + $ # end of string + "#, + ) + .unwrap() +}); + +#[derive(Debug, Error)] +pub(crate) enum LineRefError { + /// Packet decoding error. + #[error("Git pkt-line decoding error")] + Packet(#[from] pktline::PktLineError), + /// Invalid reference encoding. + /// This means we received data that was not valid UTF-8. + #[error("invalid reference: not valid UTF-8")] + BadRefEncoding(#[from] std::str::Utf8Error), + /// Malformed line ref. + #[error("malformed line ref: {line}")] + BadLine { line: String }, +} + +#[derive(Debug, PartialEq, Eq)] +pub(crate) struct LineRef<'a> { + /// The object ID (SHA-1 or SHA-256) that the ref points to. + pub(crate) obj_id: &'a str, + /// The full ref name, e.g. `refs/heads/main` or `refs/tags/v1.0.0`. + pub(crate) ref_name: &'a str, + /// The peeled object ID, if the ref has a `peeled` attribute. + pub(crate) peeled_obj_id: Option<&'a str>, +} + +impl<'a> LineRef<'a> { + /// Turn a decoded pkt-line data packet into a `LineRef`. + pub(crate) fn parse(data: pktline::Data<'a>) -> Result { + // From Git's protocol-v2: + // + // output = *ref + // flush-pkt + // obj-id-or-unborn = (obj-id | "unborn") + // ref = PKT-LINE(obj-id-or-unborn SP refname *(SP ref-attribute) LF) + // ref-attribute = (symref | peeled) + // symref = "symref-target:" symref-target + // peeled = "peeled:" obj-id + // + // Where obj-id and refname are defined in protocol-common as: + // + // NUL = %x00 + // zero-id = 40*"0" + // obj-id = 40*(HEXDIGIT) + // refname = "HEAD" + // refname /= "refs/" + // + // We send `peel` as an argument, so we expect the `peeled:obj-id` + // attribute to be present for annotated tags. + + // These packets should be UTF-8 encoded strings. + let mut line = str::from_utf8(data.as_ref()).map_err(LineRefError::BadRefEncoding)?; + + // We expect a LF, but protocol-common says we shouldn't + // complain if it's missing. + if line.ends_with("\n") { + line = &line[..line.len() - 1]; + } + + let captures = LINE_REF_PATTERN + .captures(line) + .ok_or_else(|| LineRefError::BadLine { + line: line.to_string(), + })?; + + Ok(Self { + obj_id: captures.name("obj_id").unwrap().as_str(), + ref_name: captures.name("ref_name").unwrap().as_str(), + peeled_obj_id: captures.name("peeled_obj_id").map(|m| m.as_str()), + }) + } +} + +/// An iterator over Git line refs from a pkt-line data stream. +/// +/// This wraps a [`pktline::PacketIterator`] and yields parsed [`LineRef`]s. +pub(crate) struct LineRefIterator<'a> { + inner: pktline::PacketIterator<'a>, +} + +impl<'a> LineRefIterator<'a> { + pub(crate) fn new(data: &'a [u8]) -> Self { + Self { + inner: pktline::PacketIterator::new(data), + } + } +} + +impl<'a> Iterator for LineRefIterator<'a> { + type Item = Result, LineRefError>; + + fn next(&mut self) -> Option { + match self.inner.next()? { + Ok(pktline::Packet::Data(data)) => Some(LineRef::parse(data)), + Ok(pktline::Packet::Flush) => None, + // We don't expect any non-flush control packets in the + // reference listing response. + Ok(pktline::Packet::Delim) => Some(Err(LineRefError::Packet( + pktline::PktLineError::UnexpectedControl { control: 1 }, + ))), + Err(e) => Some(Err(LineRefError::Packet(e))), + } + } +} + +#[cfg(test)] +mod tests { + use super::*; + + /// From: + /// ``` + /// curl 'https://github.com/woodruffw-experiments/zizmor-recursive-tags.git/git-upload-pack' \ + /// -d $'0014command=ls-refs\n00010009peel\n0000' \ + /// -H 'Git-Protocol: version=2' + /// ``` + #[test] + fn test_iterator() { + let resp = r#"0032ac7cfa9fb7b5d6c417847e49e375aae20819a06f HEAD +003dac7cfa9fb7b5d6c417847e49e375aae20819a06f refs/heads/main +003e3e793ac5aba04cf8157e52e796de2d808f800039 refs/pull/1/head +006a1accca34bff60347d96faaf713d328ca1250d37b refs/tags/v1 peeled:3fdd4fca8fc76b254cefefca92381c41b28d1f0d +006cbcb36f3d551340e11b88c376e74e8ae77fc6cf0b refs/tags/v1.0 peeled:3fdd4fca8fc76b254cefefca92381c41b28d1f0d +006e06f9d47abf340b709b412900a7b3ce33557d32b5 refs/tags/v1.0.0 peeled:3fdd4fca8fc76b254cefefca92381c41b28d1f0d +0000 +"#; + + let refs: Result, _> = LineRefIterator::new(resp.as_bytes()).collect(); + let refs = refs.unwrap(); + + assert_eq!( + refs, + &[ + LineRef { + obj_id: "ac7cfa9fb7b5d6c417847e49e375aae20819a06f", + ref_name: "HEAD", + peeled_obj_id: None, + }, + LineRef { + obj_id: "ac7cfa9fb7b5d6c417847e49e375aae20819a06f", + ref_name: "refs/heads/main", + peeled_obj_id: None, + }, + LineRef { + obj_id: "3e793ac5aba04cf8157e52e796de2d808f800039", + ref_name: "refs/pull/1/head", + peeled_obj_id: None, + }, + LineRef { + obj_id: "1accca34bff60347d96faaf713d328ca1250d37b", + ref_name: "refs/tags/v1", + peeled_obj_id: Some("3fdd4fca8fc76b254cefefca92381c41b28d1f0d"), + }, + LineRef { + obj_id: "bcb36f3d551340e11b88c376e74e8ae77fc6cf0b", + ref_name: "refs/tags/v1.0", + peeled_obj_id: Some("3fdd4fca8fc76b254cefefca92381c41b28d1f0d"), + }, + LineRef { + obj_id: "06f9d47abf340b709b412900a7b3ce33557d32b5", + ref_name: "refs/tags/v1.0.0", + peeled_obj_id: Some("3fdd4fca8fc76b254cefefca92381c41b28d1f0d"), + }, + ] + ) + } +} diff --git a/crates/zizmor/src/github/pktline.rs b/crates/zizmor/src/github/pktline.rs new file mode 100644 index 00000000..99ea6618 --- /dev/null +++ b/crates/zizmor/src/github/pktline.rs @@ -0,0 +1,331 @@ +//! A very minimal Git packet line ("pkt-line") implementation. +//! +//! This provides the bare minimum functionality needed to communicate +//! over Git's "smart" HTTP protocol, e.g. for efficiently listing remote +//! refs without cloning or using GitHub's REST API endpoints. +//! +//! Modules like [`lineref`](crate::github::lineref) build on top of this +//! to provide higher-level handling of specific responses. +//! +//! More precisely, this module only implements (a subset of) the "v2" Git protocol. +//! +//! See: https://git-scm.com/docs/pack-protocol +//! See: https://git-scm.com/docs/protocol-common +//! See: https://git-scm.com/docs/protocol-v2 + +use thiserror::Error; + +const LENGTH_PREFIX_LEN: usize = 4; +const MAX_DATA_LEN: usize = 65516; + +/// Errors that can occur while encoding or decoding pkt-lines. +#[derive(Debug, Error)] +pub(crate) enum PktLineError { + /// Packet line frame is too short. + /// This means we received less than 4 bytes when trying to read the length prefix. + #[error( + "packet line frame is too short: expected at least {LENGTH_PREFIX_LEN} bytes, got {actual} bytes" + )] + FrameTooShort { actual: usize }, + /// Invalid packet line length prefix. + /// This means the first 4 bytes of the packet line were not valid hexadecimal digits. + #[error("invalid packet line length: expected hex digits, got '{length:?}'")] + BadLength { length: [u8; 4] }, + /// Packet line data is shorter than indicated by length prefix. + /// This means the length prefix indicated more bytes than were actually present. + #[error( + "packet line data is too short: expected at least {expected} bytes, got {actual} bytes" + )] + DataTooShort { expected: usize, actual: usize }, + /// Invalid non-data packet. + /// This means we received a control code that we don't recognize, + /// i.e. something other than flush (`0000`) or delim (`0001`). + #[error("invalid packet line: unexpected control code {control:04x}")] + BadControl { control: usize }, + /// Empty packet line. + /// This means we received a `0004` packet line, which the server should not send. + #[error("invalid packet line: empty")] + Empty, + /// Packet line data is too long. + /// This means the data to be encoded/decoded exceeds the maximum allowed length. + #[error("packet line data is too long: maximum is {MAX_DATA_LEN} bytes, got {actual} bytes")] + DataTooLong { actual: usize }, + /// In-band error. + /// This happens when the server sends an us an `ERR` packet line, + /// including a malformed one. + #[error("in-band error: {message}")] + InBandError { message: String }, + /// Unexpected control code. + /// This means we received a control code that we didn't contextually expect. + #[error("unexpected control code: {control}")] + UnexpectedControl { control: usize }, +} + +/// Represents the data portion of a pkt-line data packet. +/// +/// Invariant: the length of the data is at most [`MAX_DATA_LEN`]. +#[derive(Copy, Clone)] +pub(crate) struct Data<'a> { + inner: &'a [u8], +} + +impl<'a> Data<'a> { + pub(crate) fn len(&self) -> usize { + self.inner.len() + } + + pub(crate) fn as_ref(&self) -> &'a [u8] { + self.inner + } +} + +impl<'a> TryFrom<&'a [u8]> for Data<'a> { + type Error = PktLineError; + + fn try_from(value: &'a [u8]) -> Result { + if value.len() > MAX_DATA_LEN { + Err(PktLineError::DataTooLong { + actual: value.len(), + }) + } else { + Ok(Data { inner: value }) + } + } +} + +/// Valid packets +pub(crate) enum Packet<'a> { + Data(Data<'a>), + Flush, + Delim, +} + +impl<'a> Packet<'a> { + pub(crate) fn data(data: &'a [u8]) -> Result { + Data::try_from(data).map(Self::Data) + } + + pub(crate) fn encode(&self, dest: &mut Vec) -> Result<(), PktLineError> { + match self { + Packet::Data(data) => { + let len = data.len() + 4; + dest.extend_from_slice(&format!("{:04x}", len).into_bytes()); + dest.extend_from_slice(data.as_ref()); + Ok(()) + } + Packet::Flush => { + dest.extend_from_slice(b"0000"); + Ok(()) + } + Packet::Delim => { + dest.extend_from_slice(b"0001"); + Ok(()) + } + } + } + + /// Decode a single pkt-line packet from the start of the given byte slice. + /// + /// Returns the decoded packet, or an error if decoding failed. + pub(crate) fn decode(packet: &'a [u8]) -> Result { + if packet.len() < LENGTH_PREFIX_LEN { + return Err(PktLineError::FrameTooShort { + actual: packet.len(), + }); + } + + // Split the length and data apart. + // We expect exactly 4 hex digits for the length prefix. + let (length_bytes, data) = packet.split_at(4); + let Ok(length_str) = str::from_utf8(length_bytes) else { + return Err(PktLineError::BadLength { + length: length_bytes.try_into().unwrap(), + }); + }; + + let Ok(length) = usize::from_str_radix(length_str, 16) else { + return Err(PktLineError::BadLength { + length: length_bytes.try_into().unwrap(), + }); + }; + + match length { + 0 => Ok(Packet::Flush), + 1 => Ok(Packet::Delim), + 2 | 3 => Err(PktLineError::BadControl { control: length }), + 4 => Err(PktLineError::Empty), + _ => { + let data_len = length - LENGTH_PREFIX_LEN; + if data_len > MAX_DATA_LEN { + Err(PktLineError::DataTooLong { actual: data_len }) + } else if data.len() < data_len { + Err(PktLineError::DataTooShort { + expected: data_len, + actual: data.len(), + }) + } else { + let data = Data::try_from(&data[..data_len])?; + + if data.as_ref().starts_with(b"ERR ") { + let message = String::from_utf8_lossy(&data.as_ref()[4..]).to_string(); + Err(PktLineError::InBandError { message }) + } else { + Ok(Packet::Data(data)) + } + } + } + } + } + + pub(crate) fn length(&self) -> usize { + match self { + Packet::Data(data) => data.len() + LENGTH_PREFIX_LEN, + Packet::Flush => LENGTH_PREFIX_LEN, + Packet::Delim => LENGTH_PREFIX_LEN, + } + } +} + +/// An iterator over pkt-line packets in a byte slice. +/// This will yield packets until the end of the slice is reached. +/// The user is responsible for assigning meaning to the sequence of packets, +/// including flush and delim packets. +pub(crate) struct PacketIterator<'a> { + data: &'a [u8], + position: usize, +} + +impl<'a> PacketIterator<'a> { + pub(crate) fn new(data: &'a [u8]) -> Self { + Self { data, position: 0 } + } +} + +impl<'a> Iterator for PacketIterator<'a> { + type Item = Result, PktLineError>; + + fn next(&mut self) -> Option { + if self.position >= self.data.len() { + return None; + } + + let remaining = &self.data[self.position..]; + match Packet::decode(remaining) { + Ok(pkt) => { + self.position += pkt.length(); + Some(Ok(pkt)) + } + Err(err) => Some(Err(err)), + } + } +} + +#[cfg(test)] +mod tests { + use core::panic; + + use crate::github::pktline::{Data, MAX_DATA_LEN, Packet}; + + #[test] + fn test_data_size_invariant() { + let data = vec![0u8; MAX_DATA_LEN + 1]; + let Err(err) = Data::try_from(data.as_slice()) else { + panic!("expected error for data exceeding max length"); + }; + assert!(matches!(err, super::PktLineError::DataTooLong { .. })); + } + + #[test] + fn test_flush_packet() { + let mut encoded = vec![]; + let pkt = Packet::Flush; + pkt.encode(&mut encoded).unwrap(); + assert_eq!(encoded, b"0000"); + + let decoded = Packet::decode(&encoded).unwrap(); + assert!(matches!(decoded, Packet::Flush)); + } + + #[test] + fn test_delim_packet() { + let mut encoded = vec![]; + let pkt = Packet::Delim; + pkt.encode(&mut encoded).unwrap(); + assert_eq!(encoded, b"0001"); + + let decoded = Packet::decode(&encoded).unwrap(); + assert!(matches!(decoded, Packet::Delim)); + } + + #[test] + fn test_data_packet() { + let mut encoded = vec![]; + let data = Data::try_from(b"hello, world!".as_slice()).unwrap(); + let pkt = Packet::Data(data); + pkt.encode(&mut encoded).unwrap(); + assert_eq!(encoded, b"0011hello, world!"); + + let Packet::Data(decoded) = Packet::decode(&encoded).unwrap() else { + panic!("expected data packet"); + }; + assert_eq!(decoded.as_ref(), data.as_ref()); + } + + #[test] + fn test_error_packet() { + let mut encoded = vec![]; + let error_message = Data::try_from(b"ERR something went wrong".as_slice()).unwrap(); + let pkt = Packet::Data(error_message); + pkt.encode(&mut encoded).unwrap(); + assert_eq!(encoded, b"001cERR something went wrong"); + + let Err(err) = Packet::decode(&encoded) else { + panic!("expected error packet"); + }; + assert!(matches!(err, super::PktLineError::InBandError { .. })); + } + + #[test] + fn test_decode_invalid_cases() { + // Invalid framings. + for case in &[ + b"".as_slice(), + b"0".as_slice(), + b"00".as_slice(), + b"000".as_slice(), + ] { + let Err(err) = Packet::decode(case) else { + panic!("expected error for case: {:?}", case); + }; + assert!(matches!(err, super::PktLineError::FrameTooShort { .. })); + } + + // Invalid length prefixes (not hex/invalid UTF8). + for case in &[b"zzzz", b"\x00\x00\x00\x00", b"\xf0\x28\x8c\xbc"] { + let Err(err) = Packet::decode(*case) else { + panic!("expected error for case: {:?}", case); + }; + assert!(matches!(err, super::PktLineError::BadLength { .. })); + } + + // Bad packets (unknown control codes). + for case in &[b"0002", b"0003"] { + let Err(err) = Packet::decode(*case) else { + panic!("expected error for case: {:?}", case); + }; + assert!(matches!(err, super::PktLineError::BadControl { .. })); + } + + // Too long (length field exceeds max). + let Err(err) = Packet::decode(b"ffffhello") else { + panic!("expected error for too long case"); + }; + assert!(matches!(err, super::PktLineError::DataTooLong { .. })); + + // Too short (data shorter than length field). + let Err(err) = Packet::decode(b"0008hi") else { + panic!("expected error for too short case"); + }; + assert!(matches!(err, super::PktLineError::DataTooShort { .. })); + } +} diff --git a/crates/zizmor/src/main.rs b/crates/zizmor/src/main.rs index 09aa1138..a1b6af1f 100644 --- a/crates/zizmor/src/main.rs +++ b/crates/zizmor/src/main.rs @@ -15,7 +15,7 @@ use clap_complete::Generator; use clap_verbosity_flag::InfoLevel; use etcetera::AppStrategy as _; use finding::{Confidence, Persona, Severity}; -use github_api::{GitHubHost, GitHubToken}; +use github::{GitHubHost, GitHubToken}; use indicatif::ProgressStyle; use owo_colors::OwoColorize; use registry::input::{InputKey, InputRegistry}; @@ -29,7 +29,7 @@ use tracing_subscriber::{EnvFilter, layer::SubscriberExt as _, util::SubscriberI use crate::{ config::{Config, ConfigError, ConfigErrorInner}, - github_api::Client, + github::Client, models::AsDocument, registry::input::CollectionError, utils::once::warn_once, @@ -38,7 +38,7 @@ use crate::{ mod audit; mod config; mod finding; -mod github_api; +mod github; #[cfg(feature = "lsp")] mod lsp; mod models; @@ -556,7 +556,7 @@ enum Error { Lsp(#[from] lsp::Error), /// An error from the GitHub API client. #[error(transparent)] - Client(#[from] github_api::ClientError), + Client(#[from] github::ClientError), /// An error while loading audit rules. #[error("failed to load audit rules")] AuditLoad(#[source] anyhow::Error), diff --git a/crates/zizmor/src/registry/input.rs b/crates/zizmor/src/registry/input.rs index 76d8fbf9..0d04a4e1 100644 --- a/crates/zizmor/src/registry/input.rs +++ b/crates/zizmor/src/registry/input.rs @@ -14,7 +14,7 @@ use crate::{ CollectionOptions, audit::AuditInput, config::{Config, ConfigError}, - github_api::{Client, ClientError}, + github::{Client, ClientError}, models::{action::Action, dependabot::Dependabot, workflow::Workflow}, }; diff --git a/crates/zizmor/src/state.rs b/crates/zizmor/src/state.rs index eaea362b..557e6d71 100644 --- a/crates/zizmor/src/state.rs +++ b/crates/zizmor/src/state.rs @@ -1,6 +1,6 @@ //! zizmor's runtime state, including application-level caching. -use crate::github_api::Client; +use crate::github::Client; pub(crate) struct AuditState { /// Whether online audits should be skipped. diff --git a/crates/zizmor/src/utils.rs b/crates/zizmor/src/utils.rs index 53615fed..a2ce33d3 100644 --- a/crates/zizmor/src/utils.rs +++ b/crates/zizmor/src/utils.rs @@ -18,6 +18,8 @@ use std::{fmt::Write, sync::LazyLock}; use crate::{audit::AuditInput, models::AsDocument, registry::input::CollectionError}; +pub(crate) static ZIZMOR_AGENT: &str = concat!("zizmor/", env!("CARGO_PKG_VERSION")); + pub(crate) static WORKFLOW_VALIDATOR: LazyLock = LazyLock::new(|| { validator_for(&serde_json::from_str(include_str!("./data/github-workflow.json")).unwrap()) .unwrap() diff --git a/docs/release-notes.md b/docs/release-notes.md index b4f5ce97..5787a520 100644 --- a/docs/release-notes.md +++ b/docs/release-notes.md @@ -18,7 +18,7 @@ of `zizmor`. ### Performance Improvements 🚄 -* `zizmor`'s online mode is now significantly (40% to 90%) faster on +* `zizmor`'s online mode is now significantly (40% to over 95%) faster on common workloads, thanks to a combination of caching improvements and conversion of GitHub API requests into Git remote lookups (#1257)