feat: CLI benchmarking harness (#1038)

This commit is contained in:
William Woodruff 2025-07-24 20:13:35 -04:00 committed by GitHub
parent 14961ac826
commit 349cbcdd26
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 380 additions and 0 deletions

62
.github/workflows/benchmark-base.yml vendored Normal file
View file

@ -0,0 +1,62 @@
# benchmark-base.yml: submit benchmarks to Bencher.
#
# This workflow provides baseline results, via the main branch.
name: Benchmark baseline
on:
push:
branches: [main]
permissions: {}
jobs:
benchmark_base_branch:
name: Continuous Benchmarking with Bencher
runs-on: ubuntu-latest
permissions:
checks: write
environment:
name: bencher
url: https://bencher.dev/console/projects/zizmor
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- name: Set up Bencher
uses: bencherdev/bencher@f89d454e74a32a81b2eab29fe0afdb2316617342 # v0.5.3
- name: Installer hyperfine
run: |
sudo apt-get remove --purge man-db
sudo apt install -y hyperfine
- uses: Swatinem/rust-cache@98c8021b550208e191a6a3145459bfc9fb29c4c0 # v2.8.0
- uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1
- name: Run benchmarks
run: make bench
- name: Upload benchmark results
# Take each result file in bench/results/*.json and use
# `bencher run` to upload it.
run: |
for file in bench/results/*.json; do
bencher run \
--project zizmor \
--token "${BENCHER_API_TOKEN}" \
--branch main \
--testbed ubuntu-latest \
--err \
--adapter shell_hyperfine \
--github-actions "${GITHUB_TOKEN}" \
--file "${file}"
done
env:
BENCHER_API_TOKEN: ${{ secrets.BENCHER_API_TOKEN }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}

69
.github/workflows/benchmark-pr-1p.yml vendored Normal file
View file

@ -0,0 +1,69 @@
# benchmark-pr-1p.yml: submit benchmarks to Bencher.
#
# This workflow covers "first party" pull requests specifically,
# i.e. those created from branches within the same repository.
name: Benchmark PRs (first-party)
on:
pull_request:
types: [opened, reopened, edited, synchronize]
permissions: {}
jobs:
benchmark-pr-1p:
name: Continuous Benchmarking PRs with Bencher
runs-on: ubuntu-latest
if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
permissions:
pull-requests: write
environment:
name: bencher
url: https://bencher.dev/console/projects/zizmor
steps:
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
with:
persist-credentials: false
- name: Set up Bencher
uses: bencherdev/bencher@f89d454e74a32a81b2eab29fe0afdb2316617342 # v0.5.3
- name: Installer hyperfine
run: |
sudo apt-get remove --purge man-db
sudo apt install -y hyperfine
- uses: Swatinem/rust-cache@98c8021b550208e191a6a3145459bfc9fb29c4c0 # v2.8.0
- uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1
- name: Run benchmarks
run: make bench
- name: Upload benchmark results
# Take each result file in bench/results/*.json and use
# `bencher run` to upload it.
run: |
for file in bench/results/*.json; do
bencher run \
--project zizmor \
--token "${BENCHER_API_TOKEN}" \
--branch "${GITHUB_HEAD_REF}" \
--start-point "${GITHUB_BASE_REF}" \
--start-point-hash "${PULL_REQUEST_BASE_SHA}" \
--start-point-clone-thresholds \
--start-point-reset \
--testbed ubuntu-latest \
--err \
--adapter shell_hyperfine \
--github-actions "${GITHUB_TOKEN}" \
--file "${file}"
done
env:
BENCHER_API_TOKEN: ${{ secrets.BENCHER_API_TOKEN }}
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
PULL_REQUEST_BASE_SHA: ${{ github.event.pull_request.base.sha }}

View file

@ -44,3 +44,8 @@ crates/zizmor/data/codeql-injection-sinks.json: support/codeql-injection-sinks.p
.PHONY: pinact
pinact:
pinact run --update --verify
.PHONY: bench
bench:
uv run bench/benchmark.py

1
bench/.gitignore vendored Normal file
View file

@ -0,0 +1 @@
results/

227
bench/benchmark.py Normal file
View file

@ -0,0 +1,227 @@
# /// script
# requires-python = ">=3.12"
# ///
import argparse
import hashlib
import json
import shlex
import shutil
import subprocess
import sys
import tempfile
from contextlib import contextmanager
from pathlib import Path
from typing import Iterator, NoReturn, TypedDict
_DEPS = ["hyperfine", "curl", "unzip"]
_HERE = Path(__file__).parent
_PROJECT_ROOT = _HERE.parent
_ZIZMOR = _PROJECT_ROOT / "target" / "release" / "zizmor"
assert (_PROJECT_ROOT / "Cargo.toml").is_file(), "Missing project root?"
_BENCHMARKS = _HERE / "benchmarks.json"
_RESULTS = _HERE / "results"
assert _BENCHMARKS.is_file(), f"Benchmarks file not found: {_BENCHMARKS}"
_RESULTS.mkdir(exist_ok=True)
_CACHE_DIR = Path(tempfile.gettempdir()) / "zizmor-benchmark-cache"
_CACHE_DIR.mkdir(exist_ok=True)
class Log:
def __init__(self, scope: str | None) -> None:
self.scopes = [scope] if scope else []
def info(self, message: str) -> None:
scopes = " ".join(f"[{s}]" for s in self.scopes)
print(f"[+] {scopes} {message}", file=sys.stderr)
def warn(self, message: str) -> None:
scopes = " ".join(f"[{s}]" for s in self.scopes)
print(f"[!] {scopes} {message}", file=sys.stderr)
def error(self, message: str) -> NoReturn:
self.warn(message)
sys.exit(1)
@contextmanager
def scope(self, new_scope: str) -> Iterator[None]:
"""Create a new logging scope."""
self.scopes.append(new_scope)
try:
yield None
finally:
self.scopes.pop()
LOG = Log("benchmarks")
def _curl(url: str, expected_sha256: str) -> Path:
"""Download a URL and cache it using content addressing with SHA256."""
cached_file = _CACHE_DIR / expected_sha256
if cached_file.exists():
LOG.info("Using cached file")
return cached_file
result = subprocess.run(
["curl", "-fsSL", url],
capture_output=True,
check=True,
)
content = result.stdout
content_hash = hashlib.sha256(content).hexdigest()
if content_hash != expected_sha256:
LOG.error(f"Hash mismatch: {expected_sha256} != {content_hash}")
cached_file.write_bytes(content)
return cached_file
def _unzip(archive_path: Path, extract_name: str) -> Path:
"""Extract an archive to a directory in the cache."""
extract_dir = _CACHE_DIR / extract_name
if extract_dir.exists():
LOG.info("Using cached extraction")
return extract_dir
extract_dir.mkdir(exist_ok=True)
subprocess.run(
["unzip", "-q", str(archive_path), "-d", str(extract_dir)],
check=True,
)
LOG.info(f"Extracted {archive_path.name} to {extract_dir}")
return extract_dir
class Benchmark(TypedDict):
name: str
source_type: str
source: str
source_sha256: str
stencil: str
Plan = list[str]
class Bench:
def __init__(self, benchmark: Benchmark) -> None:
self.benchmark = benchmark
def plan(self) -> Plan:
match self.benchmark["source_type"]:
case "archive-url":
url = self.benchmark["source"]
sha256 = self.benchmark["source_sha256"]
archive = _curl(url, sha256)
inputs = [str(_unzip(archive, self.benchmark["name"]))]
case _:
LOG.error(f"Unknown source type: {self.benchmark['source_type']}")
stencil = self.benchmark["stencil"]
command = stencil.replace("$ZIZMOR", str(_ZIZMOR)).replace(
"$INPUTS", " ".join(inputs)
)
return shlex.split(command)
def run(self, plan: Plan, *, dry_run: bool) -> None:
command = shlex.join(plan)
result_file = _RESULTS / f"{self.benchmark['name']}.json"
if result_file.exists() and not dry_run:
LOG.warn("clobbering existing result file")
hyperfine_command = [
"hyperfine",
"--warmup",
"3",
# NOTE: not needed because we use --no-exit-codes in the stencil
# "--ignore-failure",
"--export-json",
str(result_file),
command,
]
if dry_run:
LOG.warn(f"would have run: {shlex.join(hyperfine_command)}")
return
try:
subprocess.run(
hyperfine_command,
check=True,
)
except subprocess.CalledProcessError:
LOG.error("run failed, see above for details")
# Stupid hack: fixup each result file's results[0].command
# to be a more useful benchmark identifier, since bencher
# apparently keys on these.
result_json = json.loads(result_file.read_bytes())
result_json["results"][0]["command"] = f"zizmor::{self.benchmark['name']}"
result_file.write_text(json.dumps(result_json))
LOG.info(f"run written to {result_file}")
def main() -> None:
parser = argparse.ArgumentParser()
parser.add_argument(
"--dry-run", action="store_true", help="Show plans without running them"
)
args = parser.parse_args()
missing = []
for dep in _DEPS:
if not shutil.which(dep):
missing.append(dep)
if missing:
LOG.error(
f"Missing dependencies: {', '.join(missing)}. "
"Please install them before running benchmarks."
)
LOG.info("ensuring we have a benchable zizmor build")
subprocess.run(
["cargo", "build", "--release", "-p", "zizmor"],
check=True,
cwd=_PROJECT_ROOT,
)
if not _ZIZMOR.is_file():
LOG.error("zizmor build presumably failed, see above for details")
LOG.info(f"using cache dir: {_CACHE_DIR}")
benchmarks: list[Benchmark] = json.loads(_BENCHMARKS.read_text(encoding="utf-8"))
LOG.info(f"found {len(benchmarks)} benchmarks in {_BENCHMARKS.name}")
benches = [Bench(benchmark) for benchmark in benchmarks]
plans = []
with LOG.scope("plan"):
for bench in benches:
with LOG.scope(bench.benchmark["name"]):
LOG.info("beginning plan")
plans.append(bench.plan())
with LOG.scope("run"):
for bench, plan in zip(benches, plans):
with LOG.scope(bench.benchmark["name"]):
bench.run(plan, dry_run=args.dry_run)
if __name__ == "__main__":
main()

16
bench/benchmarks.json Normal file
View file

@ -0,0 +1,16 @@
[
{
"name": "grafana-9f212d11d0ac",
"source_type": "archive-url",
"source": "https://github.com/grafana/grafana/archive/9f212d11d0ac9c38ada62a7db830844bb9b02905.zip",
"source_sha256": "c6d42b52c8d912db2698d8b06f227de46f0c2d04cc757841792ed6567f0c56c7",
"stencil": "$ZIZMOR --offline --format=plain --no-exit-codes --no-config $INPUTS"
},
{
"name": "cpython-48f88310044c",
"source_type": "archive-url",
"source": "https://github.com/python/cpython/archive/48f88310044c6ef877f3b0761cf7afece2f8fb3a.zip",
"source_sha256": "a52a67f1dd9cfa67c7d1305d5b9639629abe247b2c32f01b77f790ddf8b49503",
"stencil": "$ZIZMOR --offline --format=plain --no-exit-codes --no-config $INPUTS"
}
]