feat: CLI benchmarking harness (#1038)

2025-12-23 08:47:33 +00:00 · 2025-07-24 20:13:35 -04:00 · 2025-07-24 20:13:35 -04:00 · 349cbcdd26
commit 349cbcdd26
parent 14961ac826
6 changed files with 380 additions and 0 deletions
--- a/.github/workflows/benchmark-base.yml
+++ b/.github/workflows/benchmark-base.yml
@ -0,0 +1,62 @@
+# benchmark-base.yml: submit benchmarks to Bencher.
+#
+# This workflow provides baseline results, via the main branch.
+
+name: Benchmark baseline
+
+on:
+  push:
+    branches: [main]
+
+permissions: {}
+
+jobs:
+  benchmark_base_branch:
+    name: Continuous Benchmarking with Bencher
+    runs-on: ubuntu-latest
+
+    permissions:
+      checks: write
+
+    environment:
+      name: bencher
+      url: https://bencher.dev/console/projects/zizmor
+
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          persist-credentials: false
+
+      - name: Set up Bencher
+        uses: bencherdev/bencher@f89d454e74a32a81b2eab29fe0afdb2316617342 # v0.5.3
+
+      - name: Installer hyperfine
+        run: |
+          sudo apt-get remove --purge man-db
+          sudo apt install -y hyperfine
+
+      - uses: Swatinem/rust-cache@98c8021b550208e191a6a3145459bfc9fb29c4c0 # v2.8.0
+
+      - uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1
+
+      - name: Run benchmarks
+        run: make bench
+
+      - name: Upload benchmark results
+        # Take each result file in bench/results/*.json and use
+        # `bencher run` to upload it.
+        run: |
+          for file in bench/results/*.json; do
+            bencher run \
+              --project zizmor \
+              --token "${BENCHER_API_TOKEN}" \
+              --branch main \
+              --testbed ubuntu-latest \
+              --err \
+              --adapter shell_hyperfine \
+              --github-actions "${GITHUB_TOKEN}" \
+              --file "${file}"
+          done
+        env:
+          BENCHER_API_TOKEN: ${{ secrets.BENCHER_API_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
--- a/.github/workflows/benchmark-pr-1p.yml
+++ b/.github/workflows/benchmark-pr-1p.yml
@ -0,0 +1,69 @@
+# benchmark-pr-1p.yml: submit benchmarks to Bencher.
+#
+# This workflow covers "first party" pull requests specifically,
+# i.e. those created from branches within the same repository.
+
+name: Benchmark PRs (first-party)
+
+on:
+  pull_request:
+    types: [opened, reopened, edited, synchronize]
+
+permissions: {}
+
+jobs:
+  benchmark-pr-1p:
+    name: Continuous Benchmarking PRs with Bencher
+    runs-on: ubuntu-latest
+    if: github.event_name == 'pull_request' && github.event.pull_request.head.repo.full_name == github.repository
+
+    permissions:
+      pull-requests: write
+
+    environment:
+      name: bencher
+      url: https://bencher.dev/console/projects/zizmor
+
+    steps:
+      - uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2
+        with:
+          persist-credentials: false
+
+      - name: Set up Bencher
+        uses: bencherdev/bencher@f89d454e74a32a81b2eab29fe0afdb2316617342 # v0.5.3
+
+      - name: Installer hyperfine
+        run: |
+          sudo apt-get remove --purge man-db
+          sudo apt install -y hyperfine
+
+      - uses: Swatinem/rust-cache@98c8021b550208e191a6a3145459bfc9fb29c4c0 # v2.8.0
+
+      - uses: astral-sh/setup-uv@7edac99f961f18b581bbd960d59d049f04c0002f # v6.4.1
+
+      - name: Run benchmarks
+        run: make bench
+
+      - name: Upload benchmark results
+        # Take each result file in bench/results/*.json and use
+        # `bencher run` to upload it.
+        run: |
+          for file in bench/results/*.json; do
+            bencher run \
+              --project zizmor \
+              --token "${BENCHER_API_TOKEN}" \
+              --branch "${GITHUB_HEAD_REF}" \
+              --start-point "${GITHUB_BASE_REF}" \
+              --start-point-hash "${PULL_REQUEST_BASE_SHA}" \
+              --start-point-clone-thresholds \
+              --start-point-reset \
+              --testbed ubuntu-latest \
+              --err \
+              --adapter shell_hyperfine \
+              --github-actions "${GITHUB_TOKEN}" \
+              --file "${file}"
+          done
+        env:
+          BENCHER_API_TOKEN: ${{ secrets.BENCHER_API_TOKEN }}
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          PULL_REQUEST_BASE_SHA: ${{ github.event.pull_request.base.sha }}
--- a/5
+++ b/5
@ -44,3 +44,8 @@ crates/zizmor/data/codeql-injection-sinks.json: support/codeql-injection-sinks.p
 .PHONY: pinact
 pinact:
 	pinact run --update --verify
+
+
+.PHONY: bench
+bench:
+	uv run bench/benchmark.py
--- a/bench/.gitignore
+++ b/bench/.gitignore
@ -0,0 +1 @@
+results/
--- a/bench/benchmark.py
+++ b/bench/benchmark.py
@ -0,0 +1,227 @@
+# /// script
+# requires-python = ">=3.12"
+# ///
+
+import argparse
+import hashlib
+import json
+import shlex
+import shutil
+import subprocess
+import sys
+import tempfile
+from contextlib import contextmanager
+from pathlib import Path
+from typing import Iterator, NoReturn, TypedDict
+
+_DEPS = ["hyperfine", "curl", "unzip"]
+
+_HERE = Path(__file__).parent
+_PROJECT_ROOT = _HERE.parent
+_ZIZMOR = _PROJECT_ROOT / "target" / "release" / "zizmor"
+
+assert (_PROJECT_ROOT / "Cargo.toml").is_file(), "Missing project root?"
+
+_BENCHMARKS = _HERE / "benchmarks.json"
+_RESULTS = _HERE / "results"
+
+assert _BENCHMARKS.is_file(), f"Benchmarks file not found: {_BENCHMARKS}"
+_RESULTS.mkdir(exist_ok=True)
+
+_CACHE_DIR = Path(tempfile.gettempdir()) / "zizmor-benchmark-cache"
+_CACHE_DIR.mkdir(exist_ok=True)
+
+
+class Log:
+    def __init__(self, scope: str | None) -> None:
+        self.scopes = [scope] if scope else []
+
+    def info(self, message: str) -> None:
+        scopes = " ".join(f"[{s}]" for s in self.scopes)
+        print(f"[+] {scopes} {message}", file=sys.stderr)
+
+    def warn(self, message: str) -> None:
+        scopes = " ".join(f"[{s}]" for s in self.scopes)
+        print(f"[!] {scopes} {message}", file=sys.stderr)
+
+    def error(self, message: str) -> NoReturn:
+        self.warn(message)
+        sys.exit(1)
+
+    @contextmanager
+    def scope(self, new_scope: str) -> Iterator[None]:
+        """Create a new logging scope."""
+        self.scopes.append(new_scope)
+        try:
+            yield None
+        finally:
+            self.scopes.pop()
+
+
+LOG = Log("benchmarks")
+
+
+def _curl(url: str, expected_sha256: str) -> Path:
+    """Download a URL and cache it using content addressing with SHA256."""
+    cached_file = _CACHE_DIR / expected_sha256
+    if cached_file.exists():
+        LOG.info("Using cached file")
+        return cached_file
+
+    result = subprocess.run(
+        ["curl", "-fsSL", url],
+        capture_output=True,
+        check=True,
+    )
+
+    content = result.stdout
+    content_hash = hashlib.sha256(content).hexdigest()
+
+    if content_hash != expected_sha256:
+        LOG.error(f"Hash mismatch: {expected_sha256} != {content_hash}")
+
+    cached_file.write_bytes(content)
+
+    return cached_file
+
+
+def _unzip(archive_path: Path, extract_name: str) -> Path:
+    """Extract an archive to a directory in the cache."""
+    extract_dir = _CACHE_DIR / extract_name
+
+    if extract_dir.exists():
+        LOG.info("Using cached extraction")
+        return extract_dir
+
+    extract_dir.mkdir(exist_ok=True)
+
+    subprocess.run(
+        ["unzip", "-q", str(archive_path), "-d", str(extract_dir)],
+        check=True,
+    )
+
+    LOG.info(f"Extracted {archive_path.name} to {extract_dir}")
+    return extract_dir
+
+
+class Benchmark(TypedDict):
+    name: str
+    source_type: str
+    source: str
+    source_sha256: str
+    stencil: str
+
+
+Plan = list[str]
+
+
+class Bench:
+    def __init__(self, benchmark: Benchmark) -> None:
+        self.benchmark = benchmark
+
+    def plan(self) -> Plan:
+        match self.benchmark["source_type"]:
+            case "archive-url":
+                url = self.benchmark["source"]
+                sha256 = self.benchmark["source_sha256"]
+                archive = _curl(url, sha256)
+                inputs = [str(_unzip(archive, self.benchmark["name"]))]
+            case _:
+                LOG.error(f"Unknown source type: {self.benchmark['source_type']}")
+
+        stencil = self.benchmark["stencil"]
+        command = stencil.replace("$ZIZMOR", str(_ZIZMOR)).replace(
+            "$INPUTS", " ".join(inputs)
+        )
+        return shlex.split(command)
+
+    def run(self, plan: Plan, *, dry_run: bool) -> None:
+        command = shlex.join(plan)
+
+        result_file = _RESULTS / f"{self.benchmark['name']}.json"
+        if result_file.exists() and not dry_run:
+            LOG.warn("clobbering existing result file")
+
+        hyperfine_command = [
+            "hyperfine",
+            "--warmup",
+            "3",
+            # NOTE: not needed because we use --no-exit-codes in the stencil
+            # "--ignore-failure",
+            "--export-json",
+            str(result_file),
+            command,
+        ]
+
+        if dry_run:
+            LOG.warn(f"would have run: {shlex.join(hyperfine_command)}")
+            return
+
+        try:
+            subprocess.run(
+                hyperfine_command,
+                check=True,
+            )
+        except subprocess.CalledProcessError:
+            LOG.error("run failed, see above for details")
+
+        # Stupid hack: fixup each result file's results[0].command
+        # to be a more useful benchmark identifier, since bencher
+        # apparently keys on these.
+        result_json = json.loads(result_file.read_bytes())
+        result_json["results"][0]["command"] = f"zizmor::{self.benchmark['name']}"
+        result_file.write_text(json.dumps(result_json))
+
+        LOG.info(f"run written to {result_file}")
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "--dry-run", action="store_true", help="Show plans without running them"
+    )
+
+    args = parser.parse_args()
+
+    missing = []
+    for dep in _DEPS:
+        if not shutil.which(dep):
+            missing.append(dep)
+
+    if missing:
+        LOG.error(
+            f"Missing dependencies: {', '.join(missing)}. "
+            "Please install them before running benchmarks."
+        )
+
+    LOG.info("ensuring we have a benchable zizmor build")
+    subprocess.run(
+        ["cargo", "build", "--release", "-p", "zizmor"],
+        check=True,
+        cwd=_PROJECT_ROOT,
+    )
+
+    if not _ZIZMOR.is_file():
+        LOG.error("zizmor build presumably failed, see above for details")
+
+    LOG.info(f"using cache dir: {_CACHE_DIR}")
+
+    benchmarks: list[Benchmark] = json.loads(_BENCHMARKS.read_text(encoding="utf-8"))
+    LOG.info(f"found {len(benchmarks)} benchmarks in {_BENCHMARKS.name}")
+
+    benches = [Bench(benchmark) for benchmark in benchmarks]
+    plans = []
+    with LOG.scope("plan"):
+        for bench in benches:
+            with LOG.scope(bench.benchmark["name"]):
+                LOG.info("beginning plan")
+                plans.append(bench.plan())
+
+    with LOG.scope("run"):
+        for bench, plan in zip(benches, plans):
+            with LOG.scope(bench.benchmark["name"]):
+                bench.run(plan, dry_run=args.dry_run)
+
+
+if __name__ == "__main__":
+    main()
--- a/bench/benchmarks.json
+++ b/bench/benchmarks.json
@ -0,0 +1,16 @@
+[
+    {
+        "name": "grafana-9f212d11d0ac",
+        "source_type": "archive-url",
+        "source": "https://github.com/grafana/grafana/archive/9f212d11d0ac9c38ada62a7db830844bb9b02905.zip",
+        "source_sha256": "c6d42b52c8d912db2698d8b06f227de46f0c2d04cc757841792ed6567f0c56c7",
+        "stencil": "$ZIZMOR --offline --format=plain --no-exit-codes --no-config $INPUTS"
+    },
+    {
+        "name": "cpython-48f88310044c",
+        "source_type": "archive-url",
+        "source": "https://github.com/python/cpython/archive/48f88310044c6ef877f3b0761cf7afece2f8fb3a.zip",
+        "source_sha256": "a52a67f1dd9cfa67c7d1305d5b9639629abe247b2c32f01b77f790ddf8b49503",
+        "stencil": "$ZIZMOR --offline --format=plain --no-exit-codes --no-config $INPUTS"
+    }
+]