From 063dd00542664a803e329505ef8fc5f9150cf60c Mon Sep 17 00:00:00 2001
From: Charlie Marsh <charlie.r.marsh@gmail.com>
Date: Fri, 5 Jan 2024 22:23:19 -0500
Subject: [PATCH] Enable self-benchmarking for Puffin branches in bench.py
 (#804)

## Summary

This PR enables the use of the `bench.py` script to benchmark Puffin
itself. This is something I often do by via a process like:

- Checkout the `main` branch (or any other baseline branch)
- Run: `cargo build --release`
- Run: `mv ./target/release/puffin ./target/release/baseline`
- Checkout a development branch
- Run: `cargo build --release`
- (New) Run: `python bench.py --tool puffin --path
./target/release/puffin --tool puffin --path ./target/release/baseline
requirements.in`
---
 requirements.in           |   1 +
 scripts/bench/__main__.py | 139 +++++++++++++++++++++++++-------------
 2 files changed, 93 insertions(+), 47 deletions(-)
 create mode 100644 requirements.in

diff --git a/requirements.in b/requirements.in
new file mode 100644
index 000000000..7e66a17d4
--- /dev/null
+++ b/requirements.in
@@ -0,0 +1 @@
+black
diff --git a/scripts/bench/__main__.py b/scripts/bench/__main__.py
index 3dd48b82e..4296ae953 100644
--- a/scripts/bench/__main__.py
+++ b/scripts/bench/__main__.py
@@ -15,6 +15,14 @@ To set up the required environment, run:
 Example usage:
 
     python -m scripts.bench -t puffin -t pip-tools requirements.in
+
+Tools can be repeated and accompanied by a binary to benchmark multiple versions of the
+same tool, as in:
+
+    python -m scripts.bench \
+        -t puffin -p ./target/release/puffin \
+        -t puffin -p ./target/release/baseline \
+        requirements.in
 """
 import abc
 import argparse
@@ -24,6 +32,7 @@ import os.path
 import shlex
 import subprocess
 import tempfile
+from itertools import zip_longest
 
 import tomli
 import tomli_w
@@ -37,8 +46,13 @@ class Tool(enum.Enum):
     """Enumeration of the tools to benchmark."""
 
     PIP_TOOLS = "pip-tools"
-    PUFFIN = "puffin"
+    """`pip-sync` and `pip-compile`, from the `pip-tools` package."""
+
     POETRY = "poetry"
+    """The `poetry` package manager."""
+
+    PUFFIN = "puffin"
+    """A Puffin release build, assumed to be located at `./target/release/puffin`."""
 
 
 class Benchmark(enum.Enum):
@@ -105,6 +119,9 @@ class Suite(abc.ABC):
 
 
 class PipTools(Suite):
+    def __init__(self, path: str | None = None) -> None:
+        self.path = path
+
     def resolve_cold(self, requirements_file: str, *, verbose: bool) -> None:
         with tempfile.TemporaryDirectory() as temp_dir:
             cache_dir = os.path.join(temp_dir, ".cache")
@@ -115,7 +132,7 @@ class PipTools(Suite):
                     "hyperfine",
                     *(["--show-output"] if verbose else []),
                     "--command-name",
-                    f"{Tool.PIP_TOOLS.value} ({Benchmark.RESOLVE_COLD.value})",
+                    f"{self.path or Tool.PIP_TOOLS.value} ({Benchmark.RESOLVE_COLD.value})",
                     "--warmup",
                     str(WARMUP),
                     "--min-runs",
@@ -124,7 +141,7 @@ class PipTools(Suite):
                     f"rm -rf {temp_dir} && rm -f {output_file}",
                     shlex.join(
                         [
-                            "pip-compile",
+                            self.path or "pip-compile",
                             os.path.abspath(requirements_file),
                             "--cache-dir",
                             cache_dir,
@@ -146,7 +163,7 @@ class PipTools(Suite):
                     "hyperfine",
                     *(["--show-output"] if verbose else []),
                     "--command-name",
-                    f"{Tool.PIP_TOOLS.value} ({Benchmark.RESOLVE_WARM.value})",
+                    f"{self.path or Tool.PIP_TOOLS.value} ({Benchmark.RESOLVE_WARM.value})",
                     "--warmup",
                     str(WARMUP),
                     "--min-runs",
@@ -155,7 +172,7 @@ class PipTools(Suite):
                     f"rm -f {output_file}",
                     shlex.join(
                         [
-                            "pip-compile",
+                            self.path or "pip-compile",
                             os.path.abspath(requirements_file),
                             "--cache-dir",
                             cache_dir,
@@ -176,7 +193,7 @@ class PipTools(Suite):
                     "hyperfine",
                     *(["--show-output"] if verbose else []),
                     "--command-name",
-                    f"{Tool.PIP_TOOLS.value} ({Benchmark.INSTALL_COLD.value})",
+                    f"{self.path or Tool.PIP_TOOLS.value} ({Benchmark.INSTALL_COLD.value})",
                     "--warmup",
                     str(WARMUP),
                     "--min-runs",
@@ -185,7 +202,7 @@ class PipTools(Suite):
                     f"rm -rf {cache_dir} && virtualenv --clear -p 3.10 {venv_dir}",
                     shlex.join(
                         [
-                            "pip-sync",
+                            self.path or "pip-sync",
                             os.path.abspath(requirements_file),
                             "--pip-args",
                             f"--cache-dir {cache_dir}",
@@ -206,7 +223,7 @@ class PipTools(Suite):
                     "hyperfine",
                     *(["--show-output"] if verbose else []),
                     "--command-name",
-                    f"{Tool.PIP_TOOLS.value} ({Benchmark.INSTALL_WARM.value})",
+                    f"{self.path or Tool.PIP_TOOLS.value} ({Benchmark.INSTALL_WARM.value})",
                     "--warmup",
                     str(WARMUP),
                     "--min-runs",
@@ -215,7 +232,7 @@ class PipTools(Suite):
                     f"virtualenv --clear -p 3.10 {venv_dir}",
                     shlex.join(
                         [
-                            "pip-sync",
+                            self.path or "pip-sync",
                             os.path.abspath(requirements_file),
                             "--pip-args",
                             f"--cache-dir {cache_dir}",
@@ -228,6 +245,10 @@ class PipTools(Suite):
 
 
 class Puffin(Suite):
+    def __init__(self, *, path: str | None = None) -> None:
+        """Initialize a Puffin benchmark."""
+        self.path = path
+
     def resolve_cold(self, requirements_file: str, *, verbose: bool) -> None:
         with tempfile.TemporaryDirectory() as temp_dir:
             cache_dir = os.path.join(temp_dir, ".cache")
@@ -238,7 +259,7 @@ class Puffin(Suite):
                     "hyperfine",
                     *(["--show-output"] if verbose else []),
                     "--command-name",
-                    f"{Tool.PUFFIN.value} ({Benchmark.RESOLVE_COLD.value})",
+                    f"{self.path or Tool.PUFFIN.value} ({Benchmark.RESOLVE_COLD.value})",
                     "--warmup",
                     str(WARMUP),
                     "--min-runs",
@@ -247,9 +268,12 @@ class Puffin(Suite):
                     f"rm -rf {temp_dir} && rm -f {output_file}",
                     shlex.join(
                         [
-                            os.path.join(
+                            self.path
+                            or os.path.join(
                                 os.path.dirname(
-                                    os.path.dirname(os.path.abspath(__file__))
+                                    os.path.dirname(
+                                        os.path.dirname(os.path.abspath(__file__))
+                                    )
                                 ),
                                 "target",
                                 "release",
@@ -276,7 +300,7 @@ class Puffin(Suite):
                     "hyperfine",
                     *(["--show-output"] if verbose else []),
                     "--command-name",
-                    f"{Tool.PUFFIN.value} ({Benchmark.RESOLVE_WARM.value})",
+                    f"{self.path or Tool.PIP_TOOLS.value} ({Benchmark.RESOLVE_WARM.value})",
                     "--warmup",
                     str(WARMUP),
                     "--min-runs",
@@ -285,9 +309,12 @@ class Puffin(Suite):
                     f"rm -f {output_file}",
                     shlex.join(
                         [
-                            os.path.join(
+                            self.path
+                            or os.path.join(
                                 os.path.dirname(
-                                    os.path.dirname(os.path.abspath(__file__))
+                                    os.path.dirname(
+                                        os.path.dirname(os.path.abspath(__file__))
+                                    )
                                 ),
                                 "target",
                                 "release",
@@ -314,7 +341,7 @@ class Puffin(Suite):
                     "hyperfine",
                     *(["--show-output"] if verbose else []),
                     "--command-name",
-                    f"{Tool.PUFFIN.value} ({Benchmark.INSTALL_COLD.value})",
+                    f"{self.path or Tool.PIP_TOOLS.value} ({Benchmark.INSTALL_COLD.value})",
                     "--warmup",
                     str(WARMUP),
                     "--min-runs",
@@ -324,9 +351,12 @@ class Puffin(Suite):
                     shlex.join(
                         [
                             f"VIRTUAL_ENV={venv_dir}",
-                            os.path.join(
+                            self.path
+                            or os.path.join(
                                 os.path.dirname(
-                                    os.path.dirname(os.path.abspath(__file__))
+                                    os.path.dirname(
+                                        os.path.dirname(os.path.abspath(__file__))
+                                    )
                                 ),
                                 "target",
                                 "release",
@@ -351,7 +381,7 @@ class Puffin(Suite):
                     "hyperfine",
                     *(["--show-output"] if verbose else []),
                     "--command-name",
-                    f"{Tool.PUFFIN.value} ({Benchmark.INSTALL_WARM.value})",
+                    f"{self.path or Tool.PIP_TOOLS.value} ({Benchmark.INSTALL_WARM.value})",
                     "--warmup",
                     str(WARMUP),
                     "--min-runs",
@@ -361,9 +391,12 @@ class Puffin(Suite):
                     shlex.join(
                         [
                             f"VIRTUAL_ENV={venv_dir}",
-                            os.path.join(
+                            self.path
+                            or os.path.join(
                                 os.path.dirname(
-                                    os.path.dirname(os.path.abspath(__file__))
+                                    os.path.dirname(
+                                        os.path.dirname(os.path.abspath(__file__))
+                                    )
                                 ),
                                 "target",
                                 "release",
@@ -380,6 +413,9 @@ class Puffin(Suite):
 
 
 class Poetry(Suite):
+    def __init__(self, path: str | None = None) -> None:
+        self.path = path
+
     def init(self, requirements_file: str, *, working_dir: str) -> None:
         """Initialize a Poetry project from a requirements file."""
         # Parse all dependencies from the requirements file.
@@ -391,7 +427,7 @@ class Poetry(Suite):
         # Create a Poetry project.
         subprocess.check_call(
             [
-                "poetry",
+                self.path or "poetry",
                 "init",
                 "--name",
                 "bench",
@@ -433,7 +469,7 @@ class Poetry(Suite):
                     "hyperfine",
                     *(["--show-output"] if verbose else []),
                     "--command-name",
-                    f"{Tool.POETRY.value} ({Benchmark.RESOLVE_COLD.value})",
+                    f"{self.path or Tool.POETRY.value} ({Benchmark.RESOLVE_COLD.value})",
                     "--warmup",
                     str(WARMUP),
                     "--min-runs",
@@ -450,7 +486,7 @@ class Poetry(Suite):
                             f"POETRY_CONFIG_DIR={config_dir}",
                             f"POETRY_CACHE_DIR={cache_dir}",
                             f"POETRY_DATA_DIR={data_dir}",
-                            "poetry",
+                            self.path or "poetry",
                             "lock",
                         ]
                     ),
@@ -472,7 +508,7 @@ class Poetry(Suite):
                     "hyperfine",
                     *(["--show-output"] if verbose else []),
                     "--command-name",
-                    f"{Tool.POETRY.value} ({Benchmark.RESOLVE_WARM.value})",
+                    f"{self.path or Tool.POETRY.value} ({Benchmark.RESOLVE_WARM.value})",
                     "--warmup",
                     str(WARMUP),
                     "--min-runs",
@@ -484,7 +520,7 @@ class Poetry(Suite):
                             f"POETRY_CONFIG_DIR={config_dir}",
                             f"POETRY_CACHE_DIR={cache_dir}",
                             f"POETRY_DATA_DIR={data_dir}",
-                            "poetry",
+                            self.path or "poetry",
                             "lock",
                         ]
                     ),
@@ -503,7 +539,7 @@ class Poetry(Suite):
 
             # Run a resolution, to ensure that the lock file exists.
             subprocess.check_call(
-                ["poetry", "lock"],
+                [self.path or "poetry", "lock"],
                 cwd=temp_dir,
                 stdout=subprocess.DEVNULL,
                 stderr=subprocess.DEVNULL,
@@ -522,7 +558,7 @@ class Poetry(Suite):
                     "hyperfine",
                     *(["--show-output"] if verbose else []),
                     "--command-name",
-                    f"{Tool.POETRY.value} ({Benchmark.INSTALL_COLD.value})",
+                    f"{self.path or Tool.POETRY.value} ({Benchmark.INSTALL_COLD.value})",
                     "--warmup",
                     str(WARMUP),
                     "--min-runs",
@@ -540,7 +576,7 @@ class Poetry(Suite):
                             f"POETRY_CACHE_DIR={cache_dir}",
                             f"POETRY_DATA_DIR={data_dir}",
                             f"VIRTUAL_ENV={venv_dir}",
-                            "poetry",
+                            self.path or "poetry",
                             "install",
                             "--no-root",
                             "--sync",
@@ -561,7 +597,7 @@ class Poetry(Suite):
 
             # Run a resolution, to ensure that the lock file exists.
             subprocess.check_call(
-                ["poetry", "lock"],
+                [self.path or "poetry", "lock"],
                 cwd=temp_dir,
                 stdout=subprocess.DEVNULL,
                 stderr=subprocess.DEVNULL,
@@ -580,7 +616,7 @@ class Poetry(Suite):
                     "hyperfine",
                     *(["--show-output"] if verbose else []),
                     "--command-name",
-                    f"{Tool.POETRY.value} ({Benchmark.INSTALL_WARM.value})",
+                    f"{self.path or Tool.POETRY.value} ({Benchmark.INSTALL_WARM.value})",
                     "--warmup",
                     str(WARMUP),
                     "--min-runs",
@@ -593,7 +629,7 @@ class Poetry(Suite):
                             f"POETRY_CACHE_DIR={cache_dir}",
                             f"POETRY_DATA_DIR={data_dir}",
                             f"VIRTUAL_ENV={venv_dir}",
-                            "poetry",
+                            self.path or "poetry",
                             "install",
                             "--no-root",
                             "--sync",
@@ -630,10 +666,17 @@ def main():
         "--tool",
         "-t",
         type=str,
-        help="The tool(s) to benchmark.",
+        help="The tool(s) to benchmark (typically, `puffin`, `pip-tools` or `poetry`).",
         choices=[tool.value for tool in Tool],
         action="append",
     )
+    parser.add_argument(
+        "--path",
+        "-p",
+        type=str,
+        help="Optionally, the path to the path, for each tool provided with `--tool`.",
+        action="append",
+    )
     parser.add_argument(
         "--benchmark",
         "-b",
@@ -645,9 +688,14 @@ def main():
 
     args = parser.parse_args()
 
-    requirements_file = os.path.abspath(args.file)
     verbose = args.verbose
-    tools = [Tool(tool) for tool in args.tool] if args.tool is not None else list(Tool)
+
+    requirements_file = os.path.abspath(args.file)
+    if not os.path.exists(requirements_file):
+        raise ValueError(f"File not found: {requirements_file}")
+
+    # Determine the benchmarks to run, based on user input. If no benchmarks were
+    # specified, infer an appropriate set based on the file extension.
     benchmarks = (
         [Benchmark(benchmark) for benchmark in args.benchmark]
         if args.benchmark is not None
@@ -658,15 +706,12 @@ def main():
         else list(Benchmark)
     )
 
-    if not os.path.exists(requirements_file):
-        raise ValueError(f"File not found: {requirements_file}")
+    # Determine the tools to benchmark, based on user input. If no tools were specified,
+    # default to the most common choices.
+    tools = [Tool(tool) for tool in args.tool] if args.tool is not None else list(Tool)
 
-    logging.info(
-        "Benchmarks: {}".format(
-            ", ".join([benchmark.value for benchmark in benchmarks])
-        )
-    )
-    logging.info("Tools: {}".format(", ".join([tool.value for tool in tools])))
+    # If paths were specified, apply them to the tools.
+    paths = args.path or []
 
     logging.info("Reading requirements from: {}".format(requirements_file))
     logging.info("```")
@@ -676,14 +721,14 @@ def main():
     logging.info("```")
 
     for benchmark in benchmarks:
-        for tool in tools:
+        for tool, path in zip_longest(tools, paths):
             match tool:
                 case Tool.PIP_TOOLS:
-                    suite = PipTools()
+                    suite = PipTools(path=path)
                 case Tool.PUFFIN:
-                    suite = Puffin()
+                    suite = Puffin(path=path)
                 case Tool.POETRY:
-                    suite = Poetry()
+                    suite = Poetry(path=path)
                 case _:
                     raise ValueError(f"Invalid tool: {tool}")