mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-19 01:50:38 +00:00
Improvements to the fuzz-parser
script (#11071)
## Summary - Properly fix the race condition identified in https://github.com/astral-sh/ruff/pull/11039. Instead of running the version of Ruff we're testing by invoking `cargo run --release` on each generated source file, we either (1) accept a path to an executable on the command line or (2) if that's not specified, we run `cargo build --release` once at the start and then invoke the executable found in `target/release/ruff` directly. - Now that the race condition is properly fixed, remove the workaround for the race condition added in https://github.com/astral-sh/ruff/pull/11039. - Also allow users to pass in an executable to compare against for the `--only-new-bugs` argument (previously it was hardcoded to always compare against the version of Ruff installed into the Python environment) - Use `argparse.RawDescriptionHelpFormatter` as the formatter class rather than `argparse.RawTextHelpFormatter`. This means that long help texts for the individual arguments will be wrapped to a sensible width. - On completion of the script, indicate success or failure of the script overall by raising `SytemExit` with the appropriate exit code. - Add myself as a codeowner for the script
This commit is contained in:
parent
0b92f450ca
commit
5dcb1d9e8c
2 changed files with 126 additions and 51 deletions
3
.github/CODEOWNERS
vendored
3
.github/CODEOWNERS
vendored
|
@ -12,3 +12,6 @@
|
||||||
|
|
||||||
# flake8-pyi
|
# flake8-pyi
|
||||||
/crates/ruff_linter/src/rules/flake8_pyi/ @AlexWaygood
|
/crates/ruff_linter/src/rules/flake8_pyi/ @AlexWaygood
|
||||||
|
|
||||||
|
# Script for fuzzing the parser
|
||||||
|
/scripts/fuzz-parser/ @AlexWaygood
|
||||||
|
|
|
@ -13,18 +13,16 @@ Example invocations of the script:
|
||||||
- Run the fuzzer concurrently on 10,000 different Python source-code files,
|
- Run the fuzzer concurrently on 10,000 different Python source-code files,
|
||||||
and only print a summary at the end:
|
and only print a summary at the end:
|
||||||
`python scripts/fuzz-parser/fuzz.py 1-10000 --quiet
|
`python scripts/fuzz-parser/fuzz.py 1-10000 --quiet
|
||||||
|
|
||||||
N.B. The script takes a few seconds to get started, as the script needs to compile
|
|
||||||
your checked out version of ruff with `--release` as a first step before it
|
|
||||||
can actually start fuzzing.
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
from __future__ import annotations
|
from __future__ import annotations
|
||||||
|
|
||||||
import argparse
|
import argparse
|
||||||
import concurrent.futures
|
import concurrent.futures
|
||||||
|
import os.path
|
||||||
import subprocess
|
import subprocess
|
||||||
from dataclasses import KW_ONLY, dataclass
|
from dataclasses import KW_ONLY, dataclass
|
||||||
|
from functools import partial
|
||||||
from typing import NewType
|
from typing import NewType
|
||||||
|
|
||||||
from pysource_codegen import generate as generate_random_code
|
from pysource_codegen import generate as generate_random_code
|
||||||
|
@ -33,32 +31,31 @@ from termcolor import colored
|
||||||
|
|
||||||
MinimizedSourceCode = NewType("MinimizedSourceCode", str)
|
MinimizedSourceCode = NewType("MinimizedSourceCode", str)
|
||||||
Seed = NewType("Seed", int)
|
Seed = NewType("Seed", int)
|
||||||
|
ExitCode = NewType("ExitCode", int)
|
||||||
|
|
||||||
|
|
||||||
def run_ruff(executable_args: list[str], code: str) -> subprocess.CompletedProcess[str]:
|
def contains_bug(code: str, *, ruff_executable: str) -> bool:
|
||||||
return subprocess.run(
|
"""Return `True` if the code triggers a parser error."""
|
||||||
[*executable_args, "check", "--select=E999", "--no-cache", "-"],
|
completed_process = subprocess.run(
|
||||||
|
[ruff_executable, "check", "--select=E999", "--no-cache", "-"],
|
||||||
capture_output=True,
|
capture_output=True,
|
||||||
text=True,
|
text=True,
|
||||||
input=code,
|
input=code,
|
||||||
)
|
)
|
||||||
|
return completed_process.returncode != 0
|
||||||
|
|
||||||
|
|
||||||
def contains_bug(code: str, *, only_new_bugs: bool = False) -> bool:
|
def contains_new_bug(
|
||||||
"""Return True if the code triggers a parser error and False otherwise.
|
code: str, *, test_executable: str, baseline_executable: str
|
||||||
|
) -> bool:
|
||||||
|
"""Return `True` if the code triggers a *new* parser error.
|
||||||
|
|
||||||
If `only_new_bugs` is set to `True`,
|
A "new" parser error is one that exists with `test_executable`,
|
||||||
the function also runs an installed version of Ruff on the same source code,
|
but did not exist with `baseline_executable`.
|
||||||
and only returns `True` if the bug appears on the branch you have currently
|
|
||||||
checked out but *not* in the latest release.
|
|
||||||
"""
|
"""
|
||||||
new_result = run_ruff(["cargo", "run", "--release", "--"], code)
|
return contains_bug(code, ruff_executable=test_executable) and not contains_bug(
|
||||||
if not only_new_bugs:
|
code, ruff_executable=baseline_executable
|
||||||
return new_result.returncode != 0
|
)
|
||||||
if new_result.returncode == 0:
|
|
||||||
return False
|
|
||||||
old_result = run_ruff(["ruff"], code)
|
|
||||||
return old_result.returncode == 0
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass(slots=True)
|
@dataclass(slots=True)
|
||||||
|
@ -82,21 +79,28 @@ class FuzzResult:
|
||||||
print(colored(f"Ran fuzzer successfully on seed {self.seed}", "green"))
|
print(colored(f"Ran fuzzer successfully on seed {self.seed}", "green"))
|
||||||
|
|
||||||
|
|
||||||
def fuzz_code(seed: Seed, only_new_bugs: bool) -> FuzzResult:
|
def fuzz_code(
|
||||||
|
seed: Seed,
|
||||||
|
*,
|
||||||
|
test_executable: str,
|
||||||
|
baseline_executable: str,
|
||||||
|
only_new_bugs: bool,
|
||||||
|
) -> FuzzResult:
|
||||||
"""Return a `FuzzResult` instance describing the fuzzing result from this seed."""
|
"""Return a `FuzzResult` instance describing the fuzzing result from this seed."""
|
||||||
code = generate_random_code(seed)
|
code = generate_random_code(seed)
|
||||||
if contains_bug(code, only_new_bugs=only_new_bugs):
|
has_bug = (
|
||||||
try:
|
contains_new_bug(
|
||||||
new_code = minimize_repro(code, contains_bug)
|
code,
|
||||||
except ValueError:
|
test_executable=test_executable,
|
||||||
# `pysource_minimize.minimize()` failed to reproduce the bug.
|
baseline_executable=baseline_executable,
|
||||||
# This could indicate that `contains_bug()` failed due to a race condition
|
)
|
||||||
# from running `cargo build` concurrently, so double-check that the
|
if only_new_bugs
|
||||||
# original snippet does actually reproduce the bug. If so, just go with the
|
else contains_bug(code, ruff_executable=test_executable)
|
||||||
# original snippet; if not, report the fuzzing as successful:
|
)
|
||||||
maybe_bug = MinimizedSourceCode(code) if contains_bug(code) else None
|
if has_bug:
|
||||||
else:
|
maybe_bug = MinimizedSourceCode(
|
||||||
maybe_bug = MinimizedSourceCode(new_code)
|
minimize_repro(code, partial(contains_bug, ruff_executable=test_executable))
|
||||||
|
)
|
||||||
else:
|
else:
|
||||||
maybe_bug = None
|
maybe_bug = None
|
||||||
return FuzzResult(seed, maybe_bug)
|
return FuzzResult(seed, maybe_bug)
|
||||||
|
@ -110,7 +114,14 @@ def run_fuzzer_concurrently(args: ResolvedCliArgs) -> list[FuzzResult]:
|
||||||
bugs: list[FuzzResult] = []
|
bugs: list[FuzzResult] = []
|
||||||
with concurrent.futures.ProcessPoolExecutor() as executor:
|
with concurrent.futures.ProcessPoolExecutor() as executor:
|
||||||
fuzz_result_futures = [
|
fuzz_result_futures = [
|
||||||
executor.submit(fuzz_code, seed, args.only_new_bugs) for seed in args.seeds
|
executor.submit(
|
||||||
|
fuzz_code,
|
||||||
|
seed,
|
||||||
|
test_executable=args.test_executable,
|
||||||
|
baseline_executable=args.baseline_executable,
|
||||||
|
only_new_bugs=args.only_new_bugs,
|
||||||
|
)
|
||||||
|
for seed in args.seeds
|
||||||
]
|
]
|
||||||
try:
|
try:
|
||||||
for future in concurrent.futures.as_completed(fuzz_result_futures):
|
for future in concurrent.futures.as_completed(fuzz_result_futures):
|
||||||
|
@ -134,7 +145,12 @@ def run_fuzzer_sequentially(args: ResolvedCliArgs) -> list[FuzzResult]:
|
||||||
)
|
)
|
||||||
bugs: list[FuzzResult] = []
|
bugs: list[FuzzResult] = []
|
||||||
for seed in args.seeds:
|
for seed in args.seeds:
|
||||||
fuzz_result = fuzz_code(seed, only_new_bugs=args.only_new_bugs)
|
fuzz_result = fuzz_code(
|
||||||
|
seed,
|
||||||
|
test_executable=args.test_executable,
|
||||||
|
baseline_executable=args.baseline_executable,
|
||||||
|
only_new_bugs=args.only_new_bugs,
|
||||||
|
)
|
||||||
if not args.quiet:
|
if not args.quiet:
|
||||||
fuzz_result.print_description()
|
fuzz_result.print_description()
|
||||||
if fuzz_result.maybe_bug:
|
if fuzz_result.maybe_bug:
|
||||||
|
@ -142,20 +158,7 @@ def run_fuzzer_sequentially(args: ResolvedCliArgs) -> list[FuzzResult]:
|
||||||
return bugs
|
return bugs
|
||||||
|
|
||||||
|
|
||||||
def main(args: ResolvedCliArgs) -> None:
|
def main(args: ResolvedCliArgs) -> ExitCode:
|
||||||
if args.only_new_bugs:
|
|
||||||
ruff_version = (
|
|
||||||
subprocess.run(
|
|
||||||
["ruff", "--version"], text=True, capture_output=True, check=True
|
|
||||||
)
|
|
||||||
.stdout.strip()
|
|
||||||
.split(" ")[1]
|
|
||||||
)
|
|
||||||
print(
|
|
||||||
f"As you have selected `--only-new-bugs`, "
|
|
||||||
f"bugs will only be reported if they appear on your current branch "
|
|
||||||
f"but do *not* appear in `ruff=={ruff_version}`"
|
|
||||||
)
|
|
||||||
if len(args.seeds) <= 5:
|
if len(args.seeds) <= 5:
|
||||||
bugs = run_fuzzer_sequentially(args)
|
bugs = run_fuzzer_sequentially(args)
|
||||||
else:
|
else:
|
||||||
|
@ -164,8 +167,10 @@ def main(args: ResolvedCliArgs) -> None:
|
||||||
if bugs:
|
if bugs:
|
||||||
print(colored(f"{noun_phrase} found in the following seeds:", "red"))
|
print(colored(f"{noun_phrase} found in the following seeds:", "red"))
|
||||||
print(*sorted(bug.seed for bug in bugs))
|
print(*sorted(bug.seed for bug in bugs))
|
||||||
|
return ExitCode(1)
|
||||||
else:
|
else:
|
||||||
print(colored(f"No {noun_phrase.lower()} found!", "green"))
|
print(colored(f"No {noun_phrase.lower()} found!", "green"))
|
||||||
|
return ExitCode(0)
|
||||||
|
|
||||||
|
|
||||||
def parse_seed_argument(arg: str) -> int | range:
|
def parse_seed_argument(arg: str) -> int | range:
|
||||||
|
@ -195,6 +200,8 @@ def parse_seed_argument(arg: str) -> int | range:
|
||||||
class ResolvedCliArgs:
|
class ResolvedCliArgs:
|
||||||
seeds: list[Seed]
|
seeds: list[Seed]
|
||||||
_: KW_ONLY
|
_: KW_ONLY
|
||||||
|
test_executable: str
|
||||||
|
baseline_executable: str
|
||||||
only_new_bugs: bool
|
only_new_bugs: bool
|
||||||
quiet: bool
|
quiet: bool
|
||||||
|
|
||||||
|
@ -202,7 +209,7 @@ class ResolvedCliArgs:
|
||||||
def parse_args() -> ResolvedCliArgs:
|
def parse_args() -> ResolvedCliArgs:
|
||||||
"""Parse command-line arguments"""
|
"""Parse command-line arguments"""
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description=__doc__, formatter_class=argparse.RawTextHelpFormatter
|
description=__doc__, formatter_class=argparse.RawDescriptionHelpFormatter
|
||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"seeds",
|
"seeds",
|
||||||
|
@ -224,7 +231,69 @@ def parse_args() -> ResolvedCliArgs:
|
||||||
action="store_true",
|
action="store_true",
|
||||||
help="Print fewer things to the terminal while running the fuzzer",
|
help="Print fewer things to the terminal while running the fuzzer",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--test-executable",
|
||||||
|
help=(
|
||||||
|
"`ruff` executable to test. "
|
||||||
|
"Defaults to a fresh build of the currently checked-out branch."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--baseline-executable",
|
||||||
|
help=(
|
||||||
|
"`ruff` executable to compare results against. "
|
||||||
|
"Defaults to whatever `ruff` version is installed "
|
||||||
|
"in the Python environment."
|
||||||
|
),
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
|
if args.baseline_executable:
|
||||||
|
if not args.only_new_bugs:
|
||||||
|
parser.error(
|
||||||
|
"Specifying `--baseline-executable` has no effect "
|
||||||
|
"unless `--only-new-bugs` is also specified"
|
||||||
|
)
|
||||||
|
try:
|
||||||
|
subprocess.run(
|
||||||
|
[args.baseline_executable, "--version"], check=True, capture_output=True
|
||||||
|
)
|
||||||
|
except FileNotFoundError:
|
||||||
|
parser.error(
|
||||||
|
f"Bad argument passed to `--baseline-executable`: "
|
||||||
|
f"no such file or executable {args.baseline_executable!r}"
|
||||||
|
)
|
||||||
|
elif args.only_new_bugs:
|
||||||
|
try:
|
||||||
|
ruff_version_proc = subprocess.run(
|
||||||
|
["ruff", "--version"], text=True, capture_output=True, check=True
|
||||||
|
)
|
||||||
|
except FileNotFoundError:
|
||||||
|
parser.error(
|
||||||
|
"`--only-new-bugs` was specified without specifying a baseline "
|
||||||
|
"executable, and no released version of Ruff appears to be installed "
|
||||||
|
"in your Python environment"
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
if not args.quiet:
|
||||||
|
ruff_version = ruff_version_proc.stdout.strip().split(" ")[1]
|
||||||
|
print(
|
||||||
|
f"`--only-new-bugs` was specified without specifying a baseline "
|
||||||
|
f"executable; falling back to using `ruff=={ruff_version}` as the "
|
||||||
|
f"baseline (the version of Ruff installed in your current Python "
|
||||||
|
f"environment)"
|
||||||
|
)
|
||||||
|
args.baseline_executable = "ruff"
|
||||||
|
|
||||||
|
if not args.test_executable:
|
||||||
|
print(
|
||||||
|
"Running `cargo build --release` since no test executable was specified..."
|
||||||
|
)
|
||||||
|
subprocess.run(["cargo", "build", "--release"], check=True, capture_output=True)
|
||||||
|
args.test_executable = os.path.join("target", "release", "ruff")
|
||||||
|
assert os.path.exists(args.test_executable)
|
||||||
|
|
||||||
seed_arguments: list[range | int] = args.seeds
|
seed_arguments: list[range | int] = args.seeds
|
||||||
seen_seeds: set[int] = set()
|
seen_seeds: set[int] = set()
|
||||||
for arg in seed_arguments:
|
for arg in seed_arguments:
|
||||||
|
@ -232,13 +301,16 @@ def parse_args() -> ResolvedCliArgs:
|
||||||
seen_seeds.add(arg)
|
seen_seeds.add(arg)
|
||||||
else:
|
else:
|
||||||
seen_seeds.update(arg)
|
seen_seeds.update(arg)
|
||||||
|
|
||||||
return ResolvedCliArgs(
|
return ResolvedCliArgs(
|
||||||
sorted(map(Seed, seen_seeds)),
|
sorted(map(Seed, seen_seeds)),
|
||||||
only_new_bugs=args.only_new_bugs,
|
only_new_bugs=args.only_new_bugs,
|
||||||
quiet=args.quiet,
|
quiet=args.quiet,
|
||||||
|
test_executable=args.test_executable,
|
||||||
|
baseline_executable=args.baseline_executable,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
args = parse_args()
|
args = parse_args()
|
||||||
main(args)
|
raise SystemExit(main(args))
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue