diff --git a/fuzz/.gitignore b/fuzz/.gitignore index 0aae9a3e34..41cc193fba 100644 --- a/fuzz/.gitignore +++ b/fuzz/.gitignore @@ -1,3 +1,3 @@ artifacts/ -corpus/ruff_fix_validity +corpus/ Cargo.lock diff --git a/fuzz/README.md b/fuzz/README.md index 14f7692a89..8d249780d0 100644 --- a/fuzz/README.md +++ b/fuzz/README.md @@ -12,8 +12,11 @@ To use the fuzzers provided in this directory, start by invoking: This will install [`cargo-fuzz`](https://github.com/rust-fuzz/cargo-fuzz) and optionally download a [dataset](https://zenodo.org/record/3628784) which improves the efficacy of the testing. -**This step is necessary for initialising the corpus directory, as all fuzzers share a common -corpus.** + +> [!NOTE] +> +> This step is necessary for initialising the corpus directory, as all fuzzers share a common corpus. + The dataset may take several hours to download and clean, so if you're just looking to try out the fuzzers, skip the dataset download, though be warned that some features simply cannot be tested without it (very unlikely for the fuzzer to generate valid python code from "thin air"). @@ -24,13 +27,23 @@ Once you have initialised the fuzzers, you can then execute any fuzzer with: cargo fuzz run -s none name_of_fuzzer -- -timeout=1 ``` -**Users using Apple M1 devices must use a nightly compiler and omit the `-s none` portion of this -command, as this architecture does not support fuzzing without a sanitizer.** +> [!NOTE] +> +> Users using Apple M1 devices must use a nightly compiler and omit the `-s none` portion of this +> command, as this architecture does not support fuzzing without a sanitizer. +> +> ```shell +> cargo +nightly fuzz run name_of_fuzzer -- -timeout=1 +> ``` + You can view the names of the available fuzzers with `cargo fuzz list`. For specific details about how each fuzzer works, please read this document in its entirety. -**IMPORTANT: You should run `./reinit-fuzzer.sh` after adding more file-based testcases.** This will -allow the testing of new features that you've added unit tests for. +> [!NOTE] +> +> Re-run `./init-fuzzer.sh` (say no to the dataset download) after adding more file-based test cases +> to the repository. This will make sure that the corpus is up to date with any new Python code +> added to the repository. ### Debugging a crash diff --git a/fuzz/corpus/red_knot_check_invalid_syntax b/fuzz/corpus/red_knot_check_invalid_syntax deleted file mode 120000 index 38dc5bc1ea..0000000000 --- a/fuzz/corpus/red_knot_check_invalid_syntax +++ /dev/null @@ -1 +0,0 @@ -ruff_fix_validity \ No newline at end of file diff --git a/fuzz/corpus/ruff_formatter_idempotency b/fuzz/corpus/ruff_formatter_idempotency deleted file mode 120000 index 38dc5bc1ea..0000000000 --- a/fuzz/corpus/ruff_formatter_idempotency +++ /dev/null @@ -1 +0,0 @@ -ruff_fix_validity \ No newline at end of file diff --git a/fuzz/corpus/ruff_formatter_validity b/fuzz/corpus/ruff_formatter_validity deleted file mode 120000 index 38dc5bc1ea..0000000000 --- a/fuzz/corpus/ruff_formatter_validity +++ /dev/null @@ -1 +0,0 @@ -ruff_fix_validity \ No newline at end of file diff --git a/fuzz/corpus/ruff_new_parser_equiv b/fuzz/corpus/ruff_new_parser_equiv deleted file mode 120000 index 38dc5bc1ea..0000000000 --- a/fuzz/corpus/ruff_new_parser_equiv +++ /dev/null @@ -1 +0,0 @@ -ruff_fix_validity \ No newline at end of file diff --git a/fuzz/corpus/ruff_parse_idempotency b/fuzz/corpus/ruff_parse_idempotency deleted file mode 120000 index 61e7ad4b4c..0000000000 --- a/fuzz/corpus/ruff_parse_idempotency +++ /dev/null @@ -1 +0,0 @@ -ruff_parse_simple \ No newline at end of file diff --git a/fuzz/corpus/ruff_parse_simple b/fuzz/corpus/ruff_parse_simple deleted file mode 120000 index 018c02efec..0000000000 --- a/fuzz/corpus/ruff_parse_simple +++ /dev/null @@ -1 +0,0 @@ -ruff_fix_validity/ \ No newline at end of file diff --git a/fuzz/init-fuzzer.sh b/fuzz/init-fuzzer.sh index 7de7087dc5..2419ae2466 100755 --- a/fuzz/init-fuzzer.sh +++ b/fuzz/init-fuzzer.sh @@ -6,22 +6,31 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) cd "$SCRIPT_DIR" if ! cargo fuzz --help >&/dev/null; then + echo "Installing cargo-fuzz..." cargo install --git https://github.com/rust-fuzz/cargo-fuzz.git fi -if [ ! -d corpus/ruff_fix_validity ]; then - mkdir -p corpus/ruff_fix_validity +if [ ! -d corpus/common ]; then + mkdir -p corpus/common + + echo "Creating symlinks for fuzz targets to the common corpus directory..." + for target in fuzz_targets/*; do + corpus_dir="$(basename "$target" .rs)" + ln -vs "./common" "corpus/$corpus_dir" + done ( - cd corpus/ruff_fix_validity + cd corpus/common read -p "Would you like to build a corpus from a python source code dataset? (this will take a long time!) [Y/n] " -n 1 -r echo if [[ $REPLY =~ ^[Yy]$ ]]; then + echo "Downloading the Python source code dataset..." curl -L 'https://zenodo.org/record/3628784/files/python-corpus.tar.gz?download=1' | tar xz fi # Build a smaller corpus in addition to the (optional) larger corpus + echo "Building a smaller corpus dataset..." curl -L 'https://github.com/python/cpython/archive/refs/tags/v3.13.0.tar.gz' | tar xz cp -r "../../../crates/ty_project/resources/test/corpus" "ty_project" cp -r "../../../crates/ruff_linter/resources/test/fixtures" "ruff_linter" @@ -32,11 +41,12 @@ if [ ! -d corpus/ruff_fix_validity ]; then find . -type f -not -name "*.py" -delete ) + echo "Minifying the corpus dataset..." if [[ "$OSTYPE" == "darwin"* ]]; then - cargo +nightly fuzz cmin ruff_fix_validity -- -timeout=5 + cargo +nightly fuzz cmin ruff_fix_validity corpus/common -- -timeout=5 else - cargo fuzz cmin -s none ruff_fix_validity -- -timeout=5 + cargo fuzz cmin -s none ruff_fix_validity corpus/common -- -timeout=5 fi fi -echo "Done! You are ready to fuzz." +echo "Done! You are ready to fuzz" diff --git a/fuzz/reinit-fuzzer.sh b/fuzz/reinit-fuzzer.sh deleted file mode 100755 index f1e2a698fb..0000000000 --- a/fuzz/reinit-fuzzer.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash - -# https://stackoverflow.com/a/246128/3549270 -SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) - -cd "$SCRIPT_DIR" - -cd corpus/ruff_fix_validity -curl -L 'https://github.com/python/cpython/archive/refs/tags/v3.12.0b2.tar.gz' | tar xz -cp -r "../../../crates/ruff_linter/resources/test" . -cd - -cargo fuzz cmin -s none ruff_fix_validity -- -timeout=5 - -echo "Done! You are ready to fuzz."