Remove symlinks from the fuzz directory (#18095)

## Summary This PR does the following: 1. Remove the symlinks from the `fuzz/` directory 2. Update `init-fuzzer.sh` script to create those symlinks 3. Update `fuzz/.gitignore` to ignore those corpus directories ## Test Plan Initialize the fuzzer: ```sh ./fuzz/init-fuzzer.sh ``` And, run a fuzz target: ```sh cargo +nightly fuzz run ruff_parse_simple -- -timeout=1 -only_ascii=1 ```
2025-07-07 13:15:06 +00:00 · 2025-05-14 21:05:52 +05:30 · 2025-05-14 21:05:52 +05:30 · cf70c7863c
commit cf70c7863c
parent faf54c0181
10 changed files with 36 additions and 33 deletions
--- a/fuzz/.gitignore
+++ b/fuzz/.gitignore
@ -1,3 +1,3 @@
 artifacts/
-corpus/ruff_fix_validity
+corpus/
 Cargo.lock
--- a/fuzz/README.md
+++ b/fuzz/README.md
@ -12,8 +12,11 @@ To use the fuzzers provided in this directory, start by invoking:

 This will install [`cargo-fuzz`](https://github.com/rust-fuzz/cargo-fuzz) and optionally download a
 [dataset](https://zenodo.org/record/3628784) which improves the efficacy of the testing.
-**This step is necessary for initialising the corpus directory, as all fuzzers share a common
-corpus.**
+
+> [!NOTE]
+>
+> This step is necessary for initialising the corpus directory, as all fuzzers share a common corpus.
+
 The dataset may take several hours to download and clean, so if you're just looking to try out the
 fuzzers, skip the dataset download, though be warned that some features simply cannot be tested
 without it (very unlikely for the fuzzer to generate valid python code from "thin air").
@ -24,13 +27,23 @@ Once you have initialised the fuzzers, you can then execute any fuzzer with:
 cargo fuzz run -s none name_of_fuzzer -- -timeout=1
 ```

-**Users using Apple M1 devices must use a nightly compiler and omit the `-s none` portion of this
-command, as this architecture does not support fuzzing without a sanitizer.**
+> [!NOTE]
+>
+> Users using Apple M1 devices must use a nightly compiler and omit the `-s none` portion of this
+> command, as this architecture does not support fuzzing without a sanitizer.
+>
+> ```shell
+> cargo +nightly fuzz run name_of_fuzzer -- -timeout=1
+> ```
+
 You can view the names of the available fuzzers with `cargo fuzz list`.
 For specific details about how each fuzzer works, please read this document in its entirety.

-**IMPORTANT: You should run `./reinit-fuzzer.sh` after adding more file-based testcases.** This will
-allow the testing of new features that you've added unit tests for.
+> [!NOTE]
+>
+> Re-run `./init-fuzzer.sh` (say no to the dataset download) after adding more file-based test cases
+> to the repository. This will make sure that the corpus is up to date with any new Python code
+> added to the repository.

 ### Debugging a crash

--- a/fuzz/corpus/red_knot_check_invalid_syntax
+++ b/fuzz/corpus/red_knot_check_invalid_syntax
@ -1 +0,0 @@
-ruff_fix_validity
--- a/fuzz/corpus/ruff_formatter_idempotency
+++ b/fuzz/corpus/ruff_formatter_idempotency
@ -1 +0,0 @@
-ruff_fix_validity
--- a/fuzz/corpus/ruff_formatter_validity
+++ b/fuzz/corpus/ruff_formatter_validity
@ -1 +0,0 @@
-ruff_fix_validity
--- a/fuzz/corpus/ruff_new_parser_equiv
+++ b/fuzz/corpus/ruff_new_parser_equiv
@ -1 +0,0 @@
-ruff_fix_validity
--- a/fuzz/corpus/ruff_parse_idempotency
+++ b/fuzz/corpus/ruff_parse_idempotency
@ -1 +0,0 @@
-ruff_parse_simple
--- a/fuzz/corpus/ruff_parse_simple
+++ b/fuzz/corpus/ruff_parse_simple
@ -1 +0,0 @@
-ruff_fix_validity/
--- a/fuzz/init-fuzzer.sh
+++ b/fuzz/init-fuzzer.sh
@ -6,22 +6,31 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 cd "$SCRIPT_DIR"

 if ! cargo fuzz --help >&/dev/null; then
+  echo "Installing cargo-fuzz..."
  cargo install --git https://github.com/rust-fuzz/cargo-fuzz.git
 fi

-if [ ! -d corpus/ruff_fix_validity ]; then
-  mkdir -p corpus/ruff_fix_validity
+if [ ! -d corpus/common ]; then
+  mkdir -p corpus/common
+
+  echo "Creating symlinks for fuzz targets to the common corpus directory..."
+  for target in fuzz_targets/*; do
+    corpus_dir="$(basename "$target" .rs)"
+    ln -vs "./common" "corpus/$corpus_dir"
+  done

  (
-    cd corpus/ruff_fix_validity
+    cd corpus/common

    read -p "Would you like to build a corpus from a python source code dataset? (this will take a long time!) [Y/n] " -n 1 -r
    echo
    if [[ $REPLY =~ ^[Yy]$ ]]; then
+      echo "Downloading the Python source code dataset..."
      curl -L 'https://zenodo.org/record/3628784/files/python-corpus.tar.gz?download=1' | tar xz
    fi

    # Build a smaller corpus in addition to the (optional) larger corpus
+    echo "Building a smaller corpus dataset..."
    curl -L 'https://github.com/python/cpython/archive/refs/tags/v3.13.0.tar.gz' | tar xz
    cp -r "../../../crates/ty_project/resources/test/corpus" "ty_project"
    cp -r "../../../crates/ruff_linter/resources/test/fixtures" "ruff_linter"
@ -32,11 +41,12 @@ if [ ! -d corpus/ruff_fix_validity ]; then
    find . -type f -not -name "*.py" -delete
  )

+  echo "Minifying the corpus dataset..."
  if [[ "$OSTYPE" == "darwin"* ]]; then
-    cargo +nightly fuzz cmin ruff_fix_validity -- -timeout=5
+    cargo +nightly fuzz cmin ruff_fix_validity corpus/common -- -timeout=5
  else
-    cargo fuzz cmin -s none ruff_fix_validity -- -timeout=5
+    cargo fuzz cmin -s none ruff_fix_validity corpus/common -- -timeout=5
  fi
 fi

-echo "Done! You are ready to fuzz."
+echo "Done! You are ready to fuzz"
--- a/fuzz/reinit-fuzzer.sh
+++ b/fuzz/reinit-fuzzer.sh
@ -1,14 +0,0 @@
-#!/bin/bash
-
-# https://stackoverflow.com/a/246128/3549270
-SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
-
-cd "$SCRIPT_DIR"
-
-cd corpus/ruff_fix_validity
-curl -L 'https://github.com/python/cpython/archive/refs/tags/v3.12.0b2.tar.gz' | tar xz
-cp -r "../../../crates/ruff_linter/resources/test" .
-cd -
-cargo fuzz cmin -s none ruff_fix_validity -- -timeout=5
-
-echo "Done! You are ready to fuzz."