mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-04 10:49:50 +00:00
Create fuzzers for testing correctness of parsing, linting and fixing (#4822)
Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
parent
6ab3fc60f4
commit
2f125f4019
14 changed files with 2328 additions and 9 deletions
13
.github/workflows/ci.yaml
vendored
13
.github/workflows/ci.yaml
vendored
|
@ -87,6 +87,19 @@ jobs:
|
|||
name: ruff
|
||||
path: target/debug/ruff
|
||||
|
||||
cargo-fuzz:
|
||||
runs-on: ubuntu-latest
|
||||
name: "cargo fuzz"
|
||||
steps:
|
||||
- uses: actions/checkout@v3
|
||||
- name: "Install Rust toolchain"
|
||||
run: rustup show
|
||||
- uses: Swatinem/rust-cache@v2
|
||||
- name: "Install cargo-binstall"
|
||||
uses: taiki-e/install-action@cargo-binstall
|
||||
- run: cargo binstall cargo-fuzz -y
|
||||
- run: cargo fuzz build -s none
|
||||
|
||||
cargo-test-wasm:
|
||||
runs-on: ubuntu-latest
|
||||
name: "cargo test (wasm)"
|
||||
|
|
|
@ -35,5 +35,5 @@ mod rule_selector;
|
|||
pub mod rules;
|
||||
pub mod settings;
|
||||
|
||||
#[cfg(test)]
|
||||
mod test;
|
||||
#[cfg(any(test, fuzzing))]
|
||||
pub mod test;
|
||||
|
|
|
@ -1,8 +1,9 @@
|
|||
#![cfg(test)]
|
||||
#![cfg(any(test, fuzzing))]
|
||||
//! Helper functions for the tests of rule implementations.
|
||||
|
||||
use std::path::Path;
|
||||
|
||||
#[cfg(not(fuzzing))]
|
||||
use anyhow::Result;
|
||||
use itertools::Itertools;
|
||||
use ruff_textwrap::dedent;
|
||||
|
@ -21,11 +22,13 @@ use crate::registry::AsRule;
|
|||
use crate::rules::pycodestyle::rules::syntax_error;
|
||||
use crate::settings::{flags, Settings};
|
||||
|
||||
#[cfg(not(fuzzing))]
|
||||
pub(crate) fn test_resource_path(path: impl AsRef<Path>) -> std::path::PathBuf {
|
||||
Path::new("./resources/test/").join(path)
|
||||
}
|
||||
|
||||
/// Run [`check_path`] on a file in the `resources/test/fixtures` directory.
|
||||
#[cfg(not(fuzzing))]
|
||||
pub(crate) fn test_path(path: impl AsRef<Path>, settings: &Settings) -> Result<Vec<Message>> {
|
||||
let path = test_resource_path("fixtures").join(path);
|
||||
let contents = std::fs::read_to_string(&path)?;
|
||||
|
@ -33,17 +36,27 @@ pub(crate) fn test_path(path: impl AsRef<Path>, settings: &Settings) -> Result<V
|
|||
}
|
||||
|
||||
/// Run [`check_path`] on a snippet of Python code.
|
||||
pub(crate) fn test_snippet(contents: &str, settings: &Settings) -> Vec<Message> {
|
||||
pub fn test_snippet(contents: &str, settings: &Settings) -> Vec<Message> {
|
||||
let path = Path::new("<filename>");
|
||||
let contents = dedent(contents);
|
||||
test_contents(&contents, path, settings)
|
||||
}
|
||||
|
||||
thread_local! {
|
||||
static MAX_ITERATIONS: std::cell::Cell<usize> = std::cell::Cell::new(20);
|
||||
}
|
||||
|
||||
pub fn set_max_iterations(max: usize) {
|
||||
MAX_ITERATIONS.with(|iterations| iterations.set(max));
|
||||
}
|
||||
|
||||
pub(crate) fn max_iterations() -> usize {
|
||||
MAX_ITERATIONS.with(std::cell::Cell::get)
|
||||
}
|
||||
|
||||
/// A convenient wrapper around [`check_path`], that additionally
|
||||
/// asserts that autofixes converge after a fixed number of iterations.
|
||||
fn test_contents(contents: &str, path: &Path, settings: &Settings) -> Vec<Message> {
|
||||
static MAX_ITERATIONS: usize = 20;
|
||||
|
||||
let tokens: Vec<LexResult> = ruff_rustpython::tokenize(contents);
|
||||
let locator = Locator::new(contents);
|
||||
let stylist = Stylist::from_tokens(&tokens, &locator);
|
||||
|
@ -83,14 +96,16 @@ fn test_contents(contents: &str, path: &Path, settings: &Settings) -> Vec<Messag
|
|||
let mut contents = contents.to_string();
|
||||
|
||||
while let Some((fixed_contents, _)) = fix_file(&diagnostics, &Locator::new(&contents)) {
|
||||
if iterations < MAX_ITERATIONS {
|
||||
if iterations < max_iterations() {
|
||||
iterations += 1;
|
||||
} else {
|
||||
let output = print_diagnostics(diagnostics, path, &contents);
|
||||
|
||||
panic!(
|
||||
"Failed to converge after {MAX_ITERATIONS} iterations. This likely \
|
||||
indicates a bug in the implementation of the fix. Last diagnostics:\n{output}"
|
||||
"Failed to converge after {} iterations. This likely \
|
||||
indicates a bug in the implementation of the fix. Last diagnostics:\n{}",
|
||||
max_iterations(),
|
||||
output
|
||||
);
|
||||
}
|
||||
|
||||
|
|
2
fuzz/.gitignore
vendored
Normal file
2
fuzz/.gitignore
vendored
Normal file
|
@ -0,0 +1,2 @@
|
|||
artifacts/
|
||||
corpus/ruff_fix_validity
|
1976
fuzz/Cargo.lock
generated
Normal file
1976
fuzz/Cargo.lock
generated
Normal file
File diff suppressed because it is too large
Load diff
56
fuzz/Cargo.toml
Normal file
56
fuzz/Cargo.toml
Normal file
|
@ -0,0 +1,56 @@
|
|||
[package]
|
||||
name = "ruff-fuzz"
|
||||
version = "0.0.0"
|
||||
authors = [
|
||||
"Charlie Marsh <charlie.r.marsh@gmail.com>",
|
||||
"Addison Crump <research@addisoncrump.info>",
|
||||
]
|
||||
publish = false
|
||||
edition = "2021"
|
||||
|
||||
[features]
|
||||
default = ["libfuzzer"]
|
||||
full-idempotency = []
|
||||
libafl = ["libafl_libfuzzer"]
|
||||
libafl_merge = ["libafl", "libafl_libfuzzer/merge"]
|
||||
libfuzzer = ["libfuzzer-sys/link_libfuzzer"]
|
||||
|
||||
[package.metadata]
|
||||
cargo-fuzz = true
|
||||
|
||||
[dependencies]
|
||||
arbitrary = { version = "1.3.0", features = ["derive"] }
|
||||
libafl_libfuzzer = { git = "https://github.com/AFLplusplus/LibAFL.git", branch = "libfuzzer", optional = true }
|
||||
libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer", default-features = false }
|
||||
ruff = { path = "../crates/ruff" }
|
||||
ruff_python_ast = { path = "../crates/ruff_python_ast" }
|
||||
ruff_python_formatter = { path = "../crates/ruff_python_formatter" }
|
||||
similar = { version = "2.2.1" }
|
||||
|
||||
# Prevent this from interfering with workspaces
|
||||
[workspace]
|
||||
members = ["."]
|
||||
|
||||
[[bin]]
|
||||
name = "ruff_parse_simple"
|
||||
path = "fuzz_targets/ruff_parse_simple.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "ruff_fix_validity"
|
||||
path = "fuzz_targets/ruff_fix_validity.rs"
|
||||
|
||||
[[bin]]
|
||||
name = "ruff_parse_idempotency"
|
||||
path = "fuzz_targets/ruff_parse_idempotency.rs"
|
||||
|
||||
[profile.release]
|
||||
opt-level = 3
|
||||
debug = true
|
||||
|
||||
[profile.dev]
|
||||
opt-level = 3
|
||||
debug = true
|
||||
|
||||
[profile.test]
|
||||
opt-level = 3
|
||||
debug = true
|
105
fuzz/README.md
Normal file
105
fuzz/README.md
Normal file
|
@ -0,0 +1,105 @@
|
|||
# ruff-fuzz
|
||||
|
||||
Fuzzers and associated utilities for automatic testing of Ruff.
|
||||
|
||||
## Usage
|
||||
|
||||
To use the fuzzers provided in this directory, start by invoking:
|
||||
|
||||
```bash
|
||||
./fuzz/init-fuzzers.sh
|
||||
```
|
||||
|
||||
This will install [`cargo-fuzz`](https://github.com/rust-fuzz/cargo-fuzz) and optionally download a
|
||||
[dataset](https://zenodo.org/record/3628784) which improves the efficacy of the testing.
|
||||
The dataset may take several hours to download and clean, so if you're just looking to try out the
|
||||
fuzzers, skip the dataset download, though be warned that some features simply cannot be tested
|
||||
without it (very unlikely for the fuzzer to generate valid python code from "thin air").
|
||||
|
||||
Once you have initialised the fuzzers, you can then execute any fuzzer with:
|
||||
|
||||
```bash
|
||||
cargo fuzz run -s none name_of_fuzzer -- -timeout=1
|
||||
```
|
||||
|
||||
You can view the names of the available fuzzers with `cargo fuzz list`.
|
||||
For specific details about how each fuzzer works, please read this document in its entirety.
|
||||
|
||||
**IMPORTANT: You should run `./reinit-fuzzer.sh` after adding more file-based testcases.** This will
|
||||
allow the testing of new features that you've added unit tests for.
|
||||
|
||||
### Debugging a crash
|
||||
|
||||
Once you've found a crash, you'll need to debug it.
|
||||
The easiest first step in this process is to minimise the input such that the crash is still
|
||||
triggered with a smaller input.
|
||||
`cargo-fuzz` supports this out of the box with:
|
||||
|
||||
```bash
|
||||
cargo fuzz tmin -s none name_of_fuzzer artifacts/name_of_fuzzer/crash-...
|
||||
```
|
||||
|
||||
From here, you will need to analyse the input and potentially the behaviour of the program.
|
||||
The debugging process from here is unfortunately less well-defined, so you will need to apply some
|
||||
expertise here.
|
||||
Happy hunting!
|
||||
|
||||
## A brief introduction to fuzzers
|
||||
|
||||
Fuzzing, or fuzz testing, is the process of providing generated data to a program under test.
|
||||
The most common variety of fuzzers are mutational fuzzers; given a set of existing inputs (a
|
||||
"corpus"), it will attempt to slightly change (or "mutate") these inputs into new inputs that cover
|
||||
parts of the code that haven't yet been observed.
|
||||
Using this strategy, we can quite efficiently generate testcases which cover significant portions of
|
||||
the program, both with expected and unexpected data.
|
||||
[This is really quite effective for finding bugs.](https://github.com/rust-fuzz/trophy-case)
|
||||
|
||||
The fuzzers here use [`cargo-fuzz`](https://github.com/rust-fuzz/cargo-fuzz), a utility which allows
|
||||
Rust to integrate with [libFuzzer](https://llvm.org/docs/LibFuzzer.html), the fuzzer library built
|
||||
into LLVM.
|
||||
Each source file present in [`fuzz_targets`](fuzz_targets) is a harness, which is, in effect, a unit
|
||||
test which can handle different inputs.
|
||||
When an input is provided to a harness, the harness processes this data and libFuzzer observes the
|
||||
code coverage and any special values used in comparisons over the course of the run.
|
||||
Special values are preserved for future mutations and inputs which cover new regions of code are
|
||||
added to the corpus.
|
||||
|
||||
## Each fuzzer harness in detail
|
||||
|
||||
Each fuzzer harness in [`fuzz_targets`](fuzz_targets) targets a different aspect of Ruff and tests
|
||||
them in different ways. While there is implementation-specific documentation in the source code
|
||||
itself, each harness is briefly described below.
|
||||
|
||||
### `ruff_parse_simple`
|
||||
|
||||
This fuzz harness does not perform any "smart" testing of Ruff; it merely checks that the parsing
|
||||
and unparsing of a particular input (what would normally be a source code file) does not crash.
|
||||
While this is unlikely to find any issues on its own, it executes very quickly and covers a large
|
||||
and diverse code region that may speed up the generation of inputs and therefore make a more
|
||||
valuable corpus quickly.
|
||||
It is particularly useful if you skip the dataset generation.
|
||||
|
||||
### `ruff_parse_idempotency`
|
||||
|
||||
This fuzz harness checks that Ruff's parser is idempotent in order to check that it is not
|
||||
incorrectly parsing or unparsing an input.
|
||||
It can be built in two modes: default (where it is only checked that the parser does not enter an
|
||||
unstable state) or full idempotency (the parser is checked to ensure that it will _always_ produce
|
||||
the same output after the first unparsing).
|
||||
Full idempotency mode can be used by enabling the `full-idempotency` feature when running the
|
||||
fuzzer, but this may be too strict of a restriction for initial testing.
|
||||
|
||||
### `ruff_fix_validity`
|
||||
|
||||
This fuzz harness checks that fixes applied by Ruff do not introduce new errors using the existing
|
||||
[`ruff::test::test_snippet`](../crates/ruff/src/test.rs) testing utility.
|
||||
It currently is only configured to use default settings, but may be extended in future versions to
|
||||
test non-default linter settings.
|
||||
|
||||
## Experimental settings
|
||||
|
||||
You can optionally use `--no-default-features --features libafl` to use the libafl fuzzer instead of
|
||||
libfuzzer.
|
||||
This fuzzer has experimental support, but can vastly improve fuzzer performance.
|
||||
If you are not already familiar with [LibAFL](https://github.com/AFLplusplus/LibAFL), this mode is
|
||||
not currently recommended.
|
1
fuzz/corpus/ruff_parse_idempotency
Symbolic link
1
fuzz/corpus/ruff_parse_idempotency
Symbolic link
|
@ -0,0 +1 @@
|
|||
ruff_parse_simple
|
1
fuzz/corpus/ruff_parse_simple
Symbolic link
1
fuzz/corpus/ruff_parse_simple
Symbolic link
|
@ -0,0 +1 @@
|
|||
ruff_fix_validity/
|
30
fuzz/fuzz_targets/ruff_fix_validity.rs
Normal file
30
fuzz/fuzz_targets/ruff_fix_validity.rs
Normal file
|
@ -0,0 +1,30 @@
|
|||
//! Fuzzer harness which actively tries to find testcases that cause Ruff to introduce errors into
|
||||
//! the resulting file.
|
||||
|
||||
#![no_main]
|
||||
|
||||
#[cfg(feature = "libafl")]
|
||||
extern crate libafl_libfuzzer;
|
||||
|
||||
use libfuzzer_sys::{fuzz_target, Corpus};
|
||||
use ruff::settings::Settings;
|
||||
use std::sync::OnceLock;
|
||||
|
||||
static SETTINGS: OnceLock<Settings> = OnceLock::new();
|
||||
|
||||
fn do_fuzz(case: &[u8]) -> Corpus {
|
||||
// throw away inputs which aren't utf-8
|
||||
let Ok(code) = std::str::from_utf8(case) else { return Corpus::Reject; };
|
||||
|
||||
// the settings are immutable to test_snippet, so we avoid re-initialising here
|
||||
let settings = SETTINGS.get_or_init(Settings::default);
|
||||
ruff::test::set_max_iterations(usize::MAX);
|
||||
|
||||
// unlike in the test framework, where the number of iterations is well-defined, we are only
|
||||
// looking for situations where a fix is bad; thus, we set the iterations to "infinite"
|
||||
let _ = ruff::test::test_snippet(code, settings);
|
||||
|
||||
Corpus::Keep
|
||||
}
|
||||
|
||||
fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) });
|
58
fuzz/fuzz_targets/ruff_parse_idempotency.rs
Normal file
58
fuzz/fuzz_targets/ruff_parse_idempotency.rs
Normal file
|
@ -0,0 +1,58 @@
|
|||
//! Fuzzer harness which searches for situations where the parser does not parse or unparse a
|
||||
//! particular source snippet consistently.
|
||||
|
||||
#![no_main]
|
||||
|
||||
#[cfg(feature = "libafl")]
|
||||
extern crate libafl_libfuzzer;
|
||||
|
||||
use libfuzzer_sys::{fuzz_target, Corpus};
|
||||
use ruff_python_ast::source_code::round_trip;
|
||||
use similar::TextDiff;
|
||||
|
||||
fn do_fuzz(case: &[u8]) -> Corpus {
|
||||
let Ok(code) = std::str::from_utf8(case) else { return Corpus::Reject; };
|
||||
|
||||
// round trip it once to get a formatted version
|
||||
if let Ok(first) = round_trip(code, "fuzzed-source.py") {
|
||||
// round trip it a second time to get a case to compare against
|
||||
if let Ok(second) = round_trip(&first, "fuzzed-source.py") {
|
||||
if cfg!(feature = "full-idempotency") {
|
||||
// potentially, we don't want to test for full idempotency, but just for unsteady states
|
||||
// enable the "full-idempotency" feature when fuzzing for full idempotency
|
||||
let diff = TextDiff::from_lines(&first, &second)
|
||||
.unified_diff()
|
||||
.header("Parsed once", "Parsed twice")
|
||||
.to_string();
|
||||
assert_eq!(
|
||||
first, second,
|
||||
"\nIdempotency violation (orig => first => second); original: {:?}\ndiff:\n{}",
|
||||
code, diff
|
||||
);
|
||||
} else if first != second {
|
||||
// by the third time we've round-tripped it, we shouldn't be introducing any more
|
||||
// changes; if we do, then it's likely that we're in an unsteady parsing state
|
||||
let third = round_trip(&second, "fuzzed-source.py")
|
||||
.expect("Couldn't round-trip the processed source.");
|
||||
let diff = TextDiff::from_lines(&second, &third)
|
||||
.unified_diff()
|
||||
.header("Parsed twice", "Parsed three times")
|
||||
.to_string();
|
||||
assert_eq!(
|
||||
second, third,
|
||||
"\nPotential unsteady state (orig => first => second => third); original: {:?}\ndiff:\n{}",
|
||||
code, diff
|
||||
);
|
||||
}
|
||||
} else {
|
||||
panic!(
|
||||
"Unable to perform the second round trip!\nbefore: {:?}\nfirst: {:?}",
|
||||
code, first
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
Corpus::Keep
|
||||
}
|
||||
|
||||
fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) });
|
21
fuzz/fuzz_targets/ruff_parse_simple.rs
Normal file
21
fuzz/fuzz_targets/ruff_parse_simple.rs
Normal file
|
@ -0,0 +1,21 @@
|
|||
//! Fuzzer harness which merely explores the parse/unparse coverage space and tries to make it
|
||||
//! crash. On its own, this fuzzer is (hopefully) not going to find a crash.
|
||||
|
||||
#![no_main]
|
||||
|
||||
#[cfg(feature = "libafl")]
|
||||
extern crate libafl_libfuzzer;
|
||||
|
||||
use libfuzzer_sys::{fuzz_target, Corpus};
|
||||
use ruff_python_ast::source_code::round_trip;
|
||||
|
||||
fn do_fuzz(case: &[u8]) -> Corpus {
|
||||
let Ok(code) = std::str::from_utf8(case) else { return Corpus::Reject; };
|
||||
|
||||
// just round-trip it once to trigger both parse and unparse
|
||||
let _ = round_trip(code, "fuzzed-source.py");
|
||||
|
||||
Corpus::Keep
|
||||
}
|
||||
|
||||
fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) });
|
25
fuzz/init-fuzzer.sh
Executable file
25
fuzz/init-fuzzer.sh
Executable file
|
@ -0,0 +1,25 @@
|
|||
#!/bin/bash
|
||||
|
||||
# https://stackoverflow.com/a/246128/3549270
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
if ! cargo fuzz --help >&/dev/null; then
|
||||
cargo install --git https://github.com/rust-fuzz/cargo-fuzz.git
|
||||
fi
|
||||
|
||||
if [ ! -d corpus/ruff_fix_validity ]; then
|
||||
mkdir -p corpus/ruff_fix_validity
|
||||
read -p "Would you like to build a corpus from a python source code dataset? (this will take a long time!) [Y/n] " -n 1 -r
|
||||
echo
|
||||
cd corpus/ruff_fix_validity
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
curl -L 'https://zenodo.org/record/3628784/files/python-corpus.tar.gz?download=1' | tar xz
|
||||
fi
|
||||
cp -r "../../../crates/ruff/resources/test" .
|
||||
cd -
|
||||
cargo fuzz cmin -s none ruff_fix_validity
|
||||
fi
|
||||
|
||||
echo "Done! You are ready to fuzz."
|
16
fuzz/reinit-fuzzer.sh
Executable file
16
fuzz/reinit-fuzzer.sh
Executable file
|
@ -0,0 +1,16 @@
|
|||
#!/bin/bash
|
||||
|
||||
# https://stackoverflow.com/a/246128/3549270
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
|
||||
cd "$SCRIPT_DIR"
|
||||
|
||||
cd corpus/ruff_fix_validity
|
||||
if [[ $REPLY =~ ^[Yy]$ ]]; then
|
||||
curl -L 'https://zenodo.org/record/3628784/files/python-corpus.tar.gz?download=1' | tar xz
|
||||
fi
|
||||
cp -r "../../../crates/ruff/resources/test" .
|
||||
cd -
|
||||
cargo fuzz cmin -s none ruff_fix_validity
|
||||
|
||||
echo "Done! You are ready to fuzz."
|
Loading…
Add table
Add a link
Reference in a new issue