Add a idempotent fuzz_target for ruff_python_formatter (#9448)

Co-authored-by: Addison Crump <addison.crump@cispa.de>
Co-authored-by: Addison Crump <me@addisoncrump.info>
This commit is contained in:
manunio 2024-01-11 13:25:59 +05:30 committed by GitHub
parent 4a3bb67b5f
commit 14d3fe6bfa
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 170 additions and 0 deletions

View file

@ -18,9 +18,11 @@ cargo-fuzz = true
[dependencies]
ruff_linter = { path = "../crates/ruff_linter" }
ruff_python_ast = { path = "../crates/ruff_python_ast" }
ruff_python_codegen = { path = "../crates/ruff_python_codegen" }
ruff_python_parser = { path = "../crates/ruff_python_parser" }
ruff_source_file = { path = "../crates/ruff_source_file" }
ruff_python_formatter = { path = "../crates/ruff_python_formatter"}
arbitrary = { version = "1.3.0", features = ["derive"] }
libfuzzer-sys = { git = "https://github.com/rust-fuzz/libfuzzer", default-features = false }
@ -38,10 +40,18 @@ path = "fuzz_targets/ruff_parse_simple.rs"
name = "ruff_fix_validity"
path = "fuzz_targets/ruff_fix_validity.rs"
[[bin]]
name = "ruff_formatter_validity"
path = "fuzz_targets/ruff_formatter_validity.rs"
[[bin]]
name = "ruff_parse_idempotency"
path = "fuzz_targets/ruff_parse_idempotency.rs"
[[bin]]
name = "ruff_formatter_idempotency"
path = "fuzz_targets/ruff_formatter_idempotency.rs"
[profile.release]
opt-level = 3
debug = true

View file

@ -101,3 +101,16 @@ This fuzz harness checks that fixes applied by Ruff do not introduce new errors
[`ruff_linter::test::test_snippet`](../crates/ruff_linter/src/test.rs) testing utility.
It currently is only configured to use default settings, but may be extended in future versions to
test non-default linter settings.
### `ruff_formatter_idempotency`
This fuzz harness ensures that the formatter is [idempotent](https://en.wikipedia.org/wiki/Idempotence)
which detects possible unsteady states of Ruff's formatter.
### `ruff_formatter_validity`
This fuzz harness checks that Ruff's formatter does not introduce new linter errors/warnings by
linting once, counting the number of each error type, then formatting, then linting again and
ensuring that the number of each error type does not increase across formats. This has the
beneficial side effect of discovering cases where the linter does not discover a lint error when
it should have due to a formatting inconsistency.

View file

@ -0,0 +1 @@
ruff_fix_validity

View file

@ -0,0 +1 @@
ruff_fix_validity

View file

@ -0,0 +1,47 @@
//! Fuzzer harness which double formats the input and access the idempotency or unsteady state of the
//! ruff's formatter.
#![no_main]
use libfuzzer_sys::{fuzz_target, Corpus};
use similar::TextDiff;
use ruff_python_formatter::{format_module_source, PyFormatOptions};
fn do_fuzz(case: &[u8]) -> Corpus {
// Throw away inputs which aren't utf-8
let Ok(code) = std::str::from_utf8(case) else {
return Corpus::Reject;
};
let options = PyFormatOptions::default();
// format the code once
if let Ok(formatted) = format_module_source(code, options.clone()) {
let formatted = formatted.as_code();
// reformat the code second time
if let Ok(reformatted) = format_module_source(formatted, options.clone()) {
let reformatted = reformatted.as_code();
if formatted != reformatted {
let diff = TextDiff::from_lines(formatted, reformatted)
.unified_diff()
.header("Formatted Once", "Formatted Twice")
.to_string();
panic!(
"\nReformatting the code a second time resulted in formatting changes.\nInput: {:?}\ndiff:\n{}",
code, diff
);
}
} else {
panic!(
"Unable to format the code second time:\nInput:{:?}\nformatted:\n{:?}",
code, formatted
);
}
}
Corpus::Keep
}
fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) });

View file

@ -0,0 +1,98 @@
//! Fuzzer harness which actively tries to find testcases that cause Ruff to introduce errors into
//! the resulting file.
#![no_main]
use std::collections::HashMap;
use std::sync::OnceLock;
use libfuzzer_sys::{fuzz_target, Corpus};
use ruff_linter::linter::ParseSource;
use ruff_linter::settings::flags::Noqa;
use ruff_linter::settings::LinterSettings;
use ruff_linter::source_kind::SourceKind;
use ruff_python_ast::PySourceType;
use ruff_python_formatter::{format_module_source, PyFormatOptions};
use similar::TextDiff;
static SETTINGS: OnceLock<LinterSettings> = OnceLock::new();
fn do_fuzz(case: &[u8]) -> Corpus {
// throw away inputs which aren't utf-8
let Ok(code) = std::str::from_utf8(case) else {
return Corpus::Reject;
};
// the settings are immutable to test_snippet, so we avoid re-initialising here
let linter_settings = SETTINGS.get_or_init(LinterSettings::default);
let format_options = PyFormatOptions::default();
let linter_results = ruff_linter::linter::lint_only(
"fuzzed-source.py".as_ref(),
None,
&linter_settings,
Noqa::Enabled,
&SourceKind::Python(code.to_string()),
PySourceType::Python,
ParseSource::None,
);
if linter_results.error.is_some() {
return Corpus::Keep; // keep, but don't continue
}
let mut warnings = HashMap::new();
for msg in linter_results.data.0 {
let count: &mut usize = warnings.entry(msg.kind.name).or_default();
*count += 1;
}
// format the code once
if let Ok(formatted) = format_module_source(code, format_options.clone()) {
let formatted = formatted.as_code().to_string();
let linter_results = ruff_linter::linter::lint_only(
"fuzzed-source.py".as_ref(),
None,
&linter_settings,
Noqa::Enabled,
&SourceKind::Python(formatted.clone()),
PySourceType::Python,
ParseSource::None,
);
assert!(
linter_results.error.is_none(),
"formatter introduced a parse error"
);
for msg in linter_results.data.0 {
if let Some(count) = warnings.get_mut(&msg.kind.name) {
if let Some(decremented) = count.checked_sub(1) {
*count = decremented;
} else {
panic!(
"formatter introduced additional linter warning: {msg:?}\ndiff: {}",
TextDiff::from_lines(code, &formatted)
.unified_diff()
.header("Unformatted", "Formatted")
.to_string()
);
}
} else {
panic!(
"formatter introduced new linter warning that was not previously present: {msg:?}\ndiff: {}",
TextDiff::from_lines(code, &formatted)
.unified_diff()
.header("Unformatted", "Formatted")
.to_string()
);
}
}
}
Corpus::Keep
}
fuzz_target!(|case: &[u8]| -> Corpus { do_fuzz(case) });