ruff/crates/ruff_python_formatter/tests/fixtures.rs
Brent Westbrook 1ed9b215b9
Update Black tests (#20794)
Summary
--

```shell
git clone git@github.com:psf/black.git ../other/black
crates/ruff_python_formatter/resources/test/fixtures/import_black_tests.py ../other/black
```

Then ran our tests and accepted the snapshots

I had to make a small fix to our tuple normalization logic for `del`
statements
in the second commit, otherwise the tests were panicking at a changed
AST. I
think the new implementation is closer to the intention described in the
nearby
comment anyway, though.

The first commit adds the new Python, settings, and `.expect` files, the
next three commits make some small
fixes to help get the tests running, and then the fifth commit accepts
all but one of the new snapshots. The last commit includes the new
unsupported syntax error for one f-string example, tracked in #20774.

Test Plan
--

Newly imported tests. I went through all of the new snapshots and added
review comments below. I think they're all expected, except a few cases
I wasn't 100% sure about.
2025-10-14 10:14:59 -04:00

699 lines
25 KiB
Rust

use crate::normalizer::Normalizer;
use ruff_db::diagnostic::{
Annotation, Diagnostic, DiagnosticFormat, DiagnosticId, DisplayDiagnosticConfig,
DisplayDiagnostics, DummyFileResolver, Severity, Span, SubDiagnostic, SubDiagnosticSeverity,
};
use ruff_formatter::FormatOptions;
use ruff_python_ast::Mod;
use ruff_python_ast::comparable::ComparableMod;
use ruff_python_ast::visitor::source_order::SourceOrderVisitor;
use ruff_python_formatter::{PreviewMode, PyFormatOptions, format_module_source, format_range};
use ruff_python_parser::{ParseOptions, Parsed, UnsupportedSyntaxError, parse};
use ruff_source_file::{LineIndex, OneIndexed, SourceFileBuilder};
use ruff_text_size::{Ranged, TextRange, TextSize};
use rustc_hash::FxHashMap;
use similar::TextDiff;
use std::borrow::Cow;
use std::collections::hash_map::Entry;
use std::fmt::{Formatter, Write};
use std::hash::{DefaultHasher, Hash, Hasher};
use std::io::BufReader;
use std::ops::Range;
use std::path::Path;
use std::{fmt, fs};
mod normalizer;
#[test]
fn black_compatibility() {
let test_file = |input_path: &Path| {
let content = fs::read_to_string(input_path).unwrap();
let options_path = input_path.with_extension("options.json");
let options: PyFormatOptions = if let Ok(options_file) = fs::File::open(&options_path) {
let reader = BufReader::new(options_file);
serde_json::from_reader(reader).unwrap_or_else(|_| {
panic!("Expected option file {options_path:?} to be a valid Json file")
})
} else {
PyFormatOptions::from_extension(input_path)
};
let first_line = content.lines().next().unwrap_or_default();
let formatted_code = if first_line.starts_with("# flags:")
&& first_line.contains("--line-ranges=")
{
let line_index = LineIndex::from_source_text(&content);
let ranges = first_line
.split_ascii_whitespace()
.filter_map(|chunk| {
let (_, lines) = chunk.split_once("--line-ranges=")?;
let (lower, upper) = lines.split_once('-')?;
let lower = lower
.parse::<OneIndexed>()
.expect("Expected a valid line number");
let upper = upper
.parse::<OneIndexed>()
.expect("Expected a valid line number");
let range_start = line_index.line_start(lower, &content);
let range_end = line_index.line_end(upper, &content);
Some(TextRange::new(range_start, range_end))
})
.rev();
let mut formatted_code = content.clone();
for range in ranges {
let formatted =
format_range(&content, range, options.clone()).unwrap_or_else(|err| {
panic!(
"Range-formatting of {} to succeed but encountered error {err}",
input_path.display()
)
});
let range = formatted.source_range();
formatted_code.replace_range(Range::<usize>::from(range), formatted.as_code());
}
// We can't do stability checks for range formatting because we don't know the updated rangs.
formatted_code
} else {
let printed = format_module_source(&content, options.clone()).unwrap_or_else(|err| {
panic!(
"Formatting of {} to succeed but encountered error {err}",
input_path.display()
)
});
let formatted_code = printed.into_code();
ensure_stability_when_formatting_twice(&formatted_code, &options, input_path);
formatted_code
};
let extension = input_path
.extension()
.expect("Test file to have py or pyi extension")
.to_string_lossy();
let expected_path = input_path.with_extension(format!("{extension}.expect"));
let expected_output = fs::read_to_string(&expected_path)
.unwrap_or_else(|_| panic!("Expected Black output file '{expected_path:?}' to exist"));
let unsupported_syntax_errors =
ensure_unchanged_ast(&content, &formatted_code, &options, input_path);
if formatted_code == expected_output {
// Black and Ruff formatting matches. Delete any existing snapshot files because the Black output
// already perfectly captures the expected output.
// The following code mimics insta's logic generating the snapshot name for a test.
let workspace_path = std::env::var("CARGO_MANIFEST_DIR").unwrap();
let mut components = input_path.components().rev();
let file_name = components.next().unwrap();
let test_suite = components.next().unwrap();
let snapshot_name = format!(
"black_compatibility@{}__{}.snap",
test_suite.as_os_str().to_string_lossy(),
file_name.as_os_str().to_string_lossy()
);
let snapshot_path = Path::new(&workspace_path)
.join("tests/snapshots")
.join(snapshot_name);
if snapshot_path.exists() && snapshot_path.is_file() {
// SAFETY: This is a convenience feature. That's why we don't want to abort
// when deleting a no longer needed snapshot fails.
fs::remove_file(&snapshot_path).ok();
}
let new_snapshot_path = snapshot_path.with_extension("snap.new");
if new_snapshot_path.exists() && new_snapshot_path.is_file() {
// SAFETY: This is a convenience feature. That's why we don't want to abort
// when deleting a no longer needed snapshot fails.
fs::remove_file(&new_snapshot_path).ok();
}
} else {
// Black and Ruff have different formatting. Write out a snapshot that covers the differences
// today.
let mut snapshot = String::new();
write!(snapshot, "{}", Header::new("Input")).unwrap();
write!(snapshot, "{}", CodeFrame::new("python", &content)).unwrap();
write!(snapshot, "{}", Header::new("Black Differences")).unwrap();
let diff = TextDiff::from_lines(expected_output.as_str(), &formatted_code)
.unified_diff()
.header("Black", "Ruff")
.to_string();
write!(snapshot, "{}", CodeFrame::new("diff", &diff)).unwrap();
write!(snapshot, "{}", Header::new("Ruff Output")).unwrap();
write!(snapshot, "{}", CodeFrame::new("python", &formatted_code)).unwrap();
write!(snapshot, "{}", Header::new("Black Output")).unwrap();
write!(snapshot, "{}", CodeFrame::new("python", &expected_output)).unwrap();
if !unsupported_syntax_errors.is_empty() {
write!(snapshot, "{}", Header::new("New Unsupported Syntax Errors")).unwrap();
writeln!(
snapshot,
"{}",
DisplayDiagnostics::new(
&DummyFileResolver,
&DisplayDiagnosticConfig::default().format(DiagnosticFormat::Full),
&unsupported_syntax_errors
)
)
.unwrap();
}
insta::with_settings!({
omit_expression => true,
input_file => input_path,
prepend_module_to_snapshot => false,
}, {
insta::assert_snapshot!(snapshot);
});
}
};
insta::glob!(
"../resources",
"test/fixtures/black/**/*.{py,pyi}",
test_file
);
}
#[test]
fn format() {
let test_file = |input_path: &Path| {
let content = fs::read_to_string(input_path).unwrap();
let mut snapshot = format!("## Input\n{}", CodeFrame::new("python", &content));
let options_path = input_path.with_extension("options.json");
if let Ok(options_file) = fs::File::open(&options_path) {
let reader = BufReader::new(options_file);
let options: Vec<PyFormatOptions> =
serde_json::from_reader(reader).unwrap_or_else(|_| {
panic!("Expected option file {options_path:?} to be a valid Json file")
});
writeln!(snapshot, "## Outputs").unwrap();
for (i, options) in options.into_iter().enumerate() {
let (formatted_code, unsupported_syntax_errors) =
format_file(&content, &options, input_path);
writeln!(
snapshot,
"### Output {}\n{}{}",
i + 1,
CodeFrame::new("", &DisplayPyOptions(&options)),
CodeFrame::new("python", &formatted_code)
)
.unwrap();
if options.preview().is_enabled() {
continue;
}
// We want to capture the differences in the preview style in our fixtures
let options_preview = options.with_preview(PreviewMode::Enabled);
let (formatted_preview, _) = format_file(&content, &options_preview, input_path);
if formatted_code != formatted_preview {
// Having both snapshots makes it hard to see the difference, so we're keeping only
// diff.
writeln!(
snapshot,
"#### Preview changes\n{}",
CodeFrame::new(
"diff",
TextDiff::from_lines(&formatted_code, &formatted_preview)
.unified_diff()
.header("Stable", "Preview")
)
)
.unwrap();
}
if !unsupported_syntax_errors.is_empty() {
writeln!(
snapshot,
"### Unsupported Syntax Errors\n{}",
DisplayDiagnostics::new(
&DummyFileResolver,
&DisplayDiagnosticConfig::default().format(DiagnosticFormat::Full),
&unsupported_syntax_errors
)
)
.unwrap();
}
}
} else {
// We want to capture the differences in the preview style in our fixtures
let options = PyFormatOptions::from_extension(input_path);
let (formatted_code, unsupported_syntax_errors) =
format_file(&content, &options, input_path);
let options_preview = options.with_preview(PreviewMode::Enabled);
let (formatted_preview, _) = format_file(&content, &options_preview, input_path);
if formatted_code == formatted_preview {
writeln!(
snapshot,
"## Output\n{}",
CodeFrame::new("python", &formatted_code)
)
.unwrap();
} else {
// Having both snapshots makes it hard to see the difference, so we're keeping only
// diff.
writeln!(
snapshot,
"## Output\n{}\n## Preview changes\n{}",
CodeFrame::new("python", &formatted_code),
CodeFrame::new(
"diff",
TextDiff::from_lines(&formatted_code, &formatted_preview)
.unified_diff()
.header("Stable", "Preview")
)
)
.unwrap();
}
if !unsupported_syntax_errors.is_empty() {
writeln!(
snapshot,
"## Unsupported Syntax Errors\n{}",
DisplayDiagnostics::new(
&DummyFileResolver,
&DisplayDiagnosticConfig::default().format(DiagnosticFormat::Full),
&unsupported_syntax_errors
)
)
.unwrap();
}
}
insta::with_settings!({
omit_expression => true,
input_file => input_path,
prepend_module_to_snapshot => false,
}, {
insta::assert_snapshot!(snapshot);
});
};
insta::glob!(
"../resources",
"test/fixtures/ruff/**/*.{py,pyi}",
test_file
);
}
fn format_file(
source: &str,
options: &PyFormatOptions,
input_path: &Path,
) -> (String, Vec<Diagnostic>) {
let (unformatted, formatted_code) = if source.contains("<RANGE_START>") {
let mut content = source.to_string();
let without_markers = content
.replace("<RANGE_START>", "")
.replace("<RANGE_END>", "");
while let Some(range_start_marker) = content.find("<RANGE_START>") {
// Remove the start marker
content.replace_range(
range_start_marker..range_start_marker + "<RANGE_START>".len(),
"",
);
let range_end_marker = content[range_start_marker..]
.find("<RANGE_END>")
.expect("Matching <RANGE_END> marker for <RANGE_START> to exist")
+ range_start_marker;
content.replace_range(range_end_marker..range_end_marker + "<RANGE_END>".len(), "");
// Replace all other markers to get a valid Python input
let format_input = content
.replace("<RANGE_START>", "")
.replace("<RANGE_END>", "");
let range = TextRange::new(
TextSize::try_from(range_start_marker).unwrap(),
TextSize::try_from(range_end_marker).unwrap(),
);
let formatted =
format_range(&format_input, range, options.clone()).unwrap_or_else(|err| {
panic!(
"Range-formatting of {} to succeed but encountered error {err}",
input_path.display()
)
});
content.replace_range(
Range::<usize>::from(formatted.source_range()),
formatted.as_code(),
);
}
(Cow::Owned(without_markers), content)
} else {
let printed = format_module_source(source, options.clone()).unwrap_or_else(|err| {
panic!(
"Formatting `{input_path} was expected to succeed but it failed: {err}",
input_path = input_path.display()
)
});
let formatted_code = printed.into_code();
ensure_stability_when_formatting_twice(&formatted_code, options, input_path);
(Cow::Borrowed(source), formatted_code)
};
let unsupported_syntax_errors =
ensure_unchanged_ast(&unformatted, &formatted_code, options, input_path);
(formatted_code, unsupported_syntax_errors)
}
/// Format another time and make sure that there are no changes anymore
fn ensure_stability_when_formatting_twice(
formatted_code: &str,
options: &PyFormatOptions,
input_path: &Path,
) {
let reformatted = match format_module_source(formatted_code, options.clone()) {
Ok(reformatted) => reformatted,
Err(err) => {
panic!(
"Expected formatted code of {} to be valid syntax: {err}:\
\n---\n{formatted_code}---\n",
input_path.display()
);
}
};
if reformatted.as_code() != formatted_code {
let diff = TextDiff::from_lines(formatted_code, reformatted.as_code())
.unified_diff()
.header("Formatted once", "Formatted twice")
.to_string();
panic!(
r#"Reformatting the formatted code of {input_path} a second time resulted in formatting changes.
Options:
{options}
---
{diff}---
Formatted once:
---
{formatted_code}---
Formatted twice:
---
{reformatted}---"#,
input_path = input_path.display(),
options = &DisplayPyOptions(options),
reformatted = reformatted.as_code(),
);
}
}
/// Ensure that formatting doesn't change the AST and doesn't introduce any new unsupported syntax errors.
///
/// Like Black, there are a few exceptions to this "invariant" which are encoded in
/// [`NormalizedMod`] and related structs. Namely, formatting can change indentation within strings,
/// and can also flatten tuples within `del` statements.
///
/// Returns any new [`UnsupportedSyntaxError`]s in the formatted code as [`Diagnostic`]s for
/// snapshotting.
///
/// As noted in the sub-diagnostic message, new syntax errors should only be accepted when they are
/// the result of an existing syntax error in the input. For example, the formatter knows that
/// escapes in f-strings are only allowed after Python 3.12, so it can replace escaped quotes with
/// reused outer quote characters, which are also valid after 3.12, even if the configured Python
/// version is lower. Such cases disrupt the fingerprint filter because the syntax error, and thus
/// its fingerprint, is different from the input syntax error. More typical cases like using a
/// t-string before 3.14 will be filtered out and not included in snapshots.
fn ensure_unchanged_ast(
unformatted_code: &str,
formatted_code: &str,
options: &PyFormatOptions,
input_path: &Path,
) -> Vec<Diagnostic> {
let source_type = options.source_type();
// Parse the unformatted code.
let unformatted_parsed = parse(
unformatted_code,
ParseOptions::from(source_type).with_target_version(options.target_version()),
)
.expect("Unformatted code to be valid syntax");
let unformatted_unsupported_syntax_errors =
collect_unsupported_syntax_errors(&unformatted_parsed);
let mut unformatted_ast = unformatted_parsed.into_syntax();
Normalizer.visit_module(&mut unformatted_ast);
let unformatted_ast = ComparableMod::from(&unformatted_ast);
// Parse the formatted code.
let formatted_parsed = parse(
formatted_code,
ParseOptions::from(source_type).with_target_version(options.target_version()),
)
.expect("Formatted code to be valid syntax");
// Assert that there are no new unsupported syntax errors
let mut formatted_unsupported_syntax_errors =
collect_unsupported_syntax_errors(&formatted_parsed);
formatted_unsupported_syntax_errors
.retain(|fingerprint, _| !unformatted_unsupported_syntax_errors.contains_key(fingerprint));
let file = SourceFileBuilder::new(
input_path.file_name().unwrap().to_string_lossy(),
formatted_code,
)
.finish();
let diagnostics = formatted_unsupported_syntax_errors
.values()
.map(|error| {
let mut diag = Diagnostic::new(DiagnosticId::InvalidSyntax, Severity::Error, error);
let span = Span::from(file.clone()).with_range(error.range());
diag.annotate(Annotation::primary(span));
let sub = SubDiagnostic::new(
SubDiagnosticSeverity::Warning,
"Only accept new syntax errors if they are also present in the input. \
The formatter should not introduce syntax errors.",
);
diag.sub(sub);
diag
})
.collect::<Vec<_>>();
let mut formatted_ast = formatted_parsed.into_syntax();
Normalizer.visit_module(&mut formatted_ast);
let formatted_ast = ComparableMod::from(&formatted_ast);
if formatted_ast != unformatted_ast {
let diff = TextDiff::from_lines(
&format!("{unformatted_ast:#?}"),
&format!("{formatted_ast:#?}"),
)
.unified_diff()
.header("Unformatted", "Formatted")
.to_string();
panic!(
r#"Reformatting the unformatted code of {} resulted in AST changes.
---
{diff}
"#,
input_path.display(),
);
}
diagnostics
}
struct Header<'a> {
title: &'a str,
}
impl<'a> Header<'a> {
fn new(title: &'a str) -> Self {
Self { title }
}
}
impl std::fmt::Display for Header<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
writeln!(f, "## {}", self.title)?;
writeln!(f)
}
}
struct CodeFrame<'a> {
language: &'a str,
code: &'a dyn std::fmt::Display,
}
impl<'a> CodeFrame<'a> {
fn new(language: &'a str, code: &'a dyn std::fmt::Display) -> Self {
Self { language, code }
}
}
impl std::fmt::Display for CodeFrame<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
writeln!(f, "```{}", self.language)?;
write!(f, "{}", self.code)?;
writeln!(f, "```")?;
writeln!(f)
}
}
struct DisplayPyOptions<'a>(&'a PyFormatOptions);
impl fmt::Display for DisplayPyOptions<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
writeln!(
f,
r#"indent-style = {indent_style}
line-width = {line_width}
indent-width = {indent_width}
quote-style = {quote_style:?}
line-ending = {line_ending:?}
magic-trailing-comma = {magic_trailing_comma:?}
docstring-code = {docstring_code:?}
docstring-code-line-width = {docstring_code_line_width:?}
preview = {preview:?}
target_version = {target_version}
source_type = {source_type:?}"#,
indent_style = self.0.indent_style(),
indent_width = self.0.indent_width().value(),
line_width = self.0.line_width().value(),
quote_style = self.0.quote_style(),
line_ending = self.0.line_ending(),
magic_trailing_comma = self.0.magic_trailing_comma(),
docstring_code = self.0.docstring_code(),
docstring_code_line_width = self.0.docstring_code_line_width(),
preview = self.0.preview(),
target_version = self.0.target_version(),
source_type = self.0.source_type()
)
}
}
/// A visitor to collect a sequence of node IDs for fingerprinting [`UnsupportedSyntaxError`]s.
///
/// It visits each statement in the AST in source order and saves its range. The index of the node
/// enclosing a syntax error's range can then be retrieved with the `node_id` method. This `node_id`
/// should be stable across formatting runs since the formatter won't add or remove statements.
struct StmtVisitor {
nodes: Vec<TextRange>,
}
impl StmtVisitor {
fn new(parsed: &Parsed<Mod>) -> Self {
let mut visitor = Self { nodes: Vec::new() };
visitor.visit_mod(parsed.syntax());
visitor
}
/// Return the index of the statement node that contains `range`.
fn node_id(&self, range: TextRange) -> usize {
self.nodes
.iter()
.enumerate()
.filter(|(_, node)| node.contains_range(range))
.min_by_key(|(_, node)| node.len())
.expect("Expected an enclosing node in the AST")
.0
}
}
impl<'a> SourceOrderVisitor<'a> for StmtVisitor {
fn visit_stmt(&mut self, stmt: &'a ruff_python_ast::Stmt) {
self.nodes.push(stmt.range());
ruff_python_ast::visitor::source_order::walk_stmt(self, stmt);
}
}
/// Collects the unsupported syntax errors and assigns a unique hash to each error.
fn collect_unsupported_syntax_errors(
parsed: &Parsed<Mod>,
) -> FxHashMap<u64, UnsupportedSyntaxError> {
let mut collected = FxHashMap::default();
if parsed.unsupported_syntax_errors().is_empty() {
return collected;
}
let visitor = StmtVisitor::new(parsed);
for error in parsed.unsupported_syntax_errors() {
let node_id = visitor.node_id(error.range);
let mut error_fingerprint = fingerprint_unsupported_syntax_error(error, node_id, 0);
// Make sure that we do not get a fingerprint that is already in use
// by adding in the previously generated one.
loop {
match collected.entry(error_fingerprint) {
Entry::Occupied(_) => {
error_fingerprint =
fingerprint_unsupported_syntax_error(error, node_id, error_fingerprint);
}
Entry::Vacant(entry) => {
entry.insert(error.clone());
break;
}
}
}
}
collected
}
fn fingerprint_unsupported_syntax_error(
error: &UnsupportedSyntaxError,
node_id: usize,
salt: u64,
) -> u64 {
let mut hasher = DefaultHasher::new();
let UnsupportedSyntaxError {
kind,
target_version,
// Don't hash the range because the location between the formatted and unformatted code
// is likely to be different
range: _,
} = error;
salt.hash(&mut hasher);
kind.hash(&mut hasher);
target_version.hash(&mut hasher);
node_id.hash(&mut hasher);
hasher.finish()
}