ruff/crates/ruff_python_formatter/tests/fixtures.rs
2025-05-16 13:25:28 +02:00

579 lines
20 KiB
Rust

use crate::normalizer::Normalizer;
use itertools::Itertools;
use ruff_formatter::FormatOptions;
use ruff_python_ast::comparable::ComparableMod;
use ruff_python_formatter::{PreviewMode, PyFormatOptions, format_module_source, format_range};
use ruff_python_parser::{ParseOptions, UnsupportedSyntaxError, parse};
use ruff_source_file::{LineIndex, OneIndexed};
use ruff_text_size::{Ranged, TextRange, TextSize};
use rustc_hash::FxHashMap;
use similar::TextDiff;
use std::borrow::Cow;
use std::collections::hash_map::Entry;
use std::fmt::{Formatter, Write};
use std::hash::{DefaultHasher, Hash, Hasher};
use std::io::BufReader;
use std::ops::Range;
use std::path::Path;
use std::{fmt, fs};
mod normalizer;
#[test]
fn black_compatibility() {
let test_file = |input_path: &Path| {
let content = fs::read_to_string(input_path).unwrap();
let options_path = input_path.with_extension("options.json");
let options: PyFormatOptions = if let Ok(options_file) = fs::File::open(&options_path) {
let reader = BufReader::new(options_file);
serde_json::from_reader(reader).unwrap_or_else(|_| {
panic!("Expected option file {options_path:?} to be a valid Json file")
})
} else {
PyFormatOptions::from_extension(input_path)
};
let first_line = content.lines().next().unwrap_or_default();
let formatted_code = if first_line.starts_with("# flags:")
&& first_line.contains("--line-ranges=")
{
let line_index = LineIndex::from_source_text(&content);
let ranges = first_line
.split_ascii_whitespace()
.filter_map(|chunk| {
let (_, lines) = chunk.split_once("--line-ranges=")?;
let (lower, upper) = lines.split_once('-')?;
let lower = lower
.parse::<OneIndexed>()
.expect("Expected a valid line number");
let upper = upper
.parse::<OneIndexed>()
.expect("Expected a valid line number");
let range_start = line_index.line_start(lower, &content);
let range_end = line_index.line_end(upper, &content);
Some(TextRange::new(range_start, range_end))
})
.rev();
let mut formatted_code = content.clone();
for range in ranges {
let formatted =
format_range(&content, range, options.clone()).unwrap_or_else(|err| {
panic!(
"Range-formatting of {} to succeed but encountered error {err}",
input_path.display()
)
});
let range = formatted.source_range();
formatted_code.replace_range(Range::<usize>::from(range), formatted.as_code());
}
// We can't do stability checks for range formatting because we don't know the updated rangs.
formatted_code
} else {
let printed = format_module_source(&content, options.clone()).unwrap_or_else(|err| {
panic!(
"Formatting of {} to succeed but encountered error {err}",
input_path.display()
)
});
let formatted_code = printed.into_code();
ensure_stability_when_formatting_twice(&formatted_code, &options, input_path);
formatted_code
};
let extension = input_path
.extension()
.expect("Test file to have py or pyi extension")
.to_string_lossy();
let expected_path = input_path.with_extension(format!("{extension}.expect"));
let expected_output = fs::read_to_string(&expected_path)
.unwrap_or_else(|_| panic!("Expected Black output file '{expected_path:?}' to exist"));
ensure_unchanged_ast(&content, &formatted_code, &options, input_path);
if formatted_code == expected_output {
// Black and Ruff formatting matches. Delete any existing snapshot files because the Black output
// already perfectly captures the expected output.
// The following code mimics insta's logic generating the snapshot name for a test.
let workspace_path = std::env::var("CARGO_MANIFEST_DIR").unwrap();
let mut components = input_path.components().rev();
let file_name = components.next().unwrap();
let test_suite = components.next().unwrap();
let snapshot_name = format!(
"black_compatibility@{}__{}.snap",
test_suite.as_os_str().to_string_lossy(),
file_name.as_os_str().to_string_lossy()
);
let snapshot_path = Path::new(&workspace_path)
.join("tests/snapshots")
.join(snapshot_name);
if snapshot_path.exists() && snapshot_path.is_file() {
// SAFETY: This is a convenience feature. That's why we don't want to abort
// when deleting a no longer needed snapshot fails.
fs::remove_file(&snapshot_path).ok();
}
let new_snapshot_path = snapshot_path.with_extension("snap.new");
if new_snapshot_path.exists() && new_snapshot_path.is_file() {
// SAFETY: This is a convenience feature. That's why we don't want to abort
// when deleting a no longer needed snapshot fails.
fs::remove_file(&new_snapshot_path).ok();
}
} else {
// Black and Ruff have different formatting. Write out a snapshot that covers the differences
// today.
let mut snapshot = String::new();
write!(snapshot, "{}", Header::new("Input")).unwrap();
write!(snapshot, "{}", CodeFrame::new("python", &content)).unwrap();
write!(snapshot, "{}", Header::new("Black Differences")).unwrap();
let diff = TextDiff::from_lines(expected_output.as_str(), &formatted_code)
.unified_diff()
.header("Black", "Ruff")
.to_string();
write!(snapshot, "{}", CodeFrame::new("diff", &diff)).unwrap();
write!(snapshot, "{}", Header::new("Ruff Output")).unwrap();
write!(snapshot, "{}", CodeFrame::new("python", &formatted_code)).unwrap();
write!(snapshot, "{}", Header::new("Black Output")).unwrap();
write!(snapshot, "{}", CodeFrame::new("python", &expected_output)).unwrap();
insta::with_settings!({
omit_expression => true,
input_file => input_path,
prepend_module_to_snapshot => false,
}, {
insta::assert_snapshot!(snapshot);
});
}
};
insta::glob!(
"../resources",
"test/fixtures/black/**/*.{py,pyi}",
test_file
);
}
#[test]
fn format() {
let test_file = |input_path: &Path| {
let content = fs::read_to_string(input_path).unwrap();
let mut snapshot = format!("## Input\n{}", CodeFrame::new("python", &content));
let options_path = input_path.with_extension("options.json");
if let Ok(options_file) = fs::File::open(&options_path) {
let reader = BufReader::new(options_file);
let options: Vec<PyFormatOptions> =
serde_json::from_reader(reader).unwrap_or_else(|_| {
panic!("Expected option file {options_path:?} to be a valid Json file")
});
writeln!(snapshot, "## Outputs").unwrap();
for (i, options) in options.into_iter().enumerate() {
let formatted_code = format_file(&content, &options, input_path);
writeln!(
snapshot,
"### Output {}\n{}{}",
i + 1,
CodeFrame::new("", &DisplayPyOptions(&options)),
CodeFrame::new("python", &formatted_code)
)
.unwrap();
if options.preview().is_enabled() {
continue;
}
// We want to capture the differences in the preview style in our fixtures
let options_preview = options.with_preview(PreviewMode::Enabled);
let formatted_preview = format_file(&content, &options_preview, input_path);
if formatted_code != formatted_preview {
// Having both snapshots makes it hard to see the difference, so we're keeping only
// diff.
writeln!(
snapshot,
"#### Preview changes\n{}",
CodeFrame::new(
"diff",
TextDiff::from_lines(&formatted_code, &formatted_preview)
.unified_diff()
.header("Stable", "Preview")
)
)
.unwrap();
}
}
} else {
// We want to capture the differences in the preview style in our fixtures
let options = PyFormatOptions::from_extension(input_path);
let formatted_code = format_file(&content, &options, input_path);
let options_preview = options.with_preview(PreviewMode::Enabled);
let formatted_preview = format_file(&content, &options_preview, input_path);
if formatted_code == formatted_preview {
writeln!(
snapshot,
"## Output\n{}",
CodeFrame::new("python", &formatted_code)
)
.unwrap();
} else {
// Having both snapshots makes it hard to see the difference, so we're keeping only
// diff.
writeln!(
snapshot,
"## Output\n{}\n## Preview changes\n{}",
CodeFrame::new("python", &formatted_code),
CodeFrame::new(
"diff",
TextDiff::from_lines(&formatted_code, &formatted_preview)
.unified_diff()
.header("Stable", "Preview")
)
)
.unwrap();
}
}
insta::with_settings!({
omit_expression => true,
input_file => input_path,
prepend_module_to_snapshot => false,
}, {
insta::assert_snapshot!(snapshot);
});
};
insta::glob!(
"../resources",
"test/fixtures/ruff/**/*.{py,pyi}",
test_file
);
}
fn format_file(source: &str, options: &PyFormatOptions, input_path: &Path) -> String {
let (unformatted, formatted_code) = if source.contains("<RANGE_START>") {
let mut content = source.to_string();
let without_markers = content
.replace("<RANGE_START>", "")
.replace("<RANGE_END>", "");
while let Some(range_start_marker) = content.find("<RANGE_START>") {
// Remove the start marker
content.replace_range(
range_start_marker..range_start_marker + "<RANGE_START>".len(),
"",
);
let range_end_marker = content[range_start_marker..]
.find("<RANGE_END>")
.expect("Matching <RANGE_END> marker for <RANGE_START> to exist")
+ range_start_marker;
content.replace_range(range_end_marker..range_end_marker + "<RANGE_END>".len(), "");
// Replace all other markers to get a valid Python input
let format_input = content
.replace("<RANGE_START>", "")
.replace("<RANGE_END>", "");
let range = TextRange::new(
TextSize::try_from(range_start_marker).unwrap(),
TextSize::try_from(range_end_marker).unwrap(),
);
let formatted =
format_range(&format_input, range, options.clone()).unwrap_or_else(|err| {
panic!(
"Range-formatting of {} to succeed but encountered error {err}",
input_path.display()
)
});
content.replace_range(
Range::<usize>::from(formatted.source_range()),
formatted.as_code(),
);
}
(Cow::Owned(without_markers), content)
} else {
let printed = format_module_source(source, options.clone()).expect("Formatting to succeed");
let formatted_code = printed.into_code();
ensure_stability_when_formatting_twice(&formatted_code, options, input_path);
(Cow::Borrowed(source), formatted_code)
};
ensure_unchanged_ast(&unformatted, &formatted_code, options, input_path);
formatted_code
}
/// Format another time and make sure that there are no changes anymore
fn ensure_stability_when_formatting_twice(
formatted_code: &str,
options: &PyFormatOptions,
input_path: &Path,
) {
let reformatted = match format_module_source(formatted_code, options.clone()) {
Ok(reformatted) => reformatted,
Err(err) => {
panic!(
"Expected formatted code of {} to be valid syntax: {err}:\
\n---\n{formatted_code}---\n",
input_path.display()
);
}
};
if reformatted.as_code() != formatted_code {
let diff = TextDiff::from_lines(formatted_code, reformatted.as_code())
.unified_diff()
.header("Formatted once", "Formatted twice")
.to_string();
panic!(
r#"Reformatting the formatted code of {input_path} a second time resulted in formatting changes.
Options:
{options}
---
{diff}---
Formatted once:
---
{formatted_code}---
Formatted twice:
---
{reformatted}---"#,
input_path = input_path.display(),
options = &DisplayPyOptions(options),
reformatted = reformatted.as_code(),
);
}
}
/// Ensure that formatting doesn't change the AST and doesn't introduce any new unsupported syntax errors.
///
/// Like Black, there are a few exceptions to this "invariant" which are encoded in
/// [`NormalizedMod`] and related structs. Namely, formatting can change indentation within strings,
/// and can also flatten tuples within `del` statements.
fn ensure_unchanged_ast(
unformatted_code: &str,
formatted_code: &str,
options: &PyFormatOptions,
input_path: &Path,
) {
let source_type = options.source_type();
// Parse the unformatted code.
let unformatted_parsed = parse(
unformatted_code,
ParseOptions::from(source_type).with_target_version(options.target_version()),
)
.expect("Unformatted code to be valid syntax");
let unformatted_unsupported_syntax_errors =
collect_unsupported_syntax_errors(unformatted_parsed.unsupported_syntax_errors());
let mut unformatted_ast = unformatted_parsed.into_syntax();
Normalizer.visit_module(&mut unformatted_ast);
let unformatted_ast = ComparableMod::from(&unformatted_ast);
// Parse the formatted code.
let formatted_parsed = parse(
formatted_code,
ParseOptions::from(source_type).with_target_version(options.target_version()),
)
.expect("Formatted code to be valid syntax");
// Assert that there are no new unsupported syntax errors
let mut formatted_unsupported_syntax_errors =
collect_unsupported_syntax_errors(formatted_parsed.unsupported_syntax_errors());
formatted_unsupported_syntax_errors
.retain(|fingerprint, _| !unformatted_unsupported_syntax_errors.contains_key(fingerprint));
if !formatted_unsupported_syntax_errors.is_empty() {
let index = LineIndex::from_source_text(formatted_code);
panic!(
"Formatted code `{}` introduced new unsupported syntax errors:\n---\n{}\n---",
input_path.display(),
formatted_unsupported_syntax_errors
.into_values()
.map(|error| {
let location = index.line_column(error.start(), formatted_code);
format!(
"{row}:{col} {error}",
row = location.line,
col = location.column
)
})
.join("\n")
);
}
let mut formatted_ast = formatted_parsed.into_syntax();
Normalizer.visit_module(&mut formatted_ast);
let formatted_ast = ComparableMod::from(&formatted_ast);
if formatted_ast != unformatted_ast {
let diff = TextDiff::from_lines(
&format!("{unformatted_ast:#?}"),
&format!("{formatted_ast:#?}"),
)
.unified_diff()
.header("Unformatted", "Formatted")
.to_string();
panic!(
r#"Reformatting the unformatted code of {} resulted in AST changes.
---
{diff}
"#,
input_path.display(),
);
}
}
struct Header<'a> {
title: &'a str,
}
impl<'a> Header<'a> {
fn new(title: &'a str) -> Self {
Self { title }
}
}
impl std::fmt::Display for Header<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
writeln!(f, "## {}", self.title)?;
writeln!(f)
}
}
struct CodeFrame<'a> {
language: &'a str,
code: &'a dyn std::fmt::Display,
}
impl<'a> CodeFrame<'a> {
fn new(language: &'a str, code: &'a dyn std::fmt::Display) -> Self {
Self { language, code }
}
}
impl std::fmt::Display for CodeFrame<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
writeln!(f, "```{}", self.language)?;
write!(f, "{}", self.code)?;
writeln!(f, "```")?;
writeln!(f)
}
}
struct DisplayPyOptions<'a>(&'a PyFormatOptions);
impl fmt::Display for DisplayPyOptions<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
writeln!(
f,
r#"indent-style = {indent_style}
line-width = {line_width}
indent-width = {indent_width}
quote-style = {quote_style:?}
line-ending = {line_ending:?}
magic-trailing-comma = {magic_trailing_comma:?}
docstring-code = {docstring_code:?}
docstring-code-line-width = {docstring_code_line_width:?}
preview = {preview:?}
target_version = {target_version}
source_type = {source_type:?}"#,
indent_style = self.0.indent_style(),
indent_width = self.0.indent_width().value(),
line_width = self.0.line_width().value(),
quote_style = self.0.quote_style(),
line_ending = self.0.line_ending(),
magic_trailing_comma = self.0.magic_trailing_comma(),
docstring_code = self.0.docstring_code(),
docstring_code_line_width = self.0.docstring_code_line_width(),
preview = self.0.preview(),
target_version = self.0.target_version(),
source_type = self.0.source_type()
)
}
}
/// Collects the unsupported syntax errors and assigns a unique hash to each error.
fn collect_unsupported_syntax_errors(
errors: &[UnsupportedSyntaxError],
) -> FxHashMap<u64, UnsupportedSyntaxError> {
let mut collected = FxHashMap::default();
for error in errors {
let mut error_fingerprint = fingerprint_unsupported_syntax_error(error, 0);
// Make sure that we do not get a fingerprint that is already in use
// by adding in the previously generated one.
loop {
match collected.entry(error_fingerprint) {
Entry::Occupied(_) => {
error_fingerprint =
fingerprint_unsupported_syntax_error(error, error_fingerprint);
}
Entry::Vacant(entry) => {
entry.insert(error.clone());
break;
}
}
}
}
collected
}
fn fingerprint_unsupported_syntax_error(error: &UnsupportedSyntaxError, salt: u64) -> u64 {
let mut hasher = DefaultHasher::new();
let UnsupportedSyntaxError {
kind,
target_version,
// Don't hash the range because the location between the formatted and unformatted code
// is likely to be different
range: _,
} = error;
salt.hash(&mut hasher);
kind.hash(&mut hasher);
target_version.hash(&mut hasher);
hasher.finish()
}