use crate::normalizer::Normalizer; use itertools::Itertools; use ruff_formatter::FormatOptions; use ruff_python_ast::comparable::ComparableMod; use ruff_python_formatter::{PreviewMode, PyFormatOptions, format_module_source, format_range}; use ruff_python_parser::{ParseOptions, UnsupportedSyntaxError, parse}; use ruff_source_file::{LineIndex, OneIndexed}; use ruff_text_size::{Ranged, TextRange, TextSize}; use rustc_hash::FxHashMap; use similar::TextDiff; use std::borrow::Cow; use std::collections::hash_map::Entry; use std::fmt::{Formatter, Write}; use std::hash::{DefaultHasher, Hash, Hasher}; use std::io::BufReader; use std::ops::Range; use std::path::Path; use std::{fmt, fs}; mod normalizer; #[test] fn black_compatibility() { let test_file = |input_path: &Path| { let content = fs::read_to_string(input_path).unwrap(); let options_path = input_path.with_extension("options.json"); let options: PyFormatOptions = if let Ok(options_file) = fs::File::open(&options_path) { let reader = BufReader::new(options_file); serde_json::from_reader(reader).unwrap_or_else(|_| { panic!("Expected option file {options_path:?} to be a valid Json file") }) } else { PyFormatOptions::from_extension(input_path) }; let first_line = content.lines().next().unwrap_or_default(); let formatted_code = if first_line.starts_with("# flags:") && first_line.contains("--line-ranges=") { let line_index = LineIndex::from_source_text(&content); let ranges = first_line .split_ascii_whitespace() .filter_map(|chunk| { let (_, lines) = chunk.split_once("--line-ranges=")?; let (lower, upper) = lines.split_once('-')?; let lower = lower .parse::() .expect("Expected a valid line number"); let upper = upper .parse::() .expect("Expected a valid line number"); let range_start = line_index.line_start(lower, &content); let range_end = line_index.line_end(upper, &content); Some(TextRange::new(range_start, range_end)) }) .rev(); let mut formatted_code = content.clone(); for range in ranges { let formatted = format_range(&content, range, options.clone()).unwrap_or_else(|err| { panic!( "Range-formatting of {} to succeed but encountered error {err}", input_path.display() ) }); let range = formatted.source_range(); formatted_code.replace_range(Range::::from(range), formatted.as_code()); } // We can't do stability checks for range formatting because we don't know the updated rangs. formatted_code } else { let printed = format_module_source(&content, options.clone()).unwrap_or_else(|err| { panic!( "Formatting of {} to succeed but encountered error {err}", input_path.display() ) }); let formatted_code = printed.into_code(); ensure_stability_when_formatting_twice(&formatted_code, &options, input_path); formatted_code }; let extension = input_path .extension() .expect("Test file to have py or pyi extension") .to_string_lossy(); let expected_path = input_path.with_extension(format!("{extension}.expect")); let expected_output = fs::read_to_string(&expected_path) .unwrap_or_else(|_| panic!("Expected Black output file '{expected_path:?}' to exist")); ensure_unchanged_ast(&content, &formatted_code, &options, input_path); if formatted_code == expected_output { // Black and Ruff formatting matches. Delete any existing snapshot files because the Black output // already perfectly captures the expected output. // The following code mimics insta's logic generating the snapshot name for a test. let workspace_path = std::env::var("CARGO_MANIFEST_DIR").unwrap(); let mut components = input_path.components().rev(); let file_name = components.next().unwrap(); let test_suite = components.next().unwrap(); let snapshot_name = format!( "black_compatibility@{}__{}.snap", test_suite.as_os_str().to_string_lossy(), file_name.as_os_str().to_string_lossy() ); let snapshot_path = Path::new(&workspace_path) .join("tests/snapshots") .join(snapshot_name); if snapshot_path.exists() && snapshot_path.is_file() { // SAFETY: This is a convenience feature. That's why we don't want to abort // when deleting a no longer needed snapshot fails. fs::remove_file(&snapshot_path).ok(); } let new_snapshot_path = snapshot_path.with_extension("snap.new"); if new_snapshot_path.exists() && new_snapshot_path.is_file() { // SAFETY: This is a convenience feature. That's why we don't want to abort // when deleting a no longer needed snapshot fails. fs::remove_file(&new_snapshot_path).ok(); } } else { // Black and Ruff have different formatting. Write out a snapshot that covers the differences // today. let mut snapshot = String::new(); write!(snapshot, "{}", Header::new("Input")).unwrap(); write!(snapshot, "{}", CodeFrame::new("python", &content)).unwrap(); write!(snapshot, "{}", Header::new("Black Differences")).unwrap(); let diff = TextDiff::from_lines(expected_output.as_str(), &formatted_code) .unified_diff() .header("Black", "Ruff") .to_string(); write!(snapshot, "{}", CodeFrame::new("diff", &diff)).unwrap(); write!(snapshot, "{}", Header::new("Ruff Output")).unwrap(); write!(snapshot, "{}", CodeFrame::new("python", &formatted_code)).unwrap(); write!(snapshot, "{}", Header::new("Black Output")).unwrap(); write!(snapshot, "{}", CodeFrame::new("python", &expected_output)).unwrap(); insta::with_settings!({ omit_expression => true, input_file => input_path, prepend_module_to_snapshot => false, }, { insta::assert_snapshot!(snapshot); }); } }; insta::glob!( "../resources", "test/fixtures/black/**/*.{py,pyi}", test_file ); } #[test] fn format() { let test_file = |input_path: &Path| { let content = fs::read_to_string(input_path).unwrap(); let mut snapshot = format!("## Input\n{}", CodeFrame::new("python", &content)); let options_path = input_path.with_extension("options.json"); if let Ok(options_file) = fs::File::open(&options_path) { let reader = BufReader::new(options_file); let options: Vec = serde_json::from_reader(reader).unwrap_or_else(|_| { panic!("Expected option file {options_path:?} to be a valid Json file") }); writeln!(snapshot, "## Outputs").unwrap(); for (i, options) in options.into_iter().enumerate() { let formatted_code = format_file(&content, &options, input_path); writeln!( snapshot, "### Output {}\n{}{}", i + 1, CodeFrame::new("", &DisplayPyOptions(&options)), CodeFrame::new("python", &formatted_code) ) .unwrap(); if options.preview().is_enabled() { continue; } // We want to capture the differences in the preview style in our fixtures let options_preview = options.with_preview(PreviewMode::Enabled); let formatted_preview = format_file(&content, &options_preview, input_path); if formatted_code != formatted_preview { // Having both snapshots makes it hard to see the difference, so we're keeping only // diff. writeln!( snapshot, "#### Preview changes\n{}", CodeFrame::new( "diff", TextDiff::from_lines(&formatted_code, &formatted_preview) .unified_diff() .header("Stable", "Preview") ) ) .unwrap(); } } } else { // We want to capture the differences in the preview style in our fixtures let options = PyFormatOptions::from_extension(input_path); let formatted_code = format_file(&content, &options, input_path); let options_preview = options.with_preview(PreviewMode::Enabled); let formatted_preview = format_file(&content, &options_preview, input_path); if formatted_code == formatted_preview { writeln!( snapshot, "## Output\n{}", CodeFrame::new("python", &formatted_code) ) .unwrap(); } else { // Having both snapshots makes it hard to see the difference, so we're keeping only // diff. writeln!( snapshot, "## Output\n{}\n## Preview changes\n{}", CodeFrame::new("python", &formatted_code), CodeFrame::new( "diff", TextDiff::from_lines(&formatted_code, &formatted_preview) .unified_diff() .header("Stable", "Preview") ) ) .unwrap(); } } insta::with_settings!({ omit_expression => true, input_file => input_path, prepend_module_to_snapshot => false, }, { insta::assert_snapshot!(snapshot); }); }; insta::glob!( "../resources", "test/fixtures/ruff/**/*.{py,pyi}", test_file ); } fn format_file(source: &str, options: &PyFormatOptions, input_path: &Path) -> String { let (unformatted, formatted_code) = if source.contains("") { let mut content = source.to_string(); let without_markers = content .replace("", "") .replace("", ""); while let Some(range_start_marker) = content.find("") { // Remove the start marker content.replace_range( range_start_marker..range_start_marker + "".len(), "", ); let range_end_marker = content[range_start_marker..] .find("") .expect("Matching marker for to exist") + range_start_marker; content.replace_range(range_end_marker..range_end_marker + "".len(), ""); // Replace all other markers to get a valid Python input let format_input = content .replace("", "") .replace("", ""); let range = TextRange::new( TextSize::try_from(range_start_marker).unwrap(), TextSize::try_from(range_end_marker).unwrap(), ); let formatted = format_range(&format_input, range, options.clone()).unwrap_or_else(|err| { panic!( "Range-formatting of {} to succeed but encountered error {err}", input_path.display() ) }); content.replace_range( Range::::from(formatted.source_range()), formatted.as_code(), ); } (Cow::Owned(without_markers), content) } else { let printed = format_module_source(source, options.clone()).unwrap_or_else(|err| { panic!( "Formatting `{input_path} was expected to succeed but it failed: {err}", input_path = input_path.display() ) }); let formatted_code = printed.into_code(); ensure_stability_when_formatting_twice(&formatted_code, options, input_path); (Cow::Borrowed(source), formatted_code) }; ensure_unchanged_ast(&unformatted, &formatted_code, options, input_path); formatted_code } /// Format another time and make sure that there are no changes anymore fn ensure_stability_when_formatting_twice( formatted_code: &str, options: &PyFormatOptions, input_path: &Path, ) { let reformatted = match format_module_source(formatted_code, options.clone()) { Ok(reformatted) => reformatted, Err(err) => { panic!( "Expected formatted code of {} to be valid syntax: {err}:\ \n---\n{formatted_code}---\n", input_path.display() ); } }; if reformatted.as_code() != formatted_code { let diff = TextDiff::from_lines(formatted_code, reformatted.as_code()) .unified_diff() .header("Formatted once", "Formatted twice") .to_string(); panic!( r#"Reformatting the formatted code of {input_path} a second time resulted in formatting changes. Options: {options} --- {diff}--- Formatted once: --- {formatted_code}--- Formatted twice: --- {reformatted}---"#, input_path = input_path.display(), options = &DisplayPyOptions(options), reformatted = reformatted.as_code(), ); } } /// Ensure that formatting doesn't change the AST and doesn't introduce any new unsupported syntax errors. /// /// Like Black, there are a few exceptions to this "invariant" which are encoded in /// [`NormalizedMod`] and related structs. Namely, formatting can change indentation within strings, /// and can also flatten tuples within `del` statements. fn ensure_unchanged_ast( unformatted_code: &str, formatted_code: &str, options: &PyFormatOptions, input_path: &Path, ) { let source_type = options.source_type(); // Parse the unformatted code. let unformatted_parsed = parse( unformatted_code, ParseOptions::from(source_type).with_target_version(options.target_version()), ) .expect("Unformatted code to be valid syntax"); let unformatted_unsupported_syntax_errors = collect_unsupported_syntax_errors(unformatted_parsed.unsupported_syntax_errors()); let mut unformatted_ast = unformatted_parsed.into_syntax(); Normalizer.visit_module(&mut unformatted_ast); let unformatted_ast = ComparableMod::from(&unformatted_ast); // Parse the formatted code. let formatted_parsed = parse( formatted_code, ParseOptions::from(source_type).with_target_version(options.target_version()), ) .expect("Formatted code to be valid syntax"); // Assert that there are no new unsupported syntax errors let mut formatted_unsupported_syntax_errors = collect_unsupported_syntax_errors(formatted_parsed.unsupported_syntax_errors()); formatted_unsupported_syntax_errors .retain(|fingerprint, _| !unformatted_unsupported_syntax_errors.contains_key(fingerprint)); if !formatted_unsupported_syntax_errors.is_empty() { let index = LineIndex::from_source_text(formatted_code); panic!( "Formatted code `{}` introduced new unsupported syntax errors:\n---\n{}\n---", input_path.display(), formatted_unsupported_syntax_errors .into_values() .map(|error| { let location = index.line_column(error.start(), formatted_code); format!( "{row}:{col} {error}", row = location.line, col = location.column ) }) .join("\n") ); } let mut formatted_ast = formatted_parsed.into_syntax(); Normalizer.visit_module(&mut formatted_ast); let formatted_ast = ComparableMod::from(&formatted_ast); if formatted_ast != unformatted_ast { let diff = TextDiff::from_lines( &format!("{unformatted_ast:#?}"), &format!("{formatted_ast:#?}"), ) .unified_diff() .header("Unformatted", "Formatted") .to_string(); panic!( r#"Reformatting the unformatted code of {} resulted in AST changes. --- {diff} "#, input_path.display(), ); } } struct Header<'a> { title: &'a str, } impl<'a> Header<'a> { fn new(title: &'a str) -> Self { Self { title } } } impl std::fmt::Display for Header<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { writeln!(f, "## {}", self.title)?; writeln!(f) } } struct CodeFrame<'a> { language: &'a str, code: &'a dyn std::fmt::Display, } impl<'a> CodeFrame<'a> { fn new(language: &'a str, code: &'a dyn std::fmt::Display) -> Self { Self { language, code } } } impl std::fmt::Display for CodeFrame<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result { writeln!(f, "```{}", self.language)?; write!(f, "{}", self.code)?; writeln!(f, "```")?; writeln!(f) } } struct DisplayPyOptions<'a>(&'a PyFormatOptions); impl fmt::Display for DisplayPyOptions<'_> { fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result { writeln!( f, r#"indent-style = {indent_style} line-width = {line_width} indent-width = {indent_width} quote-style = {quote_style:?} line-ending = {line_ending:?} magic-trailing-comma = {magic_trailing_comma:?} docstring-code = {docstring_code:?} docstring-code-line-width = {docstring_code_line_width:?} preview = {preview:?} target_version = {target_version} source_type = {source_type:?}"#, indent_style = self.0.indent_style(), indent_width = self.0.indent_width().value(), line_width = self.0.line_width().value(), quote_style = self.0.quote_style(), line_ending = self.0.line_ending(), magic_trailing_comma = self.0.magic_trailing_comma(), docstring_code = self.0.docstring_code(), docstring_code_line_width = self.0.docstring_code_line_width(), preview = self.0.preview(), target_version = self.0.target_version(), source_type = self.0.source_type() ) } } /// Collects the unsupported syntax errors and assigns a unique hash to each error. fn collect_unsupported_syntax_errors( errors: &[UnsupportedSyntaxError], ) -> FxHashMap { let mut collected = FxHashMap::default(); for error in errors { let mut error_fingerprint = fingerprint_unsupported_syntax_error(error, 0); // Make sure that we do not get a fingerprint that is already in use // by adding in the previously generated one. loop { match collected.entry(error_fingerprint) { Entry::Occupied(_) => { error_fingerprint = fingerprint_unsupported_syntax_error(error, error_fingerprint); } Entry::Vacant(entry) => { entry.insert(error.clone()); break; } } } } collected } fn fingerprint_unsupported_syntax_error(error: &UnsupportedSyntaxError, salt: u64) -> u64 { let mut hasher = DefaultHasher::new(); let UnsupportedSyntaxError { kind, target_version, // Don't hash the range because the location between the formatted and unformatted code // is likely to be different range: _, } = error; salt.hash(&mut hasher); kind.hash(&mut hasher); target_version.hash(&mut hasher); hasher.finish() }