ruff/crates/ruff_python_formatter/tests/fixtures.rs

use crate::normalizer::Normalizer;
use ruff_db::diagnostic::{
    Annotation, Diagnostic, DiagnosticFormat, DiagnosticId, DisplayDiagnosticConfig,
    DisplayDiagnostics, DummyFileResolver, Severity, Span, SubDiagnostic, SubDiagnosticSeverity,
};
use ruff_formatter::FormatOptions;
use ruff_python_ast::Mod;
use ruff_python_ast::comparable::ComparableMod;
use ruff_python_ast::visitor::source_order::SourceOrderVisitor;
use ruff_python_formatter::{PreviewMode, PyFormatOptions, format_module_source, format_range};
use ruff_python_parser::{ParseOptions, Parsed, UnsupportedSyntaxError, parse};
use ruff_source_file::{LineIndex, OneIndexed, SourceFileBuilder};
use ruff_text_size::{Ranged, TextRange, TextSize};
use rustc_hash::FxHashMap;
use similar::TextDiff;
use std::borrow::Cow;
use std::collections::hash_map::Entry;
use std::fmt::{Formatter, Write};
use std::hash::{DefaultHasher, Hash, Hasher};
use std::io::BufReader;
use std::ops::Range;
use std::path::Path;
use std::{fmt, fs};

mod normalizer;

#[test]
fn black_compatibility() {
    let test_file = |input_path: &Path| {
        let content = fs::read_to_string(input_path).unwrap();

        let options_path = input_path.with_extension("options.json");

        let options: PyFormatOptions = if let Ok(options_file) = fs::File::open(&options_path) {
            let reader = BufReader::new(options_file);
            serde_json::from_reader(reader).unwrap_or_else(|_| {
                panic!("Expected option file {options_path:?} to be a valid Json file")
            })
        } else {
            PyFormatOptions::from_extension(input_path)
        };

        let first_line = content.lines().next().unwrap_or_default();
        let formatted_code = if first_line.starts_with("# flags:")
            && first_line.contains("--line-ranges=")
        {
            let line_index = LineIndex::from_source_text(&content);

            let ranges = first_line
                .split_ascii_whitespace()
                .filter_map(|chunk| {
                    let (_, lines) = chunk.split_once("--line-ranges=")?;
                    let (lower, upper) = lines.split_once('-')?;

                    let lower = lower
                        .parse::<OneIndexed>()
                        .expect("Expected a valid line number");
                    let upper = upper
                        .parse::<OneIndexed>()
                        .expect("Expected a valid line number");

                    let range_start = line_index.line_start(lower, &content);
                    let range_end = line_index.line_end(upper, &content);

                    Some(TextRange::new(range_start, range_end))
                })
                .rev();

            let mut formatted_code = content.clone();

            for range in ranges {
                let formatted =
                    format_range(&content, range, options.clone()).unwrap_or_else(|err| {
                        panic!(
                            "Range-formatting of {} to succeed but encountered error {err}",
                            input_path.display()
                        )
                    });

                let range = formatted.source_range();

                formatted_code.replace_range(Range::<usize>::from(range), formatted.as_code());
            }

            // We can't do stability checks for range formatting because we don't know the updated rangs.

            formatted_code
        } else {
            let printed = format_module_source(&content, options.clone()).unwrap_or_else(|err| {
                panic!(
                    "Formatting of {} to succeed but encountered error {err}",
                    input_path.display()
                )
            });

            let formatted_code = printed.into_code();

            ensure_stability_when_formatting_twice(&formatted_code, &options, input_path);

            formatted_code
        };

        let extension = input_path
            .extension()
            .expect("Test file to have py or pyi extension")
            .to_string_lossy();
        let expected_path = input_path.with_extension(format!("{extension}.expect"));
        let expected_output = fs::read_to_string(&expected_path)
            .unwrap_or_else(|_| panic!("Expected Black output file '{expected_path:?}' to exist"));

        let unsupported_syntax_errors =
            ensure_unchanged_ast(&content, &formatted_code, &options, input_path);

        if formatted_code == expected_output {
            // Black and Ruff formatting matches. Delete any existing snapshot files because the Black output
            // already perfectly captures the expected output.
            // The following code mimics insta's logic generating the snapshot name for a test.
            let workspace_path = std::env::var("CARGO_MANIFEST_DIR").unwrap();

            let mut components = input_path.components().rev();
            let file_name = components.next().unwrap();
            let test_suite = components.next().unwrap();

            let snapshot_name = format!(
                "black_compatibility@{}__{}.snap",
                test_suite.as_os_str().to_string_lossy(),
                file_name.as_os_str().to_string_lossy()
            );

            let snapshot_path = Path::new(&workspace_path)
                .join("tests/snapshots")
                .join(snapshot_name);
            if snapshot_path.exists() && snapshot_path.is_file() {
                // SAFETY: This is a convenience feature. That's why we don't want to abort
                // when deleting a no longer needed snapshot fails.
                fs::remove_file(&snapshot_path).ok();
            }

            let new_snapshot_path = snapshot_path.with_extension("snap.new");
            if new_snapshot_path.exists() && new_snapshot_path.is_file() {
                // SAFETY: This is a convenience feature. That's why we don't want to abort
                // when deleting a no longer needed snapshot fails.
                fs::remove_file(&new_snapshot_path).ok();
            }
        } else {
            // Black and Ruff have different formatting. Write out a snapshot that covers the differences
            // today.
            let mut snapshot = String::new();
            write!(snapshot, "{}", Header::new("Input")).unwrap();
            write!(snapshot, "{}", CodeFrame::new("python", &content)).unwrap();

            write!(snapshot, "{}", Header::new("Black Differences")).unwrap();

            let diff = TextDiff::from_lines(expected_output.as_str(), &formatted_code)
                .unified_diff()
                .header("Black", "Ruff")
                .to_string();

            write!(snapshot, "{}", CodeFrame::new("diff", &diff)).unwrap();

            write!(snapshot, "{}", Header::new("Ruff Output")).unwrap();
            write!(snapshot, "{}", CodeFrame::new("python", &formatted_code)).unwrap();

            write!(snapshot, "{}", Header::new("Black Output")).unwrap();
            write!(snapshot, "{}", CodeFrame::new("python", &expected_output)).unwrap();

            if !unsupported_syntax_errors.is_empty() {
                write!(snapshot, "{}", Header::new("New Unsupported Syntax Errors")).unwrap();
                writeln!(
                    snapshot,
                    "{}",
                    DisplayDiagnostics::new(
                        &DummyFileResolver,
                        &DisplayDiagnosticConfig::default().format(DiagnosticFormat::Full),
                        &unsupported_syntax_errors
                    )
                )
                .unwrap();
            }

            insta::with_settings!({
                omit_expression => true,
                input_file => input_path,
                prepend_module_to_snapshot => false,
            }, {
                insta::assert_snapshot!(snapshot);
            });
        }
    };

    insta::glob!(
        "../resources",
        "test/fixtures/black/**/*.{py,pyi}",
        test_file
    );
}

#[test]
fn format() {
    let test_file = |input_path: &Path| {
        let content = fs::read_to_string(input_path).unwrap();

        let mut snapshot = format!("## Input\n{}", CodeFrame::new("python", &content));
        let options_path = input_path.with_extension("options.json");

        if let Ok(options_file) = fs::File::open(&options_path) {
            let reader = BufReader::new(options_file);
            let options: Vec<PyFormatOptions> =
                serde_json::from_reader(reader).unwrap_or_else(|_| {
                    panic!("Expected option file {options_path:?} to be a valid Json file")
                });

            writeln!(snapshot, "## Outputs").unwrap();

            for (i, options) in options.into_iter().enumerate() {
                let (formatted_code, unsupported_syntax_errors) =
                    format_file(&content, &options, input_path);

                writeln!(
                    snapshot,
                    "### Output {}\n{}{}",
                    i + 1,
                    CodeFrame::new("", &DisplayPyOptions(&options)),
                    CodeFrame::new("python", &formatted_code)
                )
                .unwrap();

                if options.preview().is_enabled() {
                    continue;
                }

                // We want to capture the differences in the preview style in our fixtures
                let options_preview = options.with_preview(PreviewMode::Enabled);
                let (formatted_preview, _) = format_file(&content, &options_preview, input_path);

                if formatted_code != formatted_preview {
                    // Having both snapshots makes it hard to see the difference, so we're keeping only
                    // diff.
                    writeln!(
                        snapshot,
                        "#### Preview changes\n{}",
                        CodeFrame::new(
                            "diff",
                            TextDiff::from_lines(&formatted_code, &formatted_preview)
                                .unified_diff()
                                .header("Stable", "Preview")
                        )
                    )
                    .unwrap();
                }

                if !unsupported_syntax_errors.is_empty() {
                    writeln!(
                        snapshot,
                        "### Unsupported Syntax Errors\n{}",
                        DisplayDiagnostics::new(
                            &DummyFileResolver,
                            &DisplayDiagnosticConfig::default().format(DiagnosticFormat::Full),
                            &unsupported_syntax_errors
                        )
                    )
                    .unwrap();
                }
            }
        } else {
            // We want to capture the differences in the preview style in our fixtures
            let options = PyFormatOptions::from_extension(input_path);
            let (formatted_code, unsupported_syntax_errors) =
                format_file(&content, &options, input_path);

            let options_preview = options.with_preview(PreviewMode::Enabled);
            let (formatted_preview, _) = format_file(&content, &options_preview, input_path);

            if formatted_code == formatted_preview {
                writeln!(
                    snapshot,
                    "## Output\n{}",
                    CodeFrame::new("python", &formatted_code)
                )
                .unwrap();
            } else {
                // Having both snapshots makes it hard to see the difference, so we're keeping only
                // diff.
                writeln!(
                    snapshot,
                    "## Output\n{}\n## Preview changes\n{}",
                    CodeFrame::new("python", &formatted_code),
                    CodeFrame::new(
                        "diff",
                        TextDiff::from_lines(&formatted_code, &formatted_preview)
                            .unified_diff()
                            .header("Stable", "Preview")
                    )
                )
                .unwrap();
            }

            if !unsupported_syntax_errors.is_empty() {
                writeln!(
                    snapshot,
                    "## Unsupported Syntax Errors\n{}",
                    DisplayDiagnostics::new(
                        &DummyFileResolver,
                        &DisplayDiagnosticConfig::default().format(DiagnosticFormat::Full),
                        &unsupported_syntax_errors
                    )
                )
                .unwrap();
            }
        }

        insta::with_settings!({
            omit_expression => true,
            input_file => input_path,
            prepend_module_to_snapshot => false,
        }, {
            insta::assert_snapshot!(snapshot);
        });
    };

    insta::glob!(
        "../resources",
        "test/fixtures/ruff/**/*.{py,pyi}",
        test_file
    );
}

fn format_file(
    source: &str,
    options: &PyFormatOptions,
    input_path: &Path,
) -> (String, Vec<Diagnostic>) {
    let (unformatted, formatted_code) = if source.contains("<RANGE_START>") {
        let mut content = source.to_string();
        let without_markers = content
            .replace("<RANGE_START>", "")
            .replace("<RANGE_END>", "");

        while let Some(range_start_marker) = content.find("<RANGE_START>") {
            // Remove the start marker
            content.replace_range(
                range_start_marker..range_start_marker + "<RANGE_START>".len(),
                "",
            );

            let range_end_marker = content[range_start_marker..]
                .find("<RANGE_END>")
                .expect("Matching <RANGE_END> marker for <RANGE_START> to exist")
                + range_start_marker;

            content.replace_range(range_end_marker..range_end_marker + "<RANGE_END>".len(), "");

            // Replace all other markers to get a valid Python input
            let format_input = content
                .replace("<RANGE_START>", "")
                .replace("<RANGE_END>", "");

            let range = TextRange::new(
                TextSize::try_from(range_start_marker).unwrap(),
                TextSize::try_from(range_end_marker).unwrap(),
            );

            let formatted =
                format_range(&format_input, range, options.clone()).unwrap_or_else(|err| {
                    panic!(
                        "Range-formatting of {} to succeed but encountered error {err}",
                        input_path.display()
                    )
                });

            content.replace_range(
                Range::<usize>::from(formatted.source_range()),
                formatted.as_code(),
            );
        }

        (Cow::Owned(without_markers), content)
    } else {
        let printed = format_module_source(source, options.clone()).unwrap_or_else(|err| {
            panic!(
                "Formatting `{input_path} was expected to succeed but it failed: {err}",
                input_path = input_path.display()
            )
        });
        let formatted_code = printed.into_code();

        ensure_stability_when_formatting_twice(&formatted_code, options, input_path);

        (Cow::Borrowed(source), formatted_code)
    };

    let unsupported_syntax_errors =
        ensure_unchanged_ast(&unformatted, &formatted_code, options, input_path);

    (formatted_code, unsupported_syntax_errors)
}

/// Format another time and make sure that there are no changes anymore
fn ensure_stability_when_formatting_twice(
    formatted_code: &str,
    options: &PyFormatOptions,
    input_path: &Path,
) {
    let reformatted = match format_module_source(formatted_code, options.clone()) {
        Ok(reformatted) => reformatted,
        Err(err) => {
            panic!(
                "Expected formatted code of {} to be valid syntax: {err}:\
                    \n---\n{formatted_code}---\n",
                input_path.display()
            );
        }
    };

    if reformatted.as_code() != formatted_code {
        let diff = TextDiff::from_lines(formatted_code, reformatted.as_code())
            .unified_diff()
            .header("Formatted once", "Formatted twice")
            .to_string();
        panic!(
            r#"Reformatting the formatted code of {input_path} a second time resulted in formatting changes.

Options:
{options}
---
{diff}---

Formatted once:
---
{formatted_code}---

Formatted twice:
---
{reformatted}---"#,
            input_path = input_path.display(),
            options = &DisplayPyOptions(options),
            reformatted = reformatted.as_code(),
        );
    }
}

/// Ensure that formatting doesn't change the AST and doesn't introduce any new unsupported syntax errors.
///
/// Like Black, there are a few exceptions to this "invariant" which are encoded in
/// [`NormalizedMod`] and related structs. Namely, formatting can change indentation within strings,
/// and can also flatten tuples within `del` statements.
///
/// Returns any new [`UnsupportedSyntaxError`]s in the formatted code as [`Diagnostic`]s for
/// snapshotting.
///
/// As noted in the sub-diagnostic message, new syntax errors should only be accepted when they are
/// the result of an existing syntax error in the input. For example, the formatter knows that
/// escapes in f-strings are only allowed after Python 3.12, so it can replace escaped quotes with
/// reused outer quote characters, which are also valid after 3.12, even if the configured Python
/// version is lower. Such cases disrupt the fingerprint filter because the syntax error, and thus
/// its fingerprint, is different from the input syntax error. More typical cases like using a
/// t-string before 3.14 will be filtered out and not included in snapshots.
fn ensure_unchanged_ast(
    unformatted_code: &str,
    formatted_code: &str,
    options: &PyFormatOptions,
    input_path: &Path,
) -> Vec<Diagnostic> {
    let source_type = options.source_type();

    // Parse the unformatted code.
    let unformatted_parsed = parse(
        unformatted_code,
        ParseOptions::from(source_type).with_target_version(options.target_version()),
    )
    .expect("Unformatted code to be valid syntax");

    let unformatted_unsupported_syntax_errors =
        collect_unsupported_syntax_errors(&unformatted_parsed);
    let mut unformatted_ast = unformatted_parsed.into_syntax();

    Normalizer.visit_module(&mut unformatted_ast);
    let unformatted_ast = ComparableMod::from(&unformatted_ast);

    // Parse the formatted code.
    let formatted_parsed = parse(
        formatted_code,
        ParseOptions::from(source_type).with_target_version(options.target_version()),
    )
    .expect("Formatted code to be valid syntax");

    // Assert that there are no new unsupported syntax errors
    let mut formatted_unsupported_syntax_errors =
        collect_unsupported_syntax_errors(&formatted_parsed);

    formatted_unsupported_syntax_errors
        .retain(|fingerprint, _| !unformatted_unsupported_syntax_errors.contains_key(fingerprint));

    let file = SourceFileBuilder::new(
        input_path.file_name().unwrap().to_string_lossy(),
        formatted_code,
    )
    .finish();
    let diagnostics = formatted_unsupported_syntax_errors
        .values()
        .map(|error| {
            let mut diag = Diagnostic::new(DiagnosticId::InvalidSyntax, Severity::Error, error);
            let span = Span::from(file.clone()).with_range(error.range());
            diag.annotate(Annotation::primary(span));
            let sub = SubDiagnostic::new(
                SubDiagnosticSeverity::Warning,
                "Only accept new syntax errors if they are also present in the input. \
                    The formatter should not introduce syntax errors.",
            );
            diag.sub(sub);
            diag
        })
        .collect::<Vec<_>>();

    let mut formatted_ast = formatted_parsed.into_syntax();
    Normalizer.visit_module(&mut formatted_ast);
    let formatted_ast = ComparableMod::from(&formatted_ast);

    if formatted_ast != unformatted_ast {
        let diff = TextDiff::from_lines(
            &format!("{unformatted_ast:#?}"),
            &format!("{formatted_ast:#?}"),
        )
        .unified_diff()
        .header("Unformatted", "Formatted")
        .to_string();
        panic!(
            r#"Reformatting the unformatted code of {} resulted in AST changes.
---
{diff}
"#,
            input_path.display(),
        );
    }

    diagnostics
}

struct Header<'a> {
    title: &'a str,
}

impl<'a> Header<'a> {
    fn new(title: &'a str) -> Self {
        Self { title }
    }
}

impl std::fmt::Display for Header<'_> {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        writeln!(f, "## {}", self.title)?;
        writeln!(f)
    }
}

struct CodeFrame<'a> {
    language: &'a str,
    code: &'a dyn std::fmt::Display,
}

impl<'a> CodeFrame<'a> {
    fn new(language: &'a str, code: &'a dyn std::fmt::Display) -> Self {
        Self { language, code }
    }
}

impl std::fmt::Display for CodeFrame<'_> {
    fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
        writeln!(f, "```{}", self.language)?;
        write!(f, "{}", self.code)?;
        writeln!(f, "```")?;
        writeln!(f)
    }
}

struct DisplayPyOptions<'a>(&'a PyFormatOptions);

impl fmt::Display for DisplayPyOptions<'_> {
    fn fmt(&self, f: &mut Formatter<'_>) -> fmt::Result {
        writeln!(
            f,
            r#"indent-style               = {indent_style}
line-width                 = {line_width}
indent-width               = {indent_width}
quote-style                = {quote_style:?}
line-ending                = {line_ending:?}
magic-trailing-comma       = {magic_trailing_comma:?}
docstring-code             = {docstring_code:?}
docstring-code-line-width  = {docstring_code_line_width:?}
preview                    = {preview:?}
target_version             = {target_version}
source_type                = {source_type:?}"#,
            indent_style = self.0.indent_style(),
            indent_width = self.0.indent_width().value(),
            line_width = self.0.line_width().value(),
            quote_style = self.0.quote_style(),
            line_ending = self.0.line_ending(),
            magic_trailing_comma = self.0.magic_trailing_comma(),
            docstring_code = self.0.docstring_code(),
            docstring_code_line_width = self.0.docstring_code_line_width(),
            preview = self.0.preview(),
            target_version = self.0.target_version(),
            source_type = self.0.source_type()
        )
    }
}

/// A visitor to collect a sequence of node IDs for fingerprinting [`UnsupportedSyntaxError`]s.
///
/// It visits each statement in the AST in source order and saves its range. The index of the node
/// enclosing a syntax error's range can then be retrieved with the `node_id` method. This `node_id`
/// should be stable across formatting runs since the formatter won't add or remove statements.
struct StmtVisitor {
    nodes: Vec<TextRange>,
}

impl StmtVisitor {
    fn new(parsed: &Parsed<Mod>) -> Self {
        let mut visitor = Self { nodes: Vec::new() };
        visitor.visit_mod(parsed.syntax());
        visitor
    }

    /// Return the index of the statement node that contains `range`.
    fn node_id(&self, range: TextRange) -> usize {
        self.nodes
            .iter()
            .enumerate()
            .filter(|(_, node)| node.contains_range(range))
            .min_by_key(|(_, node)| node.len())
            .expect("Expected an enclosing node in the AST")
            .0
    }
}

impl<'a> SourceOrderVisitor<'a> for StmtVisitor {
    fn visit_stmt(&mut self, stmt: &'a ruff_python_ast::Stmt) {
        self.nodes.push(stmt.range());
        ruff_python_ast::visitor::source_order::walk_stmt(self, stmt);
    }
}

/// Collects the unsupported syntax errors and assigns a unique hash to each error.
fn collect_unsupported_syntax_errors(
    parsed: &Parsed<Mod>,
) -> FxHashMap<u64, UnsupportedSyntaxError> {
    let mut collected = FxHashMap::default();

    if parsed.unsupported_syntax_errors().is_empty() {
        return collected;
    }

    let visitor = StmtVisitor::new(parsed);

    for error in parsed.unsupported_syntax_errors() {
        let node_id = visitor.node_id(error.range);
        let mut error_fingerprint = fingerprint_unsupported_syntax_error(error, node_id, 0);

        // Make sure that we do not get a fingerprint that is already in use
        // by adding in the previously generated one.
        loop {
            match collected.entry(error_fingerprint) {
                Entry::Occupied(_) => {
                    error_fingerprint =
                        fingerprint_unsupported_syntax_error(error, node_id, error_fingerprint);
                }
                Entry::Vacant(entry) => {
                    entry.insert(error.clone());
                    break;
                }
            }
        }
    }

    collected
}

fn fingerprint_unsupported_syntax_error(
    error: &UnsupportedSyntaxError,
    node_id: usize,
    salt: u64,
) -> u64 {
    let mut hasher = DefaultHasher::new();

    let UnsupportedSyntaxError {
        kind,
        target_version,
        // Don't hash the range because the location between the formatted and unformatted code
        // is likely to be different
        range: _,
    } = error;

    salt.hash(&mut hasher);
    kind.hash(&mut hasher);
    target_version.hash(&mut hasher);
    node_id.hash(&mut hasher);

    hasher.finish()
}