Assert that formatted code doesn't introduce any new unsupported syntax errors (#16549)

## Summary

This should give us better coverage for the unsupported syntax error
features and
increases our confidence that the formatter doesn't accidentially
introduce new unsupported
syntax errors. 

A feature like this would have been very useful when working on f-string
formatting
where it took a lot of iteration to find all Python 3.11 or older
incompatibilities.

## Test Plan

I applied my changes on top of
https://github.com/astral-sh/ruff/pull/16523 and
removed the target version check in the with-statement formatting code.
As expected,
the integration tests now failed
This commit is contained in:
Micha Reiser 2025-03-07 09:12:00 +01:00 committed by GitHub
parent 05a4c29344
commit 9cd0cdefd3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 111 additions and 20 deletions

View file

@ -1,20 +1,22 @@
use crate::normalizer::Normalizer;
use itertools::Itertools;
use ruff_formatter::FormatOptions;
use ruff_python_ast::comparable::ComparableMod;
use ruff_python_formatter::{format_module_source, format_range, PreviewMode, PyFormatOptions};
use ruff_python_parser::{parse, ParseOptions, UnsupportedSyntaxError};
use ruff_source_file::{LineIndex, OneIndexed};
use ruff_text_size::{Ranged, TextRange, TextSize};
use rustc_hash::FxHashMap;
use similar::TextDiff;
use std::borrow::Cow;
use std::collections::hash_map::Entry;
use std::fmt::{Formatter, Write};
use std::hash::{DefaultHasher, Hash, Hasher};
use std::io::BufReader;
use std::ops::Range;
use std::path::Path;
use std::{fmt, fs};
use similar::TextDiff;
use crate::normalizer::Normalizer;
use ruff_formatter::FormatOptions;
use ruff_python_ast::comparable::ComparableMod;
use ruff_python_formatter::{format_module_source, format_range, PreviewMode, PyFormatOptions};
use ruff_python_parser::{parse, ParseOptions};
use ruff_source_file::{LineIndex, OneIndexed};
use ruff_text_size::{TextRange, TextSize};
mod normalizer;
#[test]
@ -379,7 +381,7 @@ Formatted twice:
}
}
/// Ensure that formatting doesn't change the AST.
/// Ensure that formatting doesn't change the AST and doesn't introduce any new unsupported syntax errors.
///
/// Like Black, there are a few exceptions to this "invariant" which are encoded in
/// [`NormalizedMod`] and related structs. Namely, formatting can change indentation within strings,
@ -393,16 +395,53 @@ fn ensure_unchanged_ast(
let source_type = options.source_type();
// Parse the unformatted code.
let mut unformatted_ast = parse(unformatted_code, ParseOptions::from(source_type))
.expect("Unformatted code to be valid syntax")
.into_syntax();
let unformatted_parsed = parse(
unformatted_code,
ParseOptions::from(source_type).with_target_version(options.target_version()),
)
.expect("Unformatted code to be valid syntax");
let unformatted_unsupported_syntax_errors =
collect_unsupported_syntax_errors(unformatted_parsed.unsupported_syntax_errors());
let mut unformatted_ast = unformatted_parsed.into_syntax();
Normalizer.visit_module(&mut unformatted_ast);
let unformatted_ast = ComparableMod::from(&unformatted_ast);
// Parse the formatted code.
let mut formatted_ast = parse(formatted_code, ParseOptions::from(source_type))
.expect("Formatted code to be valid syntax")
.into_syntax();
let formatted_parsed = parse(
formatted_code,
ParseOptions::from(source_type).with_target_version(options.target_version()),
)
.expect("Formatted code to be valid syntax");
// Assert that there are no new unsupported syntax errors
let mut formatted_unsupported_syntax_errors =
collect_unsupported_syntax_errors(formatted_parsed.unsupported_syntax_errors());
formatted_unsupported_syntax_errors
.retain(|fingerprint, _| !unformatted_unsupported_syntax_errors.contains_key(fingerprint));
if !formatted_unsupported_syntax_errors.is_empty() {
let index = LineIndex::from_source_text(formatted_code);
panic!(
"Formatted code `{}` introduced new unsupported syntax errors:\n---\n{}\n---",
input_path.display(),
formatted_unsupported_syntax_errors
.into_values()
.map(|error| {
let location = index.source_location(error.start(), formatted_code);
format!(
"{row}:{col} {error}",
row = location.row,
col = location.column
)
})
.join("\n")
);
}
let mut formatted_ast = formatted_parsed.into_syntax();
Normalizer.visit_module(&mut formatted_ast);
let formatted_ast = ComparableMod::from(&formatted_ast);
@ -492,3 +531,49 @@ source_type = {source_type:?}"#,
)
}
}
/// Collects the unsupported syntax errors and assigns a unique hash to each error.
fn collect_unsupported_syntax_errors(
errors: &[UnsupportedSyntaxError],
) -> FxHashMap<u64, UnsupportedSyntaxError> {
let mut collected = FxHashMap::default();
for error in errors {
let mut error_fingerprint = fingerprint_unsupported_syntax_error(error, 0);
// Make sure that we do not get a fingerprint that is already in use
// by adding in the previously generated one.
loop {
match collected.entry(error_fingerprint) {
Entry::Occupied(_) => {
error_fingerprint =
fingerprint_unsupported_syntax_error(error, error_fingerprint);
}
Entry::Vacant(entry) => {
entry.insert(error.clone());
break;
}
}
}
}
collected
}
fn fingerprint_unsupported_syntax_error(error: &UnsupportedSyntaxError, salt: u64) -> u64 {
let mut hasher = DefaultHasher::new();
let UnsupportedSyntaxError {
kind,
target_version,
// Don't hash the range because the location between the formatted and unformatted code
// is likely to be different
range: _,
} = error;
salt.hash(&mut hasher);
kind.hash(&mut hasher);
target_version.hash(&mut hasher);
hasher.finish()
}