Compare formatted and unformatted ASTs during formatter tests (#8624)

## Summary

This PR implements validation in the formatter tests to ensure that we
don't modify the AST during formatting. Black has similar logic.

In implementing this, I learned that Black actually _does_ modify the
AST, and their test infrastructure normalizes the AST to wipe away those
differences. Specifically, Black changes the indentation of docstrings,
which _does_ modify the AST; and it also inserts parentheses in `del`
statements, which changes the AST too.

Ruff also does both these things, so we _also_ implement the same
normalization using a new visitor that allows for modifying the AST.

Closes https://github.com/astral-sh/ruff/issues/8184.

## Test Plan

`cargo test`
This commit is contained in:
Charlie Marsh 2023-11-13 09:43:27 -08:00 committed by GitHub
parent 3592f44ade
commit d574fcd1ac
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 930 additions and 19 deletions

View file

@ -1,11 +1,18 @@
use ruff_formatter::FormatOptions;
use ruff_python_formatter::{format_module_source, PreviewMode, PyFormatOptions};
use similar::TextDiff;
use std::fmt::{Formatter, Write};
use std::io::BufReader;
use std::path::Path;
use std::{fmt, fs};
use similar::TextDiff;
use crate::normalizer::Normalizer;
use ruff_formatter::FormatOptions;
use ruff_python_ast::comparable::ComparableMod;
use ruff_python_formatter::{format_module_source, PreviewMode, PyFormatOptions};
use ruff_python_parser::{parse, AsMode};
mod normalizer;
#[test]
fn black_compatibility() {
let test_file = |input_path: &Path| {
@ -33,6 +40,7 @@ fn black_compatibility() {
let formatted_code = printed.as_code();
ensure_unchanged_ast(&content, formatted_code, &options, input_path);
ensure_stability_when_formatting_twice(formatted_code, options, input_path);
if formatted_code == expected_output {
@ -111,6 +119,7 @@ fn format() {
format_module_source(&content, options.clone()).expect("Formatting to succeed");
let formatted_code = printed.as_code();
ensure_unchanged_ast(&content, formatted_code, &options, input_path);
ensure_stability_when_formatting_twice(formatted_code, options.clone(), input_path);
let mut snapshot = format!("## Input\n{}", CodeFrame::new("python", &content));
@ -128,6 +137,7 @@ fn format() {
format_module_source(&content, options.clone()).expect("Formatting to succeed");
let formatted_code = printed.as_code();
ensure_unchanged_ast(&content, formatted_code, &options, input_path);
ensure_stability_when_formatting_twice(formatted_code, options.clone(), input_path);
writeln!(
@ -140,29 +150,20 @@ fn format() {
.unwrap();
}
} else {
let printed =
format_module_source(&content, options.clone()).expect("Formatting to succeed");
let formatted = printed.as_code();
ensure_stability_when_formatting_twice(formatted, options.clone(), input_path);
// We want to capture the differences in the preview style in our fixtures
let options_preview = options.with_preview(PreviewMode::Enabled);
let printed_preview = format_module_source(&content, options_preview.clone())
.expect("Formatting to succeed");
let formatted_preview = printed_preview.as_code();
ensure_stability_when_formatting_twice(
formatted_preview,
options_preview.clone(),
input_path,
);
ensure_unchanged_ast(&content, formatted_preview, &options_preview, input_path);
ensure_stability_when_formatting_twice(formatted_preview, options_preview, input_path);
if formatted == formatted_preview {
if formatted_code == formatted_preview {
writeln!(
snapshot,
"## Output\n{}",
CodeFrame::new("python", &formatted)
CodeFrame::new("python", &formatted_code)
)
.unwrap();
} else {
@ -171,10 +172,10 @@ fn format() {
writeln!(
snapshot,
"## Output\n{}\n## Preview changes\n{}",
CodeFrame::new("python", &formatted),
CodeFrame::new("python", &formatted_code),
CodeFrame::new(
"diff",
TextDiff::from_lines(formatted, formatted_preview)
TextDiff::from_lines(formatted_code, formatted_preview)
.unified_diff()
.header("Stable", "Preview")
)
@ -239,6 +240,57 @@ Formatted twice:
}
}
/// Ensure that formatting doesn't change the AST.
///
/// Like Black, there are a few exceptions to this "invariant" which are encoded in
/// [`NormalizedMod`] and related structs. Namely, formatting can change indentation within strings,
/// and can also flatten tuples within `del` statements.
fn ensure_unchanged_ast(
unformatted_code: &str,
formatted_code: &str,
options: &PyFormatOptions,
input_path: &Path,
) {
let source_type = options.source_type();
// Parse the unformatted code.
let mut unformatted_ast = parse(
unformatted_code,
source_type.as_mode(),
&input_path.to_string_lossy(),
)
.expect("Unformatted code to be valid syntax");
Normalizer.visit_module(&mut unformatted_ast);
let unformatted_ast = ComparableMod::from(&unformatted_ast);
// Parse the formatted code.
let mut formatted_ast = parse(
formatted_code,
source_type.as_mode(),
&input_path.to_string_lossy(),
)
.expect("Formatted code to be valid syntax");
Normalizer.visit_module(&mut formatted_ast);
let formatted_ast = ComparableMod::from(&formatted_ast);
if formatted_ast != unformatted_ast {
let diff = TextDiff::from_lines(
&format!("{unformatted_ast:#?}"),
&format!("{formatted_ast:#?}"),
)
.unified_diff()
.header("Unformatted", "Formatted")
.to_string();
panic!(
r#"Reformatting the unformatted code of {} resulted in AST changes.
---
{diff}
"#,
input_path.display(),
);
}
}
struct Header<'a> {
title: &'a str,
}

View file

@ -0,0 +1,83 @@
use itertools::Either::{Left, Right};
use ruff_python_ast::visitor::transformer;
use ruff_python_ast::visitor::transformer::Transformer;
use ruff_python_ast::{self as ast, Expr, Stmt};
/// A struct to normalize AST nodes for the purpose of comparing formatted representations for
/// semantic equivalence.
///
/// Vis-à-vis comparing ASTs, comparing these normalized representations does the following:
/// - Ignores non-abstraction information that we've encoded into the AST, e.g., the difference
/// between `class C: ...` and `class C(): ...`, which is part of our AST but not `CPython`'s.
/// - Normalize strings. The formatter can re-indent docstrings, so we need to compare string
/// contents ignoring whitespace. (Black does the same.)
/// - Ignores nested tuples in deletions. (Black does the same.)
pub(crate) struct Normalizer;
impl Normalizer {
/// Transform an AST module into a normalized representation.
#[allow(dead_code)]
pub(crate) fn visit_module(&self, module: &mut ast::Mod) {
match module {
ast::Mod::Module(module) => {
self.visit_body(&mut module.body);
}
ast::Mod::Expression(expression) => {
self.visit_expr(&mut expression.body);
}
}
}
}
impl Transformer for Normalizer {
fn visit_stmt(&self, stmt: &mut Stmt) {
match stmt {
Stmt::ClassDef(class_def) => {
// Treat `class C: ...` and `class C(): ...` equivalently.
if class_def
.arguments
.as_ref()
.is_some_and(|arguments| arguments.is_empty())
{
class_def.arguments = None;
}
}
Stmt::Delete(delete) => {
// Treat `del a, b` and `del (a, b)` equivalently.
delete.targets = delete
.targets
.clone()
.into_iter()
.flat_map(|target| {
if let Expr::Tuple(tuple) = target {
Left(tuple.elts.into_iter())
} else {
Right(std::iter::once(target))
}
})
.collect();
}
_ => {}
}
transformer::walk_stmt(self, stmt);
}
fn visit_expr(&self, expr: &mut Expr) {
if let Expr::StringLiteral(string_literal) = expr {
// Normalize a string by (1) stripping any leading and trailing space from each
// line, and (2) removing any blank lines from the start and end of the string.
string_literal.value = string_literal
.value
.lines()
.map(str::trim)
.collect::<Vec<_>>()
.join("\n")
.trim()
.to_owned();
}
transformer::walk_expr(self, expr);
}
}