mirror of
				https://github.com/astral-sh/ruff.git
				synced 2025-11-04 05:34:54 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			281 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
			
		
		
	
	
			281 lines
		
	
	
	
		
			12 KiB
		
	
	
	
		
			Rust
		
	
	
	
	
	
use std::sync::LazyLock;
 | 
						|
use {
 | 
						|
    itertools::Either::{Left, Right},
 | 
						|
    regex::Regex,
 | 
						|
};
 | 
						|
 | 
						|
use ruff_python_ast::{
 | 
						|
    self as ast, BytesLiteralFlags, Expr, FStringFlags, FStringPart, InterpolatedStringElement,
 | 
						|
    InterpolatedStringLiteralElement, Stmt, StringFlags,
 | 
						|
};
 | 
						|
use ruff_python_ast::{AtomicNodeIndex, visitor::transformer::Transformer};
 | 
						|
use ruff_python_ast::{StringLiteralFlags, visitor::transformer};
 | 
						|
use ruff_text_size::{Ranged, TextRange};
 | 
						|
 | 
						|
/// A struct to normalize AST nodes for the purpose of comparing formatted representations for
 | 
						|
/// semantic equivalence.
 | 
						|
///
 | 
						|
/// Vis-à-vis comparing ASTs, comparing these normalized representations does the following:
 | 
						|
/// - Ignores non-abstraction information that we've encoded into the AST, e.g., the difference
 | 
						|
///   between `class C: ...` and `class C(): ...`, which is part of our AST but not `CPython`'s.
 | 
						|
/// - Normalize strings. The formatter can re-indent docstrings, so we need to compare string
 | 
						|
///   contents ignoring whitespace. (Black does the same.)
 | 
						|
/// - The formatter can also reformat code snippets when they're Python code, which can of
 | 
						|
///   course change the string in arbitrary ways. Black itself does not reformat code snippets,
 | 
						|
///   so we carve our own path here by stripping everything that looks like code snippets from
 | 
						|
///   string literals.
 | 
						|
/// - Ignores nested tuples in deletions. (Black does the same.)
 | 
						|
pub(crate) struct Normalizer;
 | 
						|
 | 
						|
impl Normalizer {
 | 
						|
    /// Transform an AST module into a normalized representation.
 | 
						|
    #[allow(dead_code)]
 | 
						|
    pub(crate) fn visit_module(&self, module: &mut ast::Mod) {
 | 
						|
        match module {
 | 
						|
            ast::Mod::Module(module) => {
 | 
						|
                self.visit_body(&mut module.body);
 | 
						|
            }
 | 
						|
            ast::Mod::Expression(expression) => {
 | 
						|
                self.visit_expr(&mut expression.body);
 | 
						|
            }
 | 
						|
        }
 | 
						|
    }
 | 
						|
}
 | 
						|
 | 
						|
impl Transformer for Normalizer {
 | 
						|
    fn visit_stmt(&self, stmt: &mut Stmt) {
 | 
						|
        if let Stmt::Delete(delete) = stmt {
 | 
						|
            // Treat `del a, b` and `del (a, b)` equivalently.
 | 
						|
            delete.targets = delete
 | 
						|
                .targets
 | 
						|
                .clone()
 | 
						|
                .into_iter()
 | 
						|
                .flat_map(|target| {
 | 
						|
                    if let Expr::Tuple(tuple) = target {
 | 
						|
                        Left(tuple.elts.into_iter())
 | 
						|
                    } else {
 | 
						|
                        Right(std::iter::once(target))
 | 
						|
                    }
 | 
						|
                })
 | 
						|
                .collect();
 | 
						|
        }
 | 
						|
 | 
						|
        transformer::walk_stmt(self, stmt);
 | 
						|
    }
 | 
						|
 | 
						|
    fn visit_expr(&self, expr: &mut Expr) {
 | 
						|
        // Ruff supports joining implicitly concatenated strings. The code below implements this
 | 
						|
        // at an AST level by joining the string literals in the AST if they can be joined (it doesn't mean that
 | 
						|
        // they'll be joined in the formatted output but they could).
 | 
						|
        // Comparable expression handles some of this by comparing the concatenated string
 | 
						|
        // but not joining here doesn't play nicely with other string normalizations done in the
 | 
						|
        // Normalizer.
 | 
						|
        match expr {
 | 
						|
            Expr::StringLiteral(string) => {
 | 
						|
                if string.value.is_implicit_concatenated() {
 | 
						|
                    let can_join = string.value.iter().all(|literal| {
 | 
						|
                        !literal.flags.is_triple_quoted() && !literal.flags.prefix().is_raw()
 | 
						|
                    });
 | 
						|
 | 
						|
                    if can_join {
 | 
						|
                        string.value = ast::StringLiteralValue::single(ast::StringLiteral {
 | 
						|
                            value: Box::from(string.value.to_str()),
 | 
						|
                            range: string.range,
 | 
						|
                            flags: StringLiteralFlags::empty(),
 | 
						|
                            node_index: AtomicNodeIndex::dummy(),
 | 
						|
                        });
 | 
						|
                    }
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            Expr::BytesLiteral(bytes) => {
 | 
						|
                if bytes.value.is_implicit_concatenated() {
 | 
						|
                    let can_join = bytes.value.iter().all(|literal| {
 | 
						|
                        !literal.flags.is_triple_quoted() && !literal.flags.prefix().is_raw()
 | 
						|
                    });
 | 
						|
 | 
						|
                    if can_join {
 | 
						|
                        bytes.value = ast::BytesLiteralValue::single(ast::BytesLiteral {
 | 
						|
                            value: bytes.value.bytes().collect(),
 | 
						|
                            range: bytes.range,
 | 
						|
                            flags: BytesLiteralFlags::empty(),
 | 
						|
                            node_index: AtomicNodeIndex::dummy(),
 | 
						|
                        });
 | 
						|
                    }
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            Expr::FString(fstring) => {
 | 
						|
                if fstring.value.is_implicit_concatenated() {
 | 
						|
                    let can_join = fstring.value.iter().all(|part| match part {
 | 
						|
                        FStringPart::Literal(literal) => {
 | 
						|
                            !literal.flags.is_triple_quoted() && !literal.flags.prefix().is_raw()
 | 
						|
                        }
 | 
						|
                        FStringPart::FString(string) => {
 | 
						|
                            !string.flags.is_triple_quoted() && !string.flags.prefix().is_raw()
 | 
						|
                        }
 | 
						|
                    });
 | 
						|
 | 
						|
                    if can_join {
 | 
						|
                        #[derive(Default)]
 | 
						|
                        struct Collector {
 | 
						|
                            elements: Vec<InterpolatedStringElement>,
 | 
						|
                        }
 | 
						|
 | 
						|
                        impl Collector {
 | 
						|
                            // The logic for concatenating adjacent string literals
 | 
						|
                            // occurs here, implicitly: when we encounter a sequence
 | 
						|
                            // of string literals, the first gets pushed to the
 | 
						|
                            // `elements` vector, while subsequent strings
 | 
						|
                            // are concatenated onto this top string.
 | 
						|
                            fn push_literal(&mut self, literal: &str, range: TextRange) {
 | 
						|
                                if let Some(InterpolatedStringElement::Literal(existing_literal)) =
 | 
						|
                                    self.elements.last_mut()
 | 
						|
                                {
 | 
						|
                                    let value = std::mem::take(&mut existing_literal.value);
 | 
						|
                                    let mut value = value.into_string();
 | 
						|
                                    value.push_str(literal);
 | 
						|
                                    existing_literal.value = value.into_boxed_str();
 | 
						|
                                    existing_literal.range =
 | 
						|
                                        TextRange::new(existing_literal.start(), range.end());
 | 
						|
                                } else {
 | 
						|
                                    self.elements.push(InterpolatedStringElement::Literal(
 | 
						|
                                        InterpolatedStringLiteralElement {
 | 
						|
                                            range,
 | 
						|
                                            value: literal.into(),
 | 
						|
                                            node_index: AtomicNodeIndex::dummy(),
 | 
						|
                                        },
 | 
						|
                                    ));
 | 
						|
                                }
 | 
						|
                            }
 | 
						|
 | 
						|
                            fn push_expression(&mut self, expression: ast::InterpolatedElement) {
 | 
						|
                                self.elements
 | 
						|
                                    .push(InterpolatedStringElement::Interpolation(expression));
 | 
						|
                            }
 | 
						|
                        }
 | 
						|
 | 
						|
                        let mut collector = Collector::default();
 | 
						|
 | 
						|
                        for part in &fstring.value {
 | 
						|
                            match part {
 | 
						|
                                ast::FStringPart::Literal(string_literal) => {
 | 
						|
                                    collector
 | 
						|
                                        .push_literal(&string_literal.value, string_literal.range);
 | 
						|
                                }
 | 
						|
                                ast::FStringPart::FString(fstring) => {
 | 
						|
                                    for element in &fstring.elements {
 | 
						|
                                        match element {
 | 
						|
                                            ast::InterpolatedStringElement::Literal(literal) => {
 | 
						|
                                                collector
 | 
						|
                                                    .push_literal(&literal.value, literal.range);
 | 
						|
                                            }
 | 
						|
                                            ast::InterpolatedStringElement::Interpolation(
 | 
						|
                                                expression,
 | 
						|
                                            ) => {
 | 
						|
                                                collector.push_expression(expression.clone());
 | 
						|
                                            }
 | 
						|
                                        }
 | 
						|
                                    }
 | 
						|
                                }
 | 
						|
                            }
 | 
						|
                        }
 | 
						|
 | 
						|
                        fstring.value = ast::FStringValue::single(ast::FString {
 | 
						|
                            elements: collector.elements.into(),
 | 
						|
                            range: fstring.range,
 | 
						|
                            flags: FStringFlags::empty(),
 | 
						|
                            node_index: AtomicNodeIndex::dummy(),
 | 
						|
                        });
 | 
						|
                    }
 | 
						|
                }
 | 
						|
            }
 | 
						|
 | 
						|
            _ => {}
 | 
						|
        }
 | 
						|
        transformer::walk_expr(self, expr);
 | 
						|
    }
 | 
						|
 | 
						|
    fn visit_interpolated_string_element(
 | 
						|
        &self,
 | 
						|
        interpolated_string_element: &mut InterpolatedStringElement,
 | 
						|
    ) {
 | 
						|
        let InterpolatedStringElement::Interpolation(interpolation) = interpolated_string_element
 | 
						|
        else {
 | 
						|
            return;
 | 
						|
        };
 | 
						|
 | 
						|
        let Some(debug) = &mut interpolation.debug_text else {
 | 
						|
            return;
 | 
						|
        };
 | 
						|
 | 
						|
        // Changing the newlines to the configured newline is okay because Python normalizes all newlines to `\n`
 | 
						|
        debug.leading = debug.leading.replace("\r\n", "\n").replace('\r', "\n");
 | 
						|
        debug.trailing = debug.trailing.replace("\r\n", "\n").replace('\r', "\n");
 | 
						|
    }
 | 
						|
 | 
						|
    fn visit_string_literal(&self, string_literal: &mut ast::StringLiteral) {
 | 
						|
        static STRIP_DOC_TESTS: LazyLock<Regex> = LazyLock::new(|| {
 | 
						|
            Regex::new(
 | 
						|
                r"(?mx)
 | 
						|
                    (
 | 
						|
                        # strip doctest PS1 prompt lines
 | 
						|
                        ^\s*>>>\s.*(\n|$)
 | 
						|
                        |
 | 
						|
                        # strip doctest PS2 prompt lines
 | 
						|
                        # Also handles the case of an empty ... line.
 | 
						|
                        ^\s*\.\.\.((\n|$)|\s.*(\n|$))
 | 
						|
                    )+
 | 
						|
                ",
 | 
						|
            )
 | 
						|
            .unwrap()
 | 
						|
        });
 | 
						|
        static STRIP_RST_BLOCKS: LazyLock<Regex> = LazyLock::new(|| {
 | 
						|
            // This is kind of unfortunate, but it's pretty tricky (likely
 | 
						|
            // impossible) to detect a reStructuredText block with a simple
 | 
						|
            // regex. So we just look for the start of a block and remove
 | 
						|
            // everything after it. Talk about a hammer.
 | 
						|
            Regex::new(r"::(?s:.*)").unwrap()
 | 
						|
        });
 | 
						|
        static STRIP_MARKDOWN_BLOCKS: LazyLock<Regex> = LazyLock::new(|| {
 | 
						|
            // This covers more than valid Markdown blocks, but that's OK.
 | 
						|
            Regex::new(r"(```|~~~)\p{any}*(```|~~~|$)").unwrap()
 | 
						|
        });
 | 
						|
 | 
						|
        // Start by (1) stripping everything that looks like a code
 | 
						|
        // snippet, since code snippets may be completely reformatted if
 | 
						|
        // they are Python code.
 | 
						|
        string_literal.value = STRIP_DOC_TESTS
 | 
						|
            .replace_all(
 | 
						|
                &string_literal.value,
 | 
						|
                "<DOCTEST-CODE-SNIPPET: Removed by normalizer>\n",
 | 
						|
            )
 | 
						|
            .into_owned()
 | 
						|
            .into_boxed_str();
 | 
						|
        string_literal.value = STRIP_RST_BLOCKS
 | 
						|
            .replace_all(
 | 
						|
                &string_literal.value,
 | 
						|
                "<RSTBLOCK-CODE-SNIPPET: Removed by normalizer>\n",
 | 
						|
            )
 | 
						|
            .into_owned()
 | 
						|
            .into_boxed_str();
 | 
						|
        string_literal.value = STRIP_MARKDOWN_BLOCKS
 | 
						|
            .replace_all(
 | 
						|
                &string_literal.value,
 | 
						|
                "<MARKDOWN-CODE-SNIPPET: Removed by normalizer>\n",
 | 
						|
            )
 | 
						|
            .into_owned()
 | 
						|
            .into_boxed_str();
 | 
						|
        // Normalize a string by (2) stripping any leading and trailing space from each
 | 
						|
        // line, and (3) removing any blank lines from the start and end of the string.
 | 
						|
        string_literal.value = string_literal
 | 
						|
            .value
 | 
						|
            .lines()
 | 
						|
            .map(str::trim)
 | 
						|
            .collect::<Vec<_>>()
 | 
						|
            .join("\n")
 | 
						|
            .trim()
 | 
						|
            .to_owned()
 | 
						|
            .into_boxed_str();
 | 
						|
    }
 | 
						|
}
 |