Perf: Skip string normalization when possible (#10116)

2025-09-29 21:34:57 +00:00 · 2024-02-26 18:35:29 +01:00 · 2024-02-26 18:35:29 +01:00 · 8dc22d5793
commit 8dc22d5793
parent 15b87ea8be
3 changed files with 170 additions and 88 deletions
--- a/crates/ruff_python_formatter/src/other/f_string.rs
+++ b/crates/ruff_python_formatter/src/other/f_string.rs
@ -59,16 +59,16 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
            return result;
        }
-        let quotes = normalizer.choose_quotes(&string, &locator);
+        let quote_selection = normalizer.choose_quotes(&string, &locator);
        let context = FStringContext::new(
            string.prefix(),
-            quotes,
+            quote_selection.quotes(),
            FStringLayout::from_f_string(self.value, &locator),
        );
        // Starting prefix and quote
-        write!(f, [string.prefix(), quotes])?;
+        write!(f, [string.prefix(), quote_selection.quotes()])?;
        f.join()
            .entries(
@ -80,7 +80,7 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
            .finish()?;
        // Ending quote
-        quotes.fmt(f)
+        quote_selection.quotes().fmt(f)
    }
 }
--- a/crates/ruff_python_formatter/src/other/f_string_element.rs
+++ b/crates/ruff_python_formatter/src/other/f_string_element.rs
@ -59,6 +59,7 @@ impl Format<PyFormatContext<'_>> for FormatFStringLiteralElement<'_> {
        let literal_content = f.context().locator().slice(self.element.range());
        let normalized = normalize_string(
            literal_content,
            0,
            self.context.quotes(),
            self.context.prefix(),
            is_hex_codes_in_unicode_sequences_enabled(f.context()),
--- a/crates/ruff_python_formatter/src/string/normalize.rs
+++ b/crates/ruff_python_formatter/src/string/normalize.rs
@ -1,4 +1,5 @@
 use std::borrow::Cow;
 use std::iter::FusedIterator;
 use ruff_formatter::FormatContext;
 use ruff_source_file::Locator;
@ -44,8 +45,44 @@ impl StringNormalizer {
        self
    }
    fn quoting(&self, string: &StringPart) -> Quoting {
        if let FStringState::InsideExpressionElement(context) = self.f_string_state {
            // If we're inside an f-string, we need to make sure to preserve the
            // existing quotes unless we're inside a triple-quoted f-string and
            // the inner string itself isn't triple-quoted. For example:
            //
            // ```python
            // f"""outer {"inner"}"""  # Valid
            // f"""outer {"""inner"""}"""  # Invalid
            // ```
            //
            // Or, if the target version supports PEP 701.
            //
            // The reason to preserve the quotes is based on the assumption that
            // the original f-string is valid in terms of quoting, and we don't
            // want to change that to make it invalid.
            if (context.quotes().is_triple() && !string.quotes().is_triple())
                || self.target_version.supports_pep_701()
            {
                self.quoting
            } else {
                Quoting::Preserve
            }
        } else {
            self.quoting
        }
    }
    /// Computes the strings preferred quotes.
-    pub(crate) fn choose_quotes(&self, string: &StringPart, locator: &Locator) -> StringQuotes {
+    pub(crate) fn choose_quotes(&self, string: &StringPart, locator: &Locator) -> QuoteSelection {
        let raw_content = locator.slice(string.content_range());
        let first_quote_or_normalized_char_offset = raw_content
            .bytes()
            .position(|b| matches!(b, b'\\' | b'"' | b'\'' | b'\r' | b'{'));
        let quotes = match self.quoting(string) {
            Quoting::Preserve => string.quotes(),
            Quoting::CanChange => {
                // Per PEP 8, always prefer double quotes for triple-quoted strings.
                // Except when using quote-style-preserve.
                let preferred_style = if string.quotes().triple {
@ -105,46 +142,38 @@ impl StringNormalizer {
                    self.preferred_quote_style
                };
-        let quoting = if let FStringState::InsideExpressionElement(context) = self.f_string_state {
+                if let Some(preferred_quote) = QuoteChar::from_style(preferred_style) {
-            // If we're inside an f-string, we need to make sure to preserve the
+                    if let Some(first_quote_or_normalized_char_offset) =
-            // existing quotes unless we're inside a triple-quoted f-string and
+                        first_quote_or_normalized_char_offset
            // the inner string itself isn't triple-quoted. For example:
            //
            // ```python
            // f"""outer {"inner"}"""  # Valid
            // f"""outer {"""inner"""}"""  # Invalid
            // ```
            //
            // Or, if the target version supports PEP 701.
            //
            // The reason to preserve the quotes is based on the assumption that
            // the original f-string is valid in terms of quoting, and we don't
            // want to change that to make it invalid.
            if (context.quotes().is_triple() && !string.quotes().is_triple())
                || self.target_version.supports_pep_701()
                    {
-                self.quoting
+                        if string.prefix().is_raw_string() {
                            choose_quotes_for_raw_string(
                                &raw_content[first_quote_or_normalized_char_offset..],
                                string.quotes(),
                                preferred_quote,
                            )
                        } else {
-                Quoting::Preserve
+                            choose_quotes_impl(
                                &raw_content[first_quote_or_normalized_char_offset..],
                                string.quotes(),
                                preferred_quote,
                            )
                        }
                    } else {
-            self.quoting
+                        StringQuotes {
-        };
+                            quote_char: preferred_quote,
-
+                            triple: string.quotes().is_triple(),
-        match quoting {
+                        }
            Quoting::Preserve => string.quotes(),
            Quoting::CanChange => {
                if let Some(preferred_quote) = QuoteChar::from_style(preferred_style) {
                    let raw_content = locator.slice(string.content_range());
                    if string.prefix().is_raw_string() {
                        choose_quotes_for_raw_string(raw_content, string.quotes(), preferred_quote)
                    } else {
                        choose_quotes_impl(raw_content, string.quotes(), preferred_quote)
                    }
                } else {
                    string.quotes()
                }
            }
        };
        QuoteSelection {
            quotes,
            first_quote_or_normalized_char_offset,
        }
    }
@ -156,25 +185,48 @@ impl StringNormalizer {
    ) -> NormalizedString<'a> {
        let raw_content = locator.slice(string.content_range());
-        let quotes = self.choose_quotes(string, locator);
+        let quote_selection = self.choose_quotes(string, locator);
-        let normalized = normalize_string(
+        let normalized = if let Some(first_quote_or_escape_offset) =
            quote_selection.first_quote_or_normalized_char_offset
        {
            normalize_string(
                raw_content,
-            quotes,
+                first_quote_or_escape_offset,
                quote_selection.quotes,
                string.prefix(),
                self.normalize_hex,
                // TODO: Remove the `b'{'` in `choose_quotes` when promoting the
                // `format_fstring` preview style
                self.format_fstring,
-        );
+            )
        } else {
            Cow::Borrowed(raw_content)
        };
        NormalizedString {
            prefix: string.prefix(),
            content_range: string.content_range(),
            text: normalized,
-            quotes,
+            quotes: quote_selection.quotes,
        }
    }
 }
 #[derive(Debug)]
 pub(crate) struct QuoteSelection {
    quotes: StringQuotes,
    /// Offset to the first quote character or character that needs special handling in [`normalize_string`].
    first_quote_or_normalized_char_offset: Option<usize>,
 }
 impl QuoteSelection {
    pub(crate) fn quotes(&self) -> StringQuotes {
        self.quotes
    }
 }
 #[derive(Debug)]
 pub(crate) struct NormalizedString<'a> {
    prefix: crate::string::StringPrefix,
@ -399,6 +451,7 @@ fn choose_quotes_impl(
 /// Returns the normalized string and whether it contains new lines.
 pub(crate) fn normalize_string(
    input: &str,
    start_offset: usize,
    quotes: StringQuotes,
    prefix: StringPrefix,
    normalize_hex: bool,
@ -415,7 +468,7 @@ pub(crate) fn normalize_string(
    let preferred_quote = quote.as_char();
    let opposite_quote = quote.invert().as_char();
-    let mut chars = input.char_indices().peekable();
+    let mut chars = CharIndicesWithOffset::new(input, start_offset).peekable();
    let is_raw = prefix.is_raw_string();
    let is_fstring = !format_fstring && prefix.is_fstring();
@ -454,13 +507,11 @@ pub(crate) fn normalize_string(
                        // Skip over escaped backslashes
                        chars.next();
                    } else if normalize_hex {
                        if let Some(normalised) = UnicodeEscape::new(next, !prefix.is_byte())
                            .and_then(|escape| {
                                escape.normalize(&input[index + c.len_utf8() + next.len_utf8()..])
                            })
                        {
                        // Length of the `\` plus the length of the escape sequence character (`u` | `U` | `x`)
                        let escape_start_len = '\\'.len_utf8() + next.len_utf8();
                        if let Some(normalised) = UnicodeEscape::new(next, !prefix.is_byte())
                            .and_then(|escape| escape.normalize(&input[index + escape_start_len..]))
                        {
                            let escape_start_offset = index + escape_start_len;
                            if let Cow::Owned(normalised) = &normalised {
                                output.push_str(&input[last_index..escape_start_offset]);
@ -510,6 +561,35 @@ pub(crate) fn normalize_string(
    normalized
 }
 #[derive(Clone, Debug)]
 struct CharIndicesWithOffset<'str> {
    chars: std::str::Chars<'str>,
    next_offset: usize,
 }
 impl<'str> CharIndicesWithOffset<'str> {
    fn new(input: &'str str, start_offset: usize) -> Self {
        Self {
            chars: input[start_offset..].chars(),
            next_offset: start_offset,
        }
    }
 }
 impl<'str> Iterator for CharIndicesWithOffset<'str> {
    type Item = (usize, char);
    fn next(&mut self) -> Option<Self::Item> {
        self.chars.next().map(|c| {
            let index = self.next_offset;
            self.next_offset += c.len_utf8();
            (index, c)
        })
    }
 }
 impl FusedIterator for CharIndicesWithOffset<'_> {}
 #[derive(Copy, Clone, Debug, PartialEq, Eq)]
 enum UnicodeEscape {
    /// A hex escape sequence of either 2 (`\x`), 4 (`\u`) or 8 (`\U`) hex characters.
@ -651,6 +731,7 @@ mod tests {
        let normalized = normalize_string(
            input,
            0,
            StringQuotes {
                triple: false,
                quote_char: QuoteChar::Double,