Separate StringNormalizer from StringPart (#9954)

## Summary This PR is a small refactor to extract out the logic for normalizing string in the formatter from the `StringPart` struct. It also separates the quote selection into a separate method on the new `StringNormalizer`. Both of these will help in the f-string formatting to use `StringPart` and `choose_quotes` irrespective of normalization. The reason for having separate quote selection and normalization step is so that the f-string formatting can perform quote selection on its own. Unlike string and byte literals, the f-string formatting would require that the normalization happens only for the literal elements of it i.e., the "foo" and "bar" in `f"foo {x + y} bar"`. This will automatically be handled by the already separate `normalize_string` function. Another use-case in the f-string formatting is to extract out the relevant information from the `StringPart` like quotes and prefix which is to be passed as context while formatting each element of an f-string. ## Test Plan Ensure that clippy is happy and all tests pass.
2025-09-26 03:50:19 +00:00 · 2024-02-13 18:14:56 +05:30 · 2024-02-13 18:14:56 +05:30 · 6f9c128d77
commit 6f9c128d77
parent 6380c90031
4 changed files with 96 additions and 59 deletions
--- a/crates/ruff_python_formatter/src/other/bytes_literal.rs
+++ b/crates/ruff_python_formatter/src/other/bytes_literal.rs
@ -2,8 +2,7 @@ use ruff_python_ast::BytesLiteral;
 use ruff_text_size::Ranged;

 use crate::prelude::*;
-use crate::preview::is_hex_codes_in_unicode_sequences_enabled;
-use crate::string::{Quoting, StringPart};
+use crate::string::{StringNormalizer, StringPart};

 #[derive(Default)]
 pub struct FormatBytesLiteral;
@ -12,14 +11,9 @@ impl FormatNodeRule<BytesLiteral> for FormatBytesLiteral {
    fn fmt_fields(&self, item: &BytesLiteral, f: &mut PyFormatter) -> FormatResult<()> {
        let locator = f.context().locator();

-        StringPart::from_source(item.range(), &locator)
-            .normalize(
-                Quoting::CanChange,
-                &locator,
-                f.options().quote_style(),
-                f.context().docstring(),
-                is_hex_codes_in_unicode_sequences_enabled(f.context()),
-            )
+        StringNormalizer::from_context(f.context())
+            .with_preferred_quote_style(f.options().quote_style())
+            .normalize(&StringPart::from_source(item.range(), &locator), &locator)
            .fmt(f)
    }
 }
--- a/crates/ruff_python_formatter/src/other/f_string.rs
+++ b/crates/ruff_python_formatter/src/other/f_string.rs
@ -2,8 +2,7 @@ use ruff_python_ast::FString;
 use ruff_text_size::Ranged;

 use crate::prelude::*;
-use crate::preview::is_hex_codes_in_unicode_sequences_enabled;
-use crate::string::{Quoting, StringPart};
+use crate::string::{Quoting, StringNormalizer, StringPart};

 /// Formats an f-string which is part of a larger f-string expression.
 ///
@ -26,13 +25,12 @@ impl Format<PyFormatContext<'_>> for FormatFString<'_> {
    fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
        let locator = f.context().locator();

-        let result = StringPart::from_source(self.value.range(), &locator)
+        let result = StringNormalizer::from_context(f.context())
+            .with_quoting(self.quoting)
+            .with_preferred_quote_style(f.options().quote_style())
            .normalize(
-                self.quoting,
+                &StringPart::from_source(self.value.range(), &locator),
                &locator,
-                f.options().quote_style(),
-                f.context().docstring(),
-                is_hex_codes_in_unicode_sequences_enabled(f.context()),
            )
            .fmt(f);

--- a/crates/ruff_python_formatter/src/other/string_literal.rs
+++ b/crates/ruff_python_formatter/src/other/string_literal.rs
@ -2,8 +2,7 @@ use ruff_python_ast::StringLiteral;
 use ruff_text_size::Ranged;

 use crate::prelude::*;
-use crate::preview::is_hex_codes_in_unicode_sequences_enabled;
-use crate::string::{docstring, Quoting, StringPart};
+use crate::string::{docstring, Quoting, StringNormalizer, StringPart};
 use crate::QuoteStyle;

 pub(crate) struct FormatStringLiteral<'a> {
@ -59,13 +58,13 @@ impl Format<PyFormatContext<'_>> for FormatStringLiteral<'_> {
            quote_style
        };

-        let normalized = StringPart::from_source(self.value.range(), &locator).normalize(
-            self.layout.quoting(),
-            &locator,
-            quote_style,
-            f.context().docstring(),
-            is_hex_codes_in_unicode_sequences_enabled(f.context()),
-        );
+        let normalized = StringNormalizer::from_context(f.context())
+            .with_quoting(self.layout.quoting())
+            .with_preferred_quote_style(quote_style)
+            .normalize(
+                &StringPart::from_source(self.value.range(), &locator),
+                &locator,
+            );

        if self.layout.is_docstring() {
            docstring::format(&normalized, f)
--- a/crates/ruff_python_formatter/src/string/mod.rs
+++ b/crates/ruff_python_formatter/src/string/mod.rs
@ -18,6 +18,7 @@ use crate::expression::parentheses::in_parentheses_only_soft_line_break_or_space
 use crate::other::f_string::FormatFString;
 use crate::other::string_literal::{FormatStringLiteral, StringLiteralKind};
 use crate::prelude::*;
+use crate::preview::is_hex_codes_in_unicode_sequences_enabled;
 use crate::QuoteStyle;

 pub(crate) mod docstring;
@ -291,23 +292,54 @@ impl StringPart {
        }
    }

-    /// Computes the strings preferred quotes and normalizes its content.
-    ///
-    /// The parent docstring quote style should be set when formatting a code
-    /// snippet within the docstring. The quote style should correspond to the
-    /// style of quotes used by said docstring. Normalization will ensure the
-    /// quoting styles don't conflict.
-    pub(crate) fn normalize<'a>(
-        self,
-        quoting: Quoting,
-        locator: &'a Locator,
-        configured_style: QuoteStyle,
-        parent_docstring_quote_char: Option<QuoteChar>,
-        normalize_hex: bool,
-    ) -> NormalizedString<'a> {
+    /// Returns the prefix of the string part.
+    pub(crate) const fn prefix(&self) -> StringPrefix {
+        self.prefix
+    }
+
+    /// Returns the surrounding quotes of the string part.
+    pub(crate) const fn quotes(&self) -> StringQuotes {
+        self.quotes
+    }
+
+    /// Returns the range of the string's content in the source (minus prefix and quotes).
+    pub(crate) const fn content_range(&self) -> TextRange {
+        self.content_range
+    }
+}
+
+pub(crate) struct StringNormalizer {
+    quoting: Quoting,
+    preferred_quote_style: QuoteStyle,
+    parent_docstring_quote_char: Option<QuoteChar>,
+    normalize_hex: bool,
+}
+
+impl StringNormalizer {
+    pub(crate) fn from_context(context: &PyFormatContext<'_>) -> Self {
+        Self {
+            quoting: Quoting::default(),
+            preferred_quote_style: QuoteStyle::default(),
+            parent_docstring_quote_char: context.docstring(),
+            normalize_hex: is_hex_codes_in_unicode_sequences_enabled(context),
+        }
+    }
+
+    pub(crate) fn with_preferred_quote_style(mut self, quote_style: QuoteStyle) -> Self {
+        self.preferred_quote_style = quote_style;
+        self
+    }
+
+    pub(crate) fn with_quoting(mut self, quoting: Quoting) -> Self {
+        self.quoting = quoting;
+        self
+    }
+
+    /// Computes the strings preferred quotes.
+    pub(crate) fn choose_quotes(&self, string: &StringPart, locator: &Locator) -> StringQuotes {
        // Per PEP 8, always prefer double quotes for triple-quoted strings.
        // Except when using quote-style-preserve.
-        let preferred_style = if self.quotes.triple {
+        let preferred_style = if string.quotes().triple {
            // ... unless we're formatting a code snippet inside a docstring,
            // then we specifically want to invert our quote style to avoid
            // writing out invalid Python.
@ -353,39 +385,49 @@ impl StringPart {
            // Overall this is a bit of a corner case and just inverting the
            // style from what the parent ultimately decided upon works, even
            // if it doesn't have perfect alignment with PEP8.
-            if let Some(quote) = parent_docstring_quote_char {
+            if let Some(quote) = self.parent_docstring_quote_char {
                QuoteStyle::from(quote.invert())
-            } else if configured_style.is_preserve() {
+            } else if self.preferred_quote_style.is_preserve() {
                QuoteStyle::Preserve
            } else {
                QuoteStyle::Double
            }
        } else {
-            configured_style
+            self.preferred_quote_style
        };

-        let raw_content = &locator.slice(self.content_range);
-
-        let quotes = match quoting {
-            Quoting::Preserve => self.quotes,
+        match self.quoting {
+            Quoting::Preserve => string.quotes(),
            Quoting::CanChange => {
                if let Some(preferred_quote) = QuoteChar::from_style(preferred_style) {
-                    if self.prefix.is_raw_string() {
-                        choose_quotes_raw(raw_content, self.quotes, preferred_quote)
+                    let raw_content = locator.slice(string.content_range());
+                    if string.prefix().is_raw_string() {
+                        choose_quotes_for_raw_string(raw_content, string.quotes(), preferred_quote)
                    } else {
-                        choose_quotes(raw_content, self.quotes, preferred_quote)
+                        choose_quotes_impl(raw_content, string.quotes(), preferred_quote)
                    }
                } else {
-                    self.quotes
+                    string.quotes()
                }
            }
-        };
+        }
+    }

-        let normalized = normalize_string(raw_content, quotes, self.prefix, normalize_hex);
+    /// Computes the strings preferred quotes and normalizes its content.
+    pub(crate) fn normalize<'a>(
+        &self,
+        string: &StringPart,
+        locator: &'a Locator,
+    ) -> NormalizedString<'a> {
+        let raw_content = locator.slice(string.content_range());
+
+        let quotes = self.choose_quotes(string, locator);
+
+        let normalized = normalize_string(raw_content, quotes, string.prefix(), self.normalize_hex);

        NormalizedString {
-            prefix: self.prefix,
-            content_range: self.content_range,
+            prefix: string.prefix(),
+            content_range: string.content_range(),
            text: normalized,
            quotes,
        }
@ -512,7 +554,7 @@ impl Format<PyFormatContext<'_>> for StringPrefix {
 /// The preferred quote style is chosen unless the string contains unescaped quotes of the
 /// preferred style. For example, `r"foo"` is chosen over `r'foo'` if the preferred quote
 /// style is double quotes.
-fn choose_quotes_raw(
+fn choose_quotes_for_raw_string(
    input: &str,
    quotes: StringQuotes,
    preferred_quote: QuoteChar,
@ -571,7 +613,11 @@ fn choose_quotes_raw(
 /// For triple quoted strings, the preferred quote style is always used, unless the string contains
 /// a triplet of the quote character (e.g., if double quotes are preferred, double quotes will be
 /// used unless the string contains `"""`).
-fn choose_quotes(input: &str, quotes: StringQuotes, preferred_quote: QuoteChar) -> StringQuotes {
+fn choose_quotes_impl(
+    input: &str,
+    quotes: StringQuotes,
+    preferred_quote: QuoteChar,
+) -> StringQuotes {
    let quote = if quotes.triple {
        // True if the string contains a triple quote sequence of the configured quote style.
        let mut uses_triple_quotes = false;
@ -780,7 +826,7 @@ impl TryFrom<char> for QuoteChar {
 /// with the provided [`StringQuotes`] style.
 ///
 /// Returns the normalized string and whether it contains new lines.
-fn normalize_string(
+pub(crate) fn normalize_string(
    input: &str,
    quotes: StringQuotes,
    prefix: StringPrefix,