Extract string part and normalized string (#7219)

2025-08-19 01:51:30 +00:00 · 2023-09-08 12:56:55 +02:00 · 2023-09-08 12:56:55 +02:00 · 0a07a2ca62
commit 0a07a2ca62
parent 47a253fb62
1 changed files with 102 additions and 85 deletions
--- a/crates/ruff_python_formatter/src/expression/string.rs
+++ b/crates/ruff_python_formatter/src/expression/string.rs
@ -138,29 +138,26 @@ impl<'a> FormatString<'a> {
 impl<'a> Format<PyFormatContext<'_>> for FormatString<'a> {
    fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
        let locator = f.context().locator();
        match self.layout {
            StringLayout::Default => {
                if self.string.is_implicit_concatenated() {
                    in_parentheses_only_group(&FormatStringContinuation::new(self.string)).fmt(f)
                } else {
-                    FormatStringPart::new(
+                    StringPart::from_source(self.string.range(), &locator)
-                        self.string.range(),
+                        .normalize(
-                        self.string.quoting(&f.context().locator()),
+                            self.string.quoting(&locator),
-                        &f.context().locator(),
+                            &locator,
                            f.options().quote_style(),
                        )
                        .fmt(f)
                }
            }
            StringLayout::DocString => {
-                let string_part = FormatStringPart::new(
+                let string_part = StringPart::from_source(self.string.range(), &locator);
-                    self.string.range(),
+                let normalized =
-                    // f-strings can't be docstrings
+                    string_part.normalize(Quoting::CanChange, &locator, f.options().quote_style());
-                    Quoting::CanChange,
+                format_docstring(&normalized, f)
                    &f.context().locator(),
                    f.options().quote_style(),
                );
                format_docstring(&string_part, f)
            }
            StringLayout::ImplicitConcatenatedStringInBinaryLike => {
                FormatStringContinuation::new(self.string).fmt(f)
@ -259,16 +256,14 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
                    });
                    let (trailing_part_comments, rest) = rest.split_at(trailing_comments_end);
                    let part = StringPart::from_source(token_range, &locator);
                    let normalized =
                        part.normalize(self.string.quoting(&locator), &locator, quote_style);
                    joiner.entry(&format_args![
                        line_suffix_boundary(),
                        leading_comments(leading_part_comments),
-                        FormatStringPart::new(
+                        normalized,
                            token_range,
                            self.string.quoting(&locator),
                            &locator,
                            quote_style,
                        ),
                        trailing_comments(trailing_part_comments)
                    ]);
@ -289,21 +284,20 @@ impl Format<PyFormatContext<'_>> for FormatStringContinuation<'_> {
    }
 }
-struct FormatStringPart {
+#[derive(Debug)]
 struct StringPart {
    /// The prefix.
    prefix: StringPrefix,
-    preferred_quotes: StringQuotes,
+
-    range: TextRange,
+    /// The actual quotes of the string in the source
-    is_raw_string: bool,
+    quotes: StringQuotes,
    /// The range of the string's content (full range minus quotes and prefix)
    content_range: TextRange,
 }
-impl Ranged for FormatStringPart {
+impl StringPart {
-    fn range(&self) -> TextRange {
+    fn from_source(range: TextRange, locator: &Locator) -> Self {
        self.range
    }
 }
 impl FormatStringPart {
    fn new(range: TextRange, quoting: Quoting, locator: &Locator, quote_style: QuoteStyle) -> Self {
        let string_content = locator.slice(range);
        let prefix = StringPrefix::parse(string_content);
@ -317,46 +311,80 @@ impl FormatStringPart {
        );
        let raw_content_range = relative_raw_content_range + range.start();
-        let raw_content = &string_content[relative_raw_content_range];
+        Self {
-        let is_raw_string = prefix.is_raw_string();
+            prefix,
            content_range: raw_content_range,
            quotes,
        }
    }
    /// Computes the strings preferred quotes and normalizes its content.
    fn normalize<'a>(
        self,
        quoting: Quoting,
        locator: &'a Locator,
        quote_style: QuoteStyle,
    ) -> NormalizedString<'a> {
        let raw_content = locator.slice(self.content_range);
        let preferred_quotes = match quoting {
-            Quoting::Preserve => quotes,
+            Quoting::Preserve => self.quotes,
            Quoting::CanChange => {
-                if is_raw_string {
+                if self.prefix.is_raw_string() {
-                    preferred_quotes_raw(raw_content, quotes, quote_style)
+                    preferred_quotes_raw(raw_content, self.quotes, quote_style)
                } else {
-                    preferred_quotes(raw_content, quotes, quote_style)
+                    preferred_quotes(raw_content, self.quotes, quote_style)
                }
            }
        };
        Self {
            prefix,
            range: raw_content_range,
            preferred_quotes,
            is_raw_string,
        }
    }
 }
 impl Format<PyFormatContext<'_>> for FormatStringPart {
    fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
        let normalized = normalize_string(
-            f.context().locator().slice(self.range),
+            locator.slice(self.content_range),
-            self.preferred_quotes,
+            preferred_quotes,
-            self.is_raw_string,
+            self.prefix.is_raw_string(),
        );
-        write!(f, [self.prefix, self.preferred_quotes])?;
+        NormalizedString {
-        match normalized {
+            prefix: self.prefix,
            content_range: self.content_range,
            text: normalized,
            quotes: preferred_quotes,
        }
    }
 }
 #[derive(Debug)]
 struct NormalizedString<'a> {
    prefix: StringPrefix,
    /// The quotes of the normalized string (preferred quotes)
    quotes: StringQuotes,
    /// The range of the string's content in the source (minus prefix and quotes).
    content_range: TextRange,
    /// The normalized text
    text: Cow<'a, str>,
 }
 impl Ranged for NormalizedString<'_> {
    fn range(&self) -> TextRange {
        self.content_range
    }
 }
 impl Format<PyFormatContext<'_>> for NormalizedString<'_> {
    fn fmt(&self, f: &mut Formatter<PyFormatContext<'_>>) -> FormatResult<()> {
        write!(f, [self.prefix, self.quotes])?;
        match &self.text {
            Cow::Borrowed(_) => {
                source_text_slice(self.range()).fmt(f)?;
            }
            Cow::Owned(normalized) => {
-                text(&normalized, Some(self.start())).fmt(f)?;
+                text(normalized, Some(self.start())).fmt(f)?;
            }
        }
-        self.preferred_quotes.fmt(f)
+        self.quotes.fmt(f)
    }
 }
@ -802,35 +830,30 @@ fn count_indentation_like_black(line: &str, tab_width: TabWidth) -> TextSize {
 ///         line c
 ///    """
 /// ```
-fn format_docstring(string_part: &FormatStringPart, f: &mut PyFormatter) -> FormatResult<()> {
+fn format_docstring(normalized: &NormalizedString, f: &mut PyFormatter) -> FormatResult<()> {
-    let locator = f.context().locator();
+    let docstring = &normalized.text;
    // Black doesn't change the indentation of docstrings that contain an escaped newline
-    if locator.slice(string_part).contains("\\\n") {
+    if docstring.contains("\\\n") {
-        return string_part.fmt(f);
+        return normalized.fmt(f);
    }
    let normalized = normalize_string(
        locator.slice(string_part),
        string_part.preferred_quotes,
        string_part.is_raw_string,
    );
    // is_borrowed is unstable :/
-    let already_normalized = matches!(normalized, Cow::Borrowed(_));
+    let already_normalized = matches!(docstring, Cow::Borrowed(_));
-    let mut lines = normalized.lines().peekable();
+    let mut lines = docstring.lines().peekable();
    // Start the string
    write!(
        f,
        [
-            source_position(string_part.start()),
+            normalized.prefix,
-            string_part.prefix,
+            normalized.quotes,
-            string_part.preferred_quotes
+            source_position(normalized.start()),
        ]
    )?;
    // We track where in the source docstring we are (in source code byte offsets)
-    let mut offset = string_part.start();
+    let mut offset = normalized.start();
    // The first line directly after the opening quotes has different rules than the rest, mainly
    // that we remove all leading whitespace as there's no indentation
@ -844,7 +867,7 @@ fn format_docstring(string_part: &FormatStringPart, f: &mut PyFormatter) -> Form
    // Edge case: The first line is `""" "content`, so we need to insert chaperone space that keep
    // inner quotes and closing quotes from getting to close to avoid `""""content`
-    if trim_both.starts_with(string_part.preferred_quotes.style.as_char()) {
+    if trim_both.starts_with(normalized.quotes.style.as_char()) {
        space().fmt(f)?;
    }
@ -863,15 +886,15 @@ fn format_docstring(string_part: &FormatStringPart, f: &mut PyFormatter) -> Form
    offset += first.text_len();
    // Check if we have a single line (or empty) docstring
-    if normalized[first.len()..].trim().is_empty() {
+    if docstring[first.len()..].trim().is_empty() {
        // For `"""\n"""` or other whitespace between the quotes, black keeps a single whitespace,
        // but `""""""` doesn't get one inserted.
-        if needs_chaperone_space(string_part, trim_end)
+        if needs_chaperone_space(normalized, trim_end)
-            || (trim_end.is_empty() && !normalized.is_empty())
+            || (trim_end.is_empty() && !docstring.is_empty())
        {
            space().fmt(f)?;
        }
-        string_part.preferred_quotes.fmt(f)?;
+        normalized.quotes.fmt(f)?;
        return Ok(());
    }
@ -906,27 +929,21 @@ fn format_docstring(string_part: &FormatStringPart, f: &mut PyFormatter) -> Form
    }
    // Same special case in the last line as for the first line
-    let trim_end = normalized
+    let trim_end = docstring
        .as_ref()
        .trim_end_matches(|c: char| c.is_whitespace() && c != '\n');
-    if needs_chaperone_space(string_part, trim_end) {
+    if needs_chaperone_space(normalized, trim_end) {
        space().fmt(f)?;
    }
-    write!(
+    write!(f, [source_position(normalized.end()), normalized.quotes])
        f,
        [
            string_part.preferred_quotes,
            source_position(string_part.end())
        ]
    )
 }
 /// If the last line of the docstring is `content" """` or `content\ """`, we need a chaperone space
 /// that avoids `content""""` and `content\"""`. This does only applies to un-escaped backslashes,
 /// so `content\\ """` doesn't need a space while `content\\\ """` does.
-fn needs_chaperone_space(string_part: &FormatStringPart, trim_end: &str) -> bool {
+fn needs_chaperone_space(normalized: &NormalizedString, trim_end: &str) -> bool {
-    trim_end.ends_with(string_part.preferred_quotes.style.as_char())
+    trim_end.ends_with(normalized.quotes.style.as_char())
        || trim_end.chars().rev().take_while(|c| *c == '\\').count() % 2 == 1
 }