Memoize text width (#6552)

2025-11-25 22:29:02 +00:00 · 2023-09-06 09:10:13 +02:00 · 2023-09-06 09:10:13 +02:00 · 5f59101811
commit 5f59101811
parent fa6bff0078
14 changed files with 213 additions and 184 deletions
--- a/crates/ruff_python_formatter/src/comments/format.rs
+++ b/crates/ruff_python_formatter/src/comments/format.rs
@ -1,7 +1,5 @@
 use std::borrow::Cow;

-use unicode_width::UnicodeWidthChar;
-
 use ruff_formatter::{format_args, write, FormatError, FormatOptions, SourceCode};
 use ruff_python_ast::node::{AnyNodeRef, AstNode};
 use ruff_python_trivia::{lines_after, lines_after_ignoring_trivia, lines_before};
@ -377,16 +375,12 @@ impl Format<PyFormatContext<'_>> for FormatTrailingEndOfLineComment<'_> {
            0
        } else {
            // Start with 2 because of the two leading spaces.
-            let mut width = 2;
-
-            // SAFETY: The formatted file is <= 4GB, and each comment should as well.
-            #[allow(clippy::cast_possible_truncation)]
-            for c in normalized_comment.chars() {
-                width += match c {
-                    '\t' => f.options().tab_width().value(),
-                    c => c.width().unwrap_or(0) as u32,
-                }
-            }
+            let width = 2u32.saturating_add(
+                TextWidth::from_text(&normalized_comment, f.options().tab_width())
+                    .width()
+                    .expect("Expected comment not to contain any newlines")
+                    .value(),
+            );

            width
        };
@ -430,11 +424,9 @@ pub(crate) struct FormatNormalizedComment<'a> {
 impl Format<PyFormatContext<'_>> for FormatNormalizedComment<'_> {
    fn fmt(&self, f: &mut Formatter<PyFormatContext>) -> FormatResult<()> {
        match self.comment {
-            Cow::Borrowed(borrowed) => source_text_slice(
-                TextRange::at(self.range.start(), borrowed.text_len()),
-                ContainsNewlines::No,
-            )
-            .fmt(f),
+            Cow::Borrowed(borrowed) => {
+                source_text_slice(TextRange::at(self.range.start(), borrowed.text_len())).fmt(f)
+            }

            Cow::Owned(ref owned) => {
                write!(
--- a/crates/ruff_python_formatter/src/expression/expr_ipy_escape_command.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_ipy_escape_command.rs
@ -8,6 +8,6 @@ pub struct FormatExprIpyEscapeCommand;

 impl FormatNodeRule<ExprIpyEscapeCommand> for FormatExprIpyEscapeCommand {
    fn fmt_fields(&self, item: &ExprIpyEscapeCommand, f: &mut PyFormatter) -> FormatResult<()> {
-        source_text_slice(item.range(), ContainsNewlines::No).fmt(f)
+        source_text_slice(item.range()).fmt(f)
    }
 }
--- a/crates/ruff_python_formatter/src/expression/expr_name.rs
+++ b/crates/ruff_python_formatter/src/expression/expr_name.rs
@ -21,7 +21,7 @@ impl FormatNodeRule<ExprName> for FormatExprName {
                .text(f.context().source_code())
        );

-        write!(f, [source_text_slice(*range, ContainsNewlines::No)])
+        write!(f, [source_text_slice(*range)])
    }

    fn fmt_dangling_comments(
--- a/crates/ruff_python_formatter/src/expression/number.rs
+++ b/crates/ruff_python_formatter/src/expression/number.rs
@ -24,7 +24,7 @@ impl Format<PyFormatContext<'_>> for FormatInt<'_> {
        let normalized = normalize_integer(content);

        match normalized {
-            Cow::Borrowed(_) => source_text_slice(range, ContainsNewlines::No).fmt(f),
+            Cow::Borrowed(_) => source_text_slice(range).fmt(f),
            Cow::Owned(normalized) => text(&normalized, Some(range.start())).fmt(f),
        }
    }
@ -49,7 +49,7 @@ impl Format<PyFormatContext<'_>> for FormatFloat<'_> {
        let normalized = normalize_floating_number(content);

        match normalized {
-            Cow::Borrowed(_) => source_text_slice(range, ContainsNewlines::No).fmt(f),
+            Cow::Borrowed(_) => source_text_slice(range).fmt(f),
            Cow::Owned(normalized) => text(&normalized, Some(range.start())).fmt(f),
        }
    }
@ -75,7 +75,7 @@ impl Format<PyFormatContext<'_>> for FormatComplex<'_> {

        match normalized {
            Cow::Borrowed(_) => {
-                source_text_slice(range.sub_end(TextSize::from(1)), ContainsNewlines::No).fmt(f)?;
+                source_text_slice(range.sub_end(TextSize::from(1))).fmt(f)?;
            }
            Cow::Owned(normalized) => {
                text(&normalized, Some(range.start())).fmt(f)?;
--- a/crates/ruff_python_formatter/src/expression/string.rs
+++ b/crates/ruff_python_formatter/src/expression/string.rs
@ -314,7 +314,7 @@ impl FormatStringPart {

 impl Format<PyFormatContext<'_>> for FormatStringPart {
    fn fmt(&self, f: &mut PyFormatter) -> FormatResult<()> {
-        let (normalized, contains_newlines) = normalize_string(
+        let normalized = normalize_string(
            f.context().locator().slice(self.range),
            self.preferred_quotes,
            self.is_raw_string,
@ -323,7 +323,7 @@ impl Format<PyFormatContext<'_>> for FormatStringPart {
        write!(f, [self.prefix, self.preferred_quotes])?;
        match normalized {
            Cow::Borrowed(_) => {
-                source_text_slice(self.range(), contains_newlines).fmt(f)?;
+                source_text_slice(self.range()).fmt(f)?;
            }
            Cow::Owned(normalized) => {
                text(&normalized, Some(self.start())).fmt(f)?;
@ -604,11 +604,7 @@ impl Format<PyFormatContext<'_>> for StringQuotes {
 /// with the provided `style`.
 ///
 /// Returns the normalized string and whether it contains new lines.
-fn normalize_string(
-    input: &str,
-    quotes: StringQuotes,
-    is_raw: bool,
-) -> (Cow<str>, ContainsNewlines) {
+fn normalize_string(input: &str, quotes: StringQuotes, is_raw: bool) -> Cow<str> {
    // The normalized string if `input` is not yet normalized.
    // `output` must remain empty if `input` is already normalized.
    let mut output = String::new();
@ -616,8 +612,6 @@ fn normalize_string(
    // If `last_index` is `0` at the end, then the input is already normalized and can be returned as is.
    let mut last_index = 0;

-    let mut newlines = ContainsNewlines::No;
-
    let style = quotes.style;
    let preferred_quote = style.as_char();
    let opposite_quote = style.invert().as_char();
@ -638,9 +632,6 @@ fn normalize_string(
            }

            last_index = index + '\r'.len_utf8();
-            newlines = ContainsNewlines::Yes;
-        } else if c == '\n' {
-            newlines = ContainsNewlines::Yes;
        } else if !quotes.triple && !is_raw {
            if c == '\\' {
                if let Some(next) = input.as_bytes().get(index + 1).copied().map(char::from) {
@ -675,7 +666,7 @@ fn normalize_string(
        Cow::Owned(output)
    };

-    (normalized, newlines)
+    normalized
 }

 /// For docstring indentation, black counts spaces as 1 and tabs by increasing the indentation up
@ -792,7 +783,7 @@ fn format_docstring(string_part: &FormatStringPart, f: &mut PyFormatter) -> Form
        return string_part.fmt(f);
    }

-    let (normalized, _) = normalize_string(
+    let normalized = normalize_string(
        locator.slice(string_part),
        string_part.preferred_quotes,
        string_part.is_raw_string,
@ -837,7 +828,7 @@ fn format_docstring(string_part: &FormatStringPart, f: &mut PyFormatter) -> Form
        let trimmed_line_range =
            TextRange::at(offset, trim_end.text_len()).add_start(leading_whitespace);
        if already_normalized {
-            source_text_slice(trimmed_line_range, ContainsNewlines::No).fmt(f)?;
+            source_text_slice(trimmed_line_range).fmt(f)?;
        } else {
            text(trim_both, Some(trimmed_line_range.start())).fmt(f)?;
        }
@ -954,7 +945,7 @@ fn format_docstring_line(
        let trimmed_line_range =
            TextRange::at(offset, trim_end.text_len()).add_start(stripped_indentation);
        if already_normalized {
-            source_text_slice(trimmed_line_range, ContainsNewlines::No).fmt(f)?;
+            source_text_slice(trimmed_line_range).fmt(f)?;
        } else {
            // All indents are ascii spaces, so the slicing is correct
            text(
--- a/crates/ruff_python_formatter/src/other/identifier.rs
+++ b/crates/ruff_python_formatter/src/other/identifier.rs
@ -8,7 +8,7 @@ pub struct FormatIdentifier;

 impl FormatRule<Identifier, PyFormatContext<'_>> for FormatIdentifier {
    fn fmt(&self, item: &Identifier, f: &mut PyFormatter) -> FormatResult<()> {
-        source_text_slice(item.range(), ContainsNewlines::No).fmt(f)
+        source_text_slice(item.range()).fmt(f)
    }
 }

--- a/crates/ruff_python_formatter/src/statement/stmt_ipy_escape_command.rs
+++ b/crates/ruff_python_formatter/src/statement/stmt_ipy_escape_command.rs
@ -9,7 +9,7 @@ pub struct FormatStmtIpyEscapeCommand;

 impl FormatNodeRule<StmtIpyEscapeCommand> for FormatStmtIpyEscapeCommand {
    fn fmt_fields(&self, item: &StmtIpyEscapeCommand, f: &mut PyFormatter) -> FormatResult<()> {
-        source_text_slice(item.range(), ContainsNewlines::No).fmt(f)
+        source_text_slice(item.range()).fmt(f)
    }

    fn is_suppressed(
--- a/crates/ruff_python_formatter/src/verbatim.rs
+++ b/crates/ruff_python_formatter/src/verbatim.rs
@ -709,7 +709,7 @@ impl Format<PyFormatContext<'_>> for FormatVerbatimStatementRange {
                }
            } else {
                // Non empty line, write the text of the line
-                verbatim_text(trimmed_line_range, logical_line.contains_newlines).fmt(f)?;
+                verbatim_text(trimmed_line_range).fmt(f)?;

                // Write the line separator that terminates the line, except if it is the last line (that isn't separated by a hard line break).
                if logical_line.has_trailing_newline {
@ -760,7 +760,6 @@ where

    fn next(&mut self) -> Option<Self::Item> {
        let mut parens = 0u32;
-        let mut contains_newlines = ContainsNewlines::No;

        let (content_end, full_end) = loop {
            match self.lexer.next() {
@ -768,18 +767,12 @@ where
                    Tok::Newline => break (range.start(), range.end()),
                    // Ignore if inside an expression
                    Tok::NonLogicalNewline if parens == 0 => break (range.start(), range.end()),
-                    Tok::NonLogicalNewline => {
-                        contains_newlines = ContainsNewlines::Yes;
-                    }
                    Tok::Lbrace | Tok::Lpar | Tok::Lsqb => {
                        parens = parens.saturating_add(1);
                    }
                    Tok::Rbrace | Tok::Rpar | Tok::Rsqb => {
                        parens = parens.saturating_sub(1);
                    }
-                    Tok::String { value, .. } if value.contains(['\n', '\r']) => {
-                        contains_newlines = ContainsNewlines::Yes;
-                    }
                    _ => {}
                },
                None => {
@ -790,7 +783,6 @@ where
                        self.last_line_end = self.content_end;
                        Some(Ok(LogicalLine {
                            content_range: TextRange::new(content_start, self.content_end),
-                            contains_newlines: ContainsNewlines::No,
                            has_trailing_newline: false,
                        }))
                    } else {
@ -810,7 +802,6 @@ where

        Some(Ok(LogicalLine {
            content_range: TextRange::new(line_start, content_end),
-            contains_newlines,
            has_trailing_newline: true,
        }))
    }
@ -822,8 +813,6 @@ impl<I> FusedIterator for LogicalLinesIter<I> where I: Iterator<Item = LexResult
 struct LogicalLine {
    /// The range of this lines content (excluding the trailing newline)
    content_range: TextRange,
-    /// Whether the content in `content_range` contains any newlines.
-    contains_newlines: ContainsNewlines,
    /// Does this logical line have a trailing newline or does it just happen to be the last line.
    has_trailing_newline: bool,
 }
@ -836,16 +825,14 @@ impl Ranged for LogicalLine {

 struct VerbatimText {
    verbatim_range: TextRange,
-    contains_newlines: ContainsNewlines,
 }

-fn verbatim_text<T>(item: T, contains_newlines: ContainsNewlines) -> VerbatimText
+fn verbatim_text<T>(item: T) -> VerbatimText
 where
    T: Ranged,
 {
    VerbatimText {
        verbatim_range: item.range(),
-        contains_newlines,
    }
 }

@ -859,13 +846,7 @@ impl Format<PyFormatContext<'_>> for VerbatimText {

        match normalize_newlines(f.context().locator().slice(self.verbatim_range), ['\r']) {
            Cow::Borrowed(_) => {
-                write!(
-                    f,
-                    [source_text_slice(
-                        self.verbatim_range,
-                        self.contains_newlines
-                    )]
-                )?;
+                write!(f, [source_text_slice(self.verbatim_range,)])?;
            }
            Cow::Owned(cleaned) => {
                write!(
@ -924,7 +905,7 @@ impl Format<PyFormatContext<'_>> for FormatSuppressedNode<'_> {
            f,
            [
                leading_comments(node_comments.leading),
-                verbatim_text(self.node, ContainsNewlines::Detect),
+                verbatim_text(self.node),
                trailing_comments(node_comments.trailing)
            ]
        )
@ -937,13 +918,7 @@ pub(crate) fn write_suppressed_clause_header(
    f: &mut PyFormatter,
 ) -> FormatResult<()> {
    // Write the outer comments and format the node as verbatim
-    write!(
-        f,
-        [verbatim_text(
-            header.range(f.context().source())?,
-            ContainsNewlines::Detect
-        )]
-    )?;
+    write!(f, [verbatim_text(header.range(f.context().source())?)])?;

    let comments = f.context().comments();
    header.visit(&mut |child| {