Skip LibCST parsing for standard dedent adjustments (#9769)

## Summary Often, when fixing, we need to dedent a block of code (e.g., if we remove an `if` and dedent its body). Today, we use LibCST to parse and adjust the indentation, which is really expensive -- but this is only really necessary if the block contains a multiline string, since naively adjusting the indentation for such a string can change the whitespace _within_ the string. This PR uses a simple dedent implementation for cases in which the block doesn't intersect with a multi-line string (or an f-string, since we don't support tracking multi-line strings for f-strings right now). We could improve this even further by using the ranges to guide the dedent function, such that we don't apply the dedent if the line starts within a multiline string. But that would also need to take f-strings into account, which is a little tricky. ## Test Plan `cargo test`
2025-09-26 11:59:10 +00:00 · 2024-02-02 10:13:46 -08:00 · 2024-02-02 10:13:46 -08:00 · c3ca34543f
commit c3ca34543f
parent 4f7fb566f0
12 changed files with 193 additions and 19 deletions
--- a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP036_0.py
+++ b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP036_0.py
@ -215,3 +215,13 @@ if sys.version_info[:2] > (3,13):

 if sys.version_info[:3] > (3,13):
    print("py3")
+
+if sys.version_info > (3,0):
+    f"this is\
+    allowed too"
+
+    f"""the indentation on
+    this line is significant"""
+
+    "this is\
+    allowed too"
--- a/crates/ruff_linter/src/fix/edits.rs
+++ b/crates/ruff_linter/src/fix/edits.rs
@ -8,6 +8,7 @@ use ruff_python_ast::{self as ast, Arguments, ExceptHandler, Stmt};
 use ruff_python_ast::{AnyNodeRef, ArgOrKeyword};
 use ruff_python_codegen::Stylist;
 use ruff_python_index::Indexer;
+use ruff_python_trivia::textwrap::dedent_to;
 use ruff_python_trivia::{
    has_leading_content, is_python_whitespace, CommentRanges, PythonWhitespace, SimpleTokenKind,
    SimpleTokenizer,
@ -169,29 +170,47 @@ pub(crate) fn add_argument(
 }

 /// Safely adjust the indentation of the indented block at [`TextRange`].
+///
+/// The [`TextRange`] is assumed to represent an entire indented block, including the leading
+/// indentation of that block. For example, to dedent the body here:
+/// ```python
+/// if True:
+///     print("Hello, world!")
+/// ```
+///
+/// The range would be the entirety of `    print("Hello, world!")`.
 pub(crate) fn adjust_indentation(
    range: TextRange,
    indentation: &str,
    locator: &Locator,
+    indexer: &Indexer,
    stylist: &Stylist,
 ) -> Result<String> {
-    let contents = locator.slice(range);
+    // If the range includes a multi-line string, use LibCST to ensure that we don't adjust the
+    // whitespace _within_ the string.
+    if indexer.multiline_ranges().intersects(range) || indexer.fstring_ranges().intersects(range) {
+        let contents = locator.slice(range);

-    let module_text = format!("def f():{}{contents}", stylist.line_ending().as_str());
+        let module_text = format!("def f():{}{contents}", stylist.line_ending().as_str());

-    let mut tree = match_statement(&module_text)?;
+        let mut tree = match_statement(&module_text)?;

-    let embedding = match_function_def(&mut tree)?;
+        let embedding = match_function_def(&mut tree)?;

-    let indented_block = match_indented_block(&mut embedding.body)?;
-    indented_block.indent = Some(indentation);
+        let indented_block = match_indented_block(&mut embedding.body)?;
+        indented_block.indent = Some(indentation);

-    let module_text = indented_block.codegen_stylist(stylist);
-    let module_text = module_text
-        .strip_prefix(stylist.line_ending().as_str())
-        .unwrap()
-        .to_string();
-    Ok(module_text)
+        let module_text = indented_block.codegen_stylist(stylist);
+        let module_text = module_text
+            .strip_prefix(stylist.line_ending().as_str())
+            .unwrap()
+            .to_string();
+        Ok(module_text)
+    } else {
+        // Otherwise, we can do a simple adjustment ourselves.
+        let contents = locator.slice(range);
+        Ok(dedent_to(contents, indentation))
+    }
 }

 /// Determine if a vector contains only one, specific element.
--- a/crates/ruff_linter/src/rules/flake8_return/rules/function.rs
+++ b/crates/ruff_linter/src/rules/flake8_return/rules/function.rs
@ -852,6 +852,7 @@ fn remove_else(
            TextRange::new(else_colon_end, elif_else.end()),
            desired_indentation,
            locator,
+            indexer,
            stylist,
        )?;

--- a/crates/ruff_linter/src/rules/pycodestyle/rules/trailing_whitespace.rs
+++ b/crates/ruff_linter/src/rules/pycodestyle/rules/trailing_whitespace.rs
@ -105,7 +105,7 @@ pub(crate) fn trailing_whitespace(
            diagnostic.set_fix(Fix::applicable_edit(
                Edit::range_deletion(range),
                // Removing trailing whitespace is not safe inside multiline strings.
-                if indexer.multiline_ranges().intersects(range) {
+                if indexer.multiline_ranges().contains_range(range) {
                    Applicability::Unsafe
                } else {
                    Applicability::Safe
--- a/crates/ruff_linter/src/rules/pylint/rules/collapsible_else_if.rs
+++ b/crates/ruff_linter/src/rules/pylint/rules/collapsible_else_if.rs
@ -5,6 +5,7 @@ use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
 use ruff_macros::{derive_message_formats, violation};
 use ruff_python_ast::{self as ast, ElifElseClause, Stmt};
 use ruff_python_codegen::Stylist;
+use ruff_python_index::Indexer;
 use ruff_source_file::Locator;
 use ruff_text_size::{Ranged, TextRange};

@ -84,8 +85,15 @@ pub(crate) fn collapsible_else_if(checker: &mut Checker, stmt: &Stmt) {
        CollapsibleElseIf,
        TextRange::new(else_clause.start(), first.start()),
    );
-    diagnostic
-        .try_set_fix(|| convert_to_elif(first, else_clause, checker.locator(), checker.stylist()));
+    diagnostic.try_set_fix(|| {
+        convert_to_elif(
+            first,
+            else_clause,
+            checker.locator(),
+            checker.indexer(),
+            checker.stylist(),
+        )
+    });
    checker.diagnostics.push(diagnostic);
 }

@ -94,6 +102,7 @@ fn convert_to_elif(
    first: &Stmt,
    else_clause: &ElifElseClause,
    locator: &Locator,
+    indexer: &Indexer,
    stylist: &Stylist,
 ) -> Result<Fix> {
    let inner_if_line_start = locator.line_start(first.start());
@ -109,6 +118,7 @@ fn convert_to_elif(
        TextRange::new(inner_if_line_start, inner_if_line_end),
        indentation,
        locator,
+        indexer,
        stylist,
    )?;

--- a/crates/ruff_linter/src/rules/pylint/rules/useless_else_on_loop.rs
+++ b/crates/ruff_linter/src/rules/pylint/rules/useless_else_on_loop.rs
@ -6,6 +6,7 @@ use ruff_macros::{derive_message_formats, violation};
 use ruff_python_ast::identifier;
 use ruff_python_ast::{self as ast, ExceptHandler, MatchCase, Stmt};
 use ruff_python_codegen::Stylist;
+use ruff_python_index::Indexer;
 use ruff_source_file::Locator;
 use ruff_text_size::{Ranged, TextRange};

@ -81,6 +82,7 @@ pub(crate) fn useless_else_on_loop(
            orelse,
            else_range,
            checker.locator(),
+            checker.indexer(),
            checker.stylist(),
        )
    });
@ -134,6 +136,7 @@ fn remove_else(
    orelse: &[Stmt],
    else_range: TextRange,
    locator: &Locator,
+    indexer: &Indexer,
    stylist: &Stylist,
 ) -> Result<Fix> {
    let Some(start) = orelse.first() else {
@ -164,6 +167,7 @@ fn remove_else(
            ),
            desired_indentation,
            locator,
+            indexer,
            stylist,
        )?;

--- a/crates/ruff_linter/src/rules/pyupgrade/rules/outdated_version_block.rs
+++ b/crates/ruff_linter/src/rules/pyupgrade/rules/outdated_version_block.rs
@ -302,6 +302,7 @@ fn fix_always_false_branch(
                                ),
                                indentation,
                                checker.locator(),
+                                checker.indexer(),
                                checker.stylist(),
                            )
                            .ok()
@ -376,6 +377,7 @@ fn fix_always_true_branch(
                            TextRange::new(checker.locator().line_start(start.start()), end.end()),
                            indentation,
                            checker.locator(),
+                            checker.indexer(),
                            checker.stylist(),
                        )
                        .ok()
--- a/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linterrulespyupgradetestsUP036_0.py.snap
+++ b/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linterrulespyupgradetestsUP036_0.py.snap
@ -774,4 +774,32 @@ UP036_0.py:210:4: UP036 [*] Version block is outdated for minimum Python version
 213 212 | if sys.version_info[:2] > (3,13):
 214 213 |     print("py3")

+UP036_0.py:219:4: UP036 [*] Version block is outdated for minimum Python version
+    |
+217 |     print("py3")
+218 | 
+219 | if sys.version_info > (3,0):
+    |    ^^^^^^^^^^^^^^^^^^^^^^^^ UP036
+220 |     f"this is\
+221 |     allowed too"
+    |
+    = help: Remove outdated version block
+
+ℹ Unsafe fix
+216 216 | if sys.version_info[:3] > (3,13):
+217 217 |     print("py3")
+218 218 | 
+219     |-if sys.version_info > (3,0):
+220     |-    f"this is\
+    219 |+f"this is\
+221 220 |     allowed too"
+222 221 | 
+223     |-    f"""the indentation on
+    222 |+f"""the indentation on
+224 223 |     this line is significant"""
+225 224 | 
+226     |-    "this is\
+    225 |+"this is\
+227 226 |     allowed too"
+

--- a/crates/ruff_python_index/src/fstring_ranges.rs
+++ b/crates/ruff_python_index/src/fstring_ranges.rs
@ -14,6 +14,14 @@ pub struct FStringRanges {
 }

 impl FStringRanges {
+    /// Returns `true` if the given range intersects with any f-string range.
+    pub fn intersects(&self, target: TextRange) -> bool {
+        self.raw
+            .values()
+            .take_while(|range| range.start() < target.end())
+            .any(|range| target.intersect(*range).is_some())
+    }
+
    /// Return the [`TextRange`] of the innermost f-string at the given offset.
    pub fn innermost(&self, offset: TextSize) -> Option<TextRange> {
        self.raw
--- a/crates/ruff_python_index/src/multiline_ranges.rs
+++ b/crates/ruff_python_index/src/multiline_ranges.rs
@ -9,7 +9,7 @@ pub struct MultilineRanges {

 impl MultilineRanges {
    /// Returns `true` if the given range is inside a multiline string.
-    pub fn intersects(&self, target: TextRange) -> bool {
+    pub fn contains_range(&self, target: TextRange) -> bool {
        self.ranges
            .binary_search_by(|range| {
                if range.contains_range(target) {
@ -22,6 +22,21 @@ impl MultilineRanges {
            })
            .is_ok()
    }
+
+    /// Returns `true` if the given range intersects with any multiline string.
+    pub fn intersects(&self, target: TextRange) -> bool {
+        self.ranges
+            .binary_search_by(|range| {
+                if target.intersect(*range).is_some() {
+                    std::cmp::Ordering::Equal
+                } else if range.end() < target.start() {
+                    std::cmp::Ordering::Less
+                } else {
+                    std::cmp::Ordering::Greater
+                }
+            })
+            .is_ok()
+    }
 }

 #[derive(Default)]
--- a/crates/ruff_python_trivia/src/comment_ranges.rs
+++ b/crates/ruff_python_trivia/src/comment_ranges.rs
@ -19,11 +19,11 @@ impl CommentRanges {
        Self { raw: ranges }
    }

-    /// Returns `true` if the given range includes a comment.
+    /// Returns `true` if the given range intersects with any comment range.
    pub fn intersects(&self, target: TextRange) -> bool {
        self.raw
            .binary_search_by(|range| {
-                if target.contains_range(*range) {
+                if target.intersect(*range).is_some() {
                    std::cmp::Ordering::Equal
                } else if range.end() < target.start() {
                    std::cmp::Ordering::Less
--- a/crates/ruff_python_trivia/src/textwrap.rs
+++ b/crates/ruff_python_trivia/src/textwrap.rs
@ -74,7 +74,9 @@ pub fn indent<'a>(text: &'a str, prefix: &str) -> Cow<'a, str> {
 /// Removes common leading whitespace from each line.
 ///
 /// This function will look at each non-empty line and determine the
-/// maximum amount of whitespace that can be removed from all lines:
+/// maximum amount of whitespace that can be removed from all lines.
+///
+/// Lines that consist solely of whitespace are trimmed to a blank line.
 ///
 /// ```
 /// # use ruff_python_trivia::textwrap::dedent;
@ -122,6 +124,51 @@ pub fn dedent(text: &str) -> Cow<'_, str> {
    Cow::Owned(result)
 }

+/// Reduce a block's indentation to match the provided indentation.
+///
+/// This function looks at the first line in the block to determine the
+/// current indentation, then removes whitespace from each line to
+/// match the provided indentation.
+///
+/// Lines that are indented by _less_ than the indent of the first line
+/// are left unchanged.
+///
+/// Lines that consist solely of whitespace are trimmed to a blank line.
+///
+/// # Panics
+/// If the first line is indented by less than the provided indent.
+pub fn dedent_to(text: &str, indent: &str) -> String {
+    // Look at the indentation of the first line, to determine the "baseline" indentation.
+    let existing_indent_len = text
+        .universal_newlines()
+        .next()
+        .map_or(0, |line| line.len() - line.trim_start().len());
+
+    // Determine the amount of indentation to remove.
+    let dedent_len = existing_indent_len - indent.len();
+
+    let mut result = String::with_capacity(text.len() + indent.len());
+    for line in text.universal_newlines() {
+        let trimmed = line.trim_whitespace_start();
+        if trimmed.is_empty() {
+            if let Some(line_ending) = line.line_ending() {
+                result.push_str(&line_ending);
+            }
+        } else {
+            // Determine the current indentation level.
+            let current_indent_len = line.len() - trimmed.len();
+            if current_indent_len < existing_indent_len {
+                // If the current indentation level is less than the baseline, keep it as is.
+                result.push_str(line.as_full_str());
+            } else {
+                // Otherwise, reduce the indentation level.
+                result.push_str(&line.as_full_str()[dedent_len..]);
+            }
+        }
+    }
+    result
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@ -344,4 +391,34 @@ mod tests {
             ]";
        assert_eq!(dedent(text), text);
    }
+
+    #[test]
+    #[rustfmt::skip]
+    fn adjust_indent() {
+        let x = [
+            "    foo",
+            "  bar",
+            "   ",
+            "    baz"
+        ].join("\n");
+        let y = [
+            "  foo",
+            "  bar",
+            "",
+            "  baz"
+        ].join("\n");
+        assert_eq!(dedent_to(&x, "  "), y);
+
+        let x = [
+            "    foo",
+            "        bar",
+            "    baz",
+        ].join("\n");
+        let y = [
+            "foo",
+            "    bar",
+            "baz"
+        ].join("\n");
+        assert_eq!(dedent_to(&x, ""), y);
+    }
 }