Skip LibCST parsing for standard dedent adjustments (#9769)

## Summary

Often, when fixing, we need to dedent a block of code (e.g., if we
remove an `if` and dedent its body). Today, we use LibCST to parse and
adjust the indentation, which is really expensive -- but this is only
really necessary if the block contains a multiline string, since naively
adjusting the indentation for such a string can change the whitespace
_within_ the string.

This PR uses a simple dedent implementation for cases in which the block
doesn't intersect with a multi-line string (or an f-string, since we
don't support tracking multi-line strings for f-strings right now).

We could improve this even further by using the ranges to guide the
dedent function, such that we don't apply the dedent if the line starts
within a multiline string. But that would also need to take f-strings
into account, which is a little tricky.

## Test Plan

`cargo test`
This commit is contained in:
Charlie Marsh 2024-02-02 10:13:46 -08:00 committed by GitHub
parent 4f7fb566f0
commit c3ca34543f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 193 additions and 19 deletions

View file

@ -8,6 +8,7 @@ use ruff_python_ast::{self as ast, Arguments, ExceptHandler, Stmt};
use ruff_python_ast::{AnyNodeRef, ArgOrKeyword};
use ruff_python_codegen::Stylist;
use ruff_python_index::Indexer;
use ruff_python_trivia::textwrap::dedent_to;
use ruff_python_trivia::{
has_leading_content, is_python_whitespace, CommentRanges, PythonWhitespace, SimpleTokenKind,
SimpleTokenizer,
@ -169,29 +170,47 @@ pub(crate) fn add_argument(
}
/// Safely adjust the indentation of the indented block at [`TextRange`].
///
/// The [`TextRange`] is assumed to represent an entire indented block, including the leading
/// indentation of that block. For example, to dedent the body here:
/// ```python
/// if True:
/// print("Hello, world!")
/// ```
///
/// The range would be the entirety of ` print("Hello, world!")`.
pub(crate) fn adjust_indentation(
range: TextRange,
indentation: &str,
locator: &Locator,
indexer: &Indexer,
stylist: &Stylist,
) -> Result<String> {
let contents = locator.slice(range);
// If the range includes a multi-line string, use LibCST to ensure that we don't adjust the
// whitespace _within_ the string.
if indexer.multiline_ranges().intersects(range) || indexer.fstring_ranges().intersects(range) {
let contents = locator.slice(range);
let module_text = format!("def f():{}{contents}", stylist.line_ending().as_str());
let module_text = format!("def f():{}{contents}", stylist.line_ending().as_str());
let mut tree = match_statement(&module_text)?;
let mut tree = match_statement(&module_text)?;
let embedding = match_function_def(&mut tree)?;
let embedding = match_function_def(&mut tree)?;
let indented_block = match_indented_block(&mut embedding.body)?;
indented_block.indent = Some(indentation);
let indented_block = match_indented_block(&mut embedding.body)?;
indented_block.indent = Some(indentation);
let module_text = indented_block.codegen_stylist(stylist);
let module_text = module_text
.strip_prefix(stylist.line_ending().as_str())
.unwrap()
.to_string();
Ok(module_text)
let module_text = indented_block.codegen_stylist(stylist);
let module_text = module_text
.strip_prefix(stylist.line_ending().as_str())
.unwrap()
.to_string();
Ok(module_text)
} else {
// Otherwise, we can do a simple adjustment ourselves.
let contents = locator.slice(range);
Ok(dedent_to(contents, indentation))
}
}
/// Determine if a vector contains only one, specific element.