Skip LibCST parsing for standard dedent adjustments (#9769)

## Summary

Often, when fixing, we need to dedent a block of code (e.g., if we
remove an `if` and dedent its body). Today, we use LibCST to parse and
adjust the indentation, which is really expensive -- but this is only
really necessary if the block contains a multiline string, since naively
adjusting the indentation for such a string can change the whitespace
_within_ the string.

This PR uses a simple dedent implementation for cases in which the block
doesn't intersect with a multi-line string (or an f-string, since we
don't support tracking multi-line strings for f-strings right now).

We could improve this even further by using the ranges to guide the
dedent function, such that we don't apply the dedent if the line starts
within a multiline string. But that would also need to take f-strings
into account, which is a little tricky.

## Test Plan

`cargo test`
This commit is contained in:
Charlie Marsh 2024-02-02 10:13:46 -08:00 committed by GitHub
parent 4f7fb566f0
commit c3ca34543f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 193 additions and 19 deletions

View file

@ -8,6 +8,7 @@ use ruff_python_ast::{self as ast, Arguments, ExceptHandler, Stmt};
use ruff_python_ast::{AnyNodeRef, ArgOrKeyword};
use ruff_python_codegen::Stylist;
use ruff_python_index::Indexer;
use ruff_python_trivia::textwrap::dedent_to;
use ruff_python_trivia::{
has_leading_content, is_python_whitespace, CommentRanges, PythonWhitespace, SimpleTokenKind,
SimpleTokenizer,
@ -169,29 +170,47 @@ pub(crate) fn add_argument(
}
/// Safely adjust the indentation of the indented block at [`TextRange`].
///
/// The [`TextRange`] is assumed to represent an entire indented block, including the leading
/// indentation of that block. For example, to dedent the body here:
/// ```python
/// if True:
/// print("Hello, world!")
/// ```
///
/// The range would be the entirety of ` print("Hello, world!")`.
pub(crate) fn adjust_indentation(
range: TextRange,
indentation: &str,
locator: &Locator,
indexer: &Indexer,
stylist: &Stylist,
) -> Result<String> {
let contents = locator.slice(range);
// If the range includes a multi-line string, use LibCST to ensure that we don't adjust the
// whitespace _within_ the string.
if indexer.multiline_ranges().intersects(range) || indexer.fstring_ranges().intersects(range) {
let contents = locator.slice(range);
let module_text = format!("def f():{}{contents}", stylist.line_ending().as_str());
let module_text = format!("def f():{}{contents}", stylist.line_ending().as_str());
let mut tree = match_statement(&module_text)?;
let mut tree = match_statement(&module_text)?;
let embedding = match_function_def(&mut tree)?;
let embedding = match_function_def(&mut tree)?;
let indented_block = match_indented_block(&mut embedding.body)?;
indented_block.indent = Some(indentation);
let indented_block = match_indented_block(&mut embedding.body)?;
indented_block.indent = Some(indentation);
let module_text = indented_block.codegen_stylist(stylist);
let module_text = module_text
.strip_prefix(stylist.line_ending().as_str())
.unwrap()
.to_string();
Ok(module_text)
let module_text = indented_block.codegen_stylist(stylist);
let module_text = module_text
.strip_prefix(stylist.line_ending().as_str())
.unwrap()
.to_string();
Ok(module_text)
} else {
// Otherwise, we can do a simple adjustment ourselves.
let contents = locator.slice(range);
Ok(dedent_to(contents, indentation))
}
}
/// Determine if a vector contains only one, specific element.

View file

@ -852,6 +852,7 @@ fn remove_else(
TextRange::new(else_colon_end, elif_else.end()),
desired_indentation,
locator,
indexer,
stylist,
)?;

View file

@ -105,7 +105,7 @@ pub(crate) fn trailing_whitespace(
diagnostic.set_fix(Fix::applicable_edit(
Edit::range_deletion(range),
// Removing trailing whitespace is not safe inside multiline strings.
if indexer.multiline_ranges().intersects(range) {
if indexer.multiline_ranges().contains_range(range) {
Applicability::Unsafe
} else {
Applicability::Safe

View file

@ -5,6 +5,7 @@ use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::{self as ast, ElifElseClause, Stmt};
use ruff_python_codegen::Stylist;
use ruff_python_index::Indexer;
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextRange};
@ -84,8 +85,15 @@ pub(crate) fn collapsible_else_if(checker: &mut Checker, stmt: &Stmt) {
CollapsibleElseIf,
TextRange::new(else_clause.start(), first.start()),
);
diagnostic
.try_set_fix(|| convert_to_elif(first, else_clause, checker.locator(), checker.stylist()));
diagnostic.try_set_fix(|| {
convert_to_elif(
first,
else_clause,
checker.locator(),
checker.indexer(),
checker.stylist(),
)
});
checker.diagnostics.push(diagnostic);
}
@ -94,6 +102,7 @@ fn convert_to_elif(
first: &Stmt,
else_clause: &ElifElseClause,
locator: &Locator,
indexer: &Indexer,
stylist: &Stylist,
) -> Result<Fix> {
let inner_if_line_start = locator.line_start(first.start());
@ -109,6 +118,7 @@ fn convert_to_elif(
TextRange::new(inner_if_line_start, inner_if_line_end),
indentation,
locator,
indexer,
stylist,
)?;

View file

@ -6,6 +6,7 @@ use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::identifier;
use ruff_python_ast::{self as ast, ExceptHandler, MatchCase, Stmt};
use ruff_python_codegen::Stylist;
use ruff_python_index::Indexer;
use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextRange};
@ -81,6 +82,7 @@ pub(crate) fn useless_else_on_loop(
orelse,
else_range,
checker.locator(),
checker.indexer(),
checker.stylist(),
)
});
@ -134,6 +136,7 @@ fn remove_else(
orelse: &[Stmt],
else_range: TextRange,
locator: &Locator,
indexer: &Indexer,
stylist: &Stylist,
) -> Result<Fix> {
let Some(start) = orelse.first() else {
@ -164,6 +167,7 @@ fn remove_else(
),
desired_indentation,
locator,
indexer,
stylist,
)?;

View file

@ -302,6 +302,7 @@ fn fix_always_false_branch(
),
indentation,
checker.locator(),
checker.indexer(),
checker.stylist(),
)
.ok()
@ -376,6 +377,7 @@ fn fix_always_true_branch(
TextRange::new(checker.locator().line_start(start.start()), end.end()),
indentation,
checker.locator(),
checker.indexer(),
checker.stylist(),
)
.ok()

View file

@ -774,4 +774,32 @@ UP036_0.py:210:4: UP036 [*] Version block is outdated for minimum Python version
213 212 | if sys.version_info[:2] > (3,13):
214 213 | print("py3")
UP036_0.py:219:4: UP036 [*] Version block is outdated for minimum Python version
|
217 | print("py3")
218 |
219 | if sys.version_info > (3,0):
| ^^^^^^^^^^^^^^^^^^^^^^^^ UP036
220 | f"this is\
221 | allowed too"
|
= help: Remove outdated version block
Unsafe fix
216 216 | if sys.version_info[:3] > (3,13):
217 217 | print("py3")
218 218 |
219 |-if sys.version_info > (3,0):
220 |- f"this is\
219 |+f"this is\
221 220 | allowed too"
222 221 |
223 |- f"""the indentation on
222 |+f"""the indentation on
224 223 | this line is significant"""
225 224 |
226 |- "this is\
225 |+"this is\
227 226 | allowed too"