mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-04 10:49:50 +00:00
Skip LibCST parsing for standard dedent adjustments (#9769)
## Summary Often, when fixing, we need to dedent a block of code (e.g., if we remove an `if` and dedent its body). Today, we use LibCST to parse and adjust the indentation, which is really expensive -- but this is only really necessary if the block contains a multiline string, since naively adjusting the indentation for such a string can change the whitespace _within_ the string. This PR uses a simple dedent implementation for cases in which the block doesn't intersect with a multi-line string (or an f-string, since we don't support tracking multi-line strings for f-strings right now). We could improve this even further by using the ranges to guide the dedent function, such that we don't apply the dedent if the line starts within a multiline string. But that would also need to take f-strings into account, which is a little tricky. ## Test Plan `cargo test`
This commit is contained in:
parent
4f7fb566f0
commit
c3ca34543f
12 changed files with 193 additions and 19 deletions
|
@ -215,3 +215,13 @@ if sys.version_info[:2] > (3,13):
|
|||
|
||||
if sys.version_info[:3] > (3,13):
|
||||
print("py3")
|
||||
|
||||
if sys.version_info > (3,0):
|
||||
f"this is\
|
||||
allowed too"
|
||||
|
||||
f"""the indentation on
|
||||
this line is significant"""
|
||||
|
||||
"this is\
|
||||
allowed too"
|
||||
|
|
|
@ -8,6 +8,7 @@ use ruff_python_ast::{self as ast, Arguments, ExceptHandler, Stmt};
|
|||
use ruff_python_ast::{AnyNodeRef, ArgOrKeyword};
|
||||
use ruff_python_codegen::Stylist;
|
||||
use ruff_python_index::Indexer;
|
||||
use ruff_python_trivia::textwrap::dedent_to;
|
||||
use ruff_python_trivia::{
|
||||
has_leading_content, is_python_whitespace, CommentRanges, PythonWhitespace, SimpleTokenKind,
|
||||
SimpleTokenizer,
|
||||
|
@ -169,29 +170,47 @@ pub(crate) fn add_argument(
|
|||
}
|
||||
|
||||
/// Safely adjust the indentation of the indented block at [`TextRange`].
|
||||
///
|
||||
/// The [`TextRange`] is assumed to represent an entire indented block, including the leading
|
||||
/// indentation of that block. For example, to dedent the body here:
|
||||
/// ```python
|
||||
/// if True:
|
||||
/// print("Hello, world!")
|
||||
/// ```
|
||||
///
|
||||
/// The range would be the entirety of ` print("Hello, world!")`.
|
||||
pub(crate) fn adjust_indentation(
|
||||
range: TextRange,
|
||||
indentation: &str,
|
||||
locator: &Locator,
|
||||
indexer: &Indexer,
|
||||
stylist: &Stylist,
|
||||
) -> Result<String> {
|
||||
let contents = locator.slice(range);
|
||||
// If the range includes a multi-line string, use LibCST to ensure that we don't adjust the
|
||||
// whitespace _within_ the string.
|
||||
if indexer.multiline_ranges().intersects(range) || indexer.fstring_ranges().intersects(range) {
|
||||
let contents = locator.slice(range);
|
||||
|
||||
let module_text = format!("def f():{}{contents}", stylist.line_ending().as_str());
|
||||
let module_text = format!("def f():{}{contents}", stylist.line_ending().as_str());
|
||||
|
||||
let mut tree = match_statement(&module_text)?;
|
||||
let mut tree = match_statement(&module_text)?;
|
||||
|
||||
let embedding = match_function_def(&mut tree)?;
|
||||
let embedding = match_function_def(&mut tree)?;
|
||||
|
||||
let indented_block = match_indented_block(&mut embedding.body)?;
|
||||
indented_block.indent = Some(indentation);
|
||||
let indented_block = match_indented_block(&mut embedding.body)?;
|
||||
indented_block.indent = Some(indentation);
|
||||
|
||||
let module_text = indented_block.codegen_stylist(stylist);
|
||||
let module_text = module_text
|
||||
.strip_prefix(stylist.line_ending().as_str())
|
||||
.unwrap()
|
||||
.to_string();
|
||||
Ok(module_text)
|
||||
let module_text = indented_block.codegen_stylist(stylist);
|
||||
let module_text = module_text
|
||||
.strip_prefix(stylist.line_ending().as_str())
|
||||
.unwrap()
|
||||
.to_string();
|
||||
Ok(module_text)
|
||||
} else {
|
||||
// Otherwise, we can do a simple adjustment ourselves.
|
||||
let contents = locator.slice(range);
|
||||
Ok(dedent_to(contents, indentation))
|
||||
}
|
||||
}
|
||||
|
||||
/// Determine if a vector contains only one, specific element.
|
||||
|
|
|
@ -852,6 +852,7 @@ fn remove_else(
|
|||
TextRange::new(else_colon_end, elif_else.end()),
|
||||
desired_indentation,
|
||||
locator,
|
||||
indexer,
|
||||
stylist,
|
||||
)?;
|
||||
|
||||
|
|
|
@ -105,7 +105,7 @@ pub(crate) fn trailing_whitespace(
|
|||
diagnostic.set_fix(Fix::applicable_edit(
|
||||
Edit::range_deletion(range),
|
||||
// Removing trailing whitespace is not safe inside multiline strings.
|
||||
if indexer.multiline_ranges().intersects(range) {
|
||||
if indexer.multiline_ranges().contains_range(range) {
|
||||
Applicability::Unsafe
|
||||
} else {
|
||||
Applicability::Safe
|
||||
|
|
|
@ -5,6 +5,7 @@ use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
|
|||
use ruff_macros::{derive_message_formats, violation};
|
||||
use ruff_python_ast::{self as ast, ElifElseClause, Stmt};
|
||||
use ruff_python_codegen::Stylist;
|
||||
use ruff_python_index::Indexer;
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
|
||||
|
@ -84,8 +85,15 @@ pub(crate) fn collapsible_else_if(checker: &mut Checker, stmt: &Stmt) {
|
|||
CollapsibleElseIf,
|
||||
TextRange::new(else_clause.start(), first.start()),
|
||||
);
|
||||
diagnostic
|
||||
.try_set_fix(|| convert_to_elif(first, else_clause, checker.locator(), checker.stylist()));
|
||||
diagnostic.try_set_fix(|| {
|
||||
convert_to_elif(
|
||||
first,
|
||||
else_clause,
|
||||
checker.locator(),
|
||||
checker.indexer(),
|
||||
checker.stylist(),
|
||||
)
|
||||
});
|
||||
checker.diagnostics.push(diagnostic);
|
||||
}
|
||||
|
||||
|
@ -94,6 +102,7 @@ fn convert_to_elif(
|
|||
first: &Stmt,
|
||||
else_clause: &ElifElseClause,
|
||||
locator: &Locator,
|
||||
indexer: &Indexer,
|
||||
stylist: &Stylist,
|
||||
) -> Result<Fix> {
|
||||
let inner_if_line_start = locator.line_start(first.start());
|
||||
|
@ -109,6 +118,7 @@ fn convert_to_elif(
|
|||
TextRange::new(inner_if_line_start, inner_if_line_end),
|
||||
indentation,
|
||||
locator,
|
||||
indexer,
|
||||
stylist,
|
||||
)?;
|
||||
|
||||
|
|
|
@ -6,6 +6,7 @@ use ruff_macros::{derive_message_formats, violation};
|
|||
use ruff_python_ast::identifier;
|
||||
use ruff_python_ast::{self as ast, ExceptHandler, MatchCase, Stmt};
|
||||
use ruff_python_codegen::Stylist;
|
||||
use ruff_python_index::Indexer;
|
||||
use ruff_source_file::Locator;
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
|
||||
|
@ -81,6 +82,7 @@ pub(crate) fn useless_else_on_loop(
|
|||
orelse,
|
||||
else_range,
|
||||
checker.locator(),
|
||||
checker.indexer(),
|
||||
checker.stylist(),
|
||||
)
|
||||
});
|
||||
|
@ -134,6 +136,7 @@ fn remove_else(
|
|||
orelse: &[Stmt],
|
||||
else_range: TextRange,
|
||||
locator: &Locator,
|
||||
indexer: &Indexer,
|
||||
stylist: &Stylist,
|
||||
) -> Result<Fix> {
|
||||
let Some(start) = orelse.first() else {
|
||||
|
@ -164,6 +167,7 @@ fn remove_else(
|
|||
),
|
||||
desired_indentation,
|
||||
locator,
|
||||
indexer,
|
||||
stylist,
|
||||
)?;
|
||||
|
||||
|
|
|
@ -302,6 +302,7 @@ fn fix_always_false_branch(
|
|||
),
|
||||
indentation,
|
||||
checker.locator(),
|
||||
checker.indexer(),
|
||||
checker.stylist(),
|
||||
)
|
||||
.ok()
|
||||
|
@ -376,6 +377,7 @@ fn fix_always_true_branch(
|
|||
TextRange::new(checker.locator().line_start(start.start()), end.end()),
|
||||
indentation,
|
||||
checker.locator(),
|
||||
checker.indexer(),
|
||||
checker.stylist(),
|
||||
)
|
||||
.ok()
|
||||
|
|
|
@ -774,4 +774,32 @@ UP036_0.py:210:4: UP036 [*] Version block is outdated for minimum Python version
|
|||
213 212 | if sys.version_info[:2] > (3,13):
|
||||
214 213 | print("py3")
|
||||
|
||||
UP036_0.py:219:4: UP036 [*] Version block is outdated for minimum Python version
|
||||
|
|
||||
217 | print("py3")
|
||||
218 |
|
||||
219 | if sys.version_info > (3,0):
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^ UP036
|
||||
220 | f"this is\
|
||||
221 | allowed too"
|
||||
|
|
||||
= help: Remove outdated version block
|
||||
|
||||
ℹ Unsafe fix
|
||||
216 216 | if sys.version_info[:3] > (3,13):
|
||||
217 217 | print("py3")
|
||||
218 218 |
|
||||
219 |-if sys.version_info > (3,0):
|
||||
220 |- f"this is\
|
||||
219 |+f"this is\
|
||||
221 220 | allowed too"
|
||||
222 221 |
|
||||
223 |- f"""the indentation on
|
||||
222 |+f"""the indentation on
|
||||
224 223 | this line is significant"""
|
||||
225 224 |
|
||||
226 |- "this is\
|
||||
225 |+"this is\
|
||||
227 226 | allowed too"
|
||||
|
||||
|
||||
|
|
|
@ -14,6 +14,14 @@ pub struct FStringRanges {
|
|||
}
|
||||
|
||||
impl FStringRanges {
|
||||
/// Returns `true` if the given range intersects with any f-string range.
|
||||
pub fn intersects(&self, target: TextRange) -> bool {
|
||||
self.raw
|
||||
.values()
|
||||
.take_while(|range| range.start() < target.end())
|
||||
.any(|range| target.intersect(*range).is_some())
|
||||
}
|
||||
|
||||
/// Return the [`TextRange`] of the innermost f-string at the given offset.
|
||||
pub fn innermost(&self, offset: TextSize) -> Option<TextRange> {
|
||||
self.raw
|
||||
|
|
|
@ -9,7 +9,7 @@ pub struct MultilineRanges {
|
|||
|
||||
impl MultilineRanges {
|
||||
/// Returns `true` if the given range is inside a multiline string.
|
||||
pub fn intersects(&self, target: TextRange) -> bool {
|
||||
pub fn contains_range(&self, target: TextRange) -> bool {
|
||||
self.ranges
|
||||
.binary_search_by(|range| {
|
||||
if range.contains_range(target) {
|
||||
|
@ -22,6 +22,21 @@ impl MultilineRanges {
|
|||
})
|
||||
.is_ok()
|
||||
}
|
||||
|
||||
/// Returns `true` if the given range intersects with any multiline string.
|
||||
pub fn intersects(&self, target: TextRange) -> bool {
|
||||
self.ranges
|
||||
.binary_search_by(|range| {
|
||||
if target.intersect(*range).is_some() {
|
||||
std::cmp::Ordering::Equal
|
||||
} else if range.end() < target.start() {
|
||||
std::cmp::Ordering::Less
|
||||
} else {
|
||||
std::cmp::Ordering::Greater
|
||||
}
|
||||
})
|
||||
.is_ok()
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Default)]
|
||||
|
|
|
@ -19,11 +19,11 @@ impl CommentRanges {
|
|||
Self { raw: ranges }
|
||||
}
|
||||
|
||||
/// Returns `true` if the given range includes a comment.
|
||||
/// Returns `true` if the given range intersects with any comment range.
|
||||
pub fn intersects(&self, target: TextRange) -> bool {
|
||||
self.raw
|
||||
.binary_search_by(|range| {
|
||||
if target.contains_range(*range) {
|
||||
if target.intersect(*range).is_some() {
|
||||
std::cmp::Ordering::Equal
|
||||
} else if range.end() < target.start() {
|
||||
std::cmp::Ordering::Less
|
||||
|
|
|
@ -74,7 +74,9 @@ pub fn indent<'a>(text: &'a str, prefix: &str) -> Cow<'a, str> {
|
|||
/// Removes common leading whitespace from each line.
|
||||
///
|
||||
/// This function will look at each non-empty line and determine the
|
||||
/// maximum amount of whitespace that can be removed from all lines:
|
||||
/// maximum amount of whitespace that can be removed from all lines.
|
||||
///
|
||||
/// Lines that consist solely of whitespace are trimmed to a blank line.
|
||||
///
|
||||
/// ```
|
||||
/// # use ruff_python_trivia::textwrap::dedent;
|
||||
|
@ -122,6 +124,51 @@ pub fn dedent(text: &str) -> Cow<'_, str> {
|
|||
Cow::Owned(result)
|
||||
}
|
||||
|
||||
/// Reduce a block's indentation to match the provided indentation.
|
||||
///
|
||||
/// This function looks at the first line in the block to determine the
|
||||
/// current indentation, then removes whitespace from each line to
|
||||
/// match the provided indentation.
|
||||
///
|
||||
/// Lines that are indented by _less_ than the indent of the first line
|
||||
/// are left unchanged.
|
||||
///
|
||||
/// Lines that consist solely of whitespace are trimmed to a blank line.
|
||||
///
|
||||
/// # Panics
|
||||
/// If the first line is indented by less than the provided indent.
|
||||
pub fn dedent_to(text: &str, indent: &str) -> String {
|
||||
// Look at the indentation of the first line, to determine the "baseline" indentation.
|
||||
let existing_indent_len = text
|
||||
.universal_newlines()
|
||||
.next()
|
||||
.map_or(0, |line| line.len() - line.trim_start().len());
|
||||
|
||||
// Determine the amount of indentation to remove.
|
||||
let dedent_len = existing_indent_len - indent.len();
|
||||
|
||||
let mut result = String::with_capacity(text.len() + indent.len());
|
||||
for line in text.universal_newlines() {
|
||||
let trimmed = line.trim_whitespace_start();
|
||||
if trimmed.is_empty() {
|
||||
if let Some(line_ending) = line.line_ending() {
|
||||
result.push_str(&line_ending);
|
||||
}
|
||||
} else {
|
||||
// Determine the current indentation level.
|
||||
let current_indent_len = line.len() - trimmed.len();
|
||||
if current_indent_len < existing_indent_len {
|
||||
// If the current indentation level is less than the baseline, keep it as is.
|
||||
result.push_str(line.as_full_str());
|
||||
} else {
|
||||
// Otherwise, reduce the indentation level.
|
||||
result.push_str(&line.as_full_str()[dedent_len..]);
|
||||
}
|
||||
}
|
||||
}
|
||||
result
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
@ -344,4 +391,34 @@ mod tests {
|
|||
]";
|
||||
assert_eq!(dedent(text), text);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[rustfmt::skip]
|
||||
fn adjust_indent() {
|
||||
let x = [
|
||||
" foo",
|
||||
" bar",
|
||||
" ",
|
||||
" baz"
|
||||
].join("\n");
|
||||
let y = [
|
||||
" foo",
|
||||
" bar",
|
||||
"",
|
||||
" baz"
|
||||
].join("\n");
|
||||
assert_eq!(dedent_to(&x, " "), y);
|
||||
|
||||
let x = [
|
||||
" foo",
|
||||
" bar",
|
||||
" baz",
|
||||
].join("\n");
|
||||
let y = [
|
||||
"foo",
|
||||
" bar",
|
||||
"baz"
|
||||
].join("\n");
|
||||
assert_eq!(dedent_to(&x, ""), y);
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue