Skip LibCST parsing for standard dedent adjustments (#9769)

## Summary

Often, when fixing, we need to dedent a block of code (e.g., if we
remove an `if` and dedent its body). Today, we use LibCST to parse and
adjust the indentation, which is really expensive -- but this is only
really necessary if the block contains a multiline string, since naively
adjusting the indentation for such a string can change the whitespace
_within_ the string.

This PR uses a simple dedent implementation for cases in which the block
doesn't intersect with a multi-line string (or an f-string, since we
don't support tracking multi-line strings for f-strings right now).

We could improve this even further by using the ranges to guide the
dedent function, such that we don't apply the dedent if the line starts
within a multiline string. But that would also need to take f-strings
into account, which is a little tricky.

## Test Plan

`cargo test`
This commit is contained in:
Charlie Marsh 2024-02-02 10:13:46 -08:00 committed by GitHub
parent 4f7fb566f0
commit c3ca34543f
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 193 additions and 19 deletions

View file

@ -19,11 +19,11 @@ impl CommentRanges {
Self { raw: ranges }
}
/// Returns `true` if the given range includes a comment.
/// Returns `true` if the given range intersects with any comment range.
pub fn intersects(&self, target: TextRange) -> bool {
self.raw
.binary_search_by(|range| {
if target.contains_range(*range) {
if target.intersect(*range).is_some() {
std::cmp::Ordering::Equal
} else if range.end() < target.start() {
std::cmp::Ordering::Less

View file

@ -74,7 +74,9 @@ pub fn indent<'a>(text: &'a str, prefix: &str) -> Cow<'a, str> {
/// Removes common leading whitespace from each line.
///
/// This function will look at each non-empty line and determine the
/// maximum amount of whitespace that can be removed from all lines:
/// maximum amount of whitespace that can be removed from all lines.
///
/// Lines that consist solely of whitespace are trimmed to a blank line.
///
/// ```
/// # use ruff_python_trivia::textwrap::dedent;
@ -122,6 +124,51 @@ pub fn dedent(text: &str) -> Cow<'_, str> {
Cow::Owned(result)
}
/// Reduce a block's indentation to match the provided indentation.
///
/// This function looks at the first line in the block to determine the
/// current indentation, then removes whitespace from each line to
/// match the provided indentation.
///
/// Lines that are indented by _less_ than the indent of the first line
/// are left unchanged.
///
/// Lines that consist solely of whitespace are trimmed to a blank line.
///
/// # Panics
/// If the first line is indented by less than the provided indent.
pub fn dedent_to(text: &str, indent: &str) -> String {
// Look at the indentation of the first line, to determine the "baseline" indentation.
let existing_indent_len = text
.universal_newlines()
.next()
.map_or(0, |line| line.len() - line.trim_start().len());
// Determine the amount of indentation to remove.
let dedent_len = existing_indent_len - indent.len();
let mut result = String::with_capacity(text.len() + indent.len());
for line in text.universal_newlines() {
let trimmed = line.trim_whitespace_start();
if trimmed.is_empty() {
if let Some(line_ending) = line.line_ending() {
result.push_str(&line_ending);
}
} else {
// Determine the current indentation level.
let current_indent_len = line.len() - trimmed.len();
if current_indent_len < existing_indent_len {
// If the current indentation level is less than the baseline, keep it as is.
result.push_str(line.as_full_str());
} else {
// Otherwise, reduce the indentation level.
result.push_str(&line.as_full_str()[dedent_len..]);
}
}
}
result
}
#[cfg(test)]
mod tests {
use super::*;
@ -344,4 +391,34 @@ mod tests {
]";
assert_eq!(dedent(text), text);
}
#[test]
#[rustfmt::skip]
fn adjust_indent() {
let x = [
" foo",
" bar",
" ",
" baz"
].join("\n");
let y = [
" foo",
" bar",
"",
" baz"
].join("\n");
assert_eq!(dedent_to(&x, " "), y);
let x = [
" foo",
" bar",
" baz",
].join("\n");
let y = [
"foo",
" bar",
"baz"
].join("\n");
assert_eq!(dedent_to(&x, ""), y);
}
}