Removing trailing whitespace inside multiline strings is unsafe (#9744)

Fix #8037.
This commit is contained in:
Seo Sanghyeon 2024-02-01 06:45:23 +09:00 committed by GitHub
parent 7992583908
commit 6e225cb57c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 89 additions and 2 deletions

View file

@ -11,6 +11,7 @@ use ruff_source_file::Locator;
use ruff_text_size::{Ranged, TextRange, TextSize};
use crate::fstring_ranges::{FStringRanges, FStringRangesBuilder};
use crate::multiline_ranges::{MultilineRanges, MultilineRangesBuilder};
use crate::CommentRangesBuilder;
pub struct Indexer {
@ -21,6 +22,9 @@ pub struct Indexer {
/// The range of all f-string in the source document.
fstring_ranges: FStringRanges,
/// The range of all multiline strings in the source document.
multiline_ranges: MultilineRanges,
}
impl Indexer {
@ -29,6 +33,7 @@ impl Indexer {
let mut comment_ranges_builder = CommentRangesBuilder::default();
let mut fstring_ranges_builder = FStringRangesBuilder::default();
let mut multiline_ranges_builder = MultilineRangesBuilder::default();
let mut continuation_lines = Vec::new();
// Token, end
let mut prev_end = TextSize::default();
@ -61,6 +66,7 @@ impl Indexer {
comment_ranges_builder.visit_token(tok, *range);
fstring_ranges_builder.visit_token(tok, *range);
multiline_ranges_builder.visit_token(tok, *range);
match tok {
Tok::Newline | Tok::NonLogicalNewline => {
@ -82,6 +88,7 @@ impl Indexer {
comment_ranges: comment_ranges_builder.finish(),
continuation_lines,
fstring_ranges: fstring_ranges_builder.finish(),
multiline_ranges: multiline_ranges_builder.finish(),
}
}
@ -95,6 +102,11 @@ impl Indexer {
&self.fstring_ranges
}
/// Returns the byte offset ranges of multiline strings.
pub const fn multiline_ranges(&self) -> &MultilineRanges {
&self.multiline_ranges
}
/// Returns the line start positions of continuations (backslash).
pub fn continuation_line_starts(&self) -> &[TextSize] {
&self.continuation_lines

View file

@ -1,6 +1,7 @@
mod comment_ranges;
mod fstring_ranges;
mod indexer;
mod multiline_ranges;
pub use comment_ranges::{tokens_and_ranges, CommentRangesBuilder};
pub use indexer::Indexer;

View file

@ -0,0 +1,46 @@
use ruff_python_parser::Tok;
use ruff_text_size::TextRange;
/// Stores the range of all multiline strings in a file sorted by
/// [`TextRange::start`].
pub struct MultilineRanges {
ranges: Vec<TextRange>,
}
impl MultilineRanges {
/// Returns `true` if the given range is inside a multiline string.
pub fn intersects(&self, target: TextRange) -> bool {
self.ranges
.binary_search_by(|range| {
if range.contains_range(target) {
std::cmp::Ordering::Equal
} else if range.end() < target.start() {
std::cmp::Ordering::Less
} else {
std::cmp::Ordering::Greater
}
})
.is_ok()
}
}
#[derive(Default)]
pub(crate) struct MultilineRangesBuilder {
ranges: Vec<TextRange>,
}
impl MultilineRangesBuilder {
pub(crate) fn visit_token(&mut self, token: &Tok, range: TextRange) {
if let Tok::String { triple_quoted, .. } = token {
if *triple_quoted {
self.ranges.push(range);
}
}
}
pub(crate) fn finish(self) -> MultilineRanges {
MultilineRanges {
ranges: self.ranges,
}
}
}