mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-29 13:24:57 +00:00
Track t-strings and f-strings for token-based rules and suppression comments (#20357)
Our token-based rules and `noqa` extraction used an `Indexer` that kept track of f-string ranges but not t-strings. We've updated the `Indexer` and downstream uses thereof to handle both f-strings and t-strings. Most of the diff is renaming and adding tests. Note that much of the "new" logic gets to be naive because the lexer has already ensured that f and t-string "starts" are paired with their respective "ends", even amidst nesting and so on. Finally: one could imagine wanting to know if a given interpolated string range corresponds to an f-string or a t-string, but I didn't find a place where we actually needed this. Closes #20310
This commit is contained in:
parent
ec863bcde7
commit
b6bd32d9dc
18 changed files with 519 additions and 89 deletions
|
@ -9,15 +9,17 @@ use ruff_python_trivia::{
|
|||
use ruff_source_file::LineRanges;
|
||||
use ruff_text_size::{Ranged, TextRange, TextSize};
|
||||
|
||||
use crate::fstring_ranges::{FStringRanges, FStringRangesBuilder};
|
||||
use crate::interpolated_string_ranges::{
|
||||
InterpolatedStringRanges, InterpolatedStringRangesBuilder,
|
||||
};
|
||||
use crate::multiline_ranges::{MultilineRanges, MultilineRangesBuilder};
|
||||
|
||||
pub struct Indexer {
|
||||
/// Stores the start offset of continuation lines.
|
||||
continuation_lines: Vec<TextSize>,
|
||||
|
||||
/// The range of all f-string in the source document.
|
||||
fstring_ranges: FStringRanges,
|
||||
/// The range of all interpolated strings in the source document.
|
||||
interpolated_string_ranges: InterpolatedStringRanges,
|
||||
|
||||
/// The range of all multiline strings in the source document.
|
||||
multiline_ranges: MultilineRanges,
|
||||
|
@ -30,7 +32,7 @@ impl Indexer {
|
|||
pub fn from_tokens(tokens: &Tokens, source: &str) -> Self {
|
||||
assert!(TextSize::try_from(source.len()).is_ok());
|
||||
|
||||
let mut fstring_ranges_builder = FStringRangesBuilder::default();
|
||||
let mut interpolated_string_ranges_builder = InterpolatedStringRangesBuilder::default();
|
||||
let mut multiline_ranges_builder = MultilineRangesBuilder::default();
|
||||
let mut continuation_lines = Vec::new();
|
||||
let mut comment_ranges = Vec::new();
|
||||
|
@ -59,7 +61,7 @@ impl Indexer {
|
|||
}
|
||||
}
|
||||
|
||||
fstring_ranges_builder.visit_token(token);
|
||||
interpolated_string_ranges_builder.visit_token(token);
|
||||
multiline_ranges_builder.visit_token(token);
|
||||
|
||||
match token.kind() {
|
||||
|
@ -82,7 +84,7 @@ impl Indexer {
|
|||
|
||||
Self {
|
||||
continuation_lines,
|
||||
fstring_ranges: fstring_ranges_builder.finish(),
|
||||
interpolated_string_ranges: interpolated_string_ranges_builder.finish(),
|
||||
multiline_ranges: multiline_ranges_builder.finish(),
|
||||
comment_ranges: CommentRanges::new(comment_ranges),
|
||||
}
|
||||
|
@ -93,9 +95,9 @@ impl Indexer {
|
|||
&self.comment_ranges
|
||||
}
|
||||
|
||||
/// Returns the byte offset ranges of f-strings.
|
||||
pub const fn fstring_ranges(&self) -> &FStringRanges {
|
||||
&self.fstring_ranges
|
||||
/// Returns the byte offset ranges of interpolated strings.
|
||||
pub const fn interpolated_string_ranges(&self) -> &InterpolatedStringRanges {
|
||||
&self.interpolated_string_ranges
|
||||
}
|
||||
|
||||
/// Returns the byte offset ranges of multiline strings.
|
||||
|
@ -356,7 +358,7 @@ f"implicit " f"concatenation"
|
|||
.trim();
|
||||
assert_eq!(
|
||||
new_indexer(contents)
|
||||
.fstring_ranges()
|
||||
.interpolated_string_ranges()
|
||||
.values()
|
||||
.copied()
|
||||
.collect::<Vec<_>>(),
|
||||
|
@ -390,7 +392,7 @@ f-string"""}
|
|||
.trim();
|
||||
assert_eq!(
|
||||
new_indexer(contents)
|
||||
.fstring_ranges()
|
||||
.interpolated_string_ranges()
|
||||
.values()
|
||||
.copied()
|
||||
.collect::<Vec<_>>(),
|
||||
|
@ -504,11 +506,17 @@ the end"""
|
|||
),
|
||||
] {
|
||||
assert_eq!(
|
||||
indexer.fstring_ranges().innermost(offset).unwrap(),
|
||||
indexer
|
||||
.interpolated_string_ranges()
|
||||
.innermost(offset)
|
||||
.unwrap(),
|
||||
innermost_range
|
||||
);
|
||||
assert_eq!(
|
||||
indexer.fstring_ranges().outermost(offset).unwrap(),
|
||||
indexer
|
||||
.interpolated_string_ranges()
|
||||
.outermost(offset)
|
||||
.unwrap(),
|
||||
outermost_range
|
||||
);
|
||||
}
|
||||
|
|
|
@ -3,17 +3,17 @@ use std::collections::BTreeMap;
|
|||
use ruff_python_parser::{Token, TokenKind};
|
||||
use ruff_text_size::{Ranged, TextRange, TextSize};
|
||||
|
||||
/// Stores the ranges of all f-strings in a file sorted by [`TextRange::start`].
|
||||
/// There can be multiple overlapping ranges for nested f-strings.
|
||||
/// Stores the ranges of all interpolated strings in a file sorted by [`TextRange::start`].
|
||||
/// There can be multiple overlapping ranges for nested interpolated strings.
|
||||
///
|
||||
/// Note that the ranges for all unterminated f-strings are not stored.
|
||||
/// Note that the ranges for all unterminated interpolated strings are not stored.
|
||||
#[derive(Debug)]
|
||||
pub struct FStringRanges {
|
||||
// Mapping from the f-string start location to its range.
|
||||
pub struct InterpolatedStringRanges {
|
||||
// Mapping from the interpolated string start location to its range.
|
||||
raw: BTreeMap<TextSize, TextRange>,
|
||||
}
|
||||
|
||||
impl FStringRanges {
|
||||
impl InterpolatedStringRanges {
|
||||
/// Returns `true` if the given range intersects with any f-string range.
|
||||
pub fn intersects(&self, target: TextRange) -> bool {
|
||||
self.raw
|
||||
|
@ -61,17 +61,17 @@ impl FStringRanges {
|
|||
.map(|(_, range)| *range)
|
||||
}
|
||||
|
||||
/// Returns an iterator over all f-string [`TextRange`] sorted by their
|
||||
/// Returns an iterator over all interpolated string [`TextRange`] sorted by their
|
||||
/// start location.
|
||||
///
|
||||
/// For nested f-strings, the outermost f-string is yielded first, moving
|
||||
/// For nested interpolated strings, the outermost interpolated string is yielded first, moving
|
||||
/// inwards with each iteration.
|
||||
#[inline]
|
||||
pub fn values(&self) -> impl Iterator<Item = &TextRange> + '_ {
|
||||
self.raw.values()
|
||||
}
|
||||
|
||||
/// Returns the number of f-string ranges stored.
|
||||
/// Returns the number of interpolated string ranges stored.
|
||||
#[inline]
|
||||
pub fn len(&self) -> usize {
|
||||
self.raw.len()
|
||||
|
@ -79,18 +79,21 @@ impl FStringRanges {
|
|||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub(crate) struct FStringRangesBuilder {
|
||||
pub(crate) struct InterpolatedStringRangesBuilder {
|
||||
start_locations: Vec<TextSize>,
|
||||
raw: BTreeMap<TextSize, TextRange>,
|
||||
}
|
||||
|
||||
impl FStringRangesBuilder {
|
||||
impl InterpolatedStringRangesBuilder {
|
||||
pub(crate) fn visit_token(&mut self, token: &Token) {
|
||||
// While the logic of this visitor makes it seem possible to pair, say,
|
||||
// an `FStringStart` with a `TStringEnd`, it is not actually possible to
|
||||
// encounter this in tokenized code free from lexical errors.
|
||||
match token.kind() {
|
||||
TokenKind::FStringStart => {
|
||||
TokenKind::FStringStart | TokenKind::TStringStart => {
|
||||
self.start_locations.push(token.start());
|
||||
}
|
||||
TokenKind::FStringEnd => {
|
||||
TokenKind::FStringEnd | TokenKind::TStringEnd => {
|
||||
if let Some(start) = self.start_locations.pop() {
|
||||
self.raw.insert(start, TextRange::new(start, token.end()));
|
||||
}
|
||||
|
@ -99,7 +102,7 @@ impl FStringRangesBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) fn finish(self) -> FStringRanges {
|
||||
FStringRanges { raw: self.raw }
|
||||
pub(crate) fn finish(self) -> InterpolatedStringRanges {
|
||||
InterpolatedStringRanges { raw: self.raw }
|
||||
}
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
mod fstring_ranges;
|
||||
mod indexer;
|
||||
mod interpolated_string_ranges;
|
||||
mod multiline_ranges;
|
||||
|
||||
pub use indexer::Indexer;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue