Track t-strings and f-strings for token-based rules and suppression comments (#20357)

Our token-based rules and `noqa` extraction used an `Indexer` that kept track of f-string ranges but not t-strings. We've updated the `Indexer` and downstream uses thereof to handle both f-strings and t-strings. Most of the diff is renaming and adding tests. Note that much of the "new" logic gets to be naive because the lexer has already ensured that f and t-string "starts" are paired with their respective "ends", even amidst nesting and so on. Finally: one could imagine wanting to know if a given interpolated string range corresponds to an f-string or a t-string, but I didn't find a place where we actually needed this. Closes #20310
2025-09-29 13:24:57 +00:00 · 2025-09-12 13:00:12 -05:00 · 2025-09-12 13:00:12 -05:00 · b6bd32d9dc
commit b6bd32d9dc
parent ec863bcde7
18 changed files with 519 additions and 89 deletions
--- a/crates/ruff_python_index/src/indexer.rs
+++ b/crates/ruff_python_index/src/indexer.rs
@ -9,15 +9,17 @@ use ruff_python_trivia::{
 use ruff_source_file::LineRanges;
 use ruff_text_size::{Ranged, TextRange, TextSize};

-use crate::fstring_ranges::{FStringRanges, FStringRangesBuilder};
+use crate::interpolated_string_ranges::{
+    InterpolatedStringRanges, InterpolatedStringRangesBuilder,
+};
 use crate::multiline_ranges::{MultilineRanges, MultilineRangesBuilder};

 pub struct Indexer {
    /// Stores the start offset of continuation lines.
    continuation_lines: Vec<TextSize>,

-    /// The range of all f-string in the source document.
-    fstring_ranges: FStringRanges,
+    /// The range of all interpolated strings in the source document.
+    interpolated_string_ranges: InterpolatedStringRanges,

    /// The range of all multiline strings in the source document.
    multiline_ranges: MultilineRanges,
@ -30,7 +32,7 @@ impl Indexer {
    pub fn from_tokens(tokens: &Tokens, source: &str) -> Self {
        assert!(TextSize::try_from(source.len()).is_ok());

-        let mut fstring_ranges_builder = FStringRangesBuilder::default();
+        let mut interpolated_string_ranges_builder = InterpolatedStringRangesBuilder::default();
        let mut multiline_ranges_builder = MultilineRangesBuilder::default();
        let mut continuation_lines = Vec::new();
        let mut comment_ranges = Vec::new();
@ -59,7 +61,7 @@ impl Indexer {
                }
            }

-            fstring_ranges_builder.visit_token(token);
+            interpolated_string_ranges_builder.visit_token(token);
            multiline_ranges_builder.visit_token(token);

            match token.kind() {
@ -82,7 +84,7 @@ impl Indexer {

        Self {
            continuation_lines,
-            fstring_ranges: fstring_ranges_builder.finish(),
+            interpolated_string_ranges: interpolated_string_ranges_builder.finish(),
            multiline_ranges: multiline_ranges_builder.finish(),
            comment_ranges: CommentRanges::new(comment_ranges),
        }
@ -93,9 +95,9 @@ impl Indexer {
        &self.comment_ranges
    }

-    /// Returns the byte offset ranges of f-strings.
-    pub const fn fstring_ranges(&self) -> &FStringRanges {
-        &self.fstring_ranges
+    /// Returns the byte offset ranges of interpolated strings.
+    pub const fn interpolated_string_ranges(&self) -> &InterpolatedStringRanges {
+        &self.interpolated_string_ranges
    }

    /// Returns the byte offset ranges of multiline strings.
@ -356,7 +358,7 @@ f"implicit " f"concatenation"
        .trim();
        assert_eq!(
            new_indexer(contents)
-                .fstring_ranges()
+                .interpolated_string_ranges()
                .values()
                .copied()
                .collect::<Vec<_>>(),
@ -390,7 +392,7 @@ f-string"""}
        .trim();
        assert_eq!(
            new_indexer(contents)
-                .fstring_ranges()
+                .interpolated_string_ranges()
                .values()
                .copied()
                .collect::<Vec<_>>(),
@ -504,11 +506,17 @@ the end"""
            ),
        ] {
            assert_eq!(
-                indexer.fstring_ranges().innermost(offset).unwrap(),
+                indexer
+                    .interpolated_string_ranges()
+                    .innermost(offset)
+                    .unwrap(),
                innermost_range
            );
            assert_eq!(
-                indexer.fstring_ranges().outermost(offset).unwrap(),
+                indexer
+                    .interpolated_string_ranges()
+                    .outermost(offset)
+                    .unwrap(),
                outermost_range
            );
        }
--- a/crates/ruff_python_index/src/interpolated_string_ranges.rs
+++ b/crates/ruff_python_index/src/interpolated_string_ranges.rs
@ -3,17 +3,17 @@ use std::collections::BTreeMap;
 use ruff_python_parser::{Token, TokenKind};
 use ruff_text_size::{Ranged, TextRange, TextSize};

-/// Stores the ranges of all f-strings in a file sorted by [`TextRange::start`].
-/// There can be multiple overlapping ranges for nested f-strings.
+/// Stores the ranges of all interpolated strings in a file sorted by [`TextRange::start`].
+/// There can be multiple overlapping ranges for nested interpolated strings.
 ///
-/// Note that the ranges for all unterminated f-strings are not stored.
+/// Note that the ranges for all unterminated interpolated strings are not stored.
 #[derive(Debug)]
-pub struct FStringRanges {
-    // Mapping from the f-string start location to its range.
+pub struct InterpolatedStringRanges {
+    // Mapping from the interpolated string start location to its range.
    raw: BTreeMap<TextSize, TextRange>,
 }

-impl FStringRanges {
+impl InterpolatedStringRanges {
    /// Returns `true` if the given range intersects with any f-string range.
    pub fn intersects(&self, target: TextRange) -> bool {
        self.raw
@ -61,17 +61,17 @@ impl FStringRanges {
            .map(|(_, range)| *range)
    }

-    /// Returns an iterator over all f-string [`TextRange`] sorted by their
+    /// Returns an iterator over all interpolated string [`TextRange`] sorted by their
    /// start location.
    ///
-    /// For nested f-strings, the outermost f-string is yielded first, moving
+    /// For nested interpolated strings, the outermost interpolated string is yielded first, moving
    /// inwards with each iteration.
    #[inline]
    pub fn values(&self) -> impl Iterator<Item = &TextRange> + '_ {
        self.raw.values()
    }

-    /// Returns the number of f-string ranges stored.
+    /// Returns the number of interpolated string ranges stored.
    #[inline]
    pub fn len(&self) -> usize {
        self.raw.len()
@ -79,18 +79,21 @@ impl FStringRanges {
 }

 #[derive(Default)]
-pub(crate) struct FStringRangesBuilder {
+pub(crate) struct InterpolatedStringRangesBuilder {
    start_locations: Vec<TextSize>,
    raw: BTreeMap<TextSize, TextRange>,
 }

-impl FStringRangesBuilder {
+impl InterpolatedStringRangesBuilder {
    pub(crate) fn visit_token(&mut self, token: &Token) {
+        // While the logic of this visitor makes it seem possible to pair, say,
+        // an `FStringStart` with a `TStringEnd`, it is not actually possible to
+        // encounter this in tokenized code free from lexical errors.
        match token.kind() {
-            TokenKind::FStringStart => {
+            TokenKind::FStringStart | TokenKind::TStringStart => {
                self.start_locations.push(token.start());
            }
-            TokenKind::FStringEnd => {
+            TokenKind::FStringEnd | TokenKind::TStringEnd => {
                if let Some(start) = self.start_locations.pop() {
                    self.raw.insert(start, TextRange::new(start, token.end()));
                }
@ -99,7 +102,7 @@ impl FStringRangesBuilder {
        }
    }

-    pub(crate) fn finish(self) -> FStringRanges {
-        FStringRanges { raw: self.raw }
+    pub(crate) fn finish(self) -> InterpolatedStringRanges {
+        InterpolatedStringRanges { raw: self.raw }
    }
 }
--- a/crates/ruff_python_index/src/lib.rs
+++ b/crates/ruff_python_index/src/lib.rs
@ -1,5 +1,5 @@
-mod fstring_ranges;
 mod indexer;
+mod interpolated_string_ranges;
 mod multiline_ranges;

 pub use indexer::Indexer;