Track t-strings and f-strings for token-based rules and suppression comments (#20357)

Our token-based rules and `noqa` extraction used an `Indexer` that kept track of f-string ranges but not t-strings. We've updated the `Indexer` and downstream uses thereof to handle both f-strings and t-strings. Most of the diff is renaming and adding tests. Note that much of the "new" logic gets to be naive because the lexer has already ensured that f and t-string "starts" are paired with their respective "ends", even amidst nesting and so on. Finally: one could imagine wanting to know if a given interpolated string range corresponds to an f-string or a t-string, but I didn't find a place where we actually needed this. Closes #20310
2025-10-01 22:31:23 +00:00 · 2025-09-12 13:00:12 -05:00 · 2025-09-12 13:00:12 -05:00 · b6bd32d9dc
commit b6bd32d9dc
parent ec863bcde7
18 changed files with 519 additions and 89 deletions
--- a/crates/ruff_python_ast/src/str.rs
+++ b/crates/ruff_python_ast/src/str.rs
@ -210,6 +210,58 @@ pub const SINGLE_QUOTE_BYTE_PREFIXES: &[&str] = &[
    "b'",
 ];

+/// Includes all permutations of `t` and `rt`. This includes all possible orders, and all possible
+/// casings, for both single and triple quotes.
+///
+/// See: <https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals>
+#[rustfmt::skip]
+pub const TRIPLE_QUOTE_TEMPLATE_PREFIXES: &[&str] = &[
+    "TR\"\"\"",
+    "Tr\"\"\"",
+    "tR\"\"\"",
+    "tr\"\"\"",
+    "RT\"\"\"",
+    "Rt\"\"\"",
+    "rT\"\"\"",
+    "rt\"\"\"",
+    "TR'''",
+    "Tr'''",
+    "tR'''",
+    "tr'''",
+    "RT'''",
+    "Rt'''",
+    "rT'''",
+    "rt'''",
+    "T\"\"\"",
+    "t\"\"\"",
+    "T'''",
+    "t'''",
+];
+
+#[rustfmt::skip]
+pub const SINGLE_QUOTE_TEMPLATE_PREFIXES: &[&str] = &[
+    "TR\"",
+    "Tr\"",
+    "tR\"",
+    "tr\"",
+    "RT\"",
+    "Rt\"",
+    "rT\"",
+    "rt\"",
+    "TR'",
+    "Tr'",
+    "tR'",
+    "tr'",
+    "RT'",
+    "Rt'",
+    "rT'",
+    "rt'",
+    "T\"",
+    "t\"",
+    "T'",
+    "t'",
+];
+
 /// Strip the leading and trailing quotes from a string.
 /// Assumes that the string is a valid string literal, but does not verify that the string
 /// is a "simple" string literal (i.e., that it does not contain any implicit concatenations).
@ -229,7 +281,7 @@ pub fn raw_contents_range(contents: &str) -> Option<TextRange> {
    ))
 }

-/// An [`AhoCorasick`] matcher for string and byte literal prefixes.
+/// An [`AhoCorasick`] matcher for string, template, and bytes literal prefixes.
 static PREFIX_MATCHER: LazyLock<AhoCorasick> = LazyLock::new(|| {
    AhoCorasick::builder()
        .start_kind(StartKind::Anchored)
@ -239,19 +291,21 @@ static PREFIX_MATCHER: LazyLock<AhoCorasick> = LazyLock::new(|| {
            TRIPLE_QUOTE_STR_PREFIXES
                .iter()
                .chain(TRIPLE_QUOTE_BYTE_PREFIXES)
+                .chain(TRIPLE_QUOTE_TEMPLATE_PREFIXES)
                .chain(SINGLE_QUOTE_STR_PREFIXES)
-                .chain(SINGLE_QUOTE_BYTE_PREFIXES),
+                .chain(SINGLE_QUOTE_BYTE_PREFIXES)
+                .chain(SINGLE_QUOTE_TEMPLATE_PREFIXES),
        )
        .unwrap()
 });

-/// Return the leading quote for a string or byte literal (e.g., `"""`).
+/// Return the leading quote for a string, template, or bytes literal (e.g., `"""`).
 pub fn leading_quote(content: &str) -> Option<&str> {
    let mat = PREFIX_MATCHER.find(Input::new(content).anchored(Anchored::Yes))?;
    Some(&content[mat.start()..mat.end()])
 }

-/// Return the trailing quote string for a string or byte literal (e.g., `"""`).
+/// Return the trailing quote string for a string, template, or bytes literal (e.g., `"""`).
 pub fn trailing_quote(content: &str) -> Option<&str> {
    if content.ends_with("'''") {
        Some("'''")
@ -268,14 +322,16 @@ pub fn trailing_quote(content: &str) -> Option<&str> {

 /// Return `true` if the string is a triple-quote string or byte prefix.
 pub fn is_triple_quote(content: &str) -> bool {
-    TRIPLE_QUOTE_STR_PREFIXES.contains(&content) || TRIPLE_QUOTE_BYTE_PREFIXES.contains(&content)
+    TRIPLE_QUOTE_STR_PREFIXES.contains(&content)
+        || TRIPLE_QUOTE_BYTE_PREFIXES.contains(&content)
+        || TRIPLE_QUOTE_TEMPLATE_PREFIXES.contains(&content)
 }

 #[cfg(test)]
 mod tests {
    use super::{
-        SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_BYTE_PREFIXES,
-        TRIPLE_QUOTE_STR_PREFIXES,
+        SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, SINGLE_QUOTE_TEMPLATE_PREFIXES,
+        TRIPLE_QUOTE_BYTE_PREFIXES, TRIPLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_TEMPLATE_PREFIXES,
    };

    #[test]
@ -283,8 +339,10 @@ mod tests {
        let prefixes = TRIPLE_QUOTE_STR_PREFIXES
            .iter()
            .chain(TRIPLE_QUOTE_BYTE_PREFIXES)
+            .chain(TRIPLE_QUOTE_TEMPLATE_PREFIXES)
            .chain(SINGLE_QUOTE_STR_PREFIXES)
            .chain(SINGLE_QUOTE_BYTE_PREFIXES)
+            .chain(SINGLE_QUOTE_TEMPLATE_PREFIXES)
            .collect::<Vec<_>>();
        for (i, prefix_i) in prefixes.iter().enumerate() {
            for (j, prefix_j) in prefixes.iter().enumerate() {