Handle t-string prefixes in SimpleTokenizer (#20578)

The simple tokenizer is meant to skip strings, but it was recording a `Name` token for t-strings (from the `t`). This PR fixes that.
2025-09-29 13:24:57 +00:00 · 2025-09-25 14:33:37 -05:00 · 2025-09-25 14:33:37 -05:00 · f2b7c82534
commit f2b7c82534
parent cfc64d1707
4 changed files with 54 additions and 0 deletions
--- a/crates/ruff_python_trivia/src/tokenizer.rs
+++ b/crates/ruff_python_trivia/src/tokenizer.rs
@ -599,6 +599,16 @@ impl<'a> SimpleTokenizer<'a> {
                            | "rb"
                            | "rf"
                            | "u"
+                            | "T"
+                            | "TR"
+                            | "Tr"
+                            | "RT"
+                            | "Rt"
+                            | "t"
+                            | "tR"
+                            | "tr"
+                            | "rT"
+                            | "rt"
                    )
                {
                    self.bogus = true;
--- a/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs
+++ b/crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs
@ -169,6 +169,22 @@ fn string_with_byte_kind() {
    // note: not reversible: [other, bogus] vs [bogus, other]
 }

+#[test]
+fn fstring() {
+    let source = "f'foo'";
+
+    let test_case = tokenize(source);
+    assert_debug_snapshot!(test_case.tokens());
+}
+
+#[test]
+fn tstring() {
+    let source = "t'foo'";
+
+    let test_case = tokenize(source);
+    assert_debug_snapshot!(test_case.tokens());
+}
+
 #[test]
 fn string_with_invalid_kind() {
    let source = "abc'foo'";
--- a/crates/ruff_python_trivia_integration_tests/tests/snapshots/simple_tokenizer__fstring.snap
+++ b/crates/ruff_python_trivia_integration_tests/tests/snapshots/simple_tokenizer__fstring.snap
@ -0,0 +1,14 @@
+---
+source: crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs
+expression: test_case.tokens()
+---
+[
+    SimpleToken {
+        kind: Other,
+        range: 0..1,
+    },
+    SimpleToken {
+        kind: Bogus,
+        range: 1..6,
+    },
+]
--- a/crates/ruff_python_trivia_integration_tests/tests/snapshots/simple_tokenizer__tstring.snap
+++ b/crates/ruff_python_trivia_integration_tests/tests/snapshots/simple_tokenizer__tstring.snap
@ -0,0 +1,14 @@
+---
+source: crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs
+expression: test_case.tokens()
+---
+[
+    SimpleToken {
+        kind: Other,
+        range: 0..1,
+    },
+    SimpleToken {
+        kind: Bogus,
+        range: 1..6,
+    },
+]