Handle t-string prefixes in SimpleTokenizer (#20578)

The simple tokenizer is meant to skip strings, but it was recording a
`Name` token for t-strings (from the `t`). This PR fixes that.
This commit is contained in:
Dylan 2025-09-25 14:33:37 -05:00 committed by GitHub
parent cfc64d1707
commit f2b7c82534
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 54 additions and 0 deletions

View file

@ -599,6 +599,16 @@ impl<'a> SimpleTokenizer<'a> {
| "rb"
| "rf"
| "u"
| "T"
| "TR"
| "Tr"
| "RT"
| "Rt"
| "t"
| "tR"
| "tr"
| "rT"
| "rt"
)
{
self.bogus = true;

View file

@ -169,6 +169,22 @@ fn string_with_byte_kind() {
// note: not reversible: [other, bogus] vs [bogus, other]
}
#[test]
fn fstring() {
let source = "f'foo'";
let test_case = tokenize(source);
assert_debug_snapshot!(test_case.tokens());
}
#[test]
fn tstring() {
let source = "t'foo'";
let test_case = tokenize(source);
assert_debug_snapshot!(test_case.tokens());
}
#[test]
fn string_with_invalid_kind() {
let source = "abc'foo'";

View file

@ -0,0 +1,14 @@
---
source: crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs
expression: test_case.tokens()
---
[
SimpleToken {
kind: Other,
range: 0..1,
},
SimpleToken {
kind: Bogus,
range: 1..6,
},
]

View file

@ -0,0 +1,14 @@
---
source: crates/ruff_python_trivia_integration_tests/tests/simple_tokenizer.rs
expression: test_case.tokens()
---
[
SimpleToken {
kind: Other,
range: 0..1,
},
SimpleToken {
kind: Bogus,
range: 1..6,
},
]