Track t-strings and f-strings for token-based rules and suppression comments (#20357)

Our token-based rules and `noqa` extraction used an `Indexer` that kept
track of f-string ranges but not t-strings. We've updated the `Indexer`
and downstream uses thereof to handle both f-strings and t-strings.

Most of the diff is renaming and adding tests.

Note that much of the "new" logic gets to be naive because the lexer has
already ensured that f and t-string "starts" are paired with their
respective "ends", even amidst nesting and so on.

Finally: one could imagine wanting to know if a given interpolated
string range corresponds to an f-string or a t-string, but I didn't find
a place where we actually needed this.

Closes #20310
This commit is contained in:
Dylan 2025-09-12 13:00:12 -05:00 committed by GitHub
parent ec863bcde7
commit b6bd32d9dc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 519 additions and 89 deletions

View file

@ -210,6 +210,58 @@ pub const SINGLE_QUOTE_BYTE_PREFIXES: &[&str] = &[
"b'",
];
/// Includes all permutations of `t` and `rt`. This includes all possible orders, and all possible
/// casings, for both single and triple quotes.
///
/// See: <https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals>
#[rustfmt::skip]
pub const TRIPLE_QUOTE_TEMPLATE_PREFIXES: &[&str] = &[
"TR\"\"\"",
"Tr\"\"\"",
"tR\"\"\"",
"tr\"\"\"",
"RT\"\"\"",
"Rt\"\"\"",
"rT\"\"\"",
"rt\"\"\"",
"TR'''",
"Tr'''",
"tR'''",
"tr'''",
"RT'''",
"Rt'''",
"rT'''",
"rt'''",
"T\"\"\"",
"t\"\"\"",
"T'''",
"t'''",
];
#[rustfmt::skip]
pub const SINGLE_QUOTE_TEMPLATE_PREFIXES: &[&str] = &[
"TR\"",
"Tr\"",
"tR\"",
"tr\"",
"RT\"",
"Rt\"",
"rT\"",
"rt\"",
"TR'",
"Tr'",
"tR'",
"tr'",
"RT'",
"Rt'",
"rT'",
"rt'",
"T\"",
"t\"",
"T'",
"t'",
];
/// Strip the leading and trailing quotes from a string.
/// Assumes that the string is a valid string literal, but does not verify that the string
/// is a "simple" string literal (i.e., that it does not contain any implicit concatenations).
@ -229,7 +281,7 @@ pub fn raw_contents_range(contents: &str) -> Option<TextRange> {
))
}
/// An [`AhoCorasick`] matcher for string and byte literal prefixes.
/// An [`AhoCorasick`] matcher for string, template, and bytes literal prefixes.
static PREFIX_MATCHER: LazyLock<AhoCorasick> = LazyLock::new(|| {
AhoCorasick::builder()
.start_kind(StartKind::Anchored)
@ -239,19 +291,21 @@ static PREFIX_MATCHER: LazyLock<AhoCorasick> = LazyLock::new(|| {
TRIPLE_QUOTE_STR_PREFIXES
.iter()
.chain(TRIPLE_QUOTE_BYTE_PREFIXES)
.chain(TRIPLE_QUOTE_TEMPLATE_PREFIXES)
.chain(SINGLE_QUOTE_STR_PREFIXES)
.chain(SINGLE_QUOTE_BYTE_PREFIXES),
.chain(SINGLE_QUOTE_BYTE_PREFIXES)
.chain(SINGLE_QUOTE_TEMPLATE_PREFIXES),
)
.unwrap()
});
/// Return the leading quote for a string or byte literal (e.g., `"""`).
/// Return the leading quote for a string, template, or bytes literal (e.g., `"""`).
pub fn leading_quote(content: &str) -> Option<&str> {
let mat = PREFIX_MATCHER.find(Input::new(content).anchored(Anchored::Yes))?;
Some(&content[mat.start()..mat.end()])
}
/// Return the trailing quote string for a string or byte literal (e.g., `"""`).
/// Return the trailing quote string for a string, template, or bytes literal (e.g., `"""`).
pub fn trailing_quote(content: &str) -> Option<&str> {
if content.ends_with("'''") {
Some("'''")
@ -268,14 +322,16 @@ pub fn trailing_quote(content: &str) -> Option<&str> {
/// Return `true` if the string is a triple-quote string or byte prefix.
pub fn is_triple_quote(content: &str) -> bool {
TRIPLE_QUOTE_STR_PREFIXES.contains(&content) || TRIPLE_QUOTE_BYTE_PREFIXES.contains(&content)
TRIPLE_QUOTE_STR_PREFIXES.contains(&content)
|| TRIPLE_QUOTE_BYTE_PREFIXES.contains(&content)
|| TRIPLE_QUOTE_TEMPLATE_PREFIXES.contains(&content)
}
#[cfg(test)]
mod tests {
use super::{
SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_BYTE_PREFIXES,
TRIPLE_QUOTE_STR_PREFIXES,
SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, SINGLE_QUOTE_TEMPLATE_PREFIXES,
TRIPLE_QUOTE_BYTE_PREFIXES, TRIPLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_TEMPLATE_PREFIXES,
};
#[test]
@ -283,8 +339,10 @@ mod tests {
let prefixes = TRIPLE_QUOTE_STR_PREFIXES
.iter()
.chain(TRIPLE_QUOTE_BYTE_PREFIXES)
.chain(TRIPLE_QUOTE_TEMPLATE_PREFIXES)
.chain(SINGLE_QUOTE_STR_PREFIXES)
.chain(SINGLE_QUOTE_BYTE_PREFIXES)
.chain(SINGLE_QUOTE_TEMPLATE_PREFIXES)
.collect::<Vec<_>>();
for (i, prefix_i) in prefixes.iter().enumerate() {
for (j, prefix_j) in prefixes.iter().enumerate() {