Track t-strings and f-strings for token-based rules and suppression comments (#20357)

Our token-based rules and `noqa` extraction used an `Indexer` that kept
track of f-string ranges but not t-strings. We've updated the `Indexer`
and downstream uses thereof to handle both f-strings and t-strings.

Most of the diff is renaming and adding tests.

Note that much of the "new" logic gets to be naive because the lexer has
already ensured that f and t-string "starts" are paired with their
respective "ends", even amidst nesting and so on.

Finally: one could imagine wanting to know if a given interpolated
string range corresponds to an f-string or a t-string, but I didn't find
a place where we actually needed this.

Closes #20310
This commit is contained in:
Dylan 2025-09-12 13:00:12 -05:00 committed by GitHub
parent ec863bcde7
commit b6bd32d9dc
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 519 additions and 89 deletions

View file

@ -664,3 +664,14 @@ class C[
type X[T,] = T type X[T,] = T
def f[T,](): pass def f[T,](): pass
class C[T,]: pass class C[T,]: pass
# t-string examples
kwargs.pop("remove", t"this {trailing_comma}",)
kwargs.pop("remove", t"this {f"{trailing_comma}"}",)
t"""This is a test. {
"Another sentence."
if True else
"Don't add a trailing comma here ->"
}"""

View file

@ -187,3 +187,24 @@ _ = (
# leading comment # leading comment
"end" "end"
) )
# https://github.com/astral-sh/ruff/issues/20310
# ISC001
t"The quick " t"brown fox."
# ISC002
t"The quick brown fox jumps over the lazy "\
t"dog."
# ISC003
(
t"The quick brown fox jumps over the lazy "
+ t"dog"
)
# nested examples with both t and f-strings
_ = "a" f"b {t"c" t"d"} e" "f"
_ = t"b {f"c" f"d {t"e" t"f"} g"} h"
_ = f"b {t"abc" \
t"def"} g"

View file

@ -125,26 +125,27 @@ fn extract_noqa_line_for(tokens: &Tokens, locator: &Locator, indexer: &Indexer)
} }
// The capacity allocated here might be more than we need if there are // The capacity allocated here might be more than we need if there are
// nested f-strings. // nested interpolated strings.
let mut fstring_mappings = Vec::with_capacity(indexer.fstring_ranges().len()); let mut interpolated_string_mappings =
Vec::with_capacity(indexer.interpolated_string_ranges().len());
// For nested f-strings, we expect `noqa` directives on the last line of the // For nested interpolated strings, we expect `noqa` directives on the last line of the
// outermost f-string. The last f-string range will be used to skip over // outermost interpolated string. The last interpolated string range will be used to skip over
// the inner f-strings. // the inner interpolated strings.
let mut last_fstring_range: TextRange = TextRange::default(); let mut last_interpolated_string_range: TextRange = TextRange::default();
for fstring_range in indexer.fstring_ranges().values() { for interpolated_string_range in indexer.interpolated_string_ranges().values() {
if !locator.contains_line_break(*fstring_range) { if !locator.contains_line_break(*interpolated_string_range) {
continue; continue;
} }
if last_fstring_range.contains_range(*fstring_range) { if last_interpolated_string_range.contains_range(*interpolated_string_range) {
continue; continue;
} }
let new_range = TextRange::new( let new_range = TextRange::new(
locator.line_start(fstring_range.start()), locator.line_start(interpolated_string_range.start()),
fstring_range.end(), interpolated_string_range.end(),
); );
fstring_mappings.push(new_range); interpolated_string_mappings.push(new_range);
last_fstring_range = new_range; last_interpolated_string_range = new_range;
} }
let mut continuation_mappings = Vec::new(); let mut continuation_mappings = Vec::new();
@ -172,11 +173,11 @@ fn extract_noqa_line_for(tokens: &Tokens, locator: &Locator, indexer: &Indexer)
// Merge the mappings in sorted order // Merge the mappings in sorted order
let mut mappings = NoqaMapping::with_capacity( let mut mappings = NoqaMapping::with_capacity(
continuation_mappings.len() + string_mappings.len() + fstring_mappings.len(), continuation_mappings.len() + string_mappings.len() + interpolated_string_mappings.len(),
); );
let string_mappings = SortedMergeIter { let string_mappings = SortedMergeIter {
left: fstring_mappings.into_iter().peekable(), left: interpolated_string_mappings.into_iter().peekable(),
right: string_mappings.into_iter().peekable(), right: string_mappings.into_iter().peekable(),
}; };
let all_mappings = SortedMergeIter { let all_mappings = SortedMergeIter {
@ -497,12 +498,35 @@ end'''
NoqaMapping::from_iter([TextRange::new(TextSize::from(6), TextSize::from(70))]) NoqaMapping::from_iter([TextRange::new(TextSize::from(6), TextSize::from(70))])
); );
let contents = "x = 1
y = t'''abc
def {f'''nested
interpolated string''' f'another nested'}
end'''
";
assert_eq!(
noqa_mappings(contents),
NoqaMapping::from_iter([TextRange::new(TextSize::from(6), TextSize::from(82))])
);
let contents = "x = 1 let contents = "x = 1
y = f'normal' y = f'normal'
z = f'another but {f'nested but {f'still single line'} nested'}' z = f'another but {f'nested but {f'still single line'} nested'}'
"; ";
assert_eq!(noqa_mappings(contents), NoqaMapping::default()); assert_eq!(noqa_mappings(contents), NoqaMapping::default());
let contents = "x = 1
y = t'normal'
z = t'another but {t'nested but {t'still single line'} nested'}'
";
assert_eq!(noqa_mappings(contents), NoqaMapping::default());
let contents = "x = 1
y = f'normal'
z = f'another but {t'nested but {f'still single line'} nested'}'
";
assert_eq!(noqa_mappings(contents), NoqaMapping::default());
let contents = r"x = \ let contents = r"x = \
1"; 1";
assert_eq!( assert_eq!(

View file

@ -370,8 +370,8 @@ pub(crate) fn adjust_indentation(
// If the range includes a multi-line string, use LibCST to ensure that we don't adjust the // If the range includes a multi-line string, use LibCST to ensure that we don't adjust the
// whitespace _within_ the string. // whitespace _within_ the string.
let contains_multiline_string = let contains_multiline_string = indexer.multiline_ranges().intersects(range)
indexer.multiline_ranges().intersects(range) || indexer.fstring_ranges().intersects(range); || indexer.interpolated_string_ranges().intersects(range);
// If the range has mixed indentation, we will use LibCST as well. // If the range has mixed indentation, we will use LibCST as well.
let mixed_indentation = contents.universal_newlines().any(|line| { let mixed_indentation = contents.universal_newlines().any(|line| {

View file

@ -250,23 +250,23 @@ pub(crate) fn trailing_commas(
locator: &Locator, locator: &Locator,
indexer: &Indexer, indexer: &Indexer,
) { ) {
let mut fstrings = 0u32; let mut interpolated_strings = 0u32;
let simple_tokens = tokens.iter().filter_map(|token| { let simple_tokens = tokens.iter().filter_map(|token| {
match token.kind() { match token.kind() {
// Completely ignore comments -- they just interfere with the logic. // Completely ignore comments -- they just interfere with the logic.
TokenKind::Comment => None, TokenKind::Comment => None,
// F-strings are handled as `String` token type with the complete range // F-strings and t-strings are handled as `String` token type with the complete range
// of the outermost f-string. This means that the expression inside the // of the outermost interpolated string. This means that the expression inside the
// f-string is not checked for trailing commas. // interpolated string is not checked for trailing commas.
TokenKind::FStringStart => { TokenKind::FStringStart | TokenKind::TStringStart => {
fstrings = fstrings.saturating_add(1); interpolated_strings = interpolated_strings.saturating_add(1);
None None
} }
TokenKind::FStringEnd => { TokenKind::FStringEnd | TokenKind::TStringEnd => {
fstrings = fstrings.saturating_sub(1); interpolated_strings = interpolated_strings.saturating_sub(1);
if fstrings == 0 { if interpolated_strings == 0 {
indexer indexer
.fstring_ranges() .interpolated_string_ranges()
.outermost(token.start()) .outermost(token.start())
.map(|range| SimpleToken::new(TokenType::String, range)) .map(|range| SimpleToken::new(TokenType::String, range))
} else { } else {
@ -274,7 +274,7 @@ pub(crate) fn trailing_commas(
} }
} }
_ => { _ => {
if fstrings == 0 { if interpolated_strings == 0 {
Some(SimpleToken::from(token.as_tuple())) Some(SimpleToken::from(token.as_tuple()))
} else { } else {
None None

View file

@ -1016,6 +1016,7 @@ help: Remove trailing comma
664 + type X[T] = T 664 + type X[T] = T
665 | def f[T,](): pass 665 | def f[T,](): pass
666 | class C[T,]: pass 666 | class C[T,]: pass
667 |
COM819 [*] Trailing comma prohibited COM819 [*] Trailing comma prohibited
--> COM81.py:665:8 --> COM81.py:665:8
@ -1032,6 +1033,8 @@ help: Remove trailing comma
- def f[T,](): pass - def f[T,](): pass
665 + def f[T](): pass 665 + def f[T](): pass
666 | class C[T,]: pass 666 | class C[T,]: pass
667 |
668 | # t-string examples
COM819 [*] Trailing comma prohibited COM819 [*] Trailing comma prohibited
--> COM81.py:666:10 --> COM81.py:666:10
@ -1040,6 +1043,8 @@ COM819 [*] Trailing comma prohibited
665 | def f[T,](): pass 665 | def f[T,](): pass
666 | class C[T,]: pass 666 | class C[T,]: pass
| ^ | ^
667 |
668 | # t-string examples
| |
help: Remove trailing comma help: Remove trailing comma
663 | 663 |
@ -1047,3 +1052,44 @@ help: Remove trailing comma
665 | def f[T,](): pass 665 | def f[T,](): pass
- class C[T,]: pass - class C[T,]: pass
666 + class C[T]: pass 666 + class C[T]: pass
667 |
668 | # t-string examples
669 | kwargs.pop("remove", t"this {trailing_comma}",)
COM819 [*] Trailing comma prohibited
--> COM81.py:669:46
|
668 | # t-string examples
669 | kwargs.pop("remove", t"this {trailing_comma}",)
| ^
670 | kwargs.pop("remove", t"this {f"{trailing_comma}"}",)
|
help: Remove trailing comma
666 | class C[T,]: pass
667 |
668 | # t-string examples
- kwargs.pop("remove", t"this {trailing_comma}",)
669 + kwargs.pop("remove", t"this {trailing_comma}")
670 | kwargs.pop("remove", t"this {f"{trailing_comma}"}",)
671 |
672 | t"""This is a test. {
COM819 [*] Trailing comma prohibited
--> COM81.py:670:51
|
668 | # t-string examples
669 | kwargs.pop("remove", t"this {trailing_comma}",)
670 | kwargs.pop("remove", t"this {f"{trailing_comma}"}",)
| ^
671 |
672 | t"""This is a test. {
|
help: Remove trailing comma
667 |
668 | # t-string examples
669 | kwargs.pop("remove", t"this {trailing_comma}",)
- kwargs.pop("remove", t"this {f"{trailing_comma}"}",)
670 + kwargs.pop("remove", t"this {f"{trailing_comma}"}")
671 |
672 | t"""This is a test. {
673 | "Another sentence."

View file

@ -74,6 +74,7 @@ pub(crate) fn explicit(checker: &Checker, expr: &Expr) {
Expr::StringLiteral(_) | Expr::FString(_), Expr::StringLiteral(_) | Expr::FString(_),
Expr::StringLiteral(_) | Expr::FString(_) Expr::StringLiteral(_) | Expr::FString(_)
) | (Expr::BytesLiteral(_), Expr::BytesLiteral(_)) ) | (Expr::BytesLiteral(_), Expr::BytesLiteral(_))
| (Expr::TString(_), Expr::TString(_))
); );
if concatable if concatable
&& checker && checker

View file

@ -123,21 +123,32 @@ pub(crate) fn implicit(
let (a_range, b_range) = match (a_token.kind(), b_token.kind()) { let (a_range, b_range) = match (a_token.kind(), b_token.kind()) {
(TokenKind::String, TokenKind::String) => (a_token.range(), b_token.range()), (TokenKind::String, TokenKind::String) => (a_token.range(), b_token.range()),
(TokenKind::String, TokenKind::FStringStart) => { (TokenKind::String, TokenKind::FStringStart) => {
match indexer.fstring_ranges().innermost(b_token.start()) { match indexer
.interpolated_string_ranges()
.innermost(b_token.start())
{
Some(b_range) => (a_token.range(), b_range), Some(b_range) => (a_token.range(), b_range),
None => continue, None => continue,
} }
} }
(TokenKind::FStringEnd, TokenKind::String) => { (TokenKind::FStringEnd, TokenKind::String) => {
match indexer.fstring_ranges().innermost(a_token.start()) { match indexer
.interpolated_string_ranges()
.innermost(a_token.start())
{
Some(a_range) => (a_range, b_token.range()), Some(a_range) => (a_range, b_token.range()),
None => continue, None => continue,
} }
} }
(TokenKind::FStringEnd, TokenKind::FStringStart) => { (TokenKind::FStringEnd, TokenKind::FStringStart)
| (TokenKind::TStringEnd, TokenKind::TStringStart) => {
match ( match (
indexer.fstring_ranges().innermost(a_token.start()), indexer
indexer.fstring_ranges().innermost(b_token.start()), .interpolated_string_ranges()
.innermost(a_token.start()),
indexer
.interpolated_string_ranges()
.innermost(b_token.start()),
) { ) {
(Some(a_range), Some(b_range)) => (a_range, b_range), (Some(a_range), Some(b_range)) => (a_range, b_range),
_ => continue, _ => continue,

View file

@ -484,3 +484,104 @@ help: Combine string literals
94 | 94 |
95 | 95 |
96 | # Mixed literal + non-literal scenarios 96 | # Mixed literal + non-literal scenarios
ISC001 [*] Implicitly concatenated string literals on one line
--> ISC.py:193:1
|
191 | # https://github.com/astral-sh/ruff/issues/20310
192 | # ISC001
193 | t"The quick " t"brown fox."
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^
194 |
195 | # ISC002
|
help: Combine string literals
190 |
191 | # https://github.com/astral-sh/ruff/issues/20310
192 | # ISC001
- t"The quick " t"brown fox."
193 + t"The quick brown fox."
194 |
195 | # ISC002
196 | t"The quick brown fox jumps over the lazy "\
ISC001 Implicitly concatenated string literals on one line
--> ISC.py:206:5
|
205 | # nested examples with both t and f-strings
206 | _ = "a" f"b {t"c" t"d"} e" "f"
| ^^^^^^^^^^^^^^^^^^^^^^
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
208 | _ = f"b {t"abc" \
|
help: Combine string literals
ISC001 Implicitly concatenated string literals on one line
--> ISC.py:206:9
|
205 | # nested examples with both t and f-strings
206 | _ = "a" f"b {t"c" t"d"} e" "f"
| ^^^^^^^^^^^^^^^^^^^^^^
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
208 | _ = f"b {t"abc" \
|
help: Combine string literals
ISC001 [*] Implicitly concatenated string literals on one line
--> ISC.py:206:14
|
205 | # nested examples with both t and f-strings
206 | _ = "a" f"b {t"c" t"d"} e" "f"
| ^^^^^^^^^
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
208 | _ = f"b {t"abc" \
|
help: Combine string literals
203 | )
204 |
205 | # nested examples with both t and f-strings
- _ = "a" f"b {t"c" t"d"} e" "f"
206 + _ = "a" f"b {t"cd"} e" "f"
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
208 | _ = f"b {t"abc" \
209 | t"def"} g"
ISC001 [*] Implicitly concatenated string literals on one line
--> ISC.py:207:10
|
205 | # nested examples with both t and f-strings
206 | _ = "a" f"b {t"c" t"d"} e" "f"
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
| ^^^^^^^^^^^^^^^^^^^^^^^
208 | _ = f"b {t"abc" \
209 | t"def"} g"
|
help: Combine string literals
204 |
205 | # nested examples with both t and f-strings
206 | _ = "a" f"b {t"c" t"d"} e" "f"
- _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
207 + _ = t"b {f"cd {t"e" t"f"} g"} h"
208 | _ = f"b {t"abc" \
209 | t"def"} g"
210 |
ISC001 [*] Implicitly concatenated string literals on one line
--> ISC.py:207:20
|
205 | # nested examples with both t and f-strings
206 | _ = "a" f"b {t"c" t"d"} e" "f"
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
| ^^^^^^^^^
208 | _ = f"b {t"abc" \
209 | t"def"} g"
|
help: Combine string literals
204 |
205 | # nested examples with both t and f-strings
206 | _ = "a" f"b {t"c" t"d"} e" "f"
- _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
207 + _ = t"b {f"c" f"d {t"ef"} g"} h"
208 | _ = f"b {t"abc" \
209 | t"def"} g"
210 |

View file

@ -26,3 +26,25 @@ ISC002 Implicitly concatenated string literals over multiple lines
76 | 76 |
77 | # Explicitly concatenated nested f-strings 77 | # Explicitly concatenated nested f-strings
| |
ISC002 Implicitly concatenated string literals over multiple lines
--> ISC.py:196:1
|
195 | # ISC002
196 | / t"The quick brown fox jumps over the lazy "\
197 | | t"dog."
| |_______^
198 |
199 | # ISC003
|
ISC002 Implicitly concatenated string literals over multiple lines
--> ISC.py:208:10
|
206 | _ = "a" f"b {t"c" t"d"} e" "f"
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
208 | _ = f"b {t"abc" \
| __________^
209 | | t"def"} g"
| |__________^
|

View file

@ -337,3 +337,23 @@ help: Remove redundant '+' operator to implicitly concatenate
187 | # leading comment 187 | # leading comment
188 | "end" 188 | "end"
189 | ) 189 | )
ISC003 [*] Explicitly concatenated string should be implicitly concatenated
--> ISC.py:201:5
|
199 | # ISC003
200 | (
201 | / t"The quick brown fox jumps over the lazy "
202 | | + t"dog"
| |____________^
203 | )
|
help: Remove redundant '+' operator to implicitly concatenate
199 | # ISC003
200 | (
201 | t"The quick brown fox jumps over the lazy "
- + t"dog"
202 + t"dog"
203 | )
204 |
205 | # nested examples with both t and f-strings

View file

@ -484,3 +484,104 @@ help: Combine string literals
94 | 94 |
95 | 95 |
96 | # Mixed literal + non-literal scenarios 96 | # Mixed literal + non-literal scenarios
ISC001 [*] Implicitly concatenated string literals on one line
--> ISC.py:193:1
|
191 | # https://github.com/astral-sh/ruff/issues/20310
192 | # ISC001
193 | t"The quick " t"brown fox."
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^
194 |
195 | # ISC002
|
help: Combine string literals
190 |
191 | # https://github.com/astral-sh/ruff/issues/20310
192 | # ISC001
- t"The quick " t"brown fox."
193 + t"The quick brown fox."
194 |
195 | # ISC002
196 | t"The quick brown fox jumps over the lazy "\
ISC001 Implicitly concatenated string literals on one line
--> ISC.py:206:5
|
205 | # nested examples with both t and f-strings
206 | _ = "a" f"b {t"c" t"d"} e" "f"
| ^^^^^^^^^^^^^^^^^^^^^^
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
208 | _ = f"b {t"abc" \
|
help: Combine string literals
ISC001 Implicitly concatenated string literals on one line
--> ISC.py:206:9
|
205 | # nested examples with both t and f-strings
206 | _ = "a" f"b {t"c" t"d"} e" "f"
| ^^^^^^^^^^^^^^^^^^^^^^
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
208 | _ = f"b {t"abc" \
|
help: Combine string literals
ISC001 [*] Implicitly concatenated string literals on one line
--> ISC.py:206:14
|
205 | # nested examples with both t and f-strings
206 | _ = "a" f"b {t"c" t"d"} e" "f"
| ^^^^^^^^^
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
208 | _ = f"b {t"abc" \
|
help: Combine string literals
203 | )
204 |
205 | # nested examples with both t and f-strings
- _ = "a" f"b {t"c" t"d"} e" "f"
206 + _ = "a" f"b {t"cd"} e" "f"
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
208 | _ = f"b {t"abc" \
209 | t"def"} g"
ISC001 [*] Implicitly concatenated string literals on one line
--> ISC.py:207:10
|
205 | # nested examples with both t and f-strings
206 | _ = "a" f"b {t"c" t"d"} e" "f"
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
| ^^^^^^^^^^^^^^^^^^^^^^^
208 | _ = f"b {t"abc" \
209 | t"def"} g"
|
help: Combine string literals
204 |
205 | # nested examples with both t and f-strings
206 | _ = "a" f"b {t"c" t"d"} e" "f"
- _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
207 + _ = t"b {f"cd {t"e" t"f"} g"} h"
208 | _ = f"b {t"abc" \
209 | t"def"} g"
210 |
ISC001 [*] Implicitly concatenated string literals on one line
--> ISC.py:207:20
|
205 | # nested examples with both t and f-strings
206 | _ = "a" f"b {t"c" t"d"} e" "f"
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
| ^^^^^^^^^
208 | _ = f"b {t"abc" \
209 | t"def"} g"
|
help: Combine string literals
204 |
205 | # nested examples with both t and f-strings
206 | _ = "a" f"b {t"c" t"d"} e" "f"
- _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
207 + _ = t"b {f"c" f"d {t"ef"} g"} h"
208 | _ = f"b {t"abc" \
209 | t"def"} g"
210 |

View file

@ -67,3 +67,25 @@ ISC002 Implicitly concatenated string literals over multiple lines
76 | 76 |
77 | # Explicitly concatenated nested f-strings 77 | # Explicitly concatenated nested f-strings
| |
ISC002 Implicitly concatenated string literals over multiple lines
--> ISC.py:196:1
|
195 | # ISC002
196 | / t"The quick brown fox jumps over the lazy "\
197 | | t"dog."
| |_______^
198 |
199 | # ISC003
|
ISC002 Implicitly concatenated string literals over multiple lines
--> ISC.py:208:10
|
206 | _ = "a" f"b {t"c" t"d"} e" "f"
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
208 | _ = f"b {t"abc" \
| __________^
209 | | t"def"} g"
| |__________^
|

View file

@ -402,25 +402,6 @@ help: Add backslash to escape sequence
96 | 96 |
97 | #: Okay 97 | #: Okay
W605 [*] Invalid escape sequence: `\w`
--> W605_1.py:109:1
|
107 | regex = t'\w' # noqa
108 | regex = t'''
109 | \w
| ^^
110 | ''' # noqa
|
help: Use a raw string literal
105 | '''
106 | s = t'\\'
107 | regex = t'\w' # noqa
- regex = t'''
108 + regex = rt'''
109 | \w
110 | ''' # noqa
111 |
W605 [*] Invalid escape sequence: `\_` W605 [*] Invalid escape sequence: `\_`
--> W605_1.py:112:13 --> W605_1.py:112:13
| |

View file

@ -210,6 +210,58 @@ pub const SINGLE_QUOTE_BYTE_PREFIXES: &[&str] = &[
"b'", "b'",
]; ];
/// Includes all permutations of `t` and `rt`. This includes all possible orders, and all possible
/// casings, for both single and triple quotes.
///
/// See: <https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals>
#[rustfmt::skip]
pub const TRIPLE_QUOTE_TEMPLATE_PREFIXES: &[&str] = &[
"TR\"\"\"",
"Tr\"\"\"",
"tR\"\"\"",
"tr\"\"\"",
"RT\"\"\"",
"Rt\"\"\"",
"rT\"\"\"",
"rt\"\"\"",
"TR'''",
"Tr'''",
"tR'''",
"tr'''",
"RT'''",
"Rt'''",
"rT'''",
"rt'''",
"T\"\"\"",
"t\"\"\"",
"T'''",
"t'''",
];
#[rustfmt::skip]
pub const SINGLE_QUOTE_TEMPLATE_PREFIXES: &[&str] = &[
"TR\"",
"Tr\"",
"tR\"",
"tr\"",
"RT\"",
"Rt\"",
"rT\"",
"rt\"",
"TR'",
"Tr'",
"tR'",
"tr'",
"RT'",
"Rt'",
"rT'",
"rt'",
"T\"",
"t\"",
"T'",
"t'",
];
/// Strip the leading and trailing quotes from a string. /// Strip the leading and trailing quotes from a string.
/// Assumes that the string is a valid string literal, but does not verify that the string /// Assumes that the string is a valid string literal, but does not verify that the string
/// is a "simple" string literal (i.e., that it does not contain any implicit concatenations). /// is a "simple" string literal (i.e., that it does not contain any implicit concatenations).
@ -229,7 +281,7 @@ pub fn raw_contents_range(contents: &str) -> Option<TextRange> {
)) ))
} }
/// An [`AhoCorasick`] matcher for string and byte literal prefixes. /// An [`AhoCorasick`] matcher for string, template, and bytes literal prefixes.
static PREFIX_MATCHER: LazyLock<AhoCorasick> = LazyLock::new(|| { static PREFIX_MATCHER: LazyLock<AhoCorasick> = LazyLock::new(|| {
AhoCorasick::builder() AhoCorasick::builder()
.start_kind(StartKind::Anchored) .start_kind(StartKind::Anchored)
@ -239,19 +291,21 @@ static PREFIX_MATCHER: LazyLock<AhoCorasick> = LazyLock::new(|| {
TRIPLE_QUOTE_STR_PREFIXES TRIPLE_QUOTE_STR_PREFIXES
.iter() .iter()
.chain(TRIPLE_QUOTE_BYTE_PREFIXES) .chain(TRIPLE_QUOTE_BYTE_PREFIXES)
.chain(TRIPLE_QUOTE_TEMPLATE_PREFIXES)
.chain(SINGLE_QUOTE_STR_PREFIXES) .chain(SINGLE_QUOTE_STR_PREFIXES)
.chain(SINGLE_QUOTE_BYTE_PREFIXES), .chain(SINGLE_QUOTE_BYTE_PREFIXES)
.chain(SINGLE_QUOTE_TEMPLATE_PREFIXES),
) )
.unwrap() .unwrap()
}); });
/// Return the leading quote for a string or byte literal (e.g., `"""`). /// Return the leading quote for a string, template, or bytes literal (e.g., `"""`).
pub fn leading_quote(content: &str) -> Option<&str> { pub fn leading_quote(content: &str) -> Option<&str> {
let mat = PREFIX_MATCHER.find(Input::new(content).anchored(Anchored::Yes))?; let mat = PREFIX_MATCHER.find(Input::new(content).anchored(Anchored::Yes))?;
Some(&content[mat.start()..mat.end()]) Some(&content[mat.start()..mat.end()])
} }
/// Return the trailing quote string for a string or byte literal (e.g., `"""`). /// Return the trailing quote string for a string, template, or bytes literal (e.g., `"""`).
pub fn trailing_quote(content: &str) -> Option<&str> { pub fn trailing_quote(content: &str) -> Option<&str> {
if content.ends_with("'''") { if content.ends_with("'''") {
Some("'''") Some("'''")
@ -268,14 +322,16 @@ pub fn trailing_quote(content: &str) -> Option<&str> {
/// Return `true` if the string is a triple-quote string or byte prefix. /// Return `true` if the string is a triple-quote string or byte prefix.
pub fn is_triple_quote(content: &str) -> bool { pub fn is_triple_quote(content: &str) -> bool {
TRIPLE_QUOTE_STR_PREFIXES.contains(&content) || TRIPLE_QUOTE_BYTE_PREFIXES.contains(&content) TRIPLE_QUOTE_STR_PREFIXES.contains(&content)
|| TRIPLE_QUOTE_BYTE_PREFIXES.contains(&content)
|| TRIPLE_QUOTE_TEMPLATE_PREFIXES.contains(&content)
} }
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
use super::{ use super::{
SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, SINGLE_QUOTE_TEMPLATE_PREFIXES,
TRIPLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_BYTE_PREFIXES, TRIPLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_TEMPLATE_PREFIXES,
}; };
#[test] #[test]
@ -283,8 +339,10 @@ mod tests {
let prefixes = TRIPLE_QUOTE_STR_PREFIXES let prefixes = TRIPLE_QUOTE_STR_PREFIXES
.iter() .iter()
.chain(TRIPLE_QUOTE_BYTE_PREFIXES) .chain(TRIPLE_QUOTE_BYTE_PREFIXES)
.chain(TRIPLE_QUOTE_TEMPLATE_PREFIXES)
.chain(SINGLE_QUOTE_STR_PREFIXES) .chain(SINGLE_QUOTE_STR_PREFIXES)
.chain(SINGLE_QUOTE_BYTE_PREFIXES) .chain(SINGLE_QUOTE_BYTE_PREFIXES)
.chain(SINGLE_QUOTE_TEMPLATE_PREFIXES)
.collect::<Vec<_>>(); .collect::<Vec<_>>();
for (i, prefix_i) in prefixes.iter().enumerate() { for (i, prefix_i) in prefixes.iter().enumerate() {
for (j, prefix_j) in prefixes.iter().enumerate() { for (j, prefix_j) in prefixes.iter().enumerate() {

View file

@ -9,15 +9,17 @@ use ruff_python_trivia::{
use ruff_source_file::LineRanges; use ruff_source_file::LineRanges;
use ruff_text_size::{Ranged, TextRange, TextSize}; use ruff_text_size::{Ranged, TextRange, TextSize};
use crate::fstring_ranges::{FStringRanges, FStringRangesBuilder}; use crate::interpolated_string_ranges::{
InterpolatedStringRanges, InterpolatedStringRangesBuilder,
};
use crate::multiline_ranges::{MultilineRanges, MultilineRangesBuilder}; use crate::multiline_ranges::{MultilineRanges, MultilineRangesBuilder};
pub struct Indexer { pub struct Indexer {
/// Stores the start offset of continuation lines. /// Stores the start offset of continuation lines.
continuation_lines: Vec<TextSize>, continuation_lines: Vec<TextSize>,
/// The range of all f-string in the source document. /// The range of all interpolated strings in the source document.
fstring_ranges: FStringRanges, interpolated_string_ranges: InterpolatedStringRanges,
/// The range of all multiline strings in the source document. /// The range of all multiline strings in the source document.
multiline_ranges: MultilineRanges, multiline_ranges: MultilineRanges,
@ -30,7 +32,7 @@ impl Indexer {
pub fn from_tokens(tokens: &Tokens, source: &str) -> Self { pub fn from_tokens(tokens: &Tokens, source: &str) -> Self {
assert!(TextSize::try_from(source.len()).is_ok()); assert!(TextSize::try_from(source.len()).is_ok());
let mut fstring_ranges_builder = FStringRangesBuilder::default(); let mut interpolated_string_ranges_builder = InterpolatedStringRangesBuilder::default();
let mut multiline_ranges_builder = MultilineRangesBuilder::default(); let mut multiline_ranges_builder = MultilineRangesBuilder::default();
let mut continuation_lines = Vec::new(); let mut continuation_lines = Vec::new();
let mut comment_ranges = Vec::new(); let mut comment_ranges = Vec::new();
@ -59,7 +61,7 @@ impl Indexer {
} }
} }
fstring_ranges_builder.visit_token(token); interpolated_string_ranges_builder.visit_token(token);
multiline_ranges_builder.visit_token(token); multiline_ranges_builder.visit_token(token);
match token.kind() { match token.kind() {
@ -82,7 +84,7 @@ impl Indexer {
Self { Self {
continuation_lines, continuation_lines,
fstring_ranges: fstring_ranges_builder.finish(), interpolated_string_ranges: interpolated_string_ranges_builder.finish(),
multiline_ranges: multiline_ranges_builder.finish(), multiline_ranges: multiline_ranges_builder.finish(),
comment_ranges: CommentRanges::new(comment_ranges), comment_ranges: CommentRanges::new(comment_ranges),
} }
@ -93,9 +95,9 @@ impl Indexer {
&self.comment_ranges &self.comment_ranges
} }
/// Returns the byte offset ranges of f-strings. /// Returns the byte offset ranges of interpolated strings.
pub const fn fstring_ranges(&self) -> &FStringRanges { pub const fn interpolated_string_ranges(&self) -> &InterpolatedStringRanges {
&self.fstring_ranges &self.interpolated_string_ranges
} }
/// Returns the byte offset ranges of multiline strings. /// Returns the byte offset ranges of multiline strings.
@ -356,7 +358,7 @@ f"implicit " f"concatenation"
.trim(); .trim();
assert_eq!( assert_eq!(
new_indexer(contents) new_indexer(contents)
.fstring_ranges() .interpolated_string_ranges()
.values() .values()
.copied() .copied()
.collect::<Vec<_>>(), .collect::<Vec<_>>(),
@ -390,7 +392,7 @@ f-string"""}
.trim(); .trim();
assert_eq!( assert_eq!(
new_indexer(contents) new_indexer(contents)
.fstring_ranges() .interpolated_string_ranges()
.values() .values()
.copied() .copied()
.collect::<Vec<_>>(), .collect::<Vec<_>>(),
@ -504,11 +506,17 @@ the end"""
), ),
] { ] {
assert_eq!( assert_eq!(
indexer.fstring_ranges().innermost(offset).unwrap(), indexer
.interpolated_string_ranges()
.innermost(offset)
.unwrap(),
innermost_range innermost_range
); );
assert_eq!( assert_eq!(
indexer.fstring_ranges().outermost(offset).unwrap(), indexer
.interpolated_string_ranges()
.outermost(offset)
.unwrap(),
outermost_range outermost_range
); );
} }

View file

@ -3,17 +3,17 @@ use std::collections::BTreeMap;
use ruff_python_parser::{Token, TokenKind}; use ruff_python_parser::{Token, TokenKind};
use ruff_text_size::{Ranged, TextRange, TextSize}; use ruff_text_size::{Ranged, TextRange, TextSize};
/// Stores the ranges of all f-strings in a file sorted by [`TextRange::start`]. /// Stores the ranges of all interpolated strings in a file sorted by [`TextRange::start`].
/// There can be multiple overlapping ranges for nested f-strings. /// There can be multiple overlapping ranges for nested interpolated strings.
/// ///
/// Note that the ranges for all unterminated f-strings are not stored. /// Note that the ranges for all unterminated interpolated strings are not stored.
#[derive(Debug)] #[derive(Debug)]
pub struct FStringRanges { pub struct InterpolatedStringRanges {
// Mapping from the f-string start location to its range. // Mapping from the interpolated string start location to its range.
raw: BTreeMap<TextSize, TextRange>, raw: BTreeMap<TextSize, TextRange>,
} }
impl FStringRanges { impl InterpolatedStringRanges {
/// Returns `true` if the given range intersects with any f-string range. /// Returns `true` if the given range intersects with any f-string range.
pub fn intersects(&self, target: TextRange) -> bool { pub fn intersects(&self, target: TextRange) -> bool {
self.raw self.raw
@ -61,17 +61,17 @@ impl FStringRanges {
.map(|(_, range)| *range) .map(|(_, range)| *range)
} }
/// Returns an iterator over all f-string [`TextRange`] sorted by their /// Returns an iterator over all interpolated string [`TextRange`] sorted by their
/// start location. /// start location.
/// ///
/// For nested f-strings, the outermost f-string is yielded first, moving /// For nested interpolated strings, the outermost interpolated string is yielded first, moving
/// inwards with each iteration. /// inwards with each iteration.
#[inline] #[inline]
pub fn values(&self) -> impl Iterator<Item = &TextRange> + '_ { pub fn values(&self) -> impl Iterator<Item = &TextRange> + '_ {
self.raw.values() self.raw.values()
} }
/// Returns the number of f-string ranges stored. /// Returns the number of interpolated string ranges stored.
#[inline] #[inline]
pub fn len(&self) -> usize { pub fn len(&self) -> usize {
self.raw.len() self.raw.len()
@ -79,18 +79,21 @@ impl FStringRanges {
} }
#[derive(Default)] #[derive(Default)]
pub(crate) struct FStringRangesBuilder { pub(crate) struct InterpolatedStringRangesBuilder {
start_locations: Vec<TextSize>, start_locations: Vec<TextSize>,
raw: BTreeMap<TextSize, TextRange>, raw: BTreeMap<TextSize, TextRange>,
} }
impl FStringRangesBuilder { impl InterpolatedStringRangesBuilder {
pub(crate) fn visit_token(&mut self, token: &Token) { pub(crate) fn visit_token(&mut self, token: &Token) {
// While the logic of this visitor makes it seem possible to pair, say,
// an `FStringStart` with a `TStringEnd`, it is not actually possible to
// encounter this in tokenized code free from lexical errors.
match token.kind() { match token.kind() {
TokenKind::FStringStart => { TokenKind::FStringStart | TokenKind::TStringStart => {
self.start_locations.push(token.start()); self.start_locations.push(token.start());
} }
TokenKind::FStringEnd => { TokenKind::FStringEnd | TokenKind::TStringEnd => {
if let Some(start) = self.start_locations.pop() { if let Some(start) = self.start_locations.pop() {
self.raw.insert(start, TextRange::new(start, token.end())); self.raw.insert(start, TextRange::new(start, token.end()));
} }
@ -99,7 +102,7 @@ impl FStringRangesBuilder {
} }
} }
pub(crate) fn finish(self) -> FStringRanges { pub(crate) fn finish(self) -> InterpolatedStringRanges {
FStringRanges { raw: self.raw } InterpolatedStringRanges { raw: self.raw }
} }
} }

View file

@ -1,5 +1,5 @@
mod fstring_ranges;
mod indexer; mod indexer;
mod interpolated_string_ranges;
mod multiline_ranges; mod multiline_ranges;
pub use indexer::Indexer; pub use indexer::Indexer;