mirror of
https://github.com/astral-sh/ruff.git
synced 2025-09-29 13:24:57 +00:00
Track t-strings and f-strings for token-based rules and suppression comments (#20357)
Our token-based rules and `noqa` extraction used an `Indexer` that kept track of f-string ranges but not t-strings. We've updated the `Indexer` and downstream uses thereof to handle both f-strings and t-strings. Most of the diff is renaming and adding tests. Note that much of the "new" logic gets to be naive because the lexer has already ensured that f and t-string "starts" are paired with their respective "ends", even amidst nesting and so on. Finally: one could imagine wanting to know if a given interpolated string range corresponds to an f-string or a t-string, but I didn't find a place where we actually needed this. Closes #20310
This commit is contained in:
parent
ec863bcde7
commit
b6bd32d9dc
18 changed files with 519 additions and 89 deletions
|
@ -664,3 +664,14 @@ class C[
|
|||
type X[T,] = T
|
||||
def f[T,](): pass
|
||||
class C[T,]: pass
|
||||
|
||||
# t-string examples
|
||||
kwargs.pop("remove", t"this {trailing_comma}",)
|
||||
kwargs.pop("remove", t"this {f"{trailing_comma}"}",)
|
||||
|
||||
t"""This is a test. {
|
||||
"Another sentence."
|
||||
if True else
|
||||
"Don't add a trailing comma here ->"
|
||||
}"""
|
||||
|
||||
|
|
|
@ -187,3 +187,24 @@ _ = (
|
|||
# leading comment
|
||||
"end"
|
||||
)
|
||||
|
||||
# https://github.com/astral-sh/ruff/issues/20310
|
||||
# ISC001
|
||||
t"The quick " t"brown fox."
|
||||
|
||||
# ISC002
|
||||
t"The quick brown fox jumps over the lazy "\
|
||||
t"dog."
|
||||
|
||||
# ISC003
|
||||
(
|
||||
t"The quick brown fox jumps over the lazy "
|
||||
+ t"dog"
|
||||
)
|
||||
|
||||
# nested examples with both t and f-strings
|
||||
_ = "a" f"b {t"c" t"d"} e" "f"
|
||||
_ = t"b {f"c" f"d {t"e" t"f"} g"} h"
|
||||
_ = f"b {t"abc" \
|
||||
t"def"} g"
|
||||
|
||||
|
|
|
@ -125,26 +125,27 @@ fn extract_noqa_line_for(tokens: &Tokens, locator: &Locator, indexer: &Indexer)
|
|||
}
|
||||
|
||||
// The capacity allocated here might be more than we need if there are
|
||||
// nested f-strings.
|
||||
let mut fstring_mappings = Vec::with_capacity(indexer.fstring_ranges().len());
|
||||
// nested interpolated strings.
|
||||
let mut interpolated_string_mappings =
|
||||
Vec::with_capacity(indexer.interpolated_string_ranges().len());
|
||||
|
||||
// For nested f-strings, we expect `noqa` directives on the last line of the
|
||||
// outermost f-string. The last f-string range will be used to skip over
|
||||
// the inner f-strings.
|
||||
let mut last_fstring_range: TextRange = TextRange::default();
|
||||
for fstring_range in indexer.fstring_ranges().values() {
|
||||
if !locator.contains_line_break(*fstring_range) {
|
||||
// For nested interpolated strings, we expect `noqa` directives on the last line of the
|
||||
// outermost interpolated string. The last interpolated string range will be used to skip over
|
||||
// the inner interpolated strings.
|
||||
let mut last_interpolated_string_range: TextRange = TextRange::default();
|
||||
for interpolated_string_range in indexer.interpolated_string_ranges().values() {
|
||||
if !locator.contains_line_break(*interpolated_string_range) {
|
||||
continue;
|
||||
}
|
||||
if last_fstring_range.contains_range(*fstring_range) {
|
||||
if last_interpolated_string_range.contains_range(*interpolated_string_range) {
|
||||
continue;
|
||||
}
|
||||
let new_range = TextRange::new(
|
||||
locator.line_start(fstring_range.start()),
|
||||
fstring_range.end(),
|
||||
locator.line_start(interpolated_string_range.start()),
|
||||
interpolated_string_range.end(),
|
||||
);
|
||||
fstring_mappings.push(new_range);
|
||||
last_fstring_range = new_range;
|
||||
interpolated_string_mappings.push(new_range);
|
||||
last_interpolated_string_range = new_range;
|
||||
}
|
||||
|
||||
let mut continuation_mappings = Vec::new();
|
||||
|
@ -172,11 +173,11 @@ fn extract_noqa_line_for(tokens: &Tokens, locator: &Locator, indexer: &Indexer)
|
|||
|
||||
// Merge the mappings in sorted order
|
||||
let mut mappings = NoqaMapping::with_capacity(
|
||||
continuation_mappings.len() + string_mappings.len() + fstring_mappings.len(),
|
||||
continuation_mappings.len() + string_mappings.len() + interpolated_string_mappings.len(),
|
||||
);
|
||||
|
||||
let string_mappings = SortedMergeIter {
|
||||
left: fstring_mappings.into_iter().peekable(),
|
||||
left: interpolated_string_mappings.into_iter().peekable(),
|
||||
right: string_mappings.into_iter().peekable(),
|
||||
};
|
||||
let all_mappings = SortedMergeIter {
|
||||
|
@ -497,12 +498,35 @@ end'''
|
|||
NoqaMapping::from_iter([TextRange::new(TextSize::from(6), TextSize::from(70))])
|
||||
);
|
||||
|
||||
let contents = "x = 1
|
||||
y = t'''abc
|
||||
def {f'''nested
|
||||
interpolated string''' f'another nested'}
|
||||
end'''
|
||||
";
|
||||
assert_eq!(
|
||||
noqa_mappings(contents),
|
||||
NoqaMapping::from_iter([TextRange::new(TextSize::from(6), TextSize::from(82))])
|
||||
);
|
||||
|
||||
let contents = "x = 1
|
||||
y = f'normal'
|
||||
z = f'another but {f'nested but {f'still single line'} nested'}'
|
||||
";
|
||||
assert_eq!(noqa_mappings(contents), NoqaMapping::default());
|
||||
|
||||
let contents = "x = 1
|
||||
y = t'normal'
|
||||
z = t'another but {t'nested but {t'still single line'} nested'}'
|
||||
";
|
||||
assert_eq!(noqa_mappings(contents), NoqaMapping::default());
|
||||
|
||||
let contents = "x = 1
|
||||
y = f'normal'
|
||||
z = f'another but {t'nested but {f'still single line'} nested'}'
|
||||
";
|
||||
assert_eq!(noqa_mappings(contents), NoqaMapping::default());
|
||||
|
||||
let contents = r"x = \
|
||||
1";
|
||||
assert_eq!(
|
||||
|
|
|
@ -370,8 +370,8 @@ pub(crate) fn adjust_indentation(
|
|||
|
||||
// If the range includes a multi-line string, use LibCST to ensure that we don't adjust the
|
||||
// whitespace _within_ the string.
|
||||
let contains_multiline_string =
|
||||
indexer.multiline_ranges().intersects(range) || indexer.fstring_ranges().intersects(range);
|
||||
let contains_multiline_string = indexer.multiline_ranges().intersects(range)
|
||||
|| indexer.interpolated_string_ranges().intersects(range);
|
||||
|
||||
// If the range has mixed indentation, we will use LibCST as well.
|
||||
let mixed_indentation = contents.universal_newlines().any(|line| {
|
||||
|
|
|
@ -250,23 +250,23 @@ pub(crate) fn trailing_commas(
|
|||
locator: &Locator,
|
||||
indexer: &Indexer,
|
||||
) {
|
||||
let mut fstrings = 0u32;
|
||||
let mut interpolated_strings = 0u32;
|
||||
let simple_tokens = tokens.iter().filter_map(|token| {
|
||||
match token.kind() {
|
||||
// Completely ignore comments -- they just interfere with the logic.
|
||||
TokenKind::Comment => None,
|
||||
// F-strings are handled as `String` token type with the complete range
|
||||
// of the outermost f-string. This means that the expression inside the
|
||||
// f-string is not checked for trailing commas.
|
||||
TokenKind::FStringStart => {
|
||||
fstrings = fstrings.saturating_add(1);
|
||||
// F-strings and t-strings are handled as `String` token type with the complete range
|
||||
// of the outermost interpolated string. This means that the expression inside the
|
||||
// interpolated string is not checked for trailing commas.
|
||||
TokenKind::FStringStart | TokenKind::TStringStart => {
|
||||
interpolated_strings = interpolated_strings.saturating_add(1);
|
||||
None
|
||||
}
|
||||
TokenKind::FStringEnd => {
|
||||
fstrings = fstrings.saturating_sub(1);
|
||||
if fstrings == 0 {
|
||||
TokenKind::FStringEnd | TokenKind::TStringEnd => {
|
||||
interpolated_strings = interpolated_strings.saturating_sub(1);
|
||||
if interpolated_strings == 0 {
|
||||
indexer
|
||||
.fstring_ranges()
|
||||
.interpolated_string_ranges()
|
||||
.outermost(token.start())
|
||||
.map(|range| SimpleToken::new(TokenType::String, range))
|
||||
} else {
|
||||
|
@ -274,7 +274,7 @@ pub(crate) fn trailing_commas(
|
|||
}
|
||||
}
|
||||
_ => {
|
||||
if fstrings == 0 {
|
||||
if interpolated_strings == 0 {
|
||||
Some(SimpleToken::from(token.as_tuple()))
|
||||
} else {
|
||||
None
|
||||
|
|
|
@ -1016,6 +1016,7 @@ help: Remove trailing comma
|
|||
664 + type X[T] = T
|
||||
665 | def f[T,](): pass
|
||||
666 | class C[T,]: pass
|
||||
667 |
|
||||
|
||||
COM819 [*] Trailing comma prohibited
|
||||
--> COM81.py:665:8
|
||||
|
@ -1032,6 +1033,8 @@ help: Remove trailing comma
|
|||
- def f[T,](): pass
|
||||
665 + def f[T](): pass
|
||||
666 | class C[T,]: pass
|
||||
667 |
|
||||
668 | # t-string examples
|
||||
|
||||
COM819 [*] Trailing comma prohibited
|
||||
--> COM81.py:666:10
|
||||
|
@ -1040,6 +1043,8 @@ COM819 [*] Trailing comma prohibited
|
|||
665 | def f[T,](): pass
|
||||
666 | class C[T,]: pass
|
||||
| ^
|
||||
667 |
|
||||
668 | # t-string examples
|
||||
|
|
||||
help: Remove trailing comma
|
||||
663 |
|
||||
|
@ -1047,3 +1052,44 @@ help: Remove trailing comma
|
|||
665 | def f[T,](): pass
|
||||
- class C[T,]: pass
|
||||
666 + class C[T]: pass
|
||||
667 |
|
||||
668 | # t-string examples
|
||||
669 | kwargs.pop("remove", t"this {trailing_comma}",)
|
||||
|
||||
COM819 [*] Trailing comma prohibited
|
||||
--> COM81.py:669:46
|
||||
|
|
||||
668 | # t-string examples
|
||||
669 | kwargs.pop("remove", t"this {trailing_comma}",)
|
||||
| ^
|
||||
670 | kwargs.pop("remove", t"this {f"{trailing_comma}"}",)
|
||||
|
|
||||
help: Remove trailing comma
|
||||
666 | class C[T,]: pass
|
||||
667 |
|
||||
668 | # t-string examples
|
||||
- kwargs.pop("remove", t"this {trailing_comma}",)
|
||||
669 + kwargs.pop("remove", t"this {trailing_comma}")
|
||||
670 | kwargs.pop("remove", t"this {f"{trailing_comma}"}",)
|
||||
671 |
|
||||
672 | t"""This is a test. {
|
||||
|
||||
COM819 [*] Trailing comma prohibited
|
||||
--> COM81.py:670:51
|
||||
|
|
||||
668 | # t-string examples
|
||||
669 | kwargs.pop("remove", t"this {trailing_comma}",)
|
||||
670 | kwargs.pop("remove", t"this {f"{trailing_comma}"}",)
|
||||
| ^
|
||||
671 |
|
||||
672 | t"""This is a test. {
|
||||
|
|
||||
help: Remove trailing comma
|
||||
667 |
|
||||
668 | # t-string examples
|
||||
669 | kwargs.pop("remove", t"this {trailing_comma}",)
|
||||
- kwargs.pop("remove", t"this {f"{trailing_comma}"}",)
|
||||
670 + kwargs.pop("remove", t"this {f"{trailing_comma}"}")
|
||||
671 |
|
||||
672 | t"""This is a test. {
|
||||
673 | "Another sentence."
|
||||
|
|
|
@ -74,6 +74,7 @@ pub(crate) fn explicit(checker: &Checker, expr: &Expr) {
|
|||
Expr::StringLiteral(_) | Expr::FString(_),
|
||||
Expr::StringLiteral(_) | Expr::FString(_)
|
||||
) | (Expr::BytesLiteral(_), Expr::BytesLiteral(_))
|
||||
| (Expr::TString(_), Expr::TString(_))
|
||||
);
|
||||
if concatable
|
||||
&& checker
|
||||
|
|
|
@ -123,21 +123,32 @@ pub(crate) fn implicit(
|
|||
let (a_range, b_range) = match (a_token.kind(), b_token.kind()) {
|
||||
(TokenKind::String, TokenKind::String) => (a_token.range(), b_token.range()),
|
||||
(TokenKind::String, TokenKind::FStringStart) => {
|
||||
match indexer.fstring_ranges().innermost(b_token.start()) {
|
||||
match indexer
|
||||
.interpolated_string_ranges()
|
||||
.innermost(b_token.start())
|
||||
{
|
||||
Some(b_range) => (a_token.range(), b_range),
|
||||
None => continue,
|
||||
}
|
||||
}
|
||||
(TokenKind::FStringEnd, TokenKind::String) => {
|
||||
match indexer.fstring_ranges().innermost(a_token.start()) {
|
||||
match indexer
|
||||
.interpolated_string_ranges()
|
||||
.innermost(a_token.start())
|
||||
{
|
||||
Some(a_range) => (a_range, b_token.range()),
|
||||
None => continue,
|
||||
}
|
||||
}
|
||||
(TokenKind::FStringEnd, TokenKind::FStringStart) => {
|
||||
(TokenKind::FStringEnd, TokenKind::FStringStart)
|
||||
| (TokenKind::TStringEnd, TokenKind::TStringStart) => {
|
||||
match (
|
||||
indexer.fstring_ranges().innermost(a_token.start()),
|
||||
indexer.fstring_ranges().innermost(b_token.start()),
|
||||
indexer
|
||||
.interpolated_string_ranges()
|
||||
.innermost(a_token.start()),
|
||||
indexer
|
||||
.interpolated_string_ranges()
|
||||
.innermost(b_token.start()),
|
||||
) {
|
||||
(Some(a_range), Some(b_range)) => (a_range, b_range),
|
||||
_ => continue,
|
||||
|
|
|
@ -484,3 +484,104 @@ help: Combine string literals
|
|||
94 |
|
||||
95 |
|
||||
96 | # Mixed literal + non-literal scenarios
|
||||
|
||||
ISC001 [*] Implicitly concatenated string literals on one line
|
||||
--> ISC.py:193:1
|
||||
|
|
||||
191 | # https://github.com/astral-sh/ruff/issues/20310
|
||||
192 | # ISC001
|
||||
193 | t"The quick " t"brown fox."
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
194 |
|
||||
195 | # ISC002
|
||||
|
|
||||
help: Combine string literals
|
||||
190 |
|
||||
191 | # https://github.com/astral-sh/ruff/issues/20310
|
||||
192 | # ISC001
|
||||
- t"The quick " t"brown fox."
|
||||
193 + t"The quick brown fox."
|
||||
194 |
|
||||
195 | # ISC002
|
||||
196 | t"The quick brown fox jumps over the lazy "\
|
||||
|
||||
ISC001 Implicitly concatenated string literals on one line
|
||||
--> ISC.py:206:5
|
||||
|
|
||||
205 | # nested examples with both t and f-strings
|
||||
206 | _ = "a" f"b {t"c" t"d"} e" "f"
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^
|
||||
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
|
||||
208 | _ = f"b {t"abc" \
|
||||
|
|
||||
help: Combine string literals
|
||||
|
||||
ISC001 Implicitly concatenated string literals on one line
|
||||
--> ISC.py:206:9
|
||||
|
|
||||
205 | # nested examples with both t and f-strings
|
||||
206 | _ = "a" f"b {t"c" t"d"} e" "f"
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^
|
||||
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
|
||||
208 | _ = f"b {t"abc" \
|
||||
|
|
||||
help: Combine string literals
|
||||
|
||||
ISC001 [*] Implicitly concatenated string literals on one line
|
||||
--> ISC.py:206:14
|
||||
|
|
||||
205 | # nested examples with both t and f-strings
|
||||
206 | _ = "a" f"b {t"c" t"d"} e" "f"
|
||||
| ^^^^^^^^^
|
||||
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
|
||||
208 | _ = f"b {t"abc" \
|
||||
|
|
||||
help: Combine string literals
|
||||
203 | )
|
||||
204 |
|
||||
205 | # nested examples with both t and f-strings
|
||||
- _ = "a" f"b {t"c" t"d"} e" "f"
|
||||
206 + _ = "a" f"b {t"cd"} e" "f"
|
||||
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
|
||||
208 | _ = f"b {t"abc" \
|
||||
209 | t"def"} g"
|
||||
|
||||
ISC001 [*] Implicitly concatenated string literals on one line
|
||||
--> ISC.py:207:10
|
||||
|
|
||||
205 | # nested examples with both t and f-strings
|
||||
206 | _ = "a" f"b {t"c" t"d"} e" "f"
|
||||
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^
|
||||
208 | _ = f"b {t"abc" \
|
||||
209 | t"def"} g"
|
||||
|
|
||||
help: Combine string literals
|
||||
204 |
|
||||
205 | # nested examples with both t and f-strings
|
||||
206 | _ = "a" f"b {t"c" t"d"} e" "f"
|
||||
- _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
|
||||
207 + _ = t"b {f"cd {t"e" t"f"} g"} h"
|
||||
208 | _ = f"b {t"abc" \
|
||||
209 | t"def"} g"
|
||||
210 |
|
||||
|
||||
ISC001 [*] Implicitly concatenated string literals on one line
|
||||
--> ISC.py:207:20
|
||||
|
|
||||
205 | # nested examples with both t and f-strings
|
||||
206 | _ = "a" f"b {t"c" t"d"} e" "f"
|
||||
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
|
||||
| ^^^^^^^^^
|
||||
208 | _ = f"b {t"abc" \
|
||||
209 | t"def"} g"
|
||||
|
|
||||
help: Combine string literals
|
||||
204 |
|
||||
205 | # nested examples with both t and f-strings
|
||||
206 | _ = "a" f"b {t"c" t"d"} e" "f"
|
||||
- _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
|
||||
207 + _ = t"b {f"c" f"d {t"ef"} g"} h"
|
||||
208 | _ = f"b {t"abc" \
|
||||
209 | t"def"} g"
|
||||
210 |
|
||||
|
|
|
@ -26,3 +26,25 @@ ISC002 Implicitly concatenated string literals over multiple lines
|
|||
76 |
|
||||
77 | # Explicitly concatenated nested f-strings
|
||||
|
|
||||
|
||||
ISC002 Implicitly concatenated string literals over multiple lines
|
||||
--> ISC.py:196:1
|
||||
|
|
||||
195 | # ISC002
|
||||
196 | / t"The quick brown fox jumps over the lazy "\
|
||||
197 | | t"dog."
|
||||
| |_______^
|
||||
198 |
|
||||
199 | # ISC003
|
||||
|
|
||||
|
||||
ISC002 Implicitly concatenated string literals over multiple lines
|
||||
--> ISC.py:208:10
|
||||
|
|
||||
206 | _ = "a" f"b {t"c" t"d"} e" "f"
|
||||
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
|
||||
208 | _ = f"b {t"abc" \
|
||||
| __________^
|
||||
209 | | t"def"} g"
|
||||
| |__________^
|
||||
|
|
||||
|
|
|
@ -337,3 +337,23 @@ help: Remove redundant '+' operator to implicitly concatenate
|
|||
187 | # leading comment
|
||||
188 | "end"
|
||||
189 | )
|
||||
|
||||
ISC003 [*] Explicitly concatenated string should be implicitly concatenated
|
||||
--> ISC.py:201:5
|
||||
|
|
||||
199 | # ISC003
|
||||
200 | (
|
||||
201 | / t"The quick brown fox jumps over the lazy "
|
||||
202 | | + t"dog"
|
||||
| |____________^
|
||||
203 | )
|
||||
|
|
||||
help: Remove redundant '+' operator to implicitly concatenate
|
||||
199 | # ISC003
|
||||
200 | (
|
||||
201 | t"The quick brown fox jumps over the lazy "
|
||||
- + t"dog"
|
||||
202 + t"dog"
|
||||
203 | )
|
||||
204 |
|
||||
205 | # nested examples with both t and f-strings
|
||||
|
|
|
@ -484,3 +484,104 @@ help: Combine string literals
|
|||
94 |
|
||||
95 |
|
||||
96 | # Mixed literal + non-literal scenarios
|
||||
|
||||
ISC001 [*] Implicitly concatenated string literals on one line
|
||||
--> ISC.py:193:1
|
||||
|
|
||||
191 | # https://github.com/astral-sh/ruff/issues/20310
|
||||
192 | # ISC001
|
||||
193 | t"The quick " t"brown fox."
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^
|
||||
194 |
|
||||
195 | # ISC002
|
||||
|
|
||||
help: Combine string literals
|
||||
190 |
|
||||
191 | # https://github.com/astral-sh/ruff/issues/20310
|
||||
192 | # ISC001
|
||||
- t"The quick " t"brown fox."
|
||||
193 + t"The quick brown fox."
|
||||
194 |
|
||||
195 | # ISC002
|
||||
196 | t"The quick brown fox jumps over the lazy "\
|
||||
|
||||
ISC001 Implicitly concatenated string literals on one line
|
||||
--> ISC.py:206:5
|
||||
|
|
||||
205 | # nested examples with both t and f-strings
|
||||
206 | _ = "a" f"b {t"c" t"d"} e" "f"
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^
|
||||
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
|
||||
208 | _ = f"b {t"abc" \
|
||||
|
|
||||
help: Combine string literals
|
||||
|
||||
ISC001 Implicitly concatenated string literals on one line
|
||||
--> ISC.py:206:9
|
||||
|
|
||||
205 | # nested examples with both t and f-strings
|
||||
206 | _ = "a" f"b {t"c" t"d"} e" "f"
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^
|
||||
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
|
||||
208 | _ = f"b {t"abc" \
|
||||
|
|
||||
help: Combine string literals
|
||||
|
||||
ISC001 [*] Implicitly concatenated string literals on one line
|
||||
--> ISC.py:206:14
|
||||
|
|
||||
205 | # nested examples with both t and f-strings
|
||||
206 | _ = "a" f"b {t"c" t"d"} e" "f"
|
||||
| ^^^^^^^^^
|
||||
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
|
||||
208 | _ = f"b {t"abc" \
|
||||
|
|
||||
help: Combine string literals
|
||||
203 | )
|
||||
204 |
|
||||
205 | # nested examples with both t and f-strings
|
||||
- _ = "a" f"b {t"c" t"d"} e" "f"
|
||||
206 + _ = "a" f"b {t"cd"} e" "f"
|
||||
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
|
||||
208 | _ = f"b {t"abc" \
|
||||
209 | t"def"} g"
|
||||
|
||||
ISC001 [*] Implicitly concatenated string literals on one line
|
||||
--> ISC.py:207:10
|
||||
|
|
||||
205 | # nested examples with both t and f-strings
|
||||
206 | _ = "a" f"b {t"c" t"d"} e" "f"
|
||||
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
|
||||
| ^^^^^^^^^^^^^^^^^^^^^^^
|
||||
208 | _ = f"b {t"abc" \
|
||||
209 | t"def"} g"
|
||||
|
|
||||
help: Combine string literals
|
||||
204 |
|
||||
205 | # nested examples with both t and f-strings
|
||||
206 | _ = "a" f"b {t"c" t"d"} e" "f"
|
||||
- _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
|
||||
207 + _ = t"b {f"cd {t"e" t"f"} g"} h"
|
||||
208 | _ = f"b {t"abc" \
|
||||
209 | t"def"} g"
|
||||
210 |
|
||||
|
||||
ISC001 [*] Implicitly concatenated string literals on one line
|
||||
--> ISC.py:207:20
|
||||
|
|
||||
205 | # nested examples with both t and f-strings
|
||||
206 | _ = "a" f"b {t"c" t"d"} e" "f"
|
||||
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
|
||||
| ^^^^^^^^^
|
||||
208 | _ = f"b {t"abc" \
|
||||
209 | t"def"} g"
|
||||
|
|
||||
help: Combine string literals
|
||||
204 |
|
||||
205 | # nested examples with both t and f-strings
|
||||
206 | _ = "a" f"b {t"c" t"d"} e" "f"
|
||||
- _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
|
||||
207 + _ = t"b {f"c" f"d {t"ef"} g"} h"
|
||||
208 | _ = f"b {t"abc" \
|
||||
209 | t"def"} g"
|
||||
210 |
|
||||
|
|
|
@ -67,3 +67,25 @@ ISC002 Implicitly concatenated string literals over multiple lines
|
|||
76 |
|
||||
77 | # Explicitly concatenated nested f-strings
|
||||
|
|
||||
|
||||
ISC002 Implicitly concatenated string literals over multiple lines
|
||||
--> ISC.py:196:1
|
||||
|
|
||||
195 | # ISC002
|
||||
196 | / t"The quick brown fox jumps over the lazy "\
|
||||
197 | | t"dog."
|
||||
| |_______^
|
||||
198 |
|
||||
199 | # ISC003
|
||||
|
|
||||
|
||||
ISC002 Implicitly concatenated string literals over multiple lines
|
||||
--> ISC.py:208:10
|
||||
|
|
||||
206 | _ = "a" f"b {t"c" t"d"} e" "f"
|
||||
207 | _ = t"b {f"c" f"d {t"e" t"f"} g"} h"
|
||||
208 | _ = f"b {t"abc" \
|
||||
| __________^
|
||||
209 | | t"def"} g"
|
||||
| |__________^
|
||||
|
|
||||
|
|
|
@ -402,25 +402,6 @@ help: Add backslash to escape sequence
|
|||
96 |
|
||||
97 | #: Okay
|
||||
|
||||
W605 [*] Invalid escape sequence: `\w`
|
||||
--> W605_1.py:109:1
|
||||
|
|
||||
107 | regex = t'\w' # noqa
|
||||
108 | regex = t'''
|
||||
109 | \w
|
||||
| ^^
|
||||
110 | ''' # noqa
|
||||
|
|
||||
help: Use a raw string literal
|
||||
105 | '''
|
||||
106 | s = t'\\'
|
||||
107 | regex = t'\w' # noqa
|
||||
- regex = t'''
|
||||
108 + regex = rt'''
|
||||
109 | \w
|
||||
110 | ''' # noqa
|
||||
111 |
|
||||
|
||||
W605 [*] Invalid escape sequence: `\_`
|
||||
--> W605_1.py:112:13
|
||||
|
|
||||
|
|
|
@ -210,6 +210,58 @@ pub const SINGLE_QUOTE_BYTE_PREFIXES: &[&str] = &[
|
|||
"b'",
|
||||
];
|
||||
|
||||
/// Includes all permutations of `t` and `rt`. This includes all possible orders, and all possible
|
||||
/// casings, for both single and triple quotes.
|
||||
///
|
||||
/// See: <https://docs.python.org/3/reference/lexical_analysis.html#string-and-bytes-literals>
|
||||
#[rustfmt::skip]
|
||||
pub const TRIPLE_QUOTE_TEMPLATE_PREFIXES: &[&str] = &[
|
||||
"TR\"\"\"",
|
||||
"Tr\"\"\"",
|
||||
"tR\"\"\"",
|
||||
"tr\"\"\"",
|
||||
"RT\"\"\"",
|
||||
"Rt\"\"\"",
|
||||
"rT\"\"\"",
|
||||
"rt\"\"\"",
|
||||
"TR'''",
|
||||
"Tr'''",
|
||||
"tR'''",
|
||||
"tr'''",
|
||||
"RT'''",
|
||||
"Rt'''",
|
||||
"rT'''",
|
||||
"rt'''",
|
||||
"T\"\"\"",
|
||||
"t\"\"\"",
|
||||
"T'''",
|
||||
"t'''",
|
||||
];
|
||||
|
||||
#[rustfmt::skip]
|
||||
pub const SINGLE_QUOTE_TEMPLATE_PREFIXES: &[&str] = &[
|
||||
"TR\"",
|
||||
"Tr\"",
|
||||
"tR\"",
|
||||
"tr\"",
|
||||
"RT\"",
|
||||
"Rt\"",
|
||||
"rT\"",
|
||||
"rt\"",
|
||||
"TR'",
|
||||
"Tr'",
|
||||
"tR'",
|
||||
"tr'",
|
||||
"RT'",
|
||||
"Rt'",
|
||||
"rT'",
|
||||
"rt'",
|
||||
"T\"",
|
||||
"t\"",
|
||||
"T'",
|
||||
"t'",
|
||||
];
|
||||
|
||||
/// Strip the leading and trailing quotes from a string.
|
||||
/// Assumes that the string is a valid string literal, but does not verify that the string
|
||||
/// is a "simple" string literal (i.e., that it does not contain any implicit concatenations).
|
||||
|
@ -229,7 +281,7 @@ pub fn raw_contents_range(contents: &str) -> Option<TextRange> {
|
|||
))
|
||||
}
|
||||
|
||||
/// An [`AhoCorasick`] matcher for string and byte literal prefixes.
|
||||
/// An [`AhoCorasick`] matcher for string, template, and bytes literal prefixes.
|
||||
static PREFIX_MATCHER: LazyLock<AhoCorasick> = LazyLock::new(|| {
|
||||
AhoCorasick::builder()
|
||||
.start_kind(StartKind::Anchored)
|
||||
|
@ -239,19 +291,21 @@ static PREFIX_MATCHER: LazyLock<AhoCorasick> = LazyLock::new(|| {
|
|||
TRIPLE_QUOTE_STR_PREFIXES
|
||||
.iter()
|
||||
.chain(TRIPLE_QUOTE_BYTE_PREFIXES)
|
||||
.chain(TRIPLE_QUOTE_TEMPLATE_PREFIXES)
|
||||
.chain(SINGLE_QUOTE_STR_PREFIXES)
|
||||
.chain(SINGLE_QUOTE_BYTE_PREFIXES),
|
||||
.chain(SINGLE_QUOTE_BYTE_PREFIXES)
|
||||
.chain(SINGLE_QUOTE_TEMPLATE_PREFIXES),
|
||||
)
|
||||
.unwrap()
|
||||
});
|
||||
|
||||
/// Return the leading quote for a string or byte literal (e.g., `"""`).
|
||||
/// Return the leading quote for a string, template, or bytes literal (e.g., `"""`).
|
||||
pub fn leading_quote(content: &str) -> Option<&str> {
|
||||
let mat = PREFIX_MATCHER.find(Input::new(content).anchored(Anchored::Yes))?;
|
||||
Some(&content[mat.start()..mat.end()])
|
||||
}
|
||||
|
||||
/// Return the trailing quote string for a string or byte literal (e.g., `"""`).
|
||||
/// Return the trailing quote string for a string, template, or bytes literal (e.g., `"""`).
|
||||
pub fn trailing_quote(content: &str) -> Option<&str> {
|
||||
if content.ends_with("'''") {
|
||||
Some("'''")
|
||||
|
@ -268,14 +322,16 @@ pub fn trailing_quote(content: &str) -> Option<&str> {
|
|||
|
||||
/// Return `true` if the string is a triple-quote string or byte prefix.
|
||||
pub fn is_triple_quote(content: &str) -> bool {
|
||||
TRIPLE_QUOTE_STR_PREFIXES.contains(&content) || TRIPLE_QUOTE_BYTE_PREFIXES.contains(&content)
|
||||
TRIPLE_QUOTE_STR_PREFIXES.contains(&content)
|
||||
|| TRIPLE_QUOTE_BYTE_PREFIXES.contains(&content)
|
||||
|| TRIPLE_QUOTE_TEMPLATE_PREFIXES.contains(&content)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::{
|
||||
SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_BYTE_PREFIXES,
|
||||
TRIPLE_QUOTE_STR_PREFIXES,
|
||||
SINGLE_QUOTE_BYTE_PREFIXES, SINGLE_QUOTE_STR_PREFIXES, SINGLE_QUOTE_TEMPLATE_PREFIXES,
|
||||
TRIPLE_QUOTE_BYTE_PREFIXES, TRIPLE_QUOTE_STR_PREFIXES, TRIPLE_QUOTE_TEMPLATE_PREFIXES,
|
||||
};
|
||||
|
||||
#[test]
|
||||
|
@ -283,8 +339,10 @@ mod tests {
|
|||
let prefixes = TRIPLE_QUOTE_STR_PREFIXES
|
||||
.iter()
|
||||
.chain(TRIPLE_QUOTE_BYTE_PREFIXES)
|
||||
.chain(TRIPLE_QUOTE_TEMPLATE_PREFIXES)
|
||||
.chain(SINGLE_QUOTE_STR_PREFIXES)
|
||||
.chain(SINGLE_QUOTE_BYTE_PREFIXES)
|
||||
.chain(SINGLE_QUOTE_TEMPLATE_PREFIXES)
|
||||
.collect::<Vec<_>>();
|
||||
for (i, prefix_i) in prefixes.iter().enumerate() {
|
||||
for (j, prefix_j) in prefixes.iter().enumerate() {
|
||||
|
|
|
@ -9,15 +9,17 @@ use ruff_python_trivia::{
|
|||
use ruff_source_file::LineRanges;
|
||||
use ruff_text_size::{Ranged, TextRange, TextSize};
|
||||
|
||||
use crate::fstring_ranges::{FStringRanges, FStringRangesBuilder};
|
||||
use crate::interpolated_string_ranges::{
|
||||
InterpolatedStringRanges, InterpolatedStringRangesBuilder,
|
||||
};
|
||||
use crate::multiline_ranges::{MultilineRanges, MultilineRangesBuilder};
|
||||
|
||||
pub struct Indexer {
|
||||
/// Stores the start offset of continuation lines.
|
||||
continuation_lines: Vec<TextSize>,
|
||||
|
||||
/// The range of all f-string in the source document.
|
||||
fstring_ranges: FStringRanges,
|
||||
/// The range of all interpolated strings in the source document.
|
||||
interpolated_string_ranges: InterpolatedStringRanges,
|
||||
|
||||
/// The range of all multiline strings in the source document.
|
||||
multiline_ranges: MultilineRanges,
|
||||
|
@ -30,7 +32,7 @@ impl Indexer {
|
|||
pub fn from_tokens(tokens: &Tokens, source: &str) -> Self {
|
||||
assert!(TextSize::try_from(source.len()).is_ok());
|
||||
|
||||
let mut fstring_ranges_builder = FStringRangesBuilder::default();
|
||||
let mut interpolated_string_ranges_builder = InterpolatedStringRangesBuilder::default();
|
||||
let mut multiline_ranges_builder = MultilineRangesBuilder::default();
|
||||
let mut continuation_lines = Vec::new();
|
||||
let mut comment_ranges = Vec::new();
|
||||
|
@ -59,7 +61,7 @@ impl Indexer {
|
|||
}
|
||||
}
|
||||
|
||||
fstring_ranges_builder.visit_token(token);
|
||||
interpolated_string_ranges_builder.visit_token(token);
|
||||
multiline_ranges_builder.visit_token(token);
|
||||
|
||||
match token.kind() {
|
||||
|
@ -82,7 +84,7 @@ impl Indexer {
|
|||
|
||||
Self {
|
||||
continuation_lines,
|
||||
fstring_ranges: fstring_ranges_builder.finish(),
|
||||
interpolated_string_ranges: interpolated_string_ranges_builder.finish(),
|
||||
multiline_ranges: multiline_ranges_builder.finish(),
|
||||
comment_ranges: CommentRanges::new(comment_ranges),
|
||||
}
|
||||
|
@ -93,9 +95,9 @@ impl Indexer {
|
|||
&self.comment_ranges
|
||||
}
|
||||
|
||||
/// Returns the byte offset ranges of f-strings.
|
||||
pub const fn fstring_ranges(&self) -> &FStringRanges {
|
||||
&self.fstring_ranges
|
||||
/// Returns the byte offset ranges of interpolated strings.
|
||||
pub const fn interpolated_string_ranges(&self) -> &InterpolatedStringRanges {
|
||||
&self.interpolated_string_ranges
|
||||
}
|
||||
|
||||
/// Returns the byte offset ranges of multiline strings.
|
||||
|
@ -356,7 +358,7 @@ f"implicit " f"concatenation"
|
|||
.trim();
|
||||
assert_eq!(
|
||||
new_indexer(contents)
|
||||
.fstring_ranges()
|
||||
.interpolated_string_ranges()
|
||||
.values()
|
||||
.copied()
|
||||
.collect::<Vec<_>>(),
|
||||
|
@ -390,7 +392,7 @@ f-string"""}
|
|||
.trim();
|
||||
assert_eq!(
|
||||
new_indexer(contents)
|
||||
.fstring_ranges()
|
||||
.interpolated_string_ranges()
|
||||
.values()
|
||||
.copied()
|
||||
.collect::<Vec<_>>(),
|
||||
|
@ -504,11 +506,17 @@ the end"""
|
|||
),
|
||||
] {
|
||||
assert_eq!(
|
||||
indexer.fstring_ranges().innermost(offset).unwrap(),
|
||||
indexer
|
||||
.interpolated_string_ranges()
|
||||
.innermost(offset)
|
||||
.unwrap(),
|
||||
innermost_range
|
||||
);
|
||||
assert_eq!(
|
||||
indexer.fstring_ranges().outermost(offset).unwrap(),
|
||||
indexer
|
||||
.interpolated_string_ranges()
|
||||
.outermost(offset)
|
||||
.unwrap(),
|
||||
outermost_range
|
||||
);
|
||||
}
|
||||
|
|
|
@ -3,17 +3,17 @@ use std::collections::BTreeMap;
|
|||
use ruff_python_parser::{Token, TokenKind};
|
||||
use ruff_text_size::{Ranged, TextRange, TextSize};
|
||||
|
||||
/// Stores the ranges of all f-strings in a file sorted by [`TextRange::start`].
|
||||
/// There can be multiple overlapping ranges for nested f-strings.
|
||||
/// Stores the ranges of all interpolated strings in a file sorted by [`TextRange::start`].
|
||||
/// There can be multiple overlapping ranges for nested interpolated strings.
|
||||
///
|
||||
/// Note that the ranges for all unterminated f-strings are not stored.
|
||||
/// Note that the ranges for all unterminated interpolated strings are not stored.
|
||||
#[derive(Debug)]
|
||||
pub struct FStringRanges {
|
||||
// Mapping from the f-string start location to its range.
|
||||
pub struct InterpolatedStringRanges {
|
||||
// Mapping from the interpolated string start location to its range.
|
||||
raw: BTreeMap<TextSize, TextRange>,
|
||||
}
|
||||
|
||||
impl FStringRanges {
|
||||
impl InterpolatedStringRanges {
|
||||
/// Returns `true` if the given range intersects with any f-string range.
|
||||
pub fn intersects(&self, target: TextRange) -> bool {
|
||||
self.raw
|
||||
|
@ -61,17 +61,17 @@ impl FStringRanges {
|
|||
.map(|(_, range)| *range)
|
||||
}
|
||||
|
||||
/// Returns an iterator over all f-string [`TextRange`] sorted by their
|
||||
/// Returns an iterator over all interpolated string [`TextRange`] sorted by their
|
||||
/// start location.
|
||||
///
|
||||
/// For nested f-strings, the outermost f-string is yielded first, moving
|
||||
/// For nested interpolated strings, the outermost interpolated string is yielded first, moving
|
||||
/// inwards with each iteration.
|
||||
#[inline]
|
||||
pub fn values(&self) -> impl Iterator<Item = &TextRange> + '_ {
|
||||
self.raw.values()
|
||||
}
|
||||
|
||||
/// Returns the number of f-string ranges stored.
|
||||
/// Returns the number of interpolated string ranges stored.
|
||||
#[inline]
|
||||
pub fn len(&self) -> usize {
|
||||
self.raw.len()
|
||||
|
@ -79,18 +79,21 @@ impl FStringRanges {
|
|||
}
|
||||
|
||||
#[derive(Default)]
|
||||
pub(crate) struct FStringRangesBuilder {
|
||||
pub(crate) struct InterpolatedStringRangesBuilder {
|
||||
start_locations: Vec<TextSize>,
|
||||
raw: BTreeMap<TextSize, TextRange>,
|
||||
}
|
||||
|
||||
impl FStringRangesBuilder {
|
||||
impl InterpolatedStringRangesBuilder {
|
||||
pub(crate) fn visit_token(&mut self, token: &Token) {
|
||||
// While the logic of this visitor makes it seem possible to pair, say,
|
||||
// an `FStringStart` with a `TStringEnd`, it is not actually possible to
|
||||
// encounter this in tokenized code free from lexical errors.
|
||||
match token.kind() {
|
||||
TokenKind::FStringStart => {
|
||||
TokenKind::FStringStart | TokenKind::TStringStart => {
|
||||
self.start_locations.push(token.start());
|
||||
}
|
||||
TokenKind::FStringEnd => {
|
||||
TokenKind::FStringEnd | TokenKind::TStringEnd => {
|
||||
if let Some(start) = self.start_locations.pop() {
|
||||
self.raw.insert(start, TextRange::new(start, token.end()));
|
||||
}
|
||||
|
@ -99,7 +102,7 @@ impl FStringRangesBuilder {
|
|||
}
|
||||
}
|
||||
|
||||
pub(crate) fn finish(self) -> FStringRanges {
|
||||
FStringRanges { raw: self.raw }
|
||||
pub(crate) fn finish(self) -> InterpolatedStringRanges {
|
||||
InterpolatedStringRanges { raw: self.raw }
|
||||
}
|
||||
}
|
|
@ -1,5 +1,5 @@
|
|||
mod fstring_ranges;
|
||||
mod indexer;
|
||||
mod interpolated_string_ranges;
|
||||
mod multiline_ranges;
|
||||
|
||||
pub use indexer::Indexer;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue