[pycodestyle] Handle f-strings properly for invalid-escape-sequence (W605) (#14748)

When fixing an invalid escape sequence in an f-string, each f-string
element is analyzed for valid escape characters prior to creating the
diagnostic and fix. This allows us to safely prefix with `r` to create a
raw string if no valid escape characters were found anywhere in the
f-string, and otherwise insert backslashes.

This fixes a bug in the original implementation: each "f-string part"
was treated separately, so it was not possible to tell whether a valid
escape character was or would be used elsewhere in the f-string.

Progress towards #11491 but format specifiers are not handled in this
PR.
This commit is contained in:
Dylan 2024-12-04 06:59:14 -06:00 committed by GitHub
parent 1685d95ed2
commit c617b2a48a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 117 additions and 36 deletions

View file

@ -57,3 +57,12 @@ value = f"{rf"\{1}"}"
f"{{}}+-\d"
f"\n{{}}+-\d+"
f"\n{{}}<EFBFBD>+-\d+"
# See https://github.com/astral-sh/ruff/issues/11491
total = 10
ok = 7
incomplete = 3
s = f"TOTAL: {total}\nOK: {ok}\INCOMPLETE: {incomplete}\n"
# Debug text (should trigger)
t = f"{'\InHere'=}"

View file

@ -66,70 +66,75 @@ pub(crate) fn invalid_escape_sequence(checker: &mut Checker, string_like: String
if part.flags().is_raw_string() {
continue;
}
match part {
StringLikePart::String(string_literal) => {
check(
&mut checker.diagnostics,
locator,
string_literal.start(),
string_literal.range(),
AnyStringFlags::from(string_literal.flags),
);
}
StringLikePart::Bytes(bytes_literal) => {
check(
&mut checker.diagnostics,
locator,
bytes_literal.start(),
bytes_literal.range(),
AnyStringFlags::from(bytes_literal.flags),
);
let state = match part {
StringLikePart::String(_) | StringLikePart::Bytes(_) => {
analyze_escape_chars(locator, part.range(), part.flags())
}
StringLikePart::FString(f_string) => {
let flags = AnyStringFlags::from(f_string.flags);
let mut escape_chars_state = EscapeCharsState::default();
// Whether we suggest converting to a raw string or
// adding backslashes depends on the presence of valid
// escape characters in the entire f-string. Therefore,
// we must analyze escape characters in each f-string
// element before pushing a diagnostic and fix.
for element in &f_string.elements {
match element {
FStringElement::Literal(literal) => {
check(
&mut checker.diagnostics,
escape_chars_state.update(analyze_escape_chars(
locator,
f_string.start(),
literal.range(),
flags,
);
));
}
FStringElement::Expression(expression) => {
let Some(format_spec) = expression.format_spec.as_ref() else {
continue;
};
for literal in format_spec.elements.literals() {
check(
&mut checker.diagnostics,
escape_chars_state.update(analyze_escape_chars(
locator,
f_string.start(),
literal.range(),
flags,
);
));
}
}
}
}
escape_chars_state
}
}
};
check(
&mut checker.diagnostics,
locator,
part.start(),
part.flags(),
state,
);
}
}
fn check(
diagnostics: &mut Vec<Diagnostic>,
#[derive(Default)]
struct EscapeCharsState {
contains_valid_escape_sequence: bool,
invalid_escape_chars: Vec<InvalidEscapeChar>,
}
impl EscapeCharsState {
fn update(&mut self, other: Self) {
self.contains_valid_escape_sequence |= other.contains_valid_escape_sequence;
self.invalid_escape_chars.extend(other.invalid_escape_chars);
}
}
/// Traverses string, collects invalid escape characters, and flags if a valid
/// escape character is found.
fn analyze_escape_chars(
locator: &Locator,
// Start position of the expression that contains the source range. This is used to generate
// the fix when the source range is part of the expression like in f-string which contains
// other f-string literal elements.
expr_start: TextSize,
// Range in the source code to perform the check on.
// Range in the source code to perform the analysis on.
source_range: TextRange,
flags: AnyStringFlags,
) {
) -> EscapeCharsState {
let source = locator.slice(source_range);
let mut contains_valid_escape_sequence = false;
let mut invalid_escape_chars = Vec::new();
@ -225,7 +230,31 @@ fn check(
range,
});
}
EscapeCharsState {
contains_valid_escape_sequence,
invalid_escape_chars,
}
}
/// Pushes a diagnostic and fix depending on escape characters seen so far.
///
/// If we have not seen any valid escape characters, we convert to
/// a raw string. If we have seen valid escape characters,
/// we manually add backslashes to each invalid escape character found.
fn check(
diagnostics: &mut Vec<Diagnostic>,
locator: &Locator,
// Start position of the expression that contains the source range. This is used to generate
// the fix when the source range is part of the expression like in f-string which contains
// other f-string literal elements.
expr_start: TextSize,
flags: AnyStringFlags,
escape_chars_state: EscapeCharsState,
) {
let EscapeCharsState {
contains_valid_escape_sequence,
invalid_escape_chars,
} = escape_chars_state;
if contains_valid_escape_sequence {
// Escape with backslash.
for invalid_escape_char in &invalid_escape_chars {

View file

@ -1,6 +1,5 @@
---
source: crates/ruff_linter/src/rules/pycodestyle/mod.rs
snapshot_kind: text
---
W605_1.py:4:11: W605 [*] Invalid escape sequence: `\.`
|
@ -243,6 +242,7 @@ W605_1.py:57:9: W605 [*] Invalid escape sequence: `\d`
57 |+rf"{{}}+-\d"
58 58 | f"\n{{}}+-\d+"
59 59 | f"\n{{}}<7D>+-\d+"
60 60 |
W605_1.py:58:11: W605 [*] Invalid escape sequence: `\d`
|
@ -261,6 +261,8 @@ W605_1.py:58:11: W605 [*] Invalid escape sequence: `\d`
58 |-f"\n{{}}+-\d+"
58 |+f"\n{{}}+-\\d+"
59 59 | f"\n{{}}<7D>+-\d+"
60 60 |
61 61 | # See https://github.com/astral-sh/ruff/issues/11491
W605_1.py:59:12: W605 [*] Invalid escape sequence: `\d`
|
@ -268,6 +270,8 @@ W605_1.py:59:12: W605 [*] Invalid escape sequence: `\d`
58 | f"\n{{}}+-\d+"
59 | f"\n{{}}<7D>+-\d+"
| ^^ W605
60 |
61 | # See https://github.com/astral-sh/ruff/issues/11491
|
= help: Add backslash to escape sequence
@ -277,3 +281,42 @@ W605_1.py:59:12: W605 [*] Invalid escape sequence: `\d`
58 58 | f"\n{{}}+-\d+"
59 |-f"\n{{}}<7D>+-\d+"
59 |+f"\n{{}}<7D>+-\\d+"
60 60 |
61 61 | # See https://github.com/astral-sh/ruff/issues/11491
62 62 | total = 10
W605_1.py:65:31: W605 [*] Invalid escape sequence: `\I`
|
63 | ok = 7
64 | incomplete = 3
65 | s = f"TOTAL: {total}\nOK: {ok}\INCOMPLETE: {incomplete}\n"
| ^^ W605
66 |
67 | # Debug text (should trigger)
|
= help: Add backslash to escape sequence
Safe fix
62 62 | total = 10
63 63 | ok = 7
64 64 | incomplete = 3
65 |-s = f"TOTAL: {total}\nOK: {ok}\INCOMPLETE: {incomplete}\n"
65 |+s = f"TOTAL: {total}\nOK: {ok}\\INCOMPLETE: {incomplete}\n"
66 66 |
67 67 | # Debug text (should trigger)
68 68 | t = f"{'\InHere'=}"
W605_1.py:68:9: W605 [*] Invalid escape sequence: `\I`
|
67 | # Debug text (should trigger)
68 | t = f"{'\InHere'=}"
| ^^ W605
|
= help: Use a raw string literal
Safe fix
65 65 | s = f"TOTAL: {total}\nOK: {ok}\INCOMPLETE: {incomplete}\n"
66 66 |
67 67 | # Debug text (should trigger)
68 |-t = f"{'\InHere'=}"
68 |+t = f"{r'\InHere'=}"