Use memchr for invalid-escape-sequence (#5994)

2025-09-27 04:19:18 +00:00 · 2023-07-22 20:57:36 -04:00 · 2023-07-22 20:57:36 -04:00 · 71f1643eda
commit 71f1643eda
parent 74dc137b30
4 changed files with 33 additions and 18 deletions
--- a/crates/ruff/resources/test/fixtures/pycodestyle/W605_0.py
+++ b/crates/ruff/resources/test/fixtures/pycodestyle/W605_0.py
@ -41,3 +41,5 @@ regex = '\w'  # noqa
 regex = '''
 \w
 '''  # noqa
 regex = '\\\_'
--- a/crates/ruff/src/checkers/tokens.rs
+++ b/crates/ruff/src/checkers/tokens.rs
@ -126,6 +126,7 @@ pub(crate) fn check_tokens(
            }
        }
    }
    // PLE2510, PLE2512, PLE2513
    if enforce_invalid_string_character {
        for (tok, range) in tokens.iter().flatten() {
@ -177,7 +178,7 @@ pub(crate) fn check_tokens(
    }
    // TD001, TD002, TD003, TD004, TD005, TD006, TD007
-    // T001, T002, T003, T004
+    // FIX001, FIX002, FIX003, FIX004
    if enforce_todos {
        let todo_comments: Vec<TodoComment> = indexer
            .comment_ranges()
@ -188,9 +189,7 @@ pub(crate) fn check_tokens(
                TodoComment::from_comment(comment, *comment_range, i)
            })
            .collect();
        flake8_todos::rules::todos(&mut diagnostics, &todo_comments, locator, indexer, settings);
        flake8_fixme::rules::todos(&mut diagnostics, &todo_comments);
    }
--- a/crates/ruff/src/rules/pycodestyle/rules/invalid_escape_sequence.rs
+++ b/crates/ruff/src/rules/pycodestyle/rules/invalid_escape_sequence.rs
@ -1,3 +1,4 @@
 use memchr::memchr_iter;
 use ruff_text_size::{TextLen, TextRange, TextSize};
 use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
@ -60,29 +61,26 @@ pub(crate) fn invalid_escape_sequence(
        return;
    }
    let start_offset = range.start() + TextSize::try_from(leading_quote.len()).unwrap();
    let mut chars_iter = body.char_indices().peekable();
    let mut contains_valid_escape_sequence = false;
    let mut invalid_escape_sequence = Vec::new();
    while let Some((i, c)) = chars_iter.next() {
        if c != '\\' {
            continue;
        }
    let mut prev = None;
    let bytes = body.as_bytes();
    for i in memchr_iter(b'\\', bytes) {
        // If the previous character was also a backslash, skip.
-        if i > 0 && body.as_bytes()[i - 1] == b'\\' {
+        if prev.map_or(false, |prev| prev == i - 1) {
            prev = None;
            continue;
        }
-        // If we're at the end of the file, skip.
+        prev = Some(i);
-        let Some((_, next_char)) = chars_iter.peek() else {
+
        let Some(next_char) = body[i + 1..].chars().next() else {
            // If we're at the end of the file, skip.
            continue;
        };
-        // If we're at the end of the line, skip
+        // If we're at the end of line, skip.
        if matches!(next_char, '\n' | '\r') {
            continue;
        }
@ -120,9 +118,9 @@ pub(crate) fn invalid_escape_sequence(
            continue;
        }
-        let location = start_offset + TextSize::try_from(i).unwrap();
+        let location = range.start() + leading_quote.text_len() + TextSize::try_from(i).unwrap();
        let range = TextRange::at(location, next_char.text_len() + TextSize::from(1));
-        invalid_escape_sequence.push(Diagnostic::new(InvalidEscapeSequence(*next_char), range));
+        invalid_escape_sequence.push(Diagnostic::new(InvalidEscapeSequence(next_char), range));
    }
    if autofix {
--- a/crates/ruff/src/rules/pycodestyle/snapshots/ruffrulespycodestyletestsW605_W605_0.py.snap
+++ b/crates/ruff/src/rules/pycodestyle/snapshots/ruffrulespycodestyletestsW605_W605_0.py.snap
@ -119,4 +119,20 @@ W605_0.py:28:12: W605 [*] Invalid escape sequence: `\.`
 30 30 | #: Okay
 31 31 | regex = r'\.png$'
 W605_0.py:45:12: W605 [*] Invalid escape sequence: `\_`
   |
 43 | '''  # noqa
 44 | 
 45 | regex = '\\\_'
   |            ^^ W605
   |
   = help: Add backslash to escape sequence
 ℹ Fix
 42 42 | \w
 43 43 | '''  # noqa
 44 44 | 
 45    |-regex = '\\\_'
   45 |+regex = '\\\\_'