Use memchr for invalid-escape-sequence (#5994)

This commit is contained in:
Charlie Marsh 2023-07-22 20:57:36 -04:00 committed by GitHub
parent 74dc137b30
commit 71f1643eda
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 33 additions and 18 deletions

View file

@ -41,3 +41,5 @@ regex = '\w' # noqa
regex = ''' regex = '''
\w \w
''' # noqa ''' # noqa
regex = '\\\_'

View file

@ -126,6 +126,7 @@ pub(crate) fn check_tokens(
} }
} }
} }
// PLE2510, PLE2512, PLE2513 // PLE2510, PLE2512, PLE2513
if enforce_invalid_string_character { if enforce_invalid_string_character {
for (tok, range) in tokens.iter().flatten() { for (tok, range) in tokens.iter().flatten() {
@ -177,7 +178,7 @@ pub(crate) fn check_tokens(
} }
// TD001, TD002, TD003, TD004, TD005, TD006, TD007 // TD001, TD002, TD003, TD004, TD005, TD006, TD007
// T001, T002, T003, T004 // FIX001, FIX002, FIX003, FIX004
if enforce_todos { if enforce_todos {
let todo_comments: Vec<TodoComment> = indexer let todo_comments: Vec<TodoComment> = indexer
.comment_ranges() .comment_ranges()
@ -188,9 +189,7 @@ pub(crate) fn check_tokens(
TodoComment::from_comment(comment, *comment_range, i) TodoComment::from_comment(comment, *comment_range, i)
}) })
.collect(); .collect();
flake8_todos::rules::todos(&mut diagnostics, &todo_comments, locator, indexer, settings); flake8_todos::rules::todos(&mut diagnostics, &todo_comments, locator, indexer, settings);
flake8_fixme::rules::todos(&mut diagnostics, &todo_comments); flake8_fixme::rules::todos(&mut diagnostics, &todo_comments);
} }

View file

@ -1,3 +1,4 @@
use memchr::memchr_iter;
use ruff_text_size::{TextLen, TextRange, TextSize}; use ruff_text_size::{TextLen, TextRange, TextSize};
use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix}; use ruff_diagnostics::{AlwaysAutofixableViolation, Diagnostic, Edit, Fix};
@ -60,29 +61,26 @@ pub(crate) fn invalid_escape_sequence(
return; return;
} }
let start_offset = range.start() + TextSize::try_from(leading_quote.len()).unwrap();
let mut chars_iter = body.char_indices().peekable();
let mut contains_valid_escape_sequence = false; let mut contains_valid_escape_sequence = false;
let mut invalid_escape_sequence = Vec::new(); let mut invalid_escape_sequence = Vec::new();
while let Some((i, c)) = chars_iter.next() {
if c != '\\' {
continue;
}
let mut prev = None;
let bytes = body.as_bytes();
for i in memchr_iter(b'\\', bytes) {
// If the previous character was also a backslash, skip. // If the previous character was also a backslash, skip.
if i > 0 && body.as_bytes()[i - 1] == b'\\' { if prev.map_or(false, |prev| prev == i - 1) {
prev = None;
continue; continue;
} }
prev = Some(i);
let Some(next_char) = body[i + 1..].chars().next() else {
// If we're at the end of the file, skip. // If we're at the end of the file, skip.
let Some((_, next_char)) = chars_iter.peek() else {
continue; continue;
}; };
// If we're at the end of the line, skip // If we're at the end of line, skip.
if matches!(next_char, '\n' | '\r') { if matches!(next_char, '\n' | '\r') {
continue; continue;
} }
@ -120,9 +118,9 @@ pub(crate) fn invalid_escape_sequence(
continue; continue;
} }
let location = start_offset + TextSize::try_from(i).unwrap(); let location = range.start() + leading_quote.text_len() + TextSize::try_from(i).unwrap();
let range = TextRange::at(location, next_char.text_len() + TextSize::from(1)); let range = TextRange::at(location, next_char.text_len() + TextSize::from(1));
invalid_escape_sequence.push(Diagnostic::new(InvalidEscapeSequence(*next_char), range)); invalid_escape_sequence.push(Diagnostic::new(InvalidEscapeSequence(next_char), range));
} }
if autofix { if autofix {

View file

@ -119,4 +119,20 @@ W605_0.py:28:12: W605 [*] Invalid escape sequence: `\.`
30 30 | #: Okay 30 30 | #: Okay
31 31 | regex = r'\.png$' 31 31 | regex = r'\.png$'
W605_0.py:45:12: W605 [*] Invalid escape sequence: `\_`
|
43 | ''' # noqa
44 |
45 | regex = '\\\_'
| ^^ W605
|
= help: Add backslash to escape sequence
Fix
42 42 | \w
43 43 | ''' # noqa
44 44 |
45 |-regex = '\\\_'
45 |+regex = '\\\\_'