Fix invalid escape handling for CRLF files (#589)

This commit is contained in:
Charlie Marsh 2022-11-04 14:26:39 -04:00 committed by GitHub
parent 726e6c68cf
commit a75b1c85ee
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 98 additions and 26 deletions

35
resources/test/fixtures/W605_1.py vendored Normal file
View file

@ -0,0 +1,35 @@
#: W605:1:10
regex = '\.png$'
#: W605:2:1
regex = '''
\.png$
'''
#: W605:2:6
f(
'\_'
)
#: W605:4:6
"""
multi-line
literal
with \_ somewhere
in the middle
"""
#: Okay
regex = r'\.png$'
regex = '\\.png$'
regex = r'''
\.png$
'''
regex = r'''
\\.png$
'''
s = '\\'
regex = '\w' # noqa
regex = '''
\w
''' # noqa

View file

@ -439,7 +439,8 @@ mod tests {
#[test_case(CheckCode::W292, Path::new("W292_0.py"); "W292_0")]
#[test_case(CheckCode::W292, Path::new("W292_1.py"); "W292_1")]
#[test_case(CheckCode::W292, Path::new("W292_2.py"); "W292_2")]
#[test_case(CheckCode::W605, Path::new("W605.py"); "W605")]
#[test_case(CheckCode::W605, Path::new("W605_0.py"); "W605_0")]
#[test_case(CheckCode::W605, Path::new("W605_1.py"); "W605_1")]
#[test_case(CheckCode::X001, Path::new("X001.py"); "X001")]
fn checks(check_code: CheckCode, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", check_code.as_ref(), path.to_string_lossy());

View file

@ -277,20 +277,26 @@ pub fn invalid_escape_sequence(
let body = &text[(quote_pos + quote.len())..(text.len() - quote.len())];
if !prefix.contains('r') {
let mut col_offset = 0;
let mut row_offset = 0;
let mut in_escape = false;
let mut chars = body.chars();
let mut current = chars.next();
let mut next = chars.next();
while let (Some(current_char), Some(next_char)) = (current, next) {
// If we see an escaped backslash, avoid treating the character _after_ the
// escaped backslash as itself an escaped character.
if in_escape {
in_escape = false;
} else {
in_escape = current_char == '\\' && next_char == '\\';
if current_char == '\\' && !VALID_ESCAPE_SEQUENCES.contains(&next_char) {
for (row_offset, line) in body.lines().enumerate() {
let chars: Vec<char> = line.chars().collect();
for col_offset in 0..chars.len() {
if chars[col_offset] == '\\' {
// If the previous character was also a backslash, skip.
if col_offset > 0 && chars[col_offset - 1] == '\\' {
continue;
}
// If we're at the end of the line, skip.
if col_offset == chars.len() - 1 {
continue;
}
// If the next character is a valid escape sequence, skip.
let next_char = chars[col_offset + 1];
if VALID_ESCAPE_SEQUENCES.contains(&next_char) {
continue;
}
// Compute the location of the escape sequence by offsetting the location of the
// string token by the characters we've seen thus far.
let location = if row_offset == 0 {
@ -311,17 +317,6 @@ pub fn invalid_escape_sequence(
))
}
}
// Track the offset from the start position as we iterate over the body.
if current_char == '\n' {
col_offset = 0;
row_offset += 1;
} else {
col_offset += 1;
}
current = next;
next = chars.next();
}
}

View file

@ -0,0 +1,41 @@
---
source: src/linter.rs
expression: checks
---
- kind:
InvalidEscapeSequence: "."
location:
row: 2
column: 9
end_location:
row: 2
column: 11
fix: ~
- kind:
InvalidEscapeSequence: "."
location:
row: 6
column: 0
end_location:
row: 6
column: 2
fix: ~
- kind:
InvalidEscapeSequence: _
location:
row: 11
column: 5
end_location:
row: 11
column: 7
fix: ~
- kind:
InvalidEscapeSequence: _
location:
row: 18
column: 5
end_location:
row: 18
column: 7
fix: ~