Fix invalid escape handling for CRLF files (#589)

2025-09-27 12:29:48 +00:00 · 2022-11-04 14:26:39 -04:00 · 2022-11-04 14:26:39 -04:00 · a75b1c85ee
commit a75b1c85ee
parent 726e6c68cf
6 changed files with 98 additions and 26 deletions
--- a/resources/test/fixtures/W605_0.py
+++ b/resources/test/fixtures/W605_0.py
--- a/resources/test/fixtures/W605_1.py
+++ b/resources/test/fixtures/W605_1.py
@ -0,0 +1,35 @@
+#: W605:1:10
+regex = '\.png$'
+
+#: W605:2:1
+regex = '''
+\.png$
+'''
+
+#: W605:2:6
+f(
+    '\_'
+)
+
+#: W605:4:6
+"""
+multi-line
+literal
+with \_ somewhere
+in the middle
+"""
+
+#: Okay
+regex = r'\.png$'
+regex = '\\.png$'
+regex = r'''
+\.png$
+'''
+regex = r'''
+\\.png$
+'''
+s = '\\'
+regex = '\w'  # noqa
+regex = '''
+\w
+'''  # noqa
--- a/src/linter.rs
+++ b/src/linter.rs
@ -439,7 +439,8 @@ mod tests {
    #[test_case(CheckCode::W292, Path::new("W292_0.py"); "W292_0")]
    #[test_case(CheckCode::W292, Path::new("W292_1.py"); "W292_1")]
    #[test_case(CheckCode::W292, Path::new("W292_2.py"); "W292_2")]
-    #[test_case(CheckCode::W605, Path::new("W605.py"); "W605")]
+    #[test_case(CheckCode::W605, Path::new("W605_0.py"); "W605_0")]
+    #[test_case(CheckCode::W605, Path::new("W605_1.py"); "W605_1")]
    #[test_case(CheckCode::X001, Path::new("X001.py"); "X001")]
    fn checks(check_code: CheckCode, path: &Path) -> Result<()> {
        let snapshot = format!("{}_{}", check_code.as_ref(), path.to_string_lossy());
--- a/src/pycodestyle/checks.rs
+++ b/src/pycodestyle/checks.rs
@ -277,20 +277,26 @@ pub fn invalid_escape_sequence(
    let body = &text[(quote_pos + quote.len())..(text.len() - quote.len())];

    if !prefix.contains('r') {
-        let mut col_offset = 0;
-        let mut row_offset = 0;
-        let mut in_escape = false;
-        let mut chars = body.chars();
-        let mut current = chars.next();
-        let mut next = chars.next();
-        while let (Some(current_char), Some(next_char)) = (current, next) {
-            // If we see an escaped backslash, avoid treating the character _after_ the
-            // escaped backslash as itself an escaped character.
-            if in_escape {
-                in_escape = false;
-            } else {
-                in_escape = current_char == '\\' && next_char == '\\';
-                if current_char == '\\' && !VALID_ESCAPE_SEQUENCES.contains(&next_char) {
+        for (row_offset, line) in body.lines().enumerate() {
+            let chars: Vec<char> = line.chars().collect();
+            for col_offset in 0..chars.len() {
+                if chars[col_offset] == '\\' {
+                    // If the previous character was also a backslash, skip.
+                    if col_offset > 0 && chars[col_offset - 1] == '\\' {
+                        continue;
+                    }
+
+                    // If we're at the end of the line, skip.
+                    if col_offset == chars.len() - 1 {
+                        continue;
+                    }
+
+                    // If the next character is a valid escape sequence, skip.
+                    let next_char = chars[col_offset + 1];
+                    if VALID_ESCAPE_SEQUENCES.contains(&next_char) {
+                        continue;
+                    }
+
                    // Compute the location of the escape sequence by offsetting the location of the
                    // string token by the characters we've seen thus far.
                    let location = if row_offset == 0 {
@ -311,17 +317,6 @@ pub fn invalid_escape_sequence(
                    ))
                }
            }
-
-            // Track the offset from the start position as we iterate over the body.
-            if current_char == '\n' {
-                col_offset = 0;
-                row_offset += 1;
-            } else {
-                col_offset += 1;
-            }
-
-            current = next;
-            next = chars.next();
        }
    }

--- a/src/snapshots/rufflintertests__W605_W605_0.py.snap
+++ b/src/snapshots/rufflintertests__W605_W605_0.py.snap
--- a/src/snapshots/rufflintertests__W605_W605_1.py.snap
+++ b/src/snapshots/rufflintertests__W605_W605_1.py.snap
@ -0,0 +1,41 @@
+---
+source: src/linter.rs
+expression: checks
+---
+- kind:
+    InvalidEscapeSequence: "."
+  location:
+    row: 2
+    column: 9
+  end_location:
+    row: 2
+    column: 11
+  fix: ~
+- kind:
+    InvalidEscapeSequence: "."
+  location:
+    row: 6
+    column: 0
+  end_location:
+    row: 6
+    column: 2
+  fix: ~
+- kind:
+    InvalidEscapeSequence: _
+  location:
+    row: 11
+    column: 5
+  end_location:
+    row: 11
+    column: 7
+  fix: ~
+- kind:
+    InvalidEscapeSequence: _
+  location:
+    row: 18
+    column: 5
+  end_location:
+    row: 18
+    column: 7
+  fix: ~
+