Consider 2-character EOL before line continuation (#12035)

## Summary

This PR fixes a bug introduced in
https://github.com/astral-sh/ruff/pull/12008 which didn't consider the
two character newline after the line continuation character.

For example, consider the following code highlighted with whitespaces:
```py
call(foo # comment \\r\n
\r\n
def bar():\r\n
....pass\r\n
```
The lexer is at `def` when it's running the re-lexing logic and trying
to move back to a newline character. It encounters `\n` and it's being
escaped (incorrect) but `\r` is being escaped, so it moves the lexer to
`\n` character. This creates an overlap in token ranges which causes the
panic.

```
Name 0..4
Lpar 4..5
Name 5..8
Comment 9..20
NonLogicalNewline 20..22 <-- overlap between
Newline 21..22           <-- these two tokens
NonLogicalNewline 22..23
Def 23..26
...
```

fixes: #12028 

## Test Plan

Add a test case with line continuation and windows style newline
character.
This commit is contained in:
Dhruv Manilawala 2024-06-26 14:00:48 +05:30 committed by GitHub
parent 7cb2619ef5
commit 47c9ed07f2
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 125 additions and 17 deletions

View file

@ -1393,26 +1393,40 @@ impl<'src> Lexer<'src> {
while let Some(ch) = reverse_chars.next() {
if is_python_whitespace(ch) {
current_position -= ch.text_len();
} else if matches!(ch, '\n' | '\r') {
current_position -= ch.text_len();
// Count the number of backslashes before the newline character.
let mut backslash_count = 0;
while reverse_chars.next_if_eq(&'\\').is_some() {
backslash_count += 1;
}
if backslash_count == 0 {
// No escapes: `\n`
newline_position = Some(current_position);
} else {
if backslash_count % 2 == 0 {
// Even number of backslashes i.e., all backslashes cancel each other out
// which means the newline character is not being escaped.
newline_position = Some(current_position);
continue;
}
match ch {
'\n' => {
current_position -= ch.text_len();
if let Some(carriage_return) = reverse_chars.next_if_eq(&'\r') {
current_position -= carriage_return.text_len();
}
current_position -= TextSize::new('\\'.text_len().to_u32() * backslash_count);
}
'\r' => {
current_position -= ch.text_len();
}
_ => break,
}
debug_assert!(matches!(ch, '\n' | '\r'));
// Count the number of backslashes before the newline character.
let mut backslash_count = 0;
while reverse_chars.next_if_eq(&'\\').is_some() {
backslash_count += 1;
}
if backslash_count == 0 {
// No escapes: `\n`
newline_position = Some(current_position);
} else {
break;
if backslash_count % 2 == 0 {
// Even number of backslashes i.e., all backslashes cancel each other out
// which means the newline character is not being escaped.
newline_position = Some(current_position);
}
current_position -= TextSize::new('\\'.text_len().to_u32() * backslash_count);
}
}