Raise syntax error when \ is at end of file (#17409)

## Summary

This PR fixes a bug in the lexer specifically around line continuation
character at end of file.

The reason this was occurring is because the lexer wouldn't check for
EOL _after_ consuming the escaped newline but only if the EOL was right
after the line continuation character.

fixes: #17398 

## Test Plan

Add tests for the scenarios where this should occur mainly (a) when the
state is `AfterNewline` and (b) when the state is `Other`.
This commit is contained in:
Dhruv Manilawala 2025-04-15 21:26:12 +05:30 committed by GitHub
parent 942cb9e3ad
commit bfc17fecaa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 223 additions and 7 deletions

View file

@ -246,17 +246,18 @@ impl<'src> Lexer<'src> {
self.cursor.bump();
if self.cursor.eat_char('\r') {
self.cursor.eat_char('\n');
} else if self.cursor.is_eof() {
return Some(self.push_error(LexicalError::new(
LexicalErrorType::Eof,
self.token_range(),
)));
} else if !self.cursor.eat_char('\n') {
return Some(self.push_error(LexicalError::new(
LexicalErrorType::LineContinuationError,
TextRange::at(self.offset() - '\\'.text_len(), '\\'.text_len()),
)));
}
if self.cursor.is_eof() {
return Some(self.push_error(LexicalError::new(
LexicalErrorType::Eof,
self.token_range(),
)));
}
indentation = Indentation::root();
}
// Form feed
@ -341,14 +342,15 @@ impl<'src> Lexer<'src> {
self.cursor.bump();
if self.cursor.eat_char('\r') {
self.cursor.eat_char('\n');
} else if self.cursor.is_eof() {
return Err(LexicalError::new(LexicalErrorType::Eof, self.token_range()));
} else if !self.cursor.eat_char('\n') {
return Err(LexicalError::new(
LexicalErrorType::LineContinuationError,
TextRange::at(self.offset() - '\\'.text_len(), '\\'.text_len()),
));
}
if self.cursor.is_eof() {
return Err(LexicalError::new(LexicalErrorType::Eof, self.token_range()));
}
}
// Form feed
'\x0C' => {
@ -2212,6 +2214,46 @@ if first:
assert_snapshot!(triple_quoted_eol(WINDOWS_EOL));
}
fn line_continuation_at_eof_after_newline(eol: &str) -> LexerOutput {
let source = format!(r"\{eol}");
lex_invalid(&source, Mode::Module)
}
#[test]
fn test_line_continuation_at_eof_after_newline_unix_eol() {
assert_snapshot!(line_continuation_at_eof_after_newline(UNIX_EOL));
}
#[test]
fn test_line_continuation_at_eof_after_newline_mac_eol() {
assert_snapshot!(line_continuation_at_eof_after_newline(MAC_EOL));
}
#[test]
fn test_line_continuation_at_eof_after_newline_windows_eol() {
assert_snapshot!(line_continuation_at_eof_after_newline(WINDOWS_EOL));
}
fn line_continuation_at_eof(eol: &str) -> LexerOutput {
let source = format!(r"1, \{eol}");
lex_invalid(&source, Mode::Module)
}
#[test]
fn test_line_continuation_at_eof_unix_eol() {
assert_snapshot!(line_continuation_at_eof(UNIX_EOL));
}
#[test]
fn test_line_continuation_at_eof_mac_eol() {
assert_snapshot!(line_continuation_at_eof(MAC_EOL));
}
#[test]
fn test_line_continuation_at_eof_windows_eol() {
assert_snapshot!(line_continuation_at_eof(WINDOWS_EOL));
}
// This test case is to just make sure that the lexer doesn't go into
// infinite loop on invalid input.
#[test]

View file

@ -0,0 +1,22 @@
---
source: crates/ruff_python_parser/src/lexer.rs
expression: line_continuation_at_eof_after_newline(MAC_EOL)
---
## Tokens
```
[
(
Unknown,
0..2,
),
]
```
## Errors
```
[
LexicalError {
error: Eof,
location: 0..2,
},
]
```

View file

@ -0,0 +1,22 @@
---
source: crates/ruff_python_parser/src/lexer.rs
expression: line_continuation_at_eof_after_newline(UNIX_EOL)
---
## Tokens
```
[
(
Unknown,
0..2,
),
]
```
## Errors
```
[
LexicalError {
error: Eof,
location: 0..2,
},
]
```

View file

@ -0,0 +1,22 @@
---
source: crates/ruff_python_parser/src/lexer.rs
expression: line_continuation_at_eof_after_newline(WINDOWS_EOL)
---
## Tokens
```
[
(
Unknown,
0..3,
),
]
```
## Errors
```
[
LexicalError {
error: Eof,
location: 0..3,
},
]
```

View file

@ -0,0 +1,36 @@
---
source: crates/ruff_python_parser/src/lexer.rs
expression: line_continuation_at_eof(MAC_EOL)
---
## Tokens
```
[
(
Int(
1,
),
0..1,
),
(
Comma,
1..2,
),
(
Unknown,
2..5,
),
(
Newline,
5..5,
),
]
```
## Errors
```
[
LexicalError {
error: Eof,
location: 2..5,
},
]
```

View file

@ -0,0 +1,36 @@
---
source: crates/ruff_python_parser/src/lexer.rs
expression: line_continuation_at_eof(UNIX_EOL)
---
## Tokens
```
[
(
Int(
1,
),
0..1,
),
(
Comma,
1..2,
),
(
Unknown,
2..5,
),
(
Newline,
5..5,
),
]
```
## Errors
```
[
LexicalError {
error: Eof,
location: 2..5,
},
]
```

View file

@ -0,0 +1,36 @@
---
source: crates/ruff_python_parser/src/lexer.rs
expression: line_continuation_at_eof(WINDOWS_EOL)
---
## Tokens
```
[
(
Int(
1,
),
0..1,
),
(
Comma,
1..2,
),
(
Unknown,
2..6,
),
(
Newline,
6..6,
),
]
```
## Errors
```
[
LexicalError {
error: Eof,
location: 2..6,
},
]
```