mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-04 02:38:25 +00:00
Raise syntax error when \
is at end of file (#17409)
## Summary This PR fixes a bug in the lexer specifically around line continuation character at end of file. The reason this was occurring is because the lexer wouldn't check for EOL _after_ consuming the escaped newline but only if the EOL was right after the line continuation character. fixes: #17398 ## Test Plan Add tests for the scenarios where this should occur mainly (a) when the state is `AfterNewline` and (b) when the state is `Other`.
This commit is contained in:
parent
942cb9e3ad
commit
bfc17fecaa
7 changed files with 223 additions and 7 deletions
|
@ -246,17 +246,18 @@ impl<'src> Lexer<'src> {
|
|||
self.cursor.bump();
|
||||
if self.cursor.eat_char('\r') {
|
||||
self.cursor.eat_char('\n');
|
||||
} else if self.cursor.is_eof() {
|
||||
return Some(self.push_error(LexicalError::new(
|
||||
LexicalErrorType::Eof,
|
||||
self.token_range(),
|
||||
)));
|
||||
} else if !self.cursor.eat_char('\n') {
|
||||
return Some(self.push_error(LexicalError::new(
|
||||
LexicalErrorType::LineContinuationError,
|
||||
TextRange::at(self.offset() - '\\'.text_len(), '\\'.text_len()),
|
||||
)));
|
||||
}
|
||||
if self.cursor.is_eof() {
|
||||
return Some(self.push_error(LexicalError::new(
|
||||
LexicalErrorType::Eof,
|
||||
self.token_range(),
|
||||
)));
|
||||
}
|
||||
indentation = Indentation::root();
|
||||
}
|
||||
// Form feed
|
||||
|
@ -341,14 +342,15 @@ impl<'src> Lexer<'src> {
|
|||
self.cursor.bump();
|
||||
if self.cursor.eat_char('\r') {
|
||||
self.cursor.eat_char('\n');
|
||||
} else if self.cursor.is_eof() {
|
||||
return Err(LexicalError::new(LexicalErrorType::Eof, self.token_range()));
|
||||
} else if !self.cursor.eat_char('\n') {
|
||||
return Err(LexicalError::new(
|
||||
LexicalErrorType::LineContinuationError,
|
||||
TextRange::at(self.offset() - '\\'.text_len(), '\\'.text_len()),
|
||||
));
|
||||
}
|
||||
if self.cursor.is_eof() {
|
||||
return Err(LexicalError::new(LexicalErrorType::Eof, self.token_range()));
|
||||
}
|
||||
}
|
||||
// Form feed
|
||||
'\x0C' => {
|
||||
|
@ -2212,6 +2214,46 @@ if first:
|
|||
assert_snapshot!(triple_quoted_eol(WINDOWS_EOL));
|
||||
}
|
||||
|
||||
fn line_continuation_at_eof_after_newline(eol: &str) -> LexerOutput {
|
||||
let source = format!(r"\{eol}");
|
||||
lex_invalid(&source, Mode::Module)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_line_continuation_at_eof_after_newline_unix_eol() {
|
||||
assert_snapshot!(line_continuation_at_eof_after_newline(UNIX_EOL));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_line_continuation_at_eof_after_newline_mac_eol() {
|
||||
assert_snapshot!(line_continuation_at_eof_after_newline(MAC_EOL));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_line_continuation_at_eof_after_newline_windows_eol() {
|
||||
assert_snapshot!(line_continuation_at_eof_after_newline(WINDOWS_EOL));
|
||||
}
|
||||
|
||||
fn line_continuation_at_eof(eol: &str) -> LexerOutput {
|
||||
let source = format!(r"1, \{eol}");
|
||||
lex_invalid(&source, Mode::Module)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_line_continuation_at_eof_unix_eol() {
|
||||
assert_snapshot!(line_continuation_at_eof(UNIX_EOL));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_line_continuation_at_eof_mac_eol() {
|
||||
assert_snapshot!(line_continuation_at_eof(MAC_EOL));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_line_continuation_at_eof_windows_eol() {
|
||||
assert_snapshot!(line_continuation_at_eof(WINDOWS_EOL));
|
||||
}
|
||||
|
||||
// This test case is to just make sure that the lexer doesn't go into
|
||||
// infinite loop on invalid input.
|
||||
#[test]
|
||||
|
|
|
@ -0,0 +1,22 @@
|
|||
---
|
||||
source: crates/ruff_python_parser/src/lexer.rs
|
||||
expression: line_continuation_at_eof_after_newline(MAC_EOL)
|
||||
---
|
||||
## Tokens
|
||||
```
|
||||
[
|
||||
(
|
||||
Unknown,
|
||||
0..2,
|
||||
),
|
||||
]
|
||||
```
|
||||
## Errors
|
||||
```
|
||||
[
|
||||
LexicalError {
|
||||
error: Eof,
|
||||
location: 0..2,
|
||||
},
|
||||
]
|
||||
```
|
|
@ -0,0 +1,22 @@
|
|||
---
|
||||
source: crates/ruff_python_parser/src/lexer.rs
|
||||
expression: line_continuation_at_eof_after_newline(UNIX_EOL)
|
||||
---
|
||||
## Tokens
|
||||
```
|
||||
[
|
||||
(
|
||||
Unknown,
|
||||
0..2,
|
||||
),
|
||||
]
|
||||
```
|
||||
## Errors
|
||||
```
|
||||
[
|
||||
LexicalError {
|
||||
error: Eof,
|
||||
location: 0..2,
|
||||
},
|
||||
]
|
||||
```
|
|
@ -0,0 +1,22 @@
|
|||
---
|
||||
source: crates/ruff_python_parser/src/lexer.rs
|
||||
expression: line_continuation_at_eof_after_newline(WINDOWS_EOL)
|
||||
---
|
||||
## Tokens
|
||||
```
|
||||
[
|
||||
(
|
||||
Unknown,
|
||||
0..3,
|
||||
),
|
||||
]
|
||||
```
|
||||
## Errors
|
||||
```
|
||||
[
|
||||
LexicalError {
|
||||
error: Eof,
|
||||
location: 0..3,
|
||||
},
|
||||
]
|
||||
```
|
|
@ -0,0 +1,36 @@
|
|||
---
|
||||
source: crates/ruff_python_parser/src/lexer.rs
|
||||
expression: line_continuation_at_eof(MAC_EOL)
|
||||
---
|
||||
## Tokens
|
||||
```
|
||||
[
|
||||
(
|
||||
Int(
|
||||
1,
|
||||
),
|
||||
0..1,
|
||||
),
|
||||
(
|
||||
Comma,
|
||||
1..2,
|
||||
),
|
||||
(
|
||||
Unknown,
|
||||
2..5,
|
||||
),
|
||||
(
|
||||
Newline,
|
||||
5..5,
|
||||
),
|
||||
]
|
||||
```
|
||||
## Errors
|
||||
```
|
||||
[
|
||||
LexicalError {
|
||||
error: Eof,
|
||||
location: 2..5,
|
||||
},
|
||||
]
|
||||
```
|
|
@ -0,0 +1,36 @@
|
|||
---
|
||||
source: crates/ruff_python_parser/src/lexer.rs
|
||||
expression: line_continuation_at_eof(UNIX_EOL)
|
||||
---
|
||||
## Tokens
|
||||
```
|
||||
[
|
||||
(
|
||||
Int(
|
||||
1,
|
||||
),
|
||||
0..1,
|
||||
),
|
||||
(
|
||||
Comma,
|
||||
1..2,
|
||||
),
|
||||
(
|
||||
Unknown,
|
||||
2..5,
|
||||
),
|
||||
(
|
||||
Newline,
|
||||
5..5,
|
||||
),
|
||||
]
|
||||
```
|
||||
## Errors
|
||||
```
|
||||
[
|
||||
LexicalError {
|
||||
error: Eof,
|
||||
location: 2..5,
|
||||
},
|
||||
]
|
||||
```
|
|
@ -0,0 +1,36 @@
|
|||
---
|
||||
source: crates/ruff_python_parser/src/lexer.rs
|
||||
expression: line_continuation_at_eof(WINDOWS_EOL)
|
||||
---
|
||||
## Tokens
|
||||
```
|
||||
[
|
||||
(
|
||||
Int(
|
||||
1,
|
||||
),
|
||||
0..1,
|
||||
),
|
||||
(
|
||||
Comma,
|
||||
1..2,
|
||||
),
|
||||
(
|
||||
Unknown,
|
||||
2..6,
|
||||
),
|
||||
(
|
||||
Newline,
|
||||
6..6,
|
||||
),
|
||||
]
|
||||
```
|
||||
## Errors
|
||||
```
|
||||
[
|
||||
LexicalError {
|
||||
error: Eof,
|
||||
location: 2..6,
|
||||
},
|
||||
]
|
||||
```
|
Loading…
Add table
Add a link
Reference in a new issue