Consider line continuation character for re-lexing (#12008)

## Summary

This PR fixes a bug where the re-lexing logic didn't consider the line
continuation character being present before the newline character. This
meant that the lexer was being moved back to the newline character which
is actually ignored via `\`.

Considering the following code:
```py
f'middle {'string':\
        'format spec'}

```

The old token stream is:
```
...
Colon 18..19
FStringMiddle 19..29 (flags = F_STRING)
Newline 20..21
Indent 21..29
String 29..42
Rbrace 42..43
...
```

Notice how the ranges are overlapping between the `FStringMiddle` token
and the tokens emitted after moving the lexer backwards.

After this fix, the new token stream which is without moving the lexer
backwards in this scenario:
```
FStringStart 0..2 (flags = F_STRING)
FStringMiddle 2..9 (flags = F_STRING)
Lbrace 9..10
String 10..18
Colon 18..19
FStringMiddle 19..29 (flags = F_STRING)
FStringEnd 29..30 (flags = F_STRING)
Name 30..36
Name 37..41
Unknown 41..44
Newline 44..45
```

fixes: #12004 

## Test Plan

Add test cases and update the snapshots.
This commit is contained in:
Dhruv Manilawala 2024-06-25 07:43:54 +05:30 committed by GitHub
parent cd2af3be73
commit 68a8978454
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 567 additions and 3 deletions

View file

@ -0,0 +1,12 @@
# The newline character is being escaped which means that the lexer shouldn't be moved
# back to that position.
# https://github.com/astral-sh/ruff/issues/12004
f'middle {'string':\
'format spec'}
f'middle {'string':\\
'format spec'}
f'middle {'string':\\\
'format spec'}

View file

@ -0,0 +1,4 @@
call(a, b, \\\
def bar():
pass

View file

@ -1373,15 +1373,33 @@ impl<'src> Lexer<'src> {
}
let mut current_position = self.current_range().start();
let reverse_chars = self.source[..current_position.to_usize()].chars().rev();
let mut reverse_chars = self.source[..current_position.to_usize()]
.chars()
.rev()
.peekable();
let mut newline_position = None;
for ch in reverse_chars {
while let Some(ch) = reverse_chars.next() {
if is_python_whitespace(ch) {
current_position -= ch.text_len();
} else if matches!(ch, '\n' | '\r') {
current_position -= ch.text_len();
newline_position = Some(current_position);
// Count the number of backslashes before the newline character.
let mut backslash_count = 0;
while reverse_chars.next_if_eq(&'\\').is_some() {
backslash_count += 1;
}
if backslash_count == 0 {
// No escapes: `\n`
newline_position = Some(current_position);
} else {
if backslash_count % 2 == 0 {
// Even number of backslashes i.e., all backslashes cancel each other out
// which means the newline character is not being escaped.
newline_position = Some(current_position);
}
current_position -= TextSize::new('\\'.text_len().to_u32() * backslash_count);
}
} else {
break;
}

View file

@ -0,0 +1,425 @@
---
source: crates/ruff_python_parser/tests/fixtures.rs
input_file: crates/ruff_python_parser/resources/invalid/re_lexing/fstring_format_spec_1.py
---
## AST
```
Module(
ModModule {
range: 0..298,
body: [
Expr(
StmtExpr {
range: 162..192,
value: FString(
ExprFString {
range: 162..192,
value: FStringValue {
inner: Single(
FString(
FString {
range: 162..192,
elements: [
Literal(
FStringLiteralElement {
range: 164..171,
value: "middle ",
},
),
Expression(
FStringExpressionElement {
range: 171..191,
expression: StringLiteral(
ExprStringLiteral {
range: 172..180,
value: StringLiteralValue {
inner: Single(
StringLiteral {
range: 172..180,
value: "string",
flags: StringLiteralFlags {
quote_style: Single,
prefix: Empty,
triple_quoted: false,
},
},
),
},
},
),
debug_text: None,
conversion: None,
format_spec: Some(
FStringFormatSpec {
range: 181..191,
elements: [
Literal(
FStringLiteralElement {
range: 181..191,
value: " ",
},
),
],
},
),
},
),
],
flags: FStringFlags {
quote_style: Single,
prefix: Regular,
triple_quoted: false,
},
},
),
),
},
},
),
},
),
Expr(
StmtExpr {
range: 192..198,
value: Name(
ExprName {
range: 192..198,
id: "format",
ctx: Load,
},
),
},
),
Expr(
StmtExpr {
range: 199..203,
value: Name(
ExprName {
range: 199..203,
id: "spec",
ctx: Load,
},
),
},
),
Expr(
StmtExpr {
range: 207..228,
value: FString(
ExprFString {
range: 207..228,
value: FStringValue {
inner: Single(
FString(
FString {
range: 207..228,
elements: [
Literal(
FStringLiteralElement {
range: 209..216,
value: "middle ",
},
),
Expression(
FStringExpressionElement {
range: 216..228,
expression: StringLiteral(
ExprStringLiteral {
range: 217..225,
value: StringLiteralValue {
inner: Single(
StringLiteral {
range: 217..225,
value: "string",
flags: StringLiteralFlags {
quote_style: Single,
prefix: Empty,
triple_quoted: false,
},
},
),
},
},
),
debug_text: None,
conversion: None,
format_spec: Some(
FStringFormatSpec {
range: 226..228,
elements: [
Literal(
FStringLiteralElement {
range: 226..228,
value: "\\",
},
),
],
},
),
},
),
],
flags: FStringFlags {
quote_style: Single,
prefix: Regular,
triple_quoted: false,
},
},
),
),
},
},
),
},
),
Expr(
StmtExpr {
range: 237..250,
value: StringLiteral(
ExprStringLiteral {
range: 237..250,
value: StringLiteralValue {
inner: Single(
StringLiteral {
range: 237..250,
value: "format spec",
flags: StringLiteralFlags {
quote_style: Single,
prefix: Empty,
triple_quoted: false,
},
},
),
},
},
),
},
),
Expr(
StmtExpr {
range: 253..285,
value: FString(
ExprFString {
range: 253..285,
value: FStringValue {
inner: Single(
FString(
FString {
range: 253..285,
elements: [
Literal(
FStringLiteralElement {
range: 255..262,
value: "middle ",
},
),
Expression(
FStringExpressionElement {
range: 262..284,
expression: StringLiteral(
ExprStringLiteral {
range: 263..271,
value: StringLiteralValue {
inner: Single(
StringLiteral {
range: 263..271,
value: "string",
flags: StringLiteralFlags {
quote_style: Single,
prefix: Empty,
triple_quoted: false,
},
},
),
},
},
),
debug_text: None,
conversion: None,
format_spec: Some(
FStringFormatSpec {
range: 272..284,
elements: [
Literal(
FStringLiteralElement {
range: 272..284,
value: "\\ ",
},
),
],
},
),
},
),
],
flags: FStringFlags {
quote_style: Single,
prefix: Regular,
triple_quoted: false,
},
},
),
),
},
},
),
},
),
Expr(
StmtExpr {
range: 285..291,
value: Name(
ExprName {
range: 285..291,
id: "format",
ctx: Load,
},
),
},
),
Expr(
StmtExpr {
range: 292..296,
value: Name(
ExprName {
range: 292..296,
id: "spec",
ctx: Load,
},
),
},
),
],
},
)
```
## Errors
|
5 | f'middle {'string':\
6 | 'format spec'}
| ^ Syntax Error: f-string: expecting '}'
7 |
8 | f'middle {'string':\\
|
|
5 | f'middle {'string':\
6 | 'format spec'}
| ^^^^^^ Syntax Error: Simple statements must be separated by newlines or semicolons
7 |
8 | f'middle {'string':\\
|
|
5 | f'middle {'string':\
6 | 'format spec'}
| ^^^^ Syntax Error: Simple statements must be separated by newlines or semicolons
7 |
8 | f'middle {'string':\\
|
|
5 | f'middle {'string':\
6 | 'format spec'}
| _____________________^
7 | |
| |_^ Syntax Error: missing closing quote in string literal
8 | f'middle {'string':\\
9 | 'format spec'}
|
|
5 | f'middle {'string':\
6 | 'format spec'}
7 |
| ^ Syntax Error: Expected a statement
8 | f'middle {'string':\\
9 | 'format spec'}
|
|
6 | 'format spec'}
7 |
8 | f'middle {'string':\\
| Syntax Error: f-string: unterminated string
9 | 'format spec'}
|
|
8 | f'middle {'string':\\
9 | 'format spec'}
| ^^^^^^^^ Syntax Error: Unexpected indentation
10 |
11 | f'middle {'string':\\\
|
|
8 | f'middle {'string':\\
9 | 'format spec'}
| ^ Syntax Error: Expected a statement
10 |
11 | f'middle {'string':\\\
|
|
8 | f'middle {'string':\\
9 | 'format spec'}
| ^ Syntax Error: Expected a statement
10 |
11 | f'middle {'string':\\\
12 | 'format spec'}
|
|
9 | 'format spec'}
10 |
11 | f'middle {'string':\\\
| Syntax Error: Expected a statement
12 | 'format spec'}
|
|
11 | f'middle {'string':\\\
12 | 'format spec'}
| ^ Syntax Error: f-string: expecting '}'
|
|
11 | f'middle {'string':\\\
12 | 'format spec'}
| ^^^^^^ Syntax Error: Simple statements must be separated by newlines or semicolons
|
|
11 | f'middle {'string':\\\
12 | 'format spec'}
| ^^^^ Syntax Error: Simple statements must be separated by newlines or semicolons
|
|
11 | f'middle {'string':\\\
12 | 'format spec'}
| ^^ Syntax Error: Got unexpected string
|
|
11 | f'middle {'string':\\\
12 | 'format spec'}
| Syntax Error: Expected a statement
|

View file

@ -0,0 +1,105 @@
---
source: crates/ruff_python_parser/tests/fixtures.rs
input_file: crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_1.py
---
## AST
```
Module(
ModModule {
range: 0..36,
body: [
Expr(
StmtExpr {
range: 0..13,
value: Call(
ExprCall {
range: 0..13,
func: Name(
ExprName {
range: 0..4,
id: "call",
ctx: Load,
},
),
arguments: Arguments {
range: 4..13,
args: [
Name(
ExprName {
range: 5..6,
id: "a",
ctx: Load,
},
),
Name(
ExprName {
range: 8..9,
id: "b",
ctx: Load,
},
),
],
keywords: [],
},
},
),
},
),
FunctionDef(
StmtFunctionDef {
range: 16..35,
is_async: false,
decorator_list: [],
name: Identifier {
id: "bar",
range: 20..23,
},
type_params: None,
parameters: Parameters {
range: 23..25,
posonlyargs: [],
args: [],
vararg: None,
kwonlyargs: [],
kwarg: None,
},
returns: None,
body: [
Pass(
StmtPass {
range: 31..35,
},
),
],
},
),
],
},
)
```
## Errors
|
1 | call(a, b, \\\
| ^^ Syntax Error: unexpected character after line continuation character
2 |
3 | def bar():
|
|
1 | call(a, b, \\\
| ^ Syntax Error: unexpected character after line continuation character
2 |
3 | def bar():
|
|
1 | call(a, b, \\\
2 |
| ^ Syntax Error: Expected ')', found newline
3 | def bar():
4 | pass
|