From 68a8978454e2fa21f06e162fd783f44f556f4a56 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Tue, 25 Jun 2024 07:43:54 +0530 Subject: [PATCH] Consider line continuation character for re-lexing (#12008) ## Summary This PR fixes a bug where the re-lexing logic didn't consider the line continuation character being present before the newline character. This meant that the lexer was being moved back to the newline character which is actually ignored via `\`. Considering the following code: ```py f'middle {'string':\ 'format spec'} ``` The old token stream is: ``` ... Colon 18..19 FStringMiddle 19..29 (flags = F_STRING) Newline 20..21 Indent 21..29 String 29..42 Rbrace 42..43 ... ``` Notice how the ranges are overlapping between the `FStringMiddle` token and the tokens emitted after moving the lexer backwards. After this fix, the new token stream which is without moving the lexer backwards in this scenario: ``` FStringStart 0..2 (flags = F_STRING) FStringMiddle 2..9 (flags = F_STRING) Lbrace 9..10 String 10..18 Colon 18..19 FStringMiddle 19..29 (flags = F_STRING) FStringEnd 29..30 (flags = F_STRING) Name 30..36 Name 37..41 Unknown 41..44 Newline 44..45 ``` fixes: #12004 ## Test Plan Add test cases and update the snapshots. --- .../re_lexing/fstring_format_spec_1.py | 12 + .../invalid/re_lexing/line_continuation_1.py | 4 + crates/ruff_python_parser/src/lexer.rs | 24 +- ...x@re_lexing__fstring_format_spec_1.py.snap | 425 ++++++++++++++++++ ...tax@re_lexing__line_continuation_1.py.snap | 105 +++++ 5 files changed, 567 insertions(+), 3 deletions(-) create mode 100644 crates/ruff_python_parser/resources/invalid/re_lexing/fstring_format_spec_1.py create mode 100644 crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_1.py create mode 100644 crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__fstring_format_spec_1.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_1.py.snap diff --git a/crates/ruff_python_parser/resources/invalid/re_lexing/fstring_format_spec_1.py b/crates/ruff_python_parser/resources/invalid/re_lexing/fstring_format_spec_1.py new file mode 100644 index 0000000000..271bd889d3 --- /dev/null +++ b/crates/ruff_python_parser/resources/invalid/re_lexing/fstring_format_spec_1.py @@ -0,0 +1,12 @@ +# The newline character is being escaped which means that the lexer shouldn't be moved +# back to that position. +# https://github.com/astral-sh/ruff/issues/12004 + +f'middle {'string':\ + 'format spec'} + +f'middle {'string':\\ + 'format spec'} + +f'middle {'string':\\\ + 'format spec'} \ No newline at end of file diff --git a/crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_1.py b/crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_1.py new file mode 100644 index 0000000000..1006e4fabe --- /dev/null +++ b/crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_1.py @@ -0,0 +1,4 @@ +call(a, b, \\\ + +def bar(): + pass diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 4384df0da9..47ca855ecc 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -1373,15 +1373,33 @@ impl<'src> Lexer<'src> { } let mut current_position = self.current_range().start(); - let reverse_chars = self.source[..current_position.to_usize()].chars().rev(); + let mut reverse_chars = self.source[..current_position.to_usize()] + .chars() + .rev() + .peekable(); let mut newline_position = None; - for ch in reverse_chars { + while let Some(ch) = reverse_chars.next() { if is_python_whitespace(ch) { current_position -= ch.text_len(); } else if matches!(ch, '\n' | '\r') { current_position -= ch.text_len(); - newline_position = Some(current_position); + // Count the number of backslashes before the newline character. + let mut backslash_count = 0; + while reverse_chars.next_if_eq(&'\\').is_some() { + backslash_count += 1; + } + if backslash_count == 0 { + // No escapes: `\n` + newline_position = Some(current_position); + } else { + if backslash_count % 2 == 0 { + // Even number of backslashes i.e., all backslashes cancel each other out + // which means the newline character is not being escaped. + newline_position = Some(current_position); + } + current_position -= TextSize::new('\\'.text_len().to_u32() * backslash_count); + } } else { break; } diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__fstring_format_spec_1.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__fstring_format_spec_1.py.snap new file mode 100644 index 0000000000..7fe73227d2 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__fstring_format_spec_1.py.snap @@ -0,0 +1,425 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/invalid/re_lexing/fstring_format_spec_1.py +--- +## AST + +``` +Module( + ModModule { + range: 0..298, + body: [ + Expr( + StmtExpr { + range: 162..192, + value: FString( + ExprFString { + range: 162..192, + value: FStringValue { + inner: Single( + FString( + FString { + range: 162..192, + elements: [ + Literal( + FStringLiteralElement { + range: 164..171, + value: "middle ", + }, + ), + Expression( + FStringExpressionElement { + range: 171..191, + expression: StringLiteral( + ExprStringLiteral { + range: 172..180, + value: StringLiteralValue { + inner: Single( + StringLiteral { + range: 172..180, + value: "string", + flags: StringLiteralFlags { + quote_style: Single, + prefix: Empty, + triple_quoted: false, + }, + }, + ), + }, + }, + ), + debug_text: None, + conversion: None, + format_spec: Some( + FStringFormatSpec { + range: 181..191, + elements: [ + Literal( + FStringLiteralElement { + range: 181..191, + value: " ", + }, + ), + ], + }, + ), + }, + ), + ], + flags: FStringFlags { + quote_style: Single, + prefix: Regular, + triple_quoted: false, + }, + }, + ), + ), + }, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 192..198, + value: Name( + ExprName { + range: 192..198, + id: "format", + ctx: Load, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 199..203, + value: Name( + ExprName { + range: 199..203, + id: "spec", + ctx: Load, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 207..228, + value: FString( + ExprFString { + range: 207..228, + value: FStringValue { + inner: Single( + FString( + FString { + range: 207..228, + elements: [ + Literal( + FStringLiteralElement { + range: 209..216, + value: "middle ", + }, + ), + Expression( + FStringExpressionElement { + range: 216..228, + expression: StringLiteral( + ExprStringLiteral { + range: 217..225, + value: StringLiteralValue { + inner: Single( + StringLiteral { + range: 217..225, + value: "string", + flags: StringLiteralFlags { + quote_style: Single, + prefix: Empty, + triple_quoted: false, + }, + }, + ), + }, + }, + ), + debug_text: None, + conversion: None, + format_spec: Some( + FStringFormatSpec { + range: 226..228, + elements: [ + Literal( + FStringLiteralElement { + range: 226..228, + value: "\\", + }, + ), + ], + }, + ), + }, + ), + ], + flags: FStringFlags { + quote_style: Single, + prefix: Regular, + triple_quoted: false, + }, + }, + ), + ), + }, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 237..250, + value: StringLiteral( + ExprStringLiteral { + range: 237..250, + value: StringLiteralValue { + inner: Single( + StringLiteral { + range: 237..250, + value: "format spec", + flags: StringLiteralFlags { + quote_style: Single, + prefix: Empty, + triple_quoted: false, + }, + }, + ), + }, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 253..285, + value: FString( + ExprFString { + range: 253..285, + value: FStringValue { + inner: Single( + FString( + FString { + range: 253..285, + elements: [ + Literal( + FStringLiteralElement { + range: 255..262, + value: "middle ", + }, + ), + Expression( + FStringExpressionElement { + range: 262..284, + expression: StringLiteral( + ExprStringLiteral { + range: 263..271, + value: StringLiteralValue { + inner: Single( + StringLiteral { + range: 263..271, + value: "string", + flags: StringLiteralFlags { + quote_style: Single, + prefix: Empty, + triple_quoted: false, + }, + }, + ), + }, + }, + ), + debug_text: None, + conversion: None, + format_spec: Some( + FStringFormatSpec { + range: 272..284, + elements: [ + Literal( + FStringLiteralElement { + range: 272..284, + value: "\\ ", + }, + ), + ], + }, + ), + }, + ), + ], + flags: FStringFlags { + quote_style: Single, + prefix: Regular, + triple_quoted: false, + }, + }, + ), + ), + }, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 285..291, + value: Name( + ExprName { + range: 285..291, + id: "format", + ctx: Load, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 292..296, + value: Name( + ExprName { + range: 292..296, + id: "spec", + ctx: Load, + }, + ), + }, + ), + ], + }, +) +``` +## Errors + + | +5 | f'middle {'string':\ +6 | 'format spec'} + | ^ Syntax Error: f-string: expecting '}' +7 | +8 | f'middle {'string':\\ + | + + + | +5 | f'middle {'string':\ +6 | 'format spec'} + | ^^^^^^ Syntax Error: Simple statements must be separated by newlines or semicolons +7 | +8 | f'middle {'string':\\ + | + + + | +5 | f'middle {'string':\ +6 | 'format spec'} + | ^^^^ Syntax Error: Simple statements must be separated by newlines or semicolons +7 | +8 | f'middle {'string':\\ + | + + + | +5 | f'middle {'string':\ +6 | 'format spec'} + | _____________________^ +7 | | + | |_^ Syntax Error: missing closing quote in string literal +8 | f'middle {'string':\\ +9 | 'format spec'} + | + + + | +5 | f'middle {'string':\ +6 | 'format spec'} +7 | + | ^ Syntax Error: Expected a statement +8 | f'middle {'string':\\ +9 | 'format spec'} + | + + + | +6 | 'format spec'} +7 | +8 | f'middle {'string':\\ + | Syntax Error: f-string: unterminated string +9 | 'format spec'} + | + + + | + 8 | f'middle {'string':\\ + 9 | 'format spec'} + | ^^^^^^^^ Syntax Error: Unexpected indentation +10 | +11 | f'middle {'string':\\\ + | + + + | + 8 | f'middle {'string':\\ + 9 | 'format spec'} + | ^ Syntax Error: Expected a statement +10 | +11 | f'middle {'string':\\\ + | + + + | + 8 | f'middle {'string':\\ + 9 | 'format spec'} + | ^ Syntax Error: Expected a statement +10 | +11 | f'middle {'string':\\\ +12 | 'format spec'} + | + + + | + 9 | 'format spec'} +10 | +11 | f'middle {'string':\\\ + | Syntax Error: Expected a statement +12 | 'format spec'} + | + + + | +11 | f'middle {'string':\\\ +12 | 'format spec'} + | ^ Syntax Error: f-string: expecting '}' + | + + + | +11 | f'middle {'string':\\\ +12 | 'format spec'} + | ^^^^^^ Syntax Error: Simple statements must be separated by newlines or semicolons + | + + + | +11 | f'middle {'string':\\\ +12 | 'format spec'} + | ^^^^ Syntax Error: Simple statements must be separated by newlines or semicolons + | + + + | +11 | f'middle {'string':\\\ +12 | 'format spec'} + | ^^ Syntax Error: Got unexpected string + | + + + | +11 | f'middle {'string':\\\ +12 | 'format spec'} + | Syntax Error: Expected a statement + | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_1.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_1.py.snap new file mode 100644 index 0000000000..c00e557392 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lexing__line_continuation_1.py.snap @@ -0,0 +1,105 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/invalid/re_lexing/line_continuation_1.py +--- +## AST + +``` +Module( + ModModule { + range: 0..36, + body: [ + Expr( + StmtExpr { + range: 0..13, + value: Call( + ExprCall { + range: 0..13, + func: Name( + ExprName { + range: 0..4, + id: "call", + ctx: Load, + }, + ), + arguments: Arguments { + range: 4..13, + args: [ + Name( + ExprName { + range: 5..6, + id: "a", + ctx: Load, + }, + ), + Name( + ExprName { + range: 8..9, + id: "b", + ctx: Load, + }, + ), + ], + keywords: [], + }, + }, + ), + }, + ), + FunctionDef( + StmtFunctionDef { + range: 16..35, + is_async: false, + decorator_list: [], + name: Identifier { + id: "bar", + range: 20..23, + }, + type_params: None, + parameters: Parameters { + range: 23..25, + posonlyargs: [], + args: [], + vararg: None, + kwonlyargs: [], + kwarg: None, + }, + returns: None, + body: [ + Pass( + StmtPass { + range: 31..35, + }, + ), + ], + }, + ), + ], + }, +) +``` +## Errors + + | +1 | call(a, b, \\\ + | ^^ Syntax Error: unexpected character after line continuation character +2 | +3 | def bar(): + | + + + | +1 | call(a, b, \\\ + | ^ Syntax Error: unexpected character after line continuation character +2 | +3 | def bar(): + | + + + | +1 | call(a, b, \\\ +2 | + | ^ Syntax Error: Expected ')', found newline +3 | def bar(): +4 | pass + |