From 8499abfa7feec5b496a438c4dc310ee9130d4745 Mon Sep 17 00:00:00 2001 From: Dhruv Manilawala Date: Mon, 17 Jun 2024 12:17:00 +0530 Subject: [PATCH] Implement re-lexing logic for better error recovery (#11845) ## Summary This PR implements the re-lexing logic in the parser. This logic is only applied when recovering from an error during list parsing. The logic is as follows: 1. During list parsing, if an unexpected token is encountered and it detects that an outer context can understand it and thus recover from it, it invokes the re-lexing logic in the lexer 2. This logic first checks if the lexer is in a parenthesized context and returns if it's not. Thus, the logic is a no-op if the lexer isn't in a parenthesized context 3. It then reduces the nesting level by 1. It shouldn't reset it to 0 because otherwise the recovery from nested list parsing will be incorrect 4. Then, it tries to find last newline character going backwards from the current position of the lexer. This avoids any whitespaces but if it encounters any character other than newline or whitespace, it aborts. 5. Now, if there's a newline character, then it needs to be re-lexed in a logical context which means that the lexer needs to emit it as a `Newline` token instead of `NonLogicalNewline`. 6. If the re-lexing gives a different token than the current one, the token source needs to update it's token collection to remove all the tokens which comes after the new current position. It turns out that the list parsing isn't that happy with the results so it requires some re-arranging such that the following two errors are raised correctly: 1. Expected comma 2. Recovery context error For (1), the following scenarios needs to be considered: * Missing comma between two elements * Half parsed element because the grammar doesn't allow it (for example, named expressions) For (2), the following scenarios needs to be considered: 1. If the parser is at a comma which means that there's a missing element otherwise the comma would've been consumed by the first `eat` call above. And, the parser doesn't take the re-lexing route on a comma token. 2. If it's the first element and the current token is not a comma which means that it's an invalid element. resolves: #11640 ## Test Plan - [x] Update existing test snapshots and validate them - [x] Add additional test cases specific to the re-lexing logic and validate the snapshots - [x] Run the fuzzer on 3000+ valid inputs - [x] Run the fuzzer on invalid inputs - [x] Run the parser on various open source projects - [x] Make sure the ecosystem changes are none --- .gitattributes | 3 + .../err/comma_separated_missing_comma.py | 1 + ...eparated_missing_comma_between_elements.py | 2 + ...eparated_missing_element_between_commas.py | 1 + .../comma_separated_missing_first_element.py | 1 + ...comma_separated_regular_list_terminator.py | 7 + .../resources/invalid/re_lex_logical_token.py | 46 ++ .../invalid/re_lex_logical_token_mac_eol.py | 1 + .../re_lex_logical_token_windows_eol.py | 3 + crates/ruff_python_parser/src/lexer.rs | 112 ++++ crates/ruff_python_parser/src/parser/mod.rs | 132 ++-- crates/ruff_python_parser/src/token_source.rs | 19 +- ...class_def_unclosed_type_param_list.py.snap | 56 +- ...ntax@comma_separated_missing_comma.py.snap | 70 ++ ...ted_missing_comma_between_elements.py.snap | 59 ++ ...ted_missing_element_between_commas.py.snap | 58 ++ ...ma_separated_missing_first_element.py.snap | 52 ++ ...d_syntax@dotted_name_multiple_dots.py.snap | 2 +- ...expressions__arguments__unclosed_0.py.snap | 9 +- ...expressions__arguments__unclosed_1.py.snap | 9 +- ...expressions__arguments__unclosed_2.py.snap | 9 +- ...ons__dict__missing_closing_brace_2.py.snap | 9 +- ...s__list__missing_closing_bracket_3.py.snap | 11 +- ...ressions__parenthesized__generator.py.snap | 2 +- ...nthesized__missing_closing_paren_3.py.snap | 11 +- ...set__missing_closing_curly_brace_3.py.snap | 11 +- ...id_syntax@from_import_missing_rpar.py.snap | 18 +- ...nction_def_unclosed_parameter_list.py.snap | 13 +- ...ction_def_unclosed_type_param_list.py.snap | 56 +- ...alid_syntax@global_stmt_expression.py.snap | 2 +- ...ax@import_stmt_parenthesized_names.py.snap | 4 +- ...lid_syntax@import_stmt_star_import.py.snap | 9 +- ...id_syntax@nonlocal_stmt_expression.py.snap | 2 +- ...ax@params_var_keyword_with_default.py.snap | 2 +- ...params_var_positional_with_default.py.snap | 2 +- ...nvalid_syntax@re_lex_logical_token.py.snap | 602 ++++++++++++++++++ ...yntax@re_lex_logical_token_mac_eol.py.snap | 104 +++ ...x@re_lex_logical_token_windows_eol.py.snap | 107 ++++ ...atements__function_type_parameters.py.snap | 4 +- ...ax@statements__match__as_pattern_3.py.snap | 2 +- ...s__with__ambiguous_lpar_with_items.py.snap | 6 +- ..._items_parenthesized_missing_comma.py.snap | 8 +- ..._separated_regular_list_terminator.py.snap | 168 +++++ 43 files changed, 1593 insertions(+), 212 deletions(-) create mode 100644 crates/ruff_python_parser/resources/inline/err/comma_separated_missing_comma.py create mode 100644 crates/ruff_python_parser/resources/inline/err/comma_separated_missing_comma_between_elements.py create mode 100644 crates/ruff_python_parser/resources/inline/err/comma_separated_missing_element_between_commas.py create mode 100644 crates/ruff_python_parser/resources/inline/err/comma_separated_missing_first_element.py create mode 100644 crates/ruff_python_parser/resources/inline/ok/comma_separated_regular_list_terminator.py create mode 100644 crates/ruff_python_parser/resources/invalid/re_lex_logical_token.py create mode 100644 crates/ruff_python_parser/resources/invalid/re_lex_logical_token_mac_eol.py create mode 100644 crates/ruff_python_parser/resources/invalid/re_lex_logical_token_windows_eol.py create mode 100644 crates/ruff_python_parser/tests/snapshots/invalid_syntax@comma_separated_missing_comma.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/invalid_syntax@comma_separated_missing_comma_between_elements.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/invalid_syntax@comma_separated_missing_element_between_commas.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/invalid_syntax@comma_separated_missing_first_element.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token_mac_eol.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token_windows_eol.py.snap create mode 100644 crates/ruff_python_parser/tests/snapshots/valid_syntax@comma_separated_regular_list_terminator.py.snap diff --git a/.gitattributes b/.gitattributes index 610c6b39ba..8f333acef6 100644 --- a/.gitattributes +++ b/.gitattributes @@ -8,6 +8,9 @@ crates/ruff_linter/resources/test/fixtures/pycodestyle/W391_3.py text eol=crlf crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_code_examples_crlf.py text eol=crlf crates/ruff_python_formatter/tests/snapshots/format@docstring_code_examples_crlf.py.snap text eol=crlf +crates/ruff_python_parser/resources/invalid/re_lex_logical_token_windows_eol.py text eol=crlf +crates/ruff_python_parser/resources/invalid/re_lex_logical_token_mac_eol.py text eol=cr + crates/ruff_python_parser/resources/inline linguist-generated=true ruff.schema.json linguist-generated=true text=auto eol=lf diff --git a/crates/ruff_python_parser/resources/inline/err/comma_separated_missing_comma.py b/crates/ruff_python_parser/resources/inline/err/comma_separated_missing_comma.py new file mode 100644 index 0000000000..45b3ef8f0a --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/err/comma_separated_missing_comma.py @@ -0,0 +1 @@ +call(**x := 1) diff --git a/crates/ruff_python_parser/resources/inline/err/comma_separated_missing_comma_between_elements.py b/crates/ruff_python_parser/resources/inline/err/comma_separated_missing_comma_between_elements.py new file mode 100644 index 0000000000..588e466fef --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/err/comma_separated_missing_comma_between_elements.py @@ -0,0 +1,2 @@ +# The comma between the first two elements is expected in `parse_list_expression`. +[0, 1 2] diff --git a/crates/ruff_python_parser/resources/inline/err/comma_separated_missing_element_between_commas.py b/crates/ruff_python_parser/resources/inline/err/comma_separated_missing_element_between_commas.py new file mode 100644 index 0000000000..0229737c4b --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/err/comma_separated_missing_element_between_commas.py @@ -0,0 +1 @@ +[0, 1, , 2] diff --git a/crates/ruff_python_parser/resources/inline/err/comma_separated_missing_first_element.py b/crates/ruff_python_parser/resources/inline/err/comma_separated_missing_first_element.py new file mode 100644 index 0000000000..bc29ed8166 --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/err/comma_separated_missing_first_element.py @@ -0,0 +1 @@ +call(= 1) diff --git a/crates/ruff_python_parser/resources/inline/ok/comma_separated_regular_list_terminator.py b/crates/ruff_python_parser/resources/inline/ok/comma_separated_regular_list_terminator.py new file mode 100644 index 0000000000..d1c5aa1fcd --- /dev/null +++ b/crates/ruff_python_parser/resources/inline/ok/comma_separated_regular_list_terminator.py @@ -0,0 +1,7 @@ +# The first element is parsed by `parse_list_like_expression` and the comma after +# the first element is expected by `parse_list_expression` +[0] +[0, 1] +[0, 1,] +[0, 1, 2] +[0, 1, 2,] diff --git a/crates/ruff_python_parser/resources/invalid/re_lex_logical_token.py b/crates/ruff_python_parser/resources/invalid/re_lex_logical_token.py new file mode 100644 index 0000000000..cbcaa26e91 --- /dev/null +++ b/crates/ruff_python_parser/resources/invalid/re_lex_logical_token.py @@ -0,0 +1,46 @@ +# No indentation before the function definition +if call(foo +def bar(): + pass + + +# Indented function definition +if call(foo + def bar(): + pass + + +# There are multiple non-logical newlines (blank lines) in the `if` body +if call(foo + + + def bar(): + pass + + +# There are trailing whitespaces in the blank line inside the `if` body +if call(foo + + def bar(): + pass + + +# The lexer is nested with multiple levels of parentheses +if call(foo, [a, b + def bar(): + pass + + +# The outer parenthesis is closed but the inner bracket isn't +if call(foo, [a, b) + def bar(): + pass + + +# The parser tries to recover from an unclosed `]` when the current token is `)`. This +# test is to make sure it emits a `NonLogicalNewline` token after `b`. +if call(foo, [a, + b +) + def bar(): + pass \ No newline at end of file diff --git a/crates/ruff_python_parser/resources/invalid/re_lex_logical_token_mac_eol.py b/crates/ruff_python_parser/resources/invalid/re_lex_logical_token_mac_eol.py new file mode 100644 index 0000000000..0038f8b151 --- /dev/null +++ b/crates/ruff_python_parser/resources/invalid/re_lex_logical_token_mac_eol.py @@ -0,0 +1 @@ +if call(foo, [a, b def bar(): pass \ No newline at end of file diff --git a/crates/ruff_python_parser/resources/invalid/re_lex_logical_token_windows_eol.py b/crates/ruff_python_parser/resources/invalid/re_lex_logical_token_windows_eol.py new file mode 100644 index 0000000000..e59a3af014 --- /dev/null +++ b/crates/ruff_python_parser/resources/invalid/re_lex_logical_token_windows_eol.py @@ -0,0 +1,3 @@ +if call(foo, [a, b + def bar(): + pass diff --git a/crates/ruff_python_parser/src/lexer.rs b/crates/ruff_python_parser/src/lexer.rs index 5e6b5b3160..0decf4cb80 100644 --- a/crates/ruff_python_parser/src/lexer.rs +++ b/crates/ruff_python_parser/src/lexer.rs @@ -1307,6 +1307,118 @@ impl<'src> Lexer<'src> { } } + /// Re-lex the current token in the context of a logical line. + /// + /// Returns a boolean indicating whether the lexer's position has changed. This could result + /// into the new current token being different than the previous current token but is not + /// necessarily true. If the return value is `true` then the caller is responsible for updating + /// it's state accordingly. + /// + /// This method is a no-op if the lexer isn't in a parenthesized context. + /// + /// ## Explanation + /// + /// The lexer emits two different kinds of newline token based on the context. If it's in a + /// parenthesized context, it'll emit a [`NonLogicalNewline`] token otherwise it'll emit a + /// regular [`Newline`] token. Based on the type of newline token, the lexer will consume and + /// emit the indentation tokens appropriately which affects the structure of the code. + /// + /// For example: + /// ```py + /// if call(foo + /// def bar(): + /// pass + /// ``` + /// + /// Here, the lexer emits a [`NonLogicalNewline`] token after `foo` which means that the lexer + /// doesn't emit an `Indent` token before the `def` keyword. This leads to an AST which + /// considers the function `bar` as part of the module block and the `if` block remains empty. + /// + /// This method is to facilitate the parser if it recovers from these kind of scenarios so that + /// the lexer can then re-lex a [`NonLogicalNewline`] token to a [`Newline`] token which in + /// turn helps the parser to build the correct AST. + /// + /// In the above snippet, it would mean that this method would move the lexer back to the + /// newline character after the `foo` token and emit it as a [`Newline`] token instead of + /// [`NonLogicalNewline`]. This means that the next token emitted by the lexer would be an + /// `Indent` token. + /// + /// There are cases where the lexer's position will change but the re-lexed token will remain + /// the same. This is to help the parser to add the error message at an appropriate location. + /// Consider the following example: + /// + /// ```py + /// if call(foo, [a, b + /// def bar(): + /// pass + /// ``` + /// + /// Here, the parser recovers from two unclosed parenthesis. The inner unclosed `[` will call + /// into the re-lexing logic and reduce the nesting level from 2 to 1. And, the re-lexing logic + /// will move the lexer at the newline after `b` but still emit a [`NonLogicalNewline`] token. + /// Only after the parser recovers from the outer unclosed `(` does the re-lexing logic emit + /// the [`Newline`] token. + /// + /// [`Newline`]: TokenKind::Newline + /// [`NonLogicalNewline`]: TokenKind::NonLogicalNewline + pub(crate) fn re_lex_logical_token(&mut self) -> bool { + if self.nesting == 0 { + return false; + } + + // Reduce the nesting level because the parser recovered from an error inside list parsing + // i.e., it recovered from an unclosed parenthesis (`(`, `[`, or `{`). + self.nesting -= 1; + + let current_position = self.current_range().start(); + let reverse_chars = self.source[..current_position.to_usize()].chars().rev(); + let mut new_position = current_position; + let mut has_newline = false; + + for ch in reverse_chars { + if is_python_whitespace(ch) { + new_position -= ch.text_len(); + } else if matches!(ch, '\n' | '\r') { + has_newline |= true; + new_position -= ch.text_len(); + } else { + break; + } + } + + // The lexer should only be moved if there's a newline character which needs to be + // re-lexed. + if new_position != current_position && has_newline { + // Earlier we reduced the nesting level unconditionally. Now that we know the lexer's + // position is going to be moved back, the lexer needs to be put back into a + // parenthesized context if the current token is a closing parenthesis. + // + // ```py + // (a, [b, + // c + // ) + // ``` + // + // Here, the parser would request to re-lex the token when it's at `)` and can recover + // from an unclosed `[`. This method will move the lexer back to the newline character + // after `c` which means it goes back into parenthesized context. + if matches!( + self.current_kind, + TokenKind::Rpar | TokenKind::Rsqb | TokenKind::Rbrace + ) { + self.nesting += 1; + } + + self.cursor = Cursor::new(self.source); + self.cursor.skip_bytes(new_position.to_usize()); + self.state = State::Other; + self.next_token(); + true + } else { + false + } + } + #[inline] fn token_range(&self) -> TextRange { let end = self.offset(); diff --git a/crates/ruff_python_parser/src/parser/mod.rs b/crates/ruff_python_parser/src/parser/mod.rs index d113ff992f..b58284e2a9 100644 --- a/crates/ruff_python_parser/src/parser/mod.rs +++ b/crates/ruff_python_parser/src/parser/mod.rs @@ -473,11 +473,6 @@ impl<'src> Parser<'src> { loop { progress.assert_progressing(self); - // The end of file marker ends all lists. - if self.at(TokenKind::EndOfFile) { - break; - } - if recovery_context_kind.is_list_element(self) { parse_element(self); } else if recovery_context_kind.is_list_terminator(self) { @@ -533,54 +528,96 @@ impl<'src> Parser<'src> { .recovery_context .union(RecoveryContext::from_kind(recovery_context_kind)); + let mut first_element = true; let mut trailing_comma_range: Option = None; loop { progress.assert_progressing(self); - // The end of file marker ends all lists. - if self.at(TokenKind::EndOfFile) { - break; - } - if recovery_context_kind.is_list_element(self) { parse_element(self); + // Only unset this when we've completely parsed a single element. This is mainly to + // raise the correct error in case the first element isn't valid and the current + // token isn't a comma. Without this knowledge, the parser would later expect a + // comma instead of raising the context error. + first_element = false; + let maybe_comma_range = self.current_token_range(); if self.eat(TokenKind::Comma) { trailing_comma_range = Some(maybe_comma_range); continue; } trailing_comma_range = None; - - if recovery_context_kind.is_list_terminator(self) { - break; - } - - self.expect(TokenKind::Comma); - } else if recovery_context_kind.is_list_terminator(self) { - break; - } else { - // Not a recognised element. Add an error and either skip the token or break - // parsing the list if the token is recognised as an element or terminator of an - // enclosing list. - let error = recovery_context_kind.create_error(self); - self.add_error(error, self.current_token_range()); - - // Run the error recovery: This also handles the case when an element is missing - // between two commas: `a,,b` - if self.is_enclosing_list_element_or_terminator() { - break; - } - - if self.at(TokenKind::Comma) { - trailing_comma_range = Some(self.current_token_range()); - } else { - trailing_comma_range = None; - } - - self.bump_any(); } + + // test_ok comma_separated_regular_list_terminator + // # The first element is parsed by `parse_list_like_expression` and the comma after + // # the first element is expected by `parse_list_expression` + // [0] + // [0, 1] + // [0, 1,] + // [0, 1, 2] + // [0, 1, 2,] + if recovery_context_kind.is_regular_list_terminator(self) { + break; + } + + // test_err comma_separated_missing_comma_between_elements + // # The comma between the first two elements is expected in `parse_list_expression`. + // [0, 1 2] + if recovery_context_kind.is_list_element(self) { + // This is a special case to expect a comma between two elements and should be + // checked before running the error recovery. This is because the error recovery + // will always run as the parser is currently at a list element. + self.expect(TokenKind::Comma); + continue; + } + + // Run the error recovery: If the token is recognised as an element or terminator of an + // enclosing list, then we try to re-lex in the context of a logical line and break out + // of list parsing. + if self.is_enclosing_list_element_or_terminator() { + self.tokens.re_lex_logical_token(); + break; + } + + if first_element || self.at(TokenKind::Comma) { + // There are two conditions when we need to add the recovery context error: + // + // 1. If the parser is at a comma which means that there's a missing element + // otherwise the comma would've been consumed by the first `eat` call above. + // And, the parser doesn't take the re-lexing route on a comma token. + // 2. If it's the first element and the current token is not a comma which means + // that it's an invalid element. + + // test_err comma_separated_missing_element_between_commas + // [0, 1, , 2] + + // test_err comma_separated_missing_first_element + // call(= 1) + self.add_error( + recovery_context_kind.create_error(self), + self.current_token_range(), + ); + + trailing_comma_range = if self.at(TokenKind::Comma) { + Some(self.current_token_range()) + } else { + None + }; + } else { + // Otherwise, there should've been a comma at this position. This could be because + // the element isn't consumed completely by `parse_element`. + + // test_err comma_separated_missing_comma + // call(**x := 1) + self.expect(TokenKind::Comma); + + trailing_comma_range = None; + } + + self.bump_any(); } if let Some(trailing_comma_range) = trailing_comma_range { @@ -885,13 +922,32 @@ impl RecoveryContextKind { } /// Returns `true` if the parser is at a token that terminates the list as per the context. + /// + /// This token could either end the list or is only present for better error recovery. Refer to + /// [`is_regular_list_terminator`] to only check against the former. + /// + /// [`is_regular_list_terminator`]: RecoveryContextKind::is_regular_list_terminator fn is_list_terminator(self, p: &Parser) -> bool { self.list_terminator_kind(p).is_some() } + /// Returns `true` if the parser is at a token that terminates the list as per the context but + /// the token isn't part of the error recovery set. + fn is_regular_list_terminator(self, p: &Parser) -> bool { + matches!( + self.list_terminator_kind(p), + Some(ListTerminatorKind::Regular) + ) + } + /// Checks the current token the parser is at and returns the list terminator kind if the token /// terminates the list as per the context. fn list_terminator_kind(self, p: &Parser) -> Option { + // The end of file marker ends all lists. + if p.at(TokenKind::EndOfFile) { + return Some(ListTerminatorKind::Regular); + } + match self { // The parser must consume all tokens until the end RecoveryContextKind::ModuleStatements => None, diff --git a/crates/ruff_python_parser/src/token_source.rs b/crates/ruff_python_parser/src/token_source.rs index a8a54e68f0..7662999502 100644 --- a/crates/ruff_python_parser/src/token_source.rs +++ b/crates/ruff_python_parser/src/token_source.rs @@ -1,4 +1,4 @@ -use ruff_text_size::{TextRange, TextSize}; +use ruff_text_size::{Ranged, TextRange, TextSize}; use crate::lexer::{Lexer, LexerCheckpoint, LexicalError, Token, TokenFlags, TokenValue}; use crate::{Mode, TokenKind}; @@ -58,6 +58,23 @@ impl<'src> TokenSource<'src> { self.lexer.take_value() } + /// Calls the underlying [`re_lex_logical_token`] method on the lexer and updates the token + /// vector accordingly. + /// + /// [`re_lex_logical_token`]: Lexer::re_lex_logical_token + pub(crate) fn re_lex_logical_token(&mut self) { + if self.lexer.re_lex_logical_token() { + let current_start = self.current_range().start(); + while self + .tokens + .last() + .is_some_and(|last| last.start() >= current_start) + { + self.tokens.pop(); + } + } + } + /// Returns the next non-trivia token without consuming it. /// /// Use [`peek2`] to get the next two tokens. diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@class_def_unclosed_type_param_list.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@class_def_unclosed_type_param_list.py.snap index 0fa2471470..ce87aec207 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@class_def_unclosed_type_param_list.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@class_def_unclosed_type_param_list.py.snap @@ -11,7 +11,7 @@ Module( body: [ ClassDef( StmtClassDef { - range: 0..40, + range: 0..33, decorator_list: [], name: Identifier { id: "Foo", @@ -73,29 +73,29 @@ Module( range: 29..33, }, ), - Assign( - StmtAssign { - range: 34..40, - targets: [ - Name( - ExprName { - range: 34..35, - id: "x", - ctx: Store, - }, - ), - ], - value: NumberLiteral( - ExprNumberLiteral { - range: 38..40, - value: Int( - 10, - ), - }, - ), + ], + }, + ), + Assign( + StmtAssign { + range: 34..40, + targets: [ + Name( + ExprName { + range: 34..35, + id: "x", + ctx: Store, }, ), ], + value: NumberLiteral( + ExprNumberLiteral { + range: 38..40, + value: Int( + 10, + ), + }, + ), }, ), ], @@ -108,19 +108,5 @@ Module( 1 | class Foo[T1, *T2(a, b): | ^ Syntax Error: Expected ']', found '(' 2 | pass -3 | x = 10 - | - - - | -1 | class Foo[T1, *T2(a, b): -2 | pass -3 | x = 10 - | ^ Syntax Error: Simple statements must be separated by newlines or semicolons - | - - - | -2 | pass 3 | x = 10 | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@comma_separated_missing_comma.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@comma_separated_missing_comma.py.snap new file mode 100644 index 0000000000..a278a2155f --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@comma_separated_missing_comma.py.snap @@ -0,0 +1,70 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/err/comma_separated_missing_comma.py +--- +## AST + +``` +Module( + ModModule { + range: 0..15, + body: [ + Expr( + StmtExpr { + range: 0..14, + value: Call( + ExprCall { + range: 0..14, + func: Name( + ExprName { + range: 0..4, + id: "call", + ctx: Load, + }, + ), + arguments: Arguments { + range: 4..14, + args: [ + NumberLiteral( + ExprNumberLiteral { + range: 12..13, + value: Int( + 1, + ), + }, + ), + ], + keywords: [ + Keyword { + range: 5..8, + arg: None, + value: Name( + ExprName { + range: 7..8, + id: "x", + ctx: Load, + }, + ), + }, + ], + }, + }, + ), + }, + ), + ], + }, +) +``` +## Errors + + | +1 | call(**x := 1) + | ^^ Syntax Error: Expected ',', found ':=' + | + + + | +1 | call(**x := 1) + | ^ Syntax Error: Positional argument cannot follow keyword argument unpacking + | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@comma_separated_missing_comma_between_elements.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@comma_separated_missing_comma_between_elements.py.snap new file mode 100644 index 0000000000..78474e6cbb --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@comma_separated_missing_comma_between_elements.py.snap @@ -0,0 +1,59 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/err/comma_separated_missing_comma_between_elements.py +--- +## AST + +``` +Module( + ModModule { + range: 0..92, + body: [ + Expr( + StmtExpr { + range: 83..91, + value: List( + ExprList { + range: 83..91, + elts: [ + NumberLiteral( + ExprNumberLiteral { + range: 84..85, + value: Int( + 0, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 87..88, + value: Int( + 1, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 89..90, + value: Int( + 2, + ), + }, + ), + ], + ctx: Load, + }, + ), + }, + ), + ], + }, +) +``` +## Errors + + | +1 | # The comma between the first two elements is expected in `parse_list_expression`. +2 | [0, 1 2] + | ^ Syntax Error: Expected ',', found int + | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@comma_separated_missing_element_between_commas.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@comma_separated_missing_element_between_commas.py.snap new file mode 100644 index 0000000000..c68307d59f --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@comma_separated_missing_element_between_commas.py.snap @@ -0,0 +1,58 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/err/comma_separated_missing_element_between_commas.py +--- +## AST + +``` +Module( + ModModule { + range: 0..12, + body: [ + Expr( + StmtExpr { + range: 0..11, + value: List( + ExprList { + range: 0..11, + elts: [ + NumberLiteral( + ExprNumberLiteral { + range: 1..2, + value: Int( + 0, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 4..5, + value: Int( + 1, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 9..10, + value: Int( + 2, + ), + }, + ), + ], + ctx: Load, + }, + ), + }, + ), + ], + }, +) +``` +## Errors + + | +1 | [0, 1, , 2] + | ^ Syntax Error: Expected an expression or a ']' + | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@comma_separated_missing_first_element.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@comma_separated_missing_first_element.py.snap new file mode 100644 index 0000000000..8a98ab26f5 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@comma_separated_missing_first_element.py.snap @@ -0,0 +1,52 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/err/comma_separated_missing_first_element.py +--- +## AST + +``` +Module( + ModModule { + range: 0..10, + body: [ + Expr( + StmtExpr { + range: 0..9, + value: Call( + ExprCall { + range: 0..9, + func: Name( + ExprName { + range: 0..4, + id: "call", + ctx: Load, + }, + ), + arguments: Arguments { + range: 4..9, + args: [ + NumberLiteral( + ExprNumberLiteral { + range: 7..8, + value: Int( + 1, + ), + }, + ), + ], + keywords: [], + }, + }, + ), + }, + ), + ], + }, +) +``` +## Errors + + | +1 | call(= 1) + | ^ Syntax Error: Expected an expression or a ')' + | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@dotted_name_multiple_dots.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@dotted_name_multiple_dots.py.snap index 7420fb11ba..f876858cc1 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@dotted_name_multiple_dots.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@dotted_name_multiple_dots.py.snap @@ -77,7 +77,7 @@ Module( | 1 | import a..b 2 | import a...b - | ^^^ Syntax Error: Expected ',', found '...' + | ^^^ Syntax Error: Simple statements must be separated by newlines or semicolons | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__arguments__unclosed_0.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__arguments__unclosed_0.py.snap index 263334f753..d4b6d03e5d 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__arguments__unclosed_0.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__arguments__unclosed_0.py.snap @@ -67,15 +67,8 @@ Module( | 1 | call( + | ^ Syntax Error: Expected ')', found newline 2 | 3 | def foo(): - | ^^^ Syntax Error: Expected an expression or a ')' 4 | pass | - - - | -3 | def foo(): -4 | pass - | Syntax Error: unexpected EOF while parsing - | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__arguments__unclosed_1.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__arguments__unclosed_1.py.snap index e4e21a03bd..bcb536b75b 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__arguments__unclosed_1.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__arguments__unclosed_1.py.snap @@ -75,15 +75,8 @@ Module( | 1 | call(x + | ^ Syntax Error: Expected ')', found newline 2 | 3 | def foo(): - | ^^^ Syntax Error: Expected ',', found 'def' 4 | pass | - - - | -3 | def foo(): -4 | pass - | Syntax Error: unexpected EOF while parsing - | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__arguments__unclosed_2.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__arguments__unclosed_2.py.snap index f7ca1d97c7..131bfd6e2b 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__arguments__unclosed_2.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__arguments__unclosed_2.py.snap @@ -75,15 +75,8 @@ Module( | 1 | call(x, + | ^ Syntax Error: Expected ')', found newline 2 | 3 | def foo(): - | ^^^ Syntax Error: Expected an expression or a ')' 4 | pass | - - - | -3 | def foo(): -4 | pass - | Syntax Error: unexpected EOF while parsing - | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__dict__missing_closing_brace_2.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__dict__missing_closing_brace_2.py.snap index d60ca66d0a..e116b76e2d 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__dict__missing_closing_brace_2.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__dict__missing_closing_brace_2.py.snap @@ -76,15 +76,8 @@ Module( | 1 | {x: 1, + | ^ Syntax Error: Expected '}', found newline 2 | 3 | def foo(): - | ^^^ Syntax Error: Expected an expression or a '}' 4 | pass | - - - | -3 | def foo(): -4 | pass - | Syntax Error: unexpected EOF while parsing - | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__list__missing_closing_bracket_3.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__list__missing_closing_bracket_3.py.snap index 3a0898a738..faeaa38a84 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__list__missing_closing_bracket_3.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__list__missing_closing_bracket_3.py.snap @@ -73,16 +73,11 @@ Module( ## Errors | +2 | # token starts a statement. +3 | 4 | [1, 2 + | ^ Syntax Error: Expected ']', found newline 5 | 6 | def foo(): - | ^^^ Syntax Error: Expected ',', found 'def' 7 | pass | - - - | -6 | def foo(): -7 | pass - | Syntax Error: unexpected EOF while parsing - | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__parenthesized__generator.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__parenthesized__generator.py.snap index 012bbd706f..776e7601ce 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__parenthesized__generator.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__parenthesized__generator.py.snap @@ -126,7 +126,7 @@ Module( | 1 | (*x for x in y) 2 | (x := 1, for x in y) - | ^^^ Syntax Error: Expected an expression or a ')' + | ^^^ Syntax Error: Expected ')', found 'for' | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__parenthesized__missing_closing_paren_3.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__parenthesized__missing_closing_paren_3.py.snap index 4cd851ef5c..e0708719b1 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__parenthesized__missing_closing_paren_3.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__parenthesized__missing_closing_paren_3.py.snap @@ -74,16 +74,11 @@ Module( ## Errors | +2 | # token starts a statement. +3 | 4 | (1, 2 + | ^ Syntax Error: Expected ')', found newline 5 | 6 | def foo(): - | ^^^ Syntax Error: Expected ',', found 'def' 7 | pass | - - - | -6 | def foo(): -7 | pass - | Syntax Error: unexpected EOF while parsing - | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__set__missing_closing_curly_brace_3.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__set__missing_closing_curly_brace_3.py.snap index 128d499a9a..5c7dcaa388 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__set__missing_closing_curly_brace_3.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@expressions__set__missing_closing_curly_brace_3.py.snap @@ -72,16 +72,11 @@ Module( ## Errors | +2 | # token starts a statement. +3 | 4 | {1, 2 + | ^ Syntax Error: Expected '}', found newline 5 | 6 | def foo(): - | ^^^ Syntax Error: Expected ',', found 'def' 7 | pass | - - - | -6 | def foo(): -7 | pass - | Syntax Error: unexpected EOF while parsing - | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@from_import_missing_rpar.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@from_import_missing_rpar.py.snap index 14302871b2..df0c2c6587 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@from_import_missing_rpar.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@from_import_missing_rpar.py.snap @@ -131,8 +131,8 @@ Module( | 1 | from x import (a, b + | ^ Syntax Error: Expected ')', found newline 2 | 1 + 1 - | ^ Syntax Error: Expected ',', found int 3 | from x import (a, b, 4 | 2 + 2 | @@ -142,20 +142,6 @@ Module( 1 | from x import (a, b 2 | 1 + 1 3 | from x import (a, b, - | ^^^^ Syntax Error: Simple statements must be separated by newlines or semicolons -4 | 2 + 2 - | - - - | -2 | 1 + 1 -3 | from x import (a, b, -4 | 2 + 2 - | ^ Syntax Error: Expected an import name or a ')' - | - - - | -3 | from x import (a, b, + | ^ Syntax Error: Expected ')', found newline 4 | 2 + 2 | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@function_def_unclosed_parameter_list.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@function_def_unclosed_parameter_list.py.snap index fb6c53e224..e37c632e7f 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@function_def_unclosed_parameter_list.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@function_def_unclosed_parameter_list.py.snap @@ -202,11 +202,20 @@ Module( | +1 | def foo(a: int, b: + | ^ Syntax Error: Expected ')', found newline 2 | def foo(): 3 | return 42 4 | def foo(a: int, b: str - | ^^^ Syntax Error: Compound statements are not allowed on the same line as simple statements -5 | x = 10 + | + + + | +1 | def foo(a: int, b: +2 | def foo(): + | ^^^ Syntax Error: Expected an indented block after function definition +3 | return 42 +4 | def foo(a: int, b: str | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@function_def_unclosed_type_param_list.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@function_def_unclosed_type_param_list.py.snap index be462835e0..c5cfe5b377 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@function_def_unclosed_type_param_list.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@function_def_unclosed_type_param_list.py.snap @@ -11,7 +11,7 @@ Module( body: [ FunctionDef( StmtFunctionDef { - range: 0..46, + range: 0..39, is_async: false, decorator_list: [], name: Identifier { @@ -108,29 +108,29 @@ Module( ), }, ), - Assign( - StmtAssign { - range: 40..46, - targets: [ - Name( - ExprName { - range: 40..41, - id: "x", - ctx: Store, - }, - ), - ], - value: NumberLiteral( - ExprNumberLiteral { - range: 44..46, - value: Int( - 10, - ), - }, - ), + ], + }, + ), + Assign( + StmtAssign { + range: 40..46, + targets: [ + Name( + ExprName { + range: 40..41, + id: "x", + ctx: Store, }, ), ], + value: NumberLiteral( + ExprNumberLiteral { + range: 44..46, + value: Int( + 10, + ), + }, + ), }, ), ], @@ -143,19 +143,5 @@ Module( 1 | def foo[T1, *T2(a, b): | ^ Syntax Error: Expected ']', found '(' 2 | return a + b -3 | x = 10 - | - - - | -1 | def foo[T1, *T2(a, b): -2 | return a + b -3 | x = 10 - | ^ Syntax Error: Simple statements must be separated by newlines or semicolons - | - - - | -2 | return a + b 3 | x = 10 | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@global_stmt_expression.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@global_stmt_expression.py.snap index 86352d88cc..dcb28456ec 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@global_stmt_expression.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@global_stmt_expression.py.snap @@ -47,5 +47,5 @@ Module( | 1 | global x + 1 - | ^ Syntax Error: Expected ',', found '+' + | ^ Syntax Error: Simple statements must be separated by newlines or semicolons | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@import_stmt_parenthesized_names.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@import_stmt_parenthesized_names.py.snap index d4254fd657..07706b4e06 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@import_stmt_parenthesized_names.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@import_stmt_parenthesized_names.py.snap @@ -69,7 +69,7 @@ Module( | 1 | import (a) - | ^ Syntax Error: Expected an import name + | ^ Syntax Error: Expected one or more symbol names after import 2 | import (a, b) | @@ -77,5 +77,5 @@ Module( | 1 | import (a) 2 | import (a, b) - | ^ Syntax Error: Expected an import name + | ^ Syntax Error: Expected one or more symbol names after import | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@import_stmt_star_import.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@import_stmt_star_import.py.snap index 703cc005e3..d7b385d339 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@import_stmt_star_import.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@import_stmt_star_import.py.snap @@ -90,7 +90,7 @@ Module( | 1 | import * - | ^ Syntax Error: Expected an import name + | ^ Syntax Error: Expected one or more symbol names after import 2 | import x, *, y | @@ -102,13 +102,6 @@ Module( | - | -1 | import * -2 | import x, *, y - | ^ Syntax Error: Expected an import name - | - - | 1 | import * 2 | import x, *, y diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@nonlocal_stmt_expression.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@nonlocal_stmt_expression.py.snap index 1838963632..2becdd3352 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@nonlocal_stmt_expression.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@nonlocal_stmt_expression.py.snap @@ -47,5 +47,5 @@ Module( | 1 | nonlocal x + 1 - | ^ Syntax Error: Expected ',', found '+' + | ^ Syntax Error: Simple statements must be separated by newlines or semicolons | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@params_var_keyword_with_default.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@params_var_keyword_with_default.py.snap index dd1e0636d5..014b96b8e3 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@params_var_keyword_with_default.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@params_var_keyword_with_default.py.snap @@ -150,7 +150,7 @@ Module( | 1 | def foo(a, **kwargs={'b': 1, 'c': 2}): ... - | ^ Syntax Error: Expected a parameter or the end of the parameter list + | ^ Syntax Error: Expected ')', found '{' | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@params_var_positional_with_default.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@params_var_positional_with_default.py.snap index b8bd4bfecc..2e02269531 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@params_var_positional_with_default.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@params_var_positional_with_default.py.snap @@ -108,7 +108,7 @@ Module( | 1 | def foo(a, *args=(1, 2)): ... - | ^ Syntax Error: Expected a parameter or the end of the parameter list + | ^ Syntax Error: Expected ')', found '(' | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token.py.snap new file mode 100644 index 0000000000..1c23c1e0cc --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token.py.snap @@ -0,0 +1,602 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/invalid/re_lex_logical_token.py +--- +## AST + +``` +Module( + ModModule { + range: 0..824, + body: [ + If( + StmtIf { + range: 48..59, + test: Call( + ExprCall { + range: 51..59, + func: Name( + ExprName { + range: 51..55, + id: "call", + ctx: Load, + }, + ), + arguments: Arguments { + range: 55..59, + args: [ + Name( + ExprName { + range: 56..59, + id: "foo", + ctx: Load, + }, + ), + ], + keywords: [], + }, + }, + ), + body: [], + elif_else_clauses: [], + }, + ), + FunctionDef( + StmtFunctionDef { + range: 60..79, + is_async: false, + decorator_list: [], + name: Identifier { + id: "bar", + range: 64..67, + }, + type_params: None, + parameters: Parameters { + range: 67..69, + posonlyargs: [], + args: [], + vararg: None, + kwonlyargs: [], + kwarg: None, + }, + returns: None, + body: [ + Pass( + StmtPass { + range: 75..79, + }, + ), + ], + }, + ), + If( + StmtIf { + range: 113..152, + test: Call( + ExprCall { + range: 116..124, + func: Name( + ExprName { + range: 116..120, + id: "call", + ctx: Load, + }, + ), + arguments: Arguments { + range: 120..124, + args: [ + Name( + ExprName { + range: 121..124, + id: "foo", + ctx: Load, + }, + ), + ], + keywords: [], + }, + }, + ), + body: [ + FunctionDef( + StmtFunctionDef { + range: 129..152, + is_async: false, + decorator_list: [], + name: Identifier { + id: "bar", + range: 133..136, + }, + type_params: None, + parameters: Parameters { + range: 136..138, + posonlyargs: [], + args: [], + vararg: None, + kwonlyargs: [], + kwarg: None, + }, + returns: None, + body: [ + Pass( + StmtPass { + range: 148..152, + }, + ), + ], + }, + ), + ], + elif_else_clauses: [], + }, + ), + If( + StmtIf { + range: 228..269, + test: Call( + ExprCall { + range: 231..239, + func: Name( + ExprName { + range: 231..235, + id: "call", + ctx: Load, + }, + ), + arguments: Arguments { + range: 235..239, + args: [ + Name( + ExprName { + range: 236..239, + id: "foo", + ctx: Load, + }, + ), + ], + keywords: [], + }, + }, + ), + body: [ + FunctionDef( + StmtFunctionDef { + range: 246..269, + is_async: false, + decorator_list: [], + name: Identifier { + id: "bar", + range: 250..253, + }, + type_params: None, + parameters: Parameters { + range: 253..255, + posonlyargs: [], + args: [], + vararg: None, + kwonlyargs: [], + kwarg: None, + }, + returns: None, + body: [ + Pass( + StmtPass { + range: 265..269, + }, + ), + ], + }, + ), + ], + elif_else_clauses: [], + }, + ), + If( + StmtIf { + range: 344..392, + test: Call( + ExprCall { + range: 347..355, + func: Name( + ExprName { + range: 347..351, + id: "call", + ctx: Load, + }, + ), + arguments: Arguments { + range: 351..355, + args: [ + Name( + ExprName { + range: 352..355, + id: "foo", + ctx: Load, + }, + ), + ], + keywords: [], + }, + }, + ), + body: [ + FunctionDef( + StmtFunctionDef { + range: 369..392, + is_async: false, + decorator_list: [], + name: Identifier { + id: "bar", + range: 373..376, + }, + type_params: None, + parameters: Parameters { + range: 376..378, + posonlyargs: [], + args: [], + vararg: None, + kwonlyargs: [], + kwarg: None, + }, + returns: None, + body: [ + Pass( + StmtPass { + range: 388..392, + }, + ), + ], + }, + ), + ], + elif_else_clauses: [], + }, + ), + If( + StmtIf { + range: 453..499, + test: Call( + ExprCall { + range: 456..472, + func: Name( + ExprName { + range: 456..460, + id: "call", + ctx: Load, + }, + ), + arguments: Arguments { + range: 460..472, + args: [ + Name( + ExprName { + range: 461..464, + id: "foo", + ctx: Load, + }, + ), + List( + ExprList { + range: 466..471, + elts: [ + Name( + ExprName { + range: 467..468, + id: "a", + ctx: Load, + }, + ), + Name( + ExprName { + range: 470..471, + id: "b", + ctx: Load, + }, + ), + ], + ctx: Load, + }, + ), + ], + keywords: [], + }, + }, + ), + body: [ + FunctionDef( + StmtFunctionDef { + range: 476..499, + is_async: false, + decorator_list: [], + name: Identifier { + id: "bar", + range: 480..483, + }, + type_params: None, + parameters: Parameters { + range: 483..485, + posonlyargs: [], + args: [], + vararg: None, + kwonlyargs: [], + kwarg: None, + }, + returns: None, + body: [ + Pass( + StmtPass { + range: 495..499, + }, + ), + ], + }, + ), + ], + elif_else_clauses: [], + }, + ), + If( + StmtIf { + range: 564..611, + test: Call( + ExprCall { + range: 567..583, + func: Name( + ExprName { + range: 567..571, + id: "call", + ctx: Load, + }, + ), + arguments: Arguments { + range: 571..583, + args: [ + Name( + ExprName { + range: 572..575, + id: "foo", + ctx: Load, + }, + ), + List( + ExprList { + range: 577..582, + elts: [ + Name( + ExprName { + range: 578..579, + id: "a", + ctx: Load, + }, + ), + Name( + ExprName { + range: 581..582, + id: "b", + ctx: Load, + }, + ), + ], + ctx: Load, + }, + ), + ], + keywords: [], + }, + }, + ), + body: [ + FunctionDef( + StmtFunctionDef { + range: 588..611, + is_async: false, + decorator_list: [], + name: Identifier { + id: "bar", + range: 592..595, + }, + type_params: None, + parameters: Parameters { + range: 595..597, + posonlyargs: [], + args: [], + vararg: None, + kwonlyargs: [], + kwarg: None, + }, + returns: None, + body: [ + Pass( + StmtPass { + range: 607..611, + }, + ), + ], + }, + ), + ], + elif_else_clauses: [], + }, + ), + If( + StmtIf { + range: 772..824, + test: Call( + ExprCall { + range: 775..796, + func: Name( + ExprName { + range: 775..779, + id: "call", + ctx: Load, + }, + ), + arguments: Arguments { + range: 779..796, + args: [ + Name( + ExprName { + range: 780..783, + id: "foo", + ctx: Load, + }, + ), + List( + ExprList { + range: 785..794, + elts: [ + Name( + ExprName { + range: 786..787, + id: "a", + ctx: Load, + }, + ), + Name( + ExprName { + range: 793..794, + id: "b", + ctx: Load, + }, + ), + ], + ctx: Load, + }, + ), + ], + keywords: [], + }, + }, + ), + body: [ + FunctionDef( + StmtFunctionDef { + range: 801..824, + is_async: false, + decorator_list: [], + name: Identifier { + id: "bar", + range: 805..808, + }, + type_params: None, + parameters: Parameters { + range: 808..810, + posonlyargs: [], + args: [], + vararg: None, + kwonlyargs: [], + kwarg: None, + }, + returns: None, + body: [ + Pass( + StmtPass { + range: 820..824, + }, + ), + ], + }, + ), + ], + elif_else_clauses: [], + }, + ), + ], + }, +) +``` +## Errors + + | +1 | # No indentation before the function definition +2 | if call(foo + | ^ Syntax Error: Expected ')', found newline +3 | def bar(): +4 | pass + | + + + | +1 | # No indentation before the function definition +2 | if call(foo +3 | def bar(): + | ^^^ Syntax Error: Expected an indented block after `if` statement +4 | pass + | + + + | + 7 | # Indented function definition + 8 | if call(foo + | ^ Syntax Error: Expected ')', found newline + 9 | def bar(): +10 | pass + | + + + | +13 | # There are multiple non-logical newlines (blank lines) in the `if` body +14 | if call(foo + | ^ Syntax Error: Expected ')', found newline +15 | +16 | +17 | def bar(): + | + + + | +21 | # There are trailing whitespaces in the blank line inside the `if` body +22 | if call(foo + | ^ Syntax Error: Expected ')', found newline +23 | +24 | def bar(): +25 | pass + | + + + | +28 | # The lexer is nested with multiple levels of parentheses +29 | if call(foo, [a, b + | ^ Syntax Error: Expected ']', found NonLogicalNewline +30 | def bar(): +31 | pass + | + + + | +34 | # The outer parenthesis is closed but the inner bracket isn't +35 | if call(foo, [a, b) + | ^ Syntax Error: Expected ']', found ')' +36 | def bar(): +37 | pass + | + + + | +34 | # The outer parenthesis is closed but the inner bracket isn't +35 | if call(foo, [a, b) + | ^ Syntax Error: Expected ':', found newline +36 | def bar(): +37 | pass + | + + + | +41 | # test is to make sure it emits a `NonLogicalNewline` token after `b`. +42 | if call(foo, [a, +43 | b + | ^ Syntax Error: Expected ']', found NonLogicalNewline +44 | ) +45 | def bar(): +46 | pass + | + + + | +42 | if call(foo, [a, +43 | b +44 | ) + | ^ Syntax Error: Expected ':', found newline +45 | def bar(): +46 | pass + | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token_mac_eol.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token_mac_eol.py.snap new file mode 100644 index 0000000000..72eca32ab6 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token_mac_eol.py.snap @@ -0,0 +1,104 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/invalid/re_lex_logical_token_mac_eol.py +--- +## AST + +``` +Module( + ModModule { + range: 0..46, + body: [ + If( + StmtIf { + range: 0..46, + test: Call( + ExprCall { + range: 3..19, + func: Name( + ExprName { + range: 3..7, + id: "call", + ctx: Load, + }, + ), + arguments: Arguments { + range: 7..19, + args: [ + Name( + ExprName { + range: 8..11, + id: "foo", + ctx: Load, + }, + ), + List( + ExprList { + range: 13..18, + elts: [ + Name( + ExprName { + range: 14..15, + id: "a", + ctx: Load, + }, + ), + Name( + ExprName { + range: 17..18, + id: "b", + ctx: Load, + }, + ), + ], + ctx: Load, + }, + ), + ], + keywords: [], + }, + }, + ), + body: [ + FunctionDef( + StmtFunctionDef { + range: 23..46, + is_async: false, + decorator_list: [], + name: Identifier { + id: "bar", + range: 27..30, + }, + type_params: None, + parameters: Parameters { + range: 30..32, + posonlyargs: [], + args: [], + vararg: None, + kwonlyargs: [], + kwarg: None, + }, + returns: None, + body: [ + Pass( + StmtPass { + range: 42..46, + }, + ), + ], + }, + ), + ], + elif_else_clauses: [], + }, + ), + ], + }, +) +``` +## Errors + + | +1 | if call(foo, [a, b def bar(): pass + | Syntax Error: Expected ']', found NonLogicalNewline + | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token_windows_eol.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token_windows_eol.py.snap new file mode 100644 index 0000000000..d11a5cf926 --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@re_lex_logical_token_windows_eol.py.snap @@ -0,0 +1,107 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/invalid/re_lex_logical_token_windows_eol.py +--- +## AST + +``` +Module( + ModModule { + range: 0..50, + body: [ + If( + StmtIf { + range: 0..48, + test: Call( + ExprCall { + range: 3..20, + func: Name( + ExprName { + range: 3..7, + id: "call", + ctx: Load, + }, + ), + arguments: Arguments { + range: 7..20, + args: [ + Name( + ExprName { + range: 8..11, + id: "foo", + ctx: Load, + }, + ), + List( + ExprList { + range: 13..18, + elts: [ + Name( + ExprName { + range: 14..15, + id: "a", + ctx: Load, + }, + ), + Name( + ExprName { + range: 17..18, + id: "b", + ctx: Load, + }, + ), + ], + ctx: Load, + }, + ), + ], + keywords: [], + }, + }, + ), + body: [ + FunctionDef( + StmtFunctionDef { + range: 24..48, + is_async: false, + decorator_list: [], + name: Identifier { + id: "bar", + range: 28..31, + }, + type_params: None, + parameters: Parameters { + range: 31..33, + posonlyargs: [], + args: [], + vararg: None, + kwonlyargs: [], + kwarg: None, + }, + returns: None, + body: [ + Pass( + StmtPass { + range: 44..48, + }, + ), + ], + }, + ), + ], + elif_else_clauses: [], + }, + ), + ], + }, +) +``` +## Errors + + | +1 | if call(foo, [a, b + | ___________________^ +2 | | def bar(): + | |_^ Syntax Error: Expected ']', found NonLogicalNewline +3 | pass + | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__function_type_parameters.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__function_type_parameters.py.snap index 58aa04440c..16efe8b16c 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__function_type_parameters.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__function_type_parameters.py.snap @@ -335,7 +335,7 @@ Module( 11 | def keyword[A, await](): ... 12 | 13 | def not_a_type_param[A, |, B](): ... - | ^ Syntax Error: Expected a type parameter or the end of the type parameter list + | ^ Syntax Error: Expected ',', found '|' 14 | 15 | def multiple_commas[A,,B](): ... | @@ -383,7 +383,7 @@ Module( 17 | def multiple_trailing_commas[A,,](): ... 18 | 19 | def multiple_commas_and_recovery[A,,100](): ... - | ^^^ Syntax Error: Expected a type parameter or the end of the type parameter list + | ^^^ Syntax Error: Expected ']', found int | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__match__as_pattern_3.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__match__as_pattern_3.py.snap index bd6b87ab9f..89f02bcde3 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__match__as_pattern_3.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__match__as_pattern_3.py.snap @@ -99,7 +99,7 @@ Module( 2 | # Not in the mapping start token set, so the list parsing bails 3 | # v 4 | case {(x as y): 1}: - | ^ Syntax Error: Expected a mapping pattern or the end of the mapping pattern + | ^ Syntax Error: Expected '}', found '(' 5 | pass | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__with__ambiguous_lpar_with_items.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__with__ambiguous_lpar_with_items.py.snap index 11746d587b..c37a03be19 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__with__ambiguous_lpar_with_items.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@statements__with__ambiguous_lpar_with_items.py.snap @@ -1372,7 +1372,7 @@ Module( | 4 | with (item1, item2),: ... 5 | with (item1, item2), as f: ... - | ^^ Syntax Error: Expected an expression or the end of the with item list + | ^^ Syntax Error: Expected ',', found 'as' 6 | with (item1, item2), item3,: ... 7 | with (*item): ... | @@ -1450,7 +1450,7 @@ Module( 10 | with (item1, item2 := 10 as f): ... 11 | with (x for x in range(10), item): ... 12 | with (item, x for x in range(10)): ... - | ^^^ Syntax Error: Expected ',', found 'for' + | ^^^ Syntax Error: Expected ')', found 'for' 13 | 14 | # Make sure the parser doesn't report the same error twice | @@ -1518,7 +1518,7 @@ Module( | 17 | with (*x for x in iter, item): ... 18 | with (item1, *x for x in iter, item2): ... - | ^^^ Syntax Error: Expected ',', found 'for' + | ^^^ Syntax Error: Expected ')', found 'for' 19 | with (x as f, *y): ... 20 | with (*x, y as f): ... | diff --git a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@with_items_parenthesized_missing_comma.py.snap b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@with_items_parenthesized_missing_comma.py.snap index 68009deba0..d963f9c1b8 100644 --- a/crates/ruff_python_parser/tests/snapshots/invalid_syntax@with_items_parenthesized_missing_comma.py.snap +++ b/crates/ruff_python_parser/tests/snapshots/invalid_syntax@with_items_parenthesized_missing_comma.py.snap @@ -330,11 +330,5 @@ Module( 3 | with (item1, item2 item3, item4): ... 4 | with (item1, item2 as f1 item3, item4): ... 5 | with (item1, item2: ... - | ^ Syntax Error: Expected ',', found ':' - | - - - | -4 | with (item1, item2 as f1 item3, item4): ... -5 | with (item1, item2: ... + | ^ Syntax Error: Expected ')', found ':' | diff --git a/crates/ruff_python_parser/tests/snapshots/valid_syntax@comma_separated_regular_list_terminator.py.snap b/crates/ruff_python_parser/tests/snapshots/valid_syntax@comma_separated_regular_list_terminator.py.snap new file mode 100644 index 0000000000..9561ce301b --- /dev/null +++ b/crates/ruff_python_parser/tests/snapshots/valid_syntax@comma_separated_regular_list_terminator.py.snap @@ -0,0 +1,168 @@ +--- +source: crates/ruff_python_parser/tests/fixtures.rs +input_file: crates/ruff_python_parser/resources/inline/ok/comma_separated_regular_list_terminator.py +--- +## AST + +``` +Module( + ModModule { + range: 0..181, + body: [ + Expr( + StmtExpr { + range: 141..144, + value: List( + ExprList { + range: 141..144, + elts: [ + NumberLiteral( + ExprNumberLiteral { + range: 142..143, + value: Int( + 0, + ), + }, + ), + ], + ctx: Load, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 145..151, + value: List( + ExprList { + range: 145..151, + elts: [ + NumberLiteral( + ExprNumberLiteral { + range: 146..147, + value: Int( + 0, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 149..150, + value: Int( + 1, + ), + }, + ), + ], + ctx: Load, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 152..159, + value: List( + ExprList { + range: 152..159, + elts: [ + NumberLiteral( + ExprNumberLiteral { + range: 153..154, + value: Int( + 0, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 156..157, + value: Int( + 1, + ), + }, + ), + ], + ctx: Load, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 160..169, + value: List( + ExprList { + range: 160..169, + elts: [ + NumberLiteral( + ExprNumberLiteral { + range: 161..162, + value: Int( + 0, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 164..165, + value: Int( + 1, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 167..168, + value: Int( + 2, + ), + }, + ), + ], + ctx: Load, + }, + ), + }, + ), + Expr( + StmtExpr { + range: 170..180, + value: List( + ExprList { + range: 170..180, + elts: [ + NumberLiteral( + ExprNumberLiteral { + range: 171..172, + value: Int( + 0, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 174..175, + value: Int( + 1, + ), + }, + ), + NumberLiteral( + ExprNumberLiteral { + range: 177..178, + value: Int( + 2, + ), + }, + ), + ], + ctx: Load, + }, + ), + }, + ), + ], + }, +) +```