Use empty range when there's "gap" in token source (#11032)

## Summary This fixes a bug where the parser would panic when there is a "gap" in the token source. What's a gap? The reason it's `<=` instead of just `==` is because there could be whitespaces between the two tokens. For example: ```python # last token end # | current token (newline) start # v v def foo \n # ^ # assume there's trailing whitespace here ``` Or, there could tokens that are considered "trivia" and thus aren't emitted by the token source. These are comments and non-logical newlines. For example: ```python # last token end # v def foo # comment\n # ^ current token (newline) start ``` In either of the above cases, there's a "gap" between the end of the last token and start of the current token. ## Test Plan Add test cases and update the snapshots.
2025-07-16 01:25:11 +00:00 · 2024-04-19 17:06:26 +05:30 · 2024-04-19 17:06:26 +05:30 · d3cd61f804
commit d3cd61f804
parent 9b80cc09ee
5 changed files with 195 additions and 26 deletions
--- a/crates/ruff_python_parser/src/parser/mod.rs
+++ b/crates/ruff_python_parser/src/parser/mod.rs
@ -261,12 +261,59 @@ impl<'src> Parser<'src> {
    }

    fn node_range(&self, start: TextSize) -> TextRange {
-        // It's possible during error recovery that the parsing didn't consume any tokens. In that case,
-        // `last_token_end` still points to the end of the previous token but `start` is the start of the current token.
-        // Calling `TextRange::new(start, self.last_token_end)` would panic in that case because `start > end`.
-        // This path "detects" this case and creates an empty range instead.
-        if self.node_start() == start {
-            TextRange::empty(start)
+        // It's possible during error recovery that the parsing didn't consume any tokens. In that
+        // case, `last_token_end` still points to the end of the previous token but `start` is the
+        // start of the current token. Calling `TextRange::new(start, self.last_token_end)` would
+        // panic in that case because `start > end`. This path "detects" this case and creates an
+        // empty range instead.
+        //
+        // The reason it's `<=` instead of just `==` is because there could be whitespaces between
+        // the two tokens. For example:
+        //
+        // ```python
+        // #     last token end
+        // #     | current token (newline) start
+        // #     v v
+        // def foo \n
+        // #      ^
+        // #      assume there's trailing whitespace here
+        // ```
+        //
+        // Or, there could tokens that are considered "trivia" and thus aren't emitted by the token
+        // source. These are comments and non-logical newlines. For example:
+        //
+        // ```python
+        // #     last token end
+        // #     v
+        // def foo # comment\n
+        // #                ^ current token (newline) start
+        // ```
+        //
+        // In either of the above cases, there's a "gap" between the end of the last token and start
+        // of the current token.
+        if self.last_token_end <= start {
+            // We need to create an empty range at the last token end instead of the start because
+            // otherwise this node range will fall outside the range of it's parent node. Taking
+            // the above example:
+            //
+            // ```python
+            // if True:
+            // #   function start
+            // #   |     function end
+            // #   v     v
+            //     def foo # comment
+            // #                    ^ current token start
+            // ```
+            //
+            // Here, the current token start is the start of parameter range but the function ends
+            // at `foo`. Even if there's a function body, the range of parameters would still be
+            // before the comment.
+
+            // test_err node_range_with_gaps
+            // def foo # comment
+            // def bar(): ...
+            // def baz
+            TextRange::empty(self.last_token_end)
        } else {
            TextRange::new(start, self.last_token_end)
        }
--- a/crates/ruff_python_parser/src/parser/statement.rs
+++ b/crates/ruff_python_parser/src/parser/statement.rs
@ -1663,23 +1663,19 @@ impl<'src> Parser<'src> {
        // x = 10
        let type_params = self.try_parse_type_params();

+        // test_ok function_def_parameter_range
+        // def foo(
+        //     first: int,
+        //     second: int,
+        // ) -> int: ...
+
        // test_err function_def_unclosed_parameter_list
        // def foo(a: int, b:
        // def foo():
        //     return 42
        // def foo(a: int, b: str
        // x = 10
-        let parameters_start = self.node_start();
-        self.expect(TokenKind::Lpar);
-        let mut parameters = self.parse_parameters(FunctionKind::FunctionDef);
-        self.expect(TokenKind::Rpar);
-
-        // test_ok function_def_parameter_range
-        // def foo(
-        //     first: int,
-        //     second: int,
-        // ) -> int: ...
-        parameters.range = self.node_range(parameters_start);
+        let parameters = self.parse_parameters(FunctionKind::FunctionDef);

        let returns = if self.eat(TokenKind::Rarrow) {
            if self.at_expr() {
@ -2844,19 +2840,16 @@ impl<'src> Parser<'src> {
    pub(super) fn parse_parameters(&mut self, function_kind: FunctionKind) -> ast::Parameters {
        let start = self.node_start();

+        if matches!(function_kind, FunctionKind::FunctionDef) {
+            self.expect(TokenKind::Lpar);
+        }
+
        // TODO(dhruvmanila): This has the same problem as `parse_match_pattern_mapping`
        // has where if there are multiple kwarg or vararg, the last one will win and
        // the parser will drop the previous ones. Another thing is the vararg and kwarg
        // uses `Parameter` (not `ParameterWithDefault`) which means that the parser cannot
        // recover well from `*args=(1, 2)`.
-        let mut parameters = ast::Parameters {
-            range: TextRange::default(),
-            posonlyargs: vec![],
-            args: vec![],
-            kwonlyargs: vec![],
-            vararg: None,
-            kwarg: None,
-        };
+        let mut parameters = ast::Parameters::empty(TextRange::default());

        let mut seen_default_param = false; // `a=10`
        let mut seen_positional_only_separator = false; // `/`
@ -3094,6 +3087,10 @@ impl<'src> Parser<'src> {
            self.add_error(ParseErrorType::ExpectedKeywordParam, star_range);
        }

+        if matches!(function_kind, FunctionKind::FunctionDef) {
+            self.expect(TokenKind::Rpar);
+        }
+
        parameters.range = self.node_range(start);

        // test_err params_duplicate_names