mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-04 02:39:12 +00:00
Use empty range when there's "gap" in token source (#11032)
## Summary This fixes a bug where the parser would panic when there is a "gap" in the token source. What's a gap? The reason it's `<=` instead of just `==` is because there could be whitespaces between the two tokens. For example: ```python # last token end # | current token (newline) start # v v def foo \n # ^ # assume there's trailing whitespace here ``` Or, there could tokens that are considered "trivia" and thus aren't emitted by the token source. These are comments and non-logical newlines. For example: ```python # last token end # v def foo # comment\n # ^ current token (newline) start ``` In either of the above cases, there's a "gap" between the end of the last token and start of the current token. ## Test Plan Add test cases and update the snapshots.
This commit is contained in:
parent
9b80cc09ee
commit
d3cd61f804
5 changed files with 195 additions and 26 deletions
|
@ -0,0 +1,3 @@
|
|||
def foo # comment
|
||||
def bar(): ...
|
||||
def baz
|
|
@ -261,12 +261,59 @@ impl<'src> Parser<'src> {
|
|||
}
|
||||
|
||||
fn node_range(&self, start: TextSize) -> TextRange {
|
||||
// It's possible during error recovery that the parsing didn't consume any tokens. In that case,
|
||||
// `last_token_end` still points to the end of the previous token but `start` is the start of the current token.
|
||||
// Calling `TextRange::new(start, self.last_token_end)` would panic in that case because `start > end`.
|
||||
// This path "detects" this case and creates an empty range instead.
|
||||
if self.node_start() == start {
|
||||
TextRange::empty(start)
|
||||
// It's possible during error recovery that the parsing didn't consume any tokens. In that
|
||||
// case, `last_token_end` still points to the end of the previous token but `start` is the
|
||||
// start of the current token. Calling `TextRange::new(start, self.last_token_end)` would
|
||||
// panic in that case because `start > end`. This path "detects" this case and creates an
|
||||
// empty range instead.
|
||||
//
|
||||
// The reason it's `<=` instead of just `==` is because there could be whitespaces between
|
||||
// the two tokens. For example:
|
||||
//
|
||||
// ```python
|
||||
// # last token end
|
||||
// # | current token (newline) start
|
||||
// # v v
|
||||
// def foo \n
|
||||
// # ^
|
||||
// # assume there's trailing whitespace here
|
||||
// ```
|
||||
//
|
||||
// Or, there could tokens that are considered "trivia" and thus aren't emitted by the token
|
||||
// source. These are comments and non-logical newlines. For example:
|
||||
//
|
||||
// ```python
|
||||
// # last token end
|
||||
// # v
|
||||
// def foo # comment\n
|
||||
// # ^ current token (newline) start
|
||||
// ```
|
||||
//
|
||||
// In either of the above cases, there's a "gap" between the end of the last token and start
|
||||
// of the current token.
|
||||
if self.last_token_end <= start {
|
||||
// We need to create an empty range at the last token end instead of the start because
|
||||
// otherwise this node range will fall outside the range of it's parent node. Taking
|
||||
// the above example:
|
||||
//
|
||||
// ```python
|
||||
// if True:
|
||||
// # function start
|
||||
// # | function end
|
||||
// # v v
|
||||
// def foo # comment
|
||||
// # ^ current token start
|
||||
// ```
|
||||
//
|
||||
// Here, the current token start is the start of parameter range but the function ends
|
||||
// at `foo`. Even if there's a function body, the range of parameters would still be
|
||||
// before the comment.
|
||||
|
||||
// test_err node_range_with_gaps
|
||||
// def foo # comment
|
||||
// def bar(): ...
|
||||
// def baz
|
||||
TextRange::empty(self.last_token_end)
|
||||
} else {
|
||||
TextRange::new(start, self.last_token_end)
|
||||
}
|
||||
|
|
|
@ -1663,23 +1663,19 @@ impl<'src> Parser<'src> {
|
|||
// x = 10
|
||||
let type_params = self.try_parse_type_params();
|
||||
|
||||
// test_ok function_def_parameter_range
|
||||
// def foo(
|
||||
// first: int,
|
||||
// second: int,
|
||||
// ) -> int: ...
|
||||
|
||||
// test_err function_def_unclosed_parameter_list
|
||||
// def foo(a: int, b:
|
||||
// def foo():
|
||||
// return 42
|
||||
// def foo(a: int, b: str
|
||||
// x = 10
|
||||
let parameters_start = self.node_start();
|
||||
self.expect(TokenKind::Lpar);
|
||||
let mut parameters = self.parse_parameters(FunctionKind::FunctionDef);
|
||||
self.expect(TokenKind::Rpar);
|
||||
|
||||
// test_ok function_def_parameter_range
|
||||
// def foo(
|
||||
// first: int,
|
||||
// second: int,
|
||||
// ) -> int: ...
|
||||
parameters.range = self.node_range(parameters_start);
|
||||
let parameters = self.parse_parameters(FunctionKind::FunctionDef);
|
||||
|
||||
let returns = if self.eat(TokenKind::Rarrow) {
|
||||
if self.at_expr() {
|
||||
|
@ -2844,19 +2840,16 @@ impl<'src> Parser<'src> {
|
|||
pub(super) fn parse_parameters(&mut self, function_kind: FunctionKind) -> ast::Parameters {
|
||||
let start = self.node_start();
|
||||
|
||||
if matches!(function_kind, FunctionKind::FunctionDef) {
|
||||
self.expect(TokenKind::Lpar);
|
||||
}
|
||||
|
||||
// TODO(dhruvmanila): This has the same problem as `parse_match_pattern_mapping`
|
||||
// has where if there are multiple kwarg or vararg, the last one will win and
|
||||
// the parser will drop the previous ones. Another thing is the vararg and kwarg
|
||||
// uses `Parameter` (not `ParameterWithDefault`) which means that the parser cannot
|
||||
// recover well from `*args=(1, 2)`.
|
||||
let mut parameters = ast::Parameters {
|
||||
range: TextRange::default(),
|
||||
posonlyargs: vec![],
|
||||
args: vec![],
|
||||
kwonlyargs: vec![],
|
||||
vararg: None,
|
||||
kwarg: None,
|
||||
};
|
||||
let mut parameters = ast::Parameters::empty(TextRange::default());
|
||||
|
||||
let mut seen_default_param = false; // `a=10`
|
||||
let mut seen_positional_only_separator = false; // `/`
|
||||
|
@ -3094,6 +3087,10 @@ impl<'src> Parser<'src> {
|
|||
self.add_error(ParseErrorType::ExpectedKeywordParam, star_range);
|
||||
}
|
||||
|
||||
if matches!(function_kind, FunctionKind::FunctionDef) {
|
||||
self.expect(TokenKind::Rpar);
|
||||
}
|
||||
|
||||
parameters.range = self.node_range(start);
|
||||
|
||||
// test_err params_duplicate_names
|
||||
|
|
|
@ -0,0 +1,122 @@
|
|||
---
|
||||
source: crates/ruff_python_parser/tests/fixtures.rs
|
||||
input_file: crates/ruff_python_parser/resources/inline/err/node_range_with_gaps.py
|
||||
---
|
||||
## AST
|
||||
|
||||
```
|
||||
Module(
|
||||
ModModule {
|
||||
range: 0..41,
|
||||
body: [
|
||||
FunctionDef(
|
||||
StmtFunctionDef {
|
||||
range: 0..7,
|
||||
is_async: false,
|
||||
decorator_list: [],
|
||||
name: Identifier {
|
||||
id: "foo",
|
||||
range: 4..7,
|
||||
},
|
||||
type_params: None,
|
||||
parameters: Parameters {
|
||||
range: 7..7,
|
||||
posonlyargs: [],
|
||||
args: [],
|
||||
vararg: None,
|
||||
kwonlyargs: [],
|
||||
kwarg: None,
|
||||
},
|
||||
returns: None,
|
||||
body: [],
|
||||
},
|
||||
),
|
||||
FunctionDef(
|
||||
StmtFunctionDef {
|
||||
range: 18..32,
|
||||
is_async: false,
|
||||
decorator_list: [],
|
||||
name: Identifier {
|
||||
id: "bar",
|
||||
range: 22..25,
|
||||
},
|
||||
type_params: None,
|
||||
parameters: Parameters {
|
||||
range: 25..27,
|
||||
posonlyargs: [],
|
||||
args: [],
|
||||
vararg: None,
|
||||
kwonlyargs: [],
|
||||
kwarg: None,
|
||||
},
|
||||
returns: None,
|
||||
body: [
|
||||
Expr(
|
||||
StmtExpr {
|
||||
range: 29..32,
|
||||
value: EllipsisLiteral(
|
||||
ExprEllipsisLiteral {
|
||||
range: 29..32,
|
||||
},
|
||||
),
|
||||
},
|
||||
),
|
||||
],
|
||||
},
|
||||
),
|
||||
FunctionDef(
|
||||
StmtFunctionDef {
|
||||
range: 33..40,
|
||||
is_async: false,
|
||||
decorator_list: [],
|
||||
name: Identifier {
|
||||
id: "baz",
|
||||
range: 37..40,
|
||||
},
|
||||
type_params: None,
|
||||
parameters: Parameters {
|
||||
range: 40..40,
|
||||
posonlyargs: [],
|
||||
args: [],
|
||||
vararg: None,
|
||||
kwonlyargs: [],
|
||||
kwarg: None,
|
||||
},
|
||||
returns: None,
|
||||
body: [],
|
||||
},
|
||||
),
|
||||
],
|
||||
},
|
||||
)
|
||||
```
|
||||
## Errors
|
||||
|
||||
|
|
||||
1 | def foo # comment
|
||||
| ^ Syntax Error: Expected '(', found newline
|
||||
2 | def bar(): ...
|
||||
3 | def baz
|
||||
|
|
||||
|
||||
|
||||
|
|
||||
1 | def foo # comment
|
||||
2 | def bar(): ...
|
||||
| ^^^ Syntax Error: Expected ')', found 'def'
|
||||
3 | def baz
|
||||
|
|
||||
|
||||
|
||||
|
|
||||
1 | def foo # comment
|
||||
2 | def bar(): ...
|
||||
3 | def baz
|
||||
| ^ Syntax Error: Expected '(', found newline
|
||||
|
|
||||
|
||||
|
||||
|
|
||||
2 | def bar(): ...
|
||||
3 | def baz
|
||||
|
|
|
@ -167,7 +167,7 @@ Module(
|
|||
conversion: None,
|
||||
format_spec: Some(
|
||||
FStringFormatSpec {
|
||||
range: 43..43,
|
||||
range: 42..42,
|
||||
elements: [],
|
||||
},
|
||||
),
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue