mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-28 06:14:27 +00:00
Use empty range when there's "gap" in token source (#11032)
## Summary This fixes a bug where the parser would panic when there is a "gap" in the token source. What's a gap? The reason it's `<=` instead of just `==` is because there could be whitespaces between the two tokens. For example: ```python # last token end # | current token (newline) start # v v def foo \n # ^ # assume there's trailing whitespace here ``` Or, there could tokens that are considered "trivia" and thus aren't emitted by the token source. These are comments and non-logical newlines. For example: ```python # last token end # v def foo # comment\n # ^ current token (newline) start ``` In either of the above cases, there's a "gap" between the end of the last token and start of the current token. ## Test Plan Add test cases and update the snapshots.
This commit is contained in:
parent
9b80cc09ee
commit
d3cd61f804
5 changed files with 195 additions and 26 deletions
|
@ -0,0 +1,3 @@
|
||||||
|
def foo # comment
|
||||||
|
def bar(): ...
|
||||||
|
def baz
|
|
@ -261,12 +261,59 @@ impl<'src> Parser<'src> {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn node_range(&self, start: TextSize) -> TextRange {
|
fn node_range(&self, start: TextSize) -> TextRange {
|
||||||
// It's possible during error recovery that the parsing didn't consume any tokens. In that case,
|
// It's possible during error recovery that the parsing didn't consume any tokens. In that
|
||||||
// `last_token_end` still points to the end of the previous token but `start` is the start of the current token.
|
// case, `last_token_end` still points to the end of the previous token but `start` is the
|
||||||
// Calling `TextRange::new(start, self.last_token_end)` would panic in that case because `start > end`.
|
// start of the current token. Calling `TextRange::new(start, self.last_token_end)` would
|
||||||
// This path "detects" this case and creates an empty range instead.
|
// panic in that case because `start > end`. This path "detects" this case and creates an
|
||||||
if self.node_start() == start {
|
// empty range instead.
|
||||||
TextRange::empty(start)
|
//
|
||||||
|
// The reason it's `<=` instead of just `==` is because there could be whitespaces between
|
||||||
|
// the two tokens. For example:
|
||||||
|
//
|
||||||
|
// ```python
|
||||||
|
// # last token end
|
||||||
|
// # | current token (newline) start
|
||||||
|
// # v v
|
||||||
|
// def foo \n
|
||||||
|
// # ^
|
||||||
|
// # assume there's trailing whitespace here
|
||||||
|
// ```
|
||||||
|
//
|
||||||
|
// Or, there could tokens that are considered "trivia" and thus aren't emitted by the token
|
||||||
|
// source. These are comments and non-logical newlines. For example:
|
||||||
|
//
|
||||||
|
// ```python
|
||||||
|
// # last token end
|
||||||
|
// # v
|
||||||
|
// def foo # comment\n
|
||||||
|
// # ^ current token (newline) start
|
||||||
|
// ```
|
||||||
|
//
|
||||||
|
// In either of the above cases, there's a "gap" between the end of the last token and start
|
||||||
|
// of the current token.
|
||||||
|
if self.last_token_end <= start {
|
||||||
|
// We need to create an empty range at the last token end instead of the start because
|
||||||
|
// otherwise this node range will fall outside the range of it's parent node. Taking
|
||||||
|
// the above example:
|
||||||
|
//
|
||||||
|
// ```python
|
||||||
|
// if True:
|
||||||
|
// # function start
|
||||||
|
// # | function end
|
||||||
|
// # v v
|
||||||
|
// def foo # comment
|
||||||
|
// # ^ current token start
|
||||||
|
// ```
|
||||||
|
//
|
||||||
|
// Here, the current token start is the start of parameter range but the function ends
|
||||||
|
// at `foo`. Even if there's a function body, the range of parameters would still be
|
||||||
|
// before the comment.
|
||||||
|
|
||||||
|
// test_err node_range_with_gaps
|
||||||
|
// def foo # comment
|
||||||
|
// def bar(): ...
|
||||||
|
// def baz
|
||||||
|
TextRange::empty(self.last_token_end)
|
||||||
} else {
|
} else {
|
||||||
TextRange::new(start, self.last_token_end)
|
TextRange::new(start, self.last_token_end)
|
||||||
}
|
}
|
||||||
|
|
|
@ -1663,23 +1663,19 @@ impl<'src> Parser<'src> {
|
||||||
// x = 10
|
// x = 10
|
||||||
let type_params = self.try_parse_type_params();
|
let type_params = self.try_parse_type_params();
|
||||||
|
|
||||||
|
// test_ok function_def_parameter_range
|
||||||
|
// def foo(
|
||||||
|
// first: int,
|
||||||
|
// second: int,
|
||||||
|
// ) -> int: ...
|
||||||
|
|
||||||
// test_err function_def_unclosed_parameter_list
|
// test_err function_def_unclosed_parameter_list
|
||||||
// def foo(a: int, b:
|
// def foo(a: int, b:
|
||||||
// def foo():
|
// def foo():
|
||||||
// return 42
|
// return 42
|
||||||
// def foo(a: int, b: str
|
// def foo(a: int, b: str
|
||||||
// x = 10
|
// x = 10
|
||||||
let parameters_start = self.node_start();
|
let parameters = self.parse_parameters(FunctionKind::FunctionDef);
|
||||||
self.expect(TokenKind::Lpar);
|
|
||||||
let mut parameters = self.parse_parameters(FunctionKind::FunctionDef);
|
|
||||||
self.expect(TokenKind::Rpar);
|
|
||||||
|
|
||||||
// test_ok function_def_parameter_range
|
|
||||||
// def foo(
|
|
||||||
// first: int,
|
|
||||||
// second: int,
|
|
||||||
// ) -> int: ...
|
|
||||||
parameters.range = self.node_range(parameters_start);
|
|
||||||
|
|
||||||
let returns = if self.eat(TokenKind::Rarrow) {
|
let returns = if self.eat(TokenKind::Rarrow) {
|
||||||
if self.at_expr() {
|
if self.at_expr() {
|
||||||
|
@ -2844,19 +2840,16 @@ impl<'src> Parser<'src> {
|
||||||
pub(super) fn parse_parameters(&mut self, function_kind: FunctionKind) -> ast::Parameters {
|
pub(super) fn parse_parameters(&mut self, function_kind: FunctionKind) -> ast::Parameters {
|
||||||
let start = self.node_start();
|
let start = self.node_start();
|
||||||
|
|
||||||
|
if matches!(function_kind, FunctionKind::FunctionDef) {
|
||||||
|
self.expect(TokenKind::Lpar);
|
||||||
|
}
|
||||||
|
|
||||||
// TODO(dhruvmanila): This has the same problem as `parse_match_pattern_mapping`
|
// TODO(dhruvmanila): This has the same problem as `parse_match_pattern_mapping`
|
||||||
// has where if there are multiple kwarg or vararg, the last one will win and
|
// has where if there are multiple kwarg or vararg, the last one will win and
|
||||||
// the parser will drop the previous ones. Another thing is the vararg and kwarg
|
// the parser will drop the previous ones. Another thing is the vararg and kwarg
|
||||||
// uses `Parameter` (not `ParameterWithDefault`) which means that the parser cannot
|
// uses `Parameter` (not `ParameterWithDefault`) which means that the parser cannot
|
||||||
// recover well from `*args=(1, 2)`.
|
// recover well from `*args=(1, 2)`.
|
||||||
let mut parameters = ast::Parameters {
|
let mut parameters = ast::Parameters::empty(TextRange::default());
|
||||||
range: TextRange::default(),
|
|
||||||
posonlyargs: vec![],
|
|
||||||
args: vec![],
|
|
||||||
kwonlyargs: vec![],
|
|
||||||
vararg: None,
|
|
||||||
kwarg: None,
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut seen_default_param = false; // `a=10`
|
let mut seen_default_param = false; // `a=10`
|
||||||
let mut seen_positional_only_separator = false; // `/`
|
let mut seen_positional_only_separator = false; // `/`
|
||||||
|
@ -3094,6 +3087,10 @@ impl<'src> Parser<'src> {
|
||||||
self.add_error(ParseErrorType::ExpectedKeywordParam, star_range);
|
self.add_error(ParseErrorType::ExpectedKeywordParam, star_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if matches!(function_kind, FunctionKind::FunctionDef) {
|
||||||
|
self.expect(TokenKind::Rpar);
|
||||||
|
}
|
||||||
|
|
||||||
parameters.range = self.node_range(start);
|
parameters.range = self.node_range(start);
|
||||||
|
|
||||||
// test_err params_duplicate_names
|
// test_err params_duplicate_names
|
||||||
|
|
|
@ -0,0 +1,122 @@
|
||||||
|
---
|
||||||
|
source: crates/ruff_python_parser/tests/fixtures.rs
|
||||||
|
input_file: crates/ruff_python_parser/resources/inline/err/node_range_with_gaps.py
|
||||||
|
---
|
||||||
|
## AST
|
||||||
|
|
||||||
|
```
|
||||||
|
Module(
|
||||||
|
ModModule {
|
||||||
|
range: 0..41,
|
||||||
|
body: [
|
||||||
|
FunctionDef(
|
||||||
|
StmtFunctionDef {
|
||||||
|
range: 0..7,
|
||||||
|
is_async: false,
|
||||||
|
decorator_list: [],
|
||||||
|
name: Identifier {
|
||||||
|
id: "foo",
|
||||||
|
range: 4..7,
|
||||||
|
},
|
||||||
|
type_params: None,
|
||||||
|
parameters: Parameters {
|
||||||
|
range: 7..7,
|
||||||
|
posonlyargs: [],
|
||||||
|
args: [],
|
||||||
|
vararg: None,
|
||||||
|
kwonlyargs: [],
|
||||||
|
kwarg: None,
|
||||||
|
},
|
||||||
|
returns: None,
|
||||||
|
body: [],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
FunctionDef(
|
||||||
|
StmtFunctionDef {
|
||||||
|
range: 18..32,
|
||||||
|
is_async: false,
|
||||||
|
decorator_list: [],
|
||||||
|
name: Identifier {
|
||||||
|
id: "bar",
|
||||||
|
range: 22..25,
|
||||||
|
},
|
||||||
|
type_params: None,
|
||||||
|
parameters: Parameters {
|
||||||
|
range: 25..27,
|
||||||
|
posonlyargs: [],
|
||||||
|
args: [],
|
||||||
|
vararg: None,
|
||||||
|
kwonlyargs: [],
|
||||||
|
kwarg: None,
|
||||||
|
},
|
||||||
|
returns: None,
|
||||||
|
body: [
|
||||||
|
Expr(
|
||||||
|
StmtExpr {
|
||||||
|
range: 29..32,
|
||||||
|
value: EllipsisLiteral(
|
||||||
|
ExprEllipsisLiteral {
|
||||||
|
range: 29..32,
|
||||||
|
},
|
||||||
|
),
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
FunctionDef(
|
||||||
|
StmtFunctionDef {
|
||||||
|
range: 33..40,
|
||||||
|
is_async: false,
|
||||||
|
decorator_list: [],
|
||||||
|
name: Identifier {
|
||||||
|
id: "baz",
|
||||||
|
range: 37..40,
|
||||||
|
},
|
||||||
|
type_params: None,
|
||||||
|
parameters: Parameters {
|
||||||
|
range: 40..40,
|
||||||
|
posonlyargs: [],
|
||||||
|
args: [],
|
||||||
|
vararg: None,
|
||||||
|
kwonlyargs: [],
|
||||||
|
kwarg: None,
|
||||||
|
},
|
||||||
|
returns: None,
|
||||||
|
body: [],
|
||||||
|
},
|
||||||
|
),
|
||||||
|
],
|
||||||
|
},
|
||||||
|
)
|
||||||
|
```
|
||||||
|
## Errors
|
||||||
|
|
||||||
|
|
|
||||||
|
1 | def foo # comment
|
||||||
|
| ^ Syntax Error: Expected '(', found newline
|
||||||
|
2 | def bar(): ...
|
||||||
|
3 | def baz
|
||||||
|
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
1 | def foo # comment
|
||||||
|
2 | def bar(): ...
|
||||||
|
| ^^^ Syntax Error: Expected ')', found 'def'
|
||||||
|
3 | def baz
|
||||||
|
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
1 | def foo # comment
|
||||||
|
2 | def bar(): ...
|
||||||
|
3 | def baz
|
||||||
|
| ^ Syntax Error: Expected '(', found newline
|
||||||
|
|
|
||||||
|
|
||||||
|
|
||||||
|
|
|
||||||
|
2 | def bar(): ...
|
||||||
|
3 | def baz
|
||||||
|
|
|
|
@ -167,7 +167,7 @@ Module(
|
||||||
conversion: None,
|
conversion: None,
|
||||||
format_spec: Some(
|
format_spec: Some(
|
||||||
FStringFormatSpec {
|
FStringFormatSpec {
|
||||||
range: 43..43,
|
range: 42..42,
|
||||||
elements: [],
|
elements: [],
|
||||||
},
|
},
|
||||||
),
|
),
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue