mirror of
https://github.com/astral-sh/ruff.git
synced 2025-07-16 01:25:11 +00:00
Use empty range when there's "gap" in token source (#11032)
## Summary This fixes a bug where the parser would panic when there is a "gap" in the token source. What's a gap? The reason it's `<=` instead of just `==` is because there could be whitespaces between the two tokens. For example: ```python # last token end # | current token (newline) start # v v def foo \n # ^ # assume there's trailing whitespace here ``` Or, there could tokens that are considered "trivia" and thus aren't emitted by the token source. These are comments and non-logical newlines. For example: ```python # last token end # v def foo # comment\n # ^ current token (newline) start ``` In either of the above cases, there's a "gap" between the end of the last token and start of the current token. ## Test Plan Add test cases and update the snapshots.
This commit is contained in:
parent
9b80cc09ee
commit
d3cd61f804
5 changed files with 195 additions and 26 deletions
|
@ -261,12 +261,59 @@ impl<'src> Parser<'src> {
|
|||
}
|
||||
|
||||
fn node_range(&self, start: TextSize) -> TextRange {
|
||||
// It's possible during error recovery that the parsing didn't consume any tokens. In that case,
|
||||
// `last_token_end` still points to the end of the previous token but `start` is the start of the current token.
|
||||
// Calling `TextRange::new(start, self.last_token_end)` would panic in that case because `start > end`.
|
||||
// This path "detects" this case and creates an empty range instead.
|
||||
if self.node_start() == start {
|
||||
TextRange::empty(start)
|
||||
// It's possible during error recovery that the parsing didn't consume any tokens. In that
|
||||
// case, `last_token_end` still points to the end of the previous token but `start` is the
|
||||
// start of the current token. Calling `TextRange::new(start, self.last_token_end)` would
|
||||
// panic in that case because `start > end`. This path "detects" this case and creates an
|
||||
// empty range instead.
|
||||
//
|
||||
// The reason it's `<=` instead of just `==` is because there could be whitespaces between
|
||||
// the two tokens. For example:
|
||||
//
|
||||
// ```python
|
||||
// # last token end
|
||||
// # | current token (newline) start
|
||||
// # v v
|
||||
// def foo \n
|
||||
// # ^
|
||||
// # assume there's trailing whitespace here
|
||||
// ```
|
||||
//
|
||||
// Or, there could tokens that are considered "trivia" and thus aren't emitted by the token
|
||||
// source. These are comments and non-logical newlines. For example:
|
||||
//
|
||||
// ```python
|
||||
// # last token end
|
||||
// # v
|
||||
// def foo # comment\n
|
||||
// # ^ current token (newline) start
|
||||
// ```
|
||||
//
|
||||
// In either of the above cases, there's a "gap" between the end of the last token and start
|
||||
// of the current token.
|
||||
if self.last_token_end <= start {
|
||||
// We need to create an empty range at the last token end instead of the start because
|
||||
// otherwise this node range will fall outside the range of it's parent node. Taking
|
||||
// the above example:
|
||||
//
|
||||
// ```python
|
||||
// if True:
|
||||
// # function start
|
||||
// # | function end
|
||||
// # v v
|
||||
// def foo # comment
|
||||
// # ^ current token start
|
||||
// ```
|
||||
//
|
||||
// Here, the current token start is the start of parameter range but the function ends
|
||||
// at `foo`. Even if there's a function body, the range of parameters would still be
|
||||
// before the comment.
|
||||
|
||||
// test_err node_range_with_gaps
|
||||
// def foo # comment
|
||||
// def bar(): ...
|
||||
// def baz
|
||||
TextRange::empty(self.last_token_end)
|
||||
} else {
|
||||
TextRange::new(start, self.last_token_end)
|
||||
}
|
||||
|
|
|
@ -1663,23 +1663,19 @@ impl<'src> Parser<'src> {
|
|||
// x = 10
|
||||
let type_params = self.try_parse_type_params();
|
||||
|
||||
// test_ok function_def_parameter_range
|
||||
// def foo(
|
||||
// first: int,
|
||||
// second: int,
|
||||
// ) -> int: ...
|
||||
|
||||
// test_err function_def_unclosed_parameter_list
|
||||
// def foo(a: int, b:
|
||||
// def foo():
|
||||
// return 42
|
||||
// def foo(a: int, b: str
|
||||
// x = 10
|
||||
let parameters_start = self.node_start();
|
||||
self.expect(TokenKind::Lpar);
|
||||
let mut parameters = self.parse_parameters(FunctionKind::FunctionDef);
|
||||
self.expect(TokenKind::Rpar);
|
||||
|
||||
// test_ok function_def_parameter_range
|
||||
// def foo(
|
||||
// first: int,
|
||||
// second: int,
|
||||
// ) -> int: ...
|
||||
parameters.range = self.node_range(parameters_start);
|
||||
let parameters = self.parse_parameters(FunctionKind::FunctionDef);
|
||||
|
||||
let returns = if self.eat(TokenKind::Rarrow) {
|
||||
if self.at_expr() {
|
||||
|
@ -2844,19 +2840,16 @@ impl<'src> Parser<'src> {
|
|||
pub(super) fn parse_parameters(&mut self, function_kind: FunctionKind) -> ast::Parameters {
|
||||
let start = self.node_start();
|
||||
|
||||
if matches!(function_kind, FunctionKind::FunctionDef) {
|
||||
self.expect(TokenKind::Lpar);
|
||||
}
|
||||
|
||||
// TODO(dhruvmanila): This has the same problem as `parse_match_pattern_mapping`
|
||||
// has where if there are multiple kwarg or vararg, the last one will win and
|
||||
// the parser will drop the previous ones. Another thing is the vararg and kwarg
|
||||
// uses `Parameter` (not `ParameterWithDefault`) which means that the parser cannot
|
||||
// recover well from `*args=(1, 2)`.
|
||||
let mut parameters = ast::Parameters {
|
||||
range: TextRange::default(),
|
||||
posonlyargs: vec![],
|
||||
args: vec![],
|
||||
kwonlyargs: vec![],
|
||||
vararg: None,
|
||||
kwarg: None,
|
||||
};
|
||||
let mut parameters = ast::Parameters::empty(TextRange::default());
|
||||
|
||||
let mut seen_default_param = false; // `a=10`
|
||||
let mut seen_positional_only_separator = false; // `/`
|
||||
|
@ -3094,6 +3087,10 @@ impl<'src> Parser<'src> {
|
|||
self.add_error(ParseErrorType::ExpectedKeywordParam, star_range);
|
||||
}
|
||||
|
||||
if matches!(function_kind, FunctionKind::FunctionDef) {
|
||||
self.expect(TokenKind::Rpar);
|
||||
}
|
||||
|
||||
parameters.range = self.node_range(start);
|
||||
|
||||
// test_err params_duplicate_names
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue