mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-03 18:28:56 +00:00
Replace LALRPOP parser with hand-written parser (#10036)
(Supersedes #9152, authored by @LaBatata101) ## Summary This PR replaces the current parser generated from LALRPOP to a hand-written recursive descent parser. It also updates the grammar for [PEP 646](https://peps.python.org/pep-0646/) so that the parser outputs the correct AST. For example, in `data[*x]`, the index expression is now a tuple with a single starred expression instead of just a starred expression. Beyond the performance improvements, the parser is also error resilient and can provide better error messages. The behavior as seen by any downstream tools isn't changed. That is, the linter and formatter can still assume that the parser will _stop_ at the first syntax error. This will be updated in the following months. For more details about the change here, refer to the PR corresponding to the individual commits and the release blog post. ## Test Plan Write _lots_ and _lots_ of tests for both valid and invalid syntax and verify the output. ## Acknowledgements - @MichaReiser for reviewing 100+ parser PRs and continuously providing guidance throughout the project - @LaBatata101 for initiating the transition to a hand-written parser in #9152 - @addisoncrump for implementing the fuzzer which helped [catch](https://github.com/astral-sh/ruff/pull/10903) [a](https://github.com/astral-sh/ruff/pull/10910) [lot](https://github.com/astral-sh/ruff/pull/10966) [of](https://github.com/astral-sh/ruff/pull/10896) [bugs](https://github.com/astral-sh/ruff/pull/10877) --------- Co-authored-by: Victor Hugo Gomes <labatata101@linuxmail.org> Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
parent
e09180b1df
commit
13ffb5bc19
852 changed files with 112948 additions and 103620 deletions
|
@ -0,0 +1,5 @@
|
|||
call(**yield x)
|
||||
call(** *x)
|
||||
call(***x)
|
||||
|
||||
call(**x := 1)
|
|
@ -0,0 +1 @@
|
|||
foo(a=1, b=2, c=3, b=4, a=5)
|
|
@ -0,0 +1,5 @@
|
|||
call(x + y = 1)
|
||||
call(x := 1 = 1)
|
||||
|
||||
call(yield x)
|
||||
call(yield from x)
|
|
@ -0,0 +1,4 @@
|
|||
call(x = yield y)
|
||||
call(x = yield from y)
|
||||
call(x = *y)
|
||||
call(x = (*y))
|
|
@ -0,0 +1,5 @@
|
|||
call(**kwargs, x)
|
||||
call(x=1, y)
|
||||
call(x=1, **kwargs, y)
|
||||
call(**kwargs, *args)
|
||||
call(**kwargs, (*args))
|
|
@ -0,0 +1 @@
|
|||
call(x,,y)
|
|
@ -0,0 +1 @@
|
|||
call(x y)
|
|
@ -0,0 +1,5 @@
|
|||
call( = 1)
|
||||
call(x = )
|
||||
call(*, y)
|
||||
|
||||
foo
|
|
@ -0,0 +1,3 @@
|
|||
call(*data for data in iter)
|
||||
call(*yield x)
|
||||
call(*yield from x)
|
|
@ -0,0 +1,4 @@
|
|||
call(
|
||||
|
||||
def foo():
|
||||
pass
|
|
@ -0,0 +1,4 @@
|
|||
call(x
|
||||
|
||||
def foo():
|
||||
pass
|
|
@ -0,0 +1,4 @@
|
|||
call(x,
|
||||
|
||||
def foo():
|
||||
pass
|
|
@ -0,0 +1,3 @@
|
|||
x.1
|
||||
x.1.0
|
||||
x.[0]
|
|
@ -0,0 +1,3 @@
|
|||
extra..dot
|
||||
multiple....dots
|
||||
multiple.....dots
|
|
@ -0,0 +1,6 @@
|
|||
# The `second` is a variable on another line and not part of the attribute expression.
|
||||
first.
|
||||
second
|
||||
|
||||
# No member access after the dot.
|
||||
last.
|
|
@ -0,0 +1,4 @@
|
|||
# No expression after `await`, an expression on another line
|
||||
await
|
||||
|
||||
x + y
|
|
@ -0,0 +1,5 @@
|
|||
# No expression after `await`, a statement on another line
|
||||
await
|
||||
|
||||
def foo():
|
||||
pass
|
|
@ -0,0 +1,17 @@
|
|||
# The parser parses all of the following expressions but reports an error for
|
||||
# invalid expressions.
|
||||
|
||||
# Nested await
|
||||
await await x
|
||||
|
||||
# Starred expressions
|
||||
await *x
|
||||
await (*x)
|
||||
|
||||
# Invalid expression as per precedence
|
||||
await yield x
|
||||
await lambda x: x
|
||||
await +x
|
||||
await -x
|
||||
await ~x
|
||||
await not x
|
|
@ -0,0 +1,3 @@
|
|||
x + lambda y: y
|
||||
|
||||
x - yield y
|
|
@ -0,0 +1,3 @@
|
|||
/ y
|
||||
|
||||
1 + 2
|
|
@ -0,0 +1,3 @@
|
|||
0 +
|
||||
|
||||
1 + 2
|
|
@ -0,0 +1,3 @@
|
|||
1 + 2 - 3 *
|
||||
|
||||
4 + 5
|
|
@ -0,0 +1,4 @@
|
|||
x++
|
||||
1 + 2
|
||||
x--
|
||||
1 - 2
|
|
@ -0,0 +1,2 @@
|
|||
x - y := (1, 2)
|
||||
x / y := 2
|
|
@ -0,0 +1,2 @@
|
|||
x + *y
|
||||
x ** *y
|
|
@ -0,0 +1,3 @@
|
|||
x and lambda y: y
|
||||
|
||||
x or yield y
|
|
@ -0,0 +1 @@
|
|||
and y
|
|
@ -0,0 +1,3 @@
|
|||
x and
|
||||
|
||||
1 + 2
|
|
@ -0,0 +1,2 @@
|
|||
x and a := b
|
||||
x or a := b
|
|
@ -0,0 +1,2 @@
|
|||
x and *y
|
||||
x or *y
|
|
@ -0,0 +1,7 @@
|
|||
x in not y
|
||||
|
||||
# `=>` instead of `>=`
|
||||
x => y
|
||||
|
||||
# Same here as well, `not` without `in` is considered to be a unary operator
|
||||
x not is y
|
|
@ -0,0 +1,3 @@
|
|||
x not in lambda y: y
|
||||
|
||||
x == yield y
|
|
@ -0,0 +1,3 @@
|
|||
> y
|
||||
|
||||
1 + 2
|
|
@ -0,0 +1,3 @@
|
|||
x >
|
||||
|
||||
1 + 2
|
|
@ -0,0 +1,4 @@
|
|||
# Without the `in`, this is considered to be a unary `not`
|
||||
x not
|
||||
|
||||
1 + 2
|
|
@ -0,0 +1,3 @@
|
|||
x is not
|
||||
|
||||
1 + 2
|
|
@ -0,0 +1,3 @@
|
|||
# This is not JavaScript
|
||||
x === y
|
||||
x !== y
|
|
@ -0,0 +1,2 @@
|
|||
x not in y := (1, 2)
|
||||
x > y := 2
|
|
@ -0,0 +1,5 @@
|
|||
x >= *y
|
||||
x not in *y
|
||||
|
||||
*x < y
|
||||
*x is not y
|
|
@ -0,0 +1,17 @@
|
|||
# Invalid target
|
||||
{x: y for 1 in y}
|
||||
{x: y for 'a' in y}
|
||||
{x: y for call() in y}
|
||||
{x: y for {a, b} in y}
|
||||
|
||||
# Invalid iter
|
||||
{x: y for x in *y}
|
||||
{x: y for x in yield y}
|
||||
{x: y for x in yield from y}
|
||||
{x: y for x in lambda y: y}
|
||||
|
||||
# Invalid if
|
||||
{x: y for x in data if *y}
|
||||
{x: y for x in data if yield y}
|
||||
{x: y for x in data if yield from y}
|
||||
{x: y for x in data if lambda y: y}
|
|
@ -0,0 +1,12 @@
|
|||
# Double star expression starts with bitwise OR precedence. Make sure we don't parse
|
||||
# the ones which are higher than that.
|
||||
|
||||
{**x := 1}
|
||||
{a: 1, **x if True else y}
|
||||
{**lambda x: x, b: 2}
|
||||
{a: 1, **x or y}
|
||||
{**x and y, b: 2}
|
||||
{a: 1, **not x, b: 2}
|
||||
{**x in y}
|
||||
{**x not in y}
|
||||
{**x < y}
|
|
@ -0,0 +1,9 @@
|
|||
# Double star means that the parser will try to parse it as a dictionary expression but
|
||||
# it's actually a comprehension.
|
||||
|
||||
{**x: y for x, y in data}
|
||||
|
||||
# TODO(dhruvmanila): This test case fails because there's no way to represent `**y`
|
||||
# in the AST. The parser tries to parse it as a binary expression but the range isn't
|
||||
# correct.
|
||||
# {x: **y for x, y in data}
|
|
@ -0,0 +1,4 @@
|
|||
{x:
|
||||
|
||||
def foo():
|
||||
pass
|
|
@ -0,0 +1,3 @@
|
|||
{x:
|
||||
|
||||
1 + 2
|
|
@ -0,0 +1,4 @@
|
|||
{x: 1,
|
||||
|
||||
def foo():
|
||||
pass
|
|
@ -0,0 +1,5 @@
|
|||
# Unparenthesized named expression not allowed in key
|
||||
|
||||
{x := 1: y, z := 2: a}
|
||||
|
||||
x + y
|
|
@ -0,0 +1,5 @@
|
|||
# Unparenthesized named expression not allowed in value
|
||||
|
||||
{x: y := 1, z: a := 2}
|
||||
|
||||
x + y
|
|
@ -0,0 +1,24 @@
|
|||
# Test cases for dictionary expressions where the parser recovers from a syntax error.
|
||||
|
||||
{,}
|
||||
|
||||
{1: 2,,3: 4}
|
||||
|
||||
{1: 2,,}
|
||||
|
||||
# Missing comma
|
||||
{1: 2 3: 4}
|
||||
|
||||
# No value
|
||||
{1: }
|
||||
|
||||
# No value for double star unpacking
|
||||
{**}
|
||||
{x: y, **, a: b}
|
||||
|
||||
# This is not a double star unpacking
|
||||
# {* *data}
|
||||
|
||||
# Star expression not allowed here
|
||||
{*x: y, z: a, *b: c}
|
||||
{x: *y, z: *a}
|
|
@ -0,0 +1,7 @@
|
|||
a = (🐶
|
||||
# comment 🐶
|
||||
)
|
||||
|
||||
a = (🐶 +
|
||||
# comment
|
||||
🐶)
|
|
@ -0,0 +1 @@
|
|||
👍
|
|
@ -0,0 +1,5 @@
|
|||
# Missing orelse expression, followed by a statement
|
||||
x if expr else
|
||||
|
||||
def foo():
|
||||
pass
|
|
@ -0,0 +1,4 @@
|
|||
# Missing orelse expression, followed by an expression
|
||||
x if expr else
|
||||
|
||||
1 + 1
|
|
@ -0,0 +1,5 @@
|
|||
# Missing test expression, followed by a statement
|
||||
x if
|
||||
|
||||
def foo():
|
||||
pass
|
|
@ -0,0 +1,4 @@
|
|||
# Missing test expression, followed by an expression
|
||||
x if
|
||||
|
||||
1 + 1
|
|
@ -0,0 +1,10 @@
|
|||
# Invalid test expression
|
||||
x if *expr else y
|
||||
x if lambda x: x else y
|
||||
x if yield x else y
|
||||
x if yield from x else y
|
||||
|
||||
# Invalid orelse expression
|
||||
x if expr else *orelse
|
||||
x if expr else yield y
|
||||
x if expr else yield from y
|
|
@ -0,0 +1 @@
|
|||
lambda a, b=20, c: 1
|
|
@ -0,0 +1,9 @@
|
|||
lambda a, a: 1
|
||||
|
||||
lambda a, *, a: 1
|
||||
|
||||
lambda a, a=20: 1
|
||||
|
||||
lambda a, *a: 1
|
||||
|
||||
lambda a, *, **a: 1
|
|
@ -0,0 +1,20 @@
|
|||
# Iterable unpacking not allowed
|
||||
[*x for x in y]
|
||||
|
||||
# Invalid target
|
||||
[x for 1 in y]
|
||||
[x for 'a' in y]
|
||||
[x for call() in y]
|
||||
[x for {a, b} in y]
|
||||
|
||||
# Invalid iter
|
||||
[x for x in *y]
|
||||
[x for x in yield y]
|
||||
[x for x in yield from y]
|
||||
[x for x in lambda y: y]
|
||||
|
||||
# Invalid if
|
||||
[x for x in data if *y]
|
||||
[x for x in data if yield y]
|
||||
[x for x in data if yield from y]
|
||||
[x for x in data if lambda y: y]
|
|
@ -0,0 +1,3 @@
|
|||
# Missing closing bracket 0: No elements
|
||||
|
||||
[
|
|
@ -0,0 +1,6 @@
|
|||
# Missing closing bracket 1: No elements on the same line, the one on the next line
|
||||
# is considered to be part of the list.
|
||||
|
||||
[
|
||||
|
||||
x + y
|
|
@ -0,0 +1,6 @@
|
|||
# Missing closing bracket 2: One element on the line, the other one on the next line
|
||||
# will be considered to be part of the list.
|
||||
|
||||
[1,
|
||||
|
||||
x + y
|
|
@ -0,0 +1,7 @@
|
|||
# Missing closing bracket 3: Multiple elements without a trailing comma and the next
|
||||
# token starts a statement.
|
||||
|
||||
[1, 2
|
||||
|
||||
def foo():
|
||||
pass
|
|
@ -0,0 +1,20 @@
|
|||
# Test cases for list expressions where the parser recovers from a syntax error.
|
||||
|
||||
[,]
|
||||
|
||||
[1,,2]
|
||||
|
||||
[1,,]
|
||||
|
||||
# Missing comma
|
||||
[1 2]
|
||||
|
||||
# Dictionary element in a list
|
||||
[1: 2]
|
||||
|
||||
# Missing expression
|
||||
[1, x + ]
|
||||
|
||||
[1; 2]
|
||||
|
||||
[*]
|
|
@ -0,0 +1,10 @@
|
|||
# For list expression, the minimum binding power of star expression is bitwise or.
|
||||
|
||||
[(*x), y]
|
||||
[*x in y, z]
|
||||
[*not x, z]
|
||||
[*x and y, z]
|
||||
[*x or y, z]
|
||||
[*x if True else y, z]
|
||||
[*lambda x: x, z]
|
||||
[*x := 2, z]
|
|
@ -0,0 +1,6 @@
|
|||
# Assignment expression target can only be an identifier
|
||||
|
||||
(x.y := 1)
|
||||
(x[y] := 1)
|
||||
(*x := 1)
|
||||
([x, y] := [1, 2])
|
|
@ -0,0 +1,3 @@
|
|||
# There are no parentheses, so this isn't parsed as named expression.
|
||||
|
||||
x :=
|
|
@ -0,0 +1,3 @@
|
|||
# EOF after the `:=` token
|
||||
|
||||
(x :=
|
|
@ -0,0 +1,6 @@
|
|||
# Missing expression, instead there's a function definition
|
||||
|
||||
(x :=
|
||||
|
||||
def foo():
|
||||
pass
|
|
@ -0,0 +1,6 @@
|
|||
# An expression is present, but it's most likely not what was intended for the
|
||||
# assignment value.
|
||||
|
||||
(x :=
|
||||
|
||||
x + y
|
|
@ -0,0 +1,5 @@
|
|||
# No expression on the right side of the assignment expression
|
||||
|
||||
(x := )
|
||||
|
||||
x + y
|
|
@ -0,0 +1,2 @@
|
|||
(*x for x in y)
|
||||
(x := 1, for x in y)
|
|
@ -0,0 +1,3 @@
|
|||
# Missing closing parentheses 0: No elements
|
||||
|
||||
(
|
|
@ -0,0 +1,6 @@
|
|||
# Missing closing parentheses 1: No elements on the same line, the one on the next line
|
||||
# is considered to be part of the list.
|
||||
|
||||
(
|
||||
|
||||
x + y
|
|
@ -0,0 +1,6 @@
|
|||
# Missing closing parentheses 2: One element on the line, the other one on the next line
|
||||
# will be considered to be part of the tuple.
|
||||
|
||||
(1,
|
||||
|
||||
x + y
|
|
@ -0,0 +1,7 @@
|
|||
# Missing closing parentheses 3: Multiple elements without a trailing comma and the next
|
||||
# token starts a statement.
|
||||
|
||||
(1, 2
|
||||
|
||||
def foo():
|
||||
pass
|
|
@ -0,0 +1,5 @@
|
|||
# Starred expression isn't allowed in a parenthesized expression.
|
||||
(*x)
|
||||
|
||||
# Unparenthesized named expression is allowed.
|
||||
x := 1
|
|
@ -0,0 +1,21 @@
|
|||
# Test cases for tuple expressions where the parser recovers from a syntax error.
|
||||
|
||||
(,)
|
||||
|
||||
(1,,2)
|
||||
|
||||
(1,,)
|
||||
|
||||
# Missing comma
|
||||
(1 2)
|
||||
|
||||
# Dictionary element in a list
|
||||
(1: 2)
|
||||
|
||||
# Missing expression
|
||||
(1, x + )
|
||||
|
||||
(1; 2)
|
||||
|
||||
# Unparenthesized named expression is not allowed
|
||||
x, y := 2, z
|
|
@ -0,0 +1,20 @@
|
|||
# For tuple expression, the minimum binding power of star expression is bitwise or.
|
||||
# Test the first and any other element as the there are two separate calls.
|
||||
|
||||
(*x in y, z, *x in y)
|
||||
(*not x, z, *not x)
|
||||
(*x and y, z, *x and y)
|
||||
(*x or y, z, *x or y)
|
||||
(*x if True else y, z, *x if True else y)
|
||||
(*lambda x: x, z, *lambda x: x)
|
||||
(*x := 2, z, *x := 2)
|
||||
|
||||
|
||||
# Non-parenthesized
|
||||
*x in y, z, *x in y
|
||||
*not x, z, *not x
|
||||
*x and y, z, *x and y
|
||||
*x or y, z, *x or y
|
||||
*x if True else y, z, *x if True else y
|
||||
*lambda x: x, z, *lambda x: x
|
||||
*x := 2, z, *x := 2
|
|
@ -0,0 +1,20 @@
|
|||
# Iterable unpacking not allowed
|
||||
{*x for x in y}
|
||||
|
||||
# Invalid target
|
||||
{x for 1 in y}
|
||||
{x for 'a' in y}
|
||||
{x for call() in y}
|
||||
{x for {a, b} in y}
|
||||
|
||||
# Invalid iter
|
||||
{x for x in *y}
|
||||
{x for x in yield y}
|
||||
{x for x in yield from y}
|
||||
{x for x in lambda y: y}
|
||||
|
||||
# Invalid if
|
||||
{x for x in data if *y}
|
||||
{x for x in data if yield y}
|
||||
{x for x in data if yield from y}
|
||||
{x for x in data if lambda y: y}
|
|
@ -0,0 +1,3 @@
|
|||
# Missing closing curly brace 0: No elements
|
||||
|
||||
{
|
|
@ -0,0 +1,6 @@
|
|||
# Missing closing curly brace 1: No elements on the same line, the one on the next line
|
||||
# is considered to be part of the set.
|
||||
|
||||
{
|
||||
|
||||
x + y
|
|
@ -0,0 +1,6 @@
|
|||
# Missing closing curly brace 2: One element on the line, the other one on the next line
|
||||
# will be considered to be part of the set.
|
||||
|
||||
{1,
|
||||
|
||||
x + y
|
|
@ -0,0 +1,7 @@
|
|||
# Missing closing curly brace 3: Multiple elements without a trailing comma and the next
|
||||
# token starts a statement.
|
||||
|
||||
{1, 2
|
||||
|
||||
def foo():
|
||||
pass
|
|
@ -0,0 +1,22 @@
|
|||
# Test cases for set expressions where the parser recovers from a syntax error.
|
||||
# There are valid expressions in between invalid ones to verify that.
|
||||
# These are same as for the list expressions.
|
||||
|
||||
{,}
|
||||
|
||||
{1,,2}
|
||||
|
||||
{1,,}
|
||||
|
||||
# Missing comma
|
||||
{1 2}
|
||||
|
||||
# Dictionary element in a list
|
||||
{1: 2}
|
||||
|
||||
# Missing expression
|
||||
{1, x + }
|
||||
|
||||
{1; 2}
|
||||
|
||||
[*]
|
|
@ -0,0 +1,10 @@
|
|||
# For set expression, the minimum binding power of star expression is bitwise or.
|
||||
|
||||
{(*x), y}
|
||||
{*x in y, z}
|
||||
{*not x, z}
|
||||
{*x and y, z}
|
||||
{*x or y, z}
|
||||
{*x if True else y, z}
|
||||
{*lambda x: x, z}
|
||||
{*x := 2, z}
|
|
@ -0,0 +1,12 @@
|
|||
x[x := 1:]
|
||||
|
||||
# Starred expression
|
||||
x[*x:]
|
||||
x[:*x]
|
||||
x[::*x]
|
||||
|
||||
# Empty slice
|
||||
x[]
|
||||
|
||||
# Mixed starred expression and named expression
|
||||
x[*x := 1]
|
|
@ -0,0 +1,3 @@
|
|||
x[:
|
||||
|
||||
x + y
|
|
@ -0,0 +1,4 @@
|
|||
x[::
|
||||
|
||||
def foo():
|
||||
pass
|
|
@ -0,0 +1 @@
|
|||
not x := 1
|
|
@ -0,0 +1,2 @@
|
|||
-x := 1
|
||||
not x := 1
|
|
@ -0,0 +1,3 @@
|
|||
not
|
||||
|
||||
x + y
|
|
@ -0,0 +1,3 @@
|
|||
+
|
||||
|
||||
x + y
|
|
@ -0,0 +1,4 @@
|
|||
# Unparenthesized named expressions are not allowed
|
||||
yield x := 1
|
||||
|
||||
yield 1, x := 2, 3
|
|
@ -0,0 +1,4 @@
|
|||
# Cannot use starred expression here
|
||||
yield (*x)
|
||||
|
||||
yield *x and y, z
|
|
@ -0,0 +1,4 @@
|
|||
# Yield from doesn't allow top-level starred expression unlike yield
|
||||
|
||||
yield from *x
|
||||
yield from *x, y
|
|
@ -0,0 +1,9 @@
|
|||
# Unparenthesized named expression
|
||||
yield from x := 1
|
||||
|
||||
# Unparenthesized tuple expression
|
||||
yield from x, y
|
||||
|
||||
# This is a tuple expression parsing
|
||||
# vvvvvvvvvvvvv
|
||||
yield from (x, *x and y)
|
Loading…
Add table
Add a link
Reference in a new issue