Replace LALRPOP parser with hand-written parser (#10036)

(Supersedes #9152, authored by @LaBatata101)

## Summary

This PR replaces the current parser generated from LALRPOP to a
hand-written recursive descent parser.

It also updates the grammar for [PEP
646](https://peps.python.org/pep-0646/) so that the parser outputs the
correct AST. For example, in `data[*x]`, the index expression is now a
tuple with a single starred expression instead of just a starred
expression.

Beyond the performance improvements, the parser is also error resilient
and can provide better error messages. The behavior as seen by any
downstream tools isn't changed. That is, the linter and formatter can
still assume that the parser will _stop_ at the first syntax error. This
will be updated in the following months.

For more details about the change here, refer to the PR corresponding to
the individual commits and the release blog post.

## Test Plan

Write _lots_ and _lots_ of tests for both valid and invalid syntax and
verify the output.

## Acknowledgements

- @MichaReiser for reviewing 100+ parser PRs and continuously providing
guidance throughout the project
- @LaBatata101 for initiating the transition to a hand-written parser in
#9152
- @addisoncrump for implementing the fuzzer which helped
[catch](https://github.com/astral-sh/ruff/pull/10903)
[a](https://github.com/astral-sh/ruff/pull/10910)
[lot](https://github.com/astral-sh/ruff/pull/10966)
[of](https://github.com/astral-sh/ruff/pull/10896)
[bugs](https://github.com/astral-sh/ruff/pull/10877)

---------

Co-authored-by: Victor Hugo Gomes <labatata101@linuxmail.org>
Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
Dhruv Manilawala 2024-04-18 17:57:39 +05:30 committed by GitHub
parent e09180b1df
commit 13ffb5bc19
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
852 changed files with 112948 additions and 103620 deletions

View file

@ -0,0 +1,50 @@
# This only tests the call arguments and not the expression before the opening parenthesis.
# Simple
call()
call(x, y)
call(x, y,) # Trailing comma
call(x=1, y=2)
call(*x)
call(**x)
# Order
call(x, y=1)
call(x, *y)
call(x, **y)
call(x=1, *y)
call(x=1, **y)
call(*x, **y)
call(*x, y, z)
call(**x, y=1, z=2)
call(*x1, *x2, **y1, **y2)
call(x=1, **y, z=1)
# Keyword expression
call(x=1 if True else 2)
call(x=await y)
call(x=lambda y: y)
call(x=(y := 1))
# Yield expression
call((yield x))
call((yield from x))
# Named expression
call(x := 1)
call(x := 1 for x in iter)
# Starred expressions
call(*x and y)
call(*x | y)
call(*await x)
call(*lambda x: x)
call(*x if True else y)
# Double starred
call(**x)
call(**x and y)
call(**await x)
call(**x if True else y)
call(**(yield x))
call(**lambda x: x)

View file

@ -0,0 +1,6 @@
value.attr
value.attr()
value().attr
value().attr().foo
value.attr.foo
(value).attr().foo

View file

@ -0,0 +1,15 @@
await x
await x + 1
await a and b
await f()
await [1, 2]
await {3, 4}
await {i: 5}
await 7, 8
await (9, 10)
await 1 == 1
await x if True else None
await (*x,)
await (lambda x: x)
await x ** -x
await x ** await y

View file

@ -0,0 +1,33 @@
# Simple
1 + 2
1 - 2
1 * 2
1 / 2
1 // 2
1 % 2
1 ** 2
1 | 2
1 ^ 2
1 & 2
1 >> 2
1 << 2
1 @ 2
# Same precedence
1 + 2 - 3 + 4
1 * 2 / 3 // 4 @ 5 % 6
1 << 2 >> 3 >> 4 << 5
# Different precedence
1 + 2 * 3
1 * 2 + 3
1 ** 2 * 3 - 4 @ 5 + 6 - 7 // 8
# With bitwise operators
1 | 2 & 3 ^ 4 + 5 @ 6 << 7 // 8 >> 9
# Associativity
1 + (2 + 3) + 4
1 + 2 + (3 + 4 + 5)
# Addition with a unary plus
x ++ y

View file

@ -0,0 +1,9 @@
a and b
a and b and c
a or b
a or b or c
a and b or c
a and b and c or d or e and f or g
a and not b or c
yield a and b or c
not a and b or c

View file

@ -0,0 +1,21 @@
# This only tests the expression before the opening parenthesis for the call expression
# and not the arguments.
call()
attr.expr()
subscript[1, 2]()
slice[:1]()
[1, 2, 3]()
(1, 2, 3)()
(x for x in iter)()
{1, 2, 3}()
{1: 2, 3: 4}()
(yield x)()
# These are `TypeError`, so make sure it parses correctly.
True()
False()
None()
"string"()
1()
1.0()

View file

@ -0,0 +1,30 @@
# Simple
a == b
b < a
b > a
a >= b
a <= b
a != b
a is c
a in b
a not in c
a is not b
# Double operator mixed
a not in b is not c not in d not in e is not f
# Precedence check
a | b < c | d not in e & f
# ^ ^^^^^^
# Higher precedence than bitwise operators
# unary `not` is higher precedence, but is allowed at the start of the expression
# but not anywhere else
not x not in y
x or y not in z and a
x == await y
x is not await y
# All operators have the same precedence
a < b == c > d is e not in f is not g <= h >= i != j

View file

@ -0,0 +1,41 @@
# Simple
{}
{1: 2}
{1: 2, a: 1, b: 'hello'}
# Mixed indentations
{
}
{
1:
2,
3
:4
}
# Nested
{{1: 2}: {3: {4: 5}}}
# Lambda expressions
{lambda x: x: 1}
{'A': lambda p: None, 'B': C,}
# Named expressions
{(x := 1): y}
{(x := 1): (y := 2)}
# Double star unpacking
{**d}
{a: b, **d}
{**a, **b}
{"a": "b", **c, "d": "e"}
{1: 2, **{'nested': 'dict'}}
{x * 1: y ** 2, **call()}
# Here, `not` isn't allowed but parentheses resets the precedence
{**(not x)}
# Random expressions
{1: x if True else y}
{x if True else y: y for x in range(10) for y in range(10)}
{{1, 2}: 3, x: {1: 2,},}
{(x): (y), (z): (a)}

View file

@ -0,0 +1,16 @@
{y for y in (1, 2, 3)}
{x1: x2 for y in z}
{x + 1: 'x' for i in range(5)}
{b: c * 2 for c in d if x in w if y and yy if z}
{a: a ** 2 for b in c if d and e for f in j if k > h}
{a: b for b in c if d and e async for f in j if k > h}
{a: a for b, c in d}
# Non-parenthesized iter/if for the following expressions aren't allowed, so make sure
# it parses correctly for the parenthesized cases
{x: y for x in (yield y)}
{x: y for x in (yield from y)}
{x: y for x in (lambda y: y)}
{x: y for x in data if (yield y)}
{x: y for x in data if (yield from y)}
{x: y for x in data if (lambda y: y)}

View file

@ -0,0 +1,64 @@
# Empty f-strings
f""
F""
f''
f""""""
f''''''
f"{" f"}"
f"{foo!s}"
f"{3,}"
f"{3!=4:}"
f'{3:{"}"}>10}'
f'{3:{"{"}>10}'
f"{ foo = }"
f"{ foo = :.3f }"
f"{ foo = !s }"
f"{ 1, 2 = }"
f'{f"{3.1415=:.1f}":*^20}'
{"foo " f"bar {x + y} " "baz": 10}
match foo:
case "one":
pass
case "implicitly " "concatenated":
pass
f"\{foo}\{bar:\}"
f"\\{{foo\\}}"
f"""{
foo:x
y
z
}"""
f"{ ( foo ) = }"
f"normal {foo} {{another}} {bar} {{{three}}}"
f"normal {foo!a} {bar!s} {baz!r} {foobar}"
f"normal {x:y + 2}"
f"{x:{{1}.pop()}}"
f"{(lambda x:{x})}"
f"{x =}"
f"{ x = }"
f"{x=!a}"
f"{x:.3f!r =}"
f"{x = !r :.3f}"
f"{x:.3f=!r}"
"hello" f"{x}"
f"{x}" f"{y}"
f"{x}" "world"
f"Invalid args in command: {command, *args}"
"foo" f"{x}" "bar"
(
f"a"
F"b"
"c"
rf"d"
fr"e"
)
# With unicode strings
u"foo" f"{bar}" "baz" " some"
"foo" f"{bar}" u"baz" " some"
"foo" f"{bar}" "baz" u" some"
u"foo" f"bar {baz} really" u"bar" "no"

View file

@ -0,0 +1,20 @@
(x for target in iter)
(x async for target in iter)
(x for target in iter if x in y if a and b if c)
(x for target1 in iter1 if x and y for target2 in iter2 if a > b)
(x for target1 in iter1 if x and y async for target2 in iter2 if a > b)
# Named expression
(x := y + 1 for y in z)
# If expression
(x if y else y for y in z)
# Arguments
" ".join(
sql
for sql in (
"LIMIT %d" % limit if limit else None,
("OFFSET %d" % offset) if offset else None,
)
)

View file

@ -0,0 +1,21 @@
a if True else b
f() if x else None
a if b else c if d else e
1 + x if 1 < 0 else -1
a and b if x else False
x <= y if y else x
True if a and b else False
1, 1 if a else c
# Lambda is allowed in orelse expression
x if True else lambda y: y
# These test expression are only allowed when parenthesized
x if (yield x) else y
x if (yield from x) else y
x if (lambda x: x) else y
# Split across multiple lines
(x
if y
else z)

View file

@ -0,0 +1,24 @@
lambda: a
lambda: 1
lambda x: 1
lambda x, y: ...
lambda a, b, c: 1
lambda a, b=20, c=30: 1
lambda x, y: x * y
lambda y, z=1: z * y
lambda *a: a
lambda *a, z, x=0: ...
lambda *, a, b, c: 1
lambda *, a, b=20, c=30: 1
lambda a, b, c, *, d, e: 0
lambda **kwargs: f()
lambda *args, **kwargs: f() + 1
lambda *args, a, b=1, **kwargs: f() + 1
lambda a, /: ...
lambda a, /, b: ...
lambda a=1, /,: ...
lambda a, b, /, *, c: ...
lambda kw=1, *, a: ...
lambda a, b=20, /, c=30: 1
lambda a, b, /, c, *, d, e: 0
lambda a, b, /, c, *d, e, **f: 0

View file

@ -0,0 +1,34 @@
# Simple lists
[]
[1]
[1,]
[1, 2, 3]
[1, 2, 3,]
# Mixed with indentations
[
]
[
1
]
[
1,
2,
]
# Nested
[[[1]]]
[[1, 2], [3, 4]]
# Named expression
[x := 2]
[x := 2,]
[1, x := 2, 3]
# Star expression
[1, *x, 3]
[1, *x | y, 3]
# Random expressions
[1 + 2, [1, 2, 3, 4], (a, b + c, d), {a, b, c}, {a: 1}, x := 2]
[call1(call2(value.attr()) for element in iter)]

View file

@ -0,0 +1,25 @@
x = [y for y in (1, 2, 3)]
[x for i in range(5)]
[b for c in d if x in w if y and yy if z]
[a for b in c if d and e for f in j if k > h]
[a for b in c if d and e async for f in j if k > h]
[1 for i in x in a]
[a for a, b in G]
[
await x for a, b in C
]
[i for i in await x if entity is not None]
[x for x in (l if True else L) if T]
[i for i in (await x if True else X) if F]
[i for i in await (x if True else X) if F]
[f for f in c(x if True else [])]
# Non-parenthesized iter/if for the following expressions aren't allowed, so make sure
# it parses correctly for the parenthesized cases
[x for x in (yield y)]
[x for x in (yield from y)]
[x for x in (lambda y: y)]
[x for x in data if (yield y)]
[x for x in data if (yield from y)]
[x for x in data if (lambda y: y)]

View file

@ -0,0 +1,11 @@
_
(_)
__
__init__
name
(name)
# Soft keywords used as name
match
case
type

View file

@ -0,0 +1,8 @@
(name := 0)
(name := (x * y))
(name := 1 + 1)
(name := (*x, y))
(name := x if True else y)
(name := lambda x: x)
(name := (yield x))
(name := (yield from x))

View file

@ -0,0 +1,40 @@
x = 123456789
x = 123456
x = .1
x = 1.
x = 1E+1
x = 1E-1
x = 1.000_000_01
x = 123456789.123456789
x = 123456789.123456789E123456789
x = 123456789E123456789
x = 123456789J
x = 123456789.123456789J
x = 0XB1ACC
x = 0B1011
x = 0O777
x = 0.000000006
x = 10000
x = 133333
# Attribute access
x = 1. .imag
x = 1E+1.imag
x = 1E-1.real
x = 123456789.123456789.hex()
x = 123456789.123456789E123456789 .real
x = 123456789E123456789 .conjugate()
x = 123456789J.real
x = 123456789.123456789J.__add__(0b1011.bit_length())
x = 0XB1ACC.conjugate()
x = 0B1011 .conjugate()
x = 0O777 .real
x = 0.000000006 .hex()
x = -100.0000J
if 10 .real:
...
# This is a type error, not a syntax error
y = 100[no]
y = 100(no)

View file

@ -0,0 +1,9 @@
(expr)
(expr)()
(expr)()()()
(a and b or c)
(lambda x: x)
(x := 2)
(yield x)
(yield from x)

View file

@ -0,0 +1,33 @@
# Simple sets
{}
{1}
{1,}
{1, 2, 3}
{1, 2, 3,}
# Mixed with indentations
{
}
{
1
}
{
1,
2,
}
# Nested
{{1}}
{{1, 2}, {3, 4}}
# Named expression
{x := 2}
{1, x := 2, 3}
{1, (x := 2),}
# Star expression
{1, *x, 3}
{1, *x | y, 3}
# Random expressions
{1 + 2, (a, b), {1, 2, 3}, {a: b, **d}}

View file

@ -0,0 +1,14 @@
{x for i in ll}
{b for c in d if x in w if y and yy if z}
{a for b in c if d and e for f in j if k > h}
{a for b in c if d and e async for f in j if k > h}
{a for a, b in G}
# Non-parenthesized iter/if for the following expressions aren't allowed, so make sure
# it parses correctly for the parenthesized cases
{x for x in (yield y)}
{x for x in (yield from y)}
{x for x in (lambda y: y)}
{x for x in data if (yield y)}
{x for x in data if (yield from y)}
{x for x in data if (lambda y: y)}

View file

@ -0,0 +1,21 @@
# Various combinations
x[:]
x[1:]
x[:2]
x[1:2]
x[::]
x[1::]
x[:2:]
x[1:2:]
x[::3]
x[1::3]
x[:2:3]
x[1:2:3]
# Named expression
x[y := 2]
x[(y := 2):]
x[y := 2,]
# These are two separate slice elements
x[1,:2,]

View file

@ -0,0 +1,8 @@
*a
*(a + 1)
*x.attr
array_slice = array[0, *indexes, -1]
array[0, *indexes, -1] = array_slice
array[*indexes_to_select, *indexes_to_select]
array[3:5, *indexes_to_select]

View file

@ -0,0 +1,17 @@
'Hello World'
"😎"
'Foo' 'Bar'
(
'A'
'B'
'C'
)
'''Olá, Mundo!'''
"""ABCDE"""
(
'''aB'''
'''cD'''
)
b'hello world'
b'bytes' b'concatenated'

View file

@ -0,0 +1,17 @@
data[0][0]
data[0, 1]
data[0:,]
data[0:, 1]
data[0:1, 2]
data[0:1:2, 3, a:b + 1]
data[a := b]
data[:, :11]
data[1, 2, 3]
data[~flag]
data[(a := 0):]
data[(a := 0):y]
# This is a single element tuple with a starred expression
data[*x]
data[*x and y]
data[*(x := y)]

View file

@ -0,0 +1,27 @@
# With parentheses
()
(())
((()), ())
(a,)
(a, b)
(a, b,)
((a, b))
# Without parentheses
a,
a, b
a, b,
# Starred expression
*a,
a, *b
*a | b, *await x, (), *()
(*a,)
(a, *b)
(*a | b, *await x, (), *())
# Named expression
(x := 1,)
(x, y := 2)
(x, y := 2, z)
x, (y := 2), z

View file

@ -0,0 +1,22 @@
# Simple
-1
+1
~1
not x
# Multiple
---1
-+~1
not-+~1
not not x
# Precedence check
- await 1
+ await 1 ** -2
~(1, 2)
-1 + 2
# Precedence check for `not` operator because it is higher than other unary operators
not a and b or not c | d and not e
not (x := 1)
not a | (not b)

View file

@ -0,0 +1,16 @@
yield
yield x
yield x + 1
yield x and y
yield call()
yield [1, 2]
yield {3, 4}
yield {x: 5}
yield x, y
yield (x, y)
yield x == y
yield (x := 1)
yield *y
yield x, *y
yield *x,
yield *x | y

View file

@ -0,0 +1,11 @@
yield from x
yield from x + 1
yield from x and y
yield from call()
yield from [1, 2]
yield from {3, 4}
yield from {x: 5}
yield from (x, y)
yield from x == y
yield from (x := 1)
yield from (x, *x | y)