Replace LALRPOP parser with hand-written parser (#10036)

(Supersedes #9152, authored by @LaBatata101)

## Summary

This PR replaces the current parser generated from LALRPOP to a
hand-written recursive descent parser.

It also updates the grammar for [PEP
646](https://peps.python.org/pep-0646/) so that the parser outputs the
correct AST. For example, in `data[*x]`, the index expression is now a
tuple with a single starred expression instead of just a starred
expression.

Beyond the performance improvements, the parser is also error resilient
and can provide better error messages. The behavior as seen by any
downstream tools isn't changed. That is, the linter and formatter can
still assume that the parser will _stop_ at the first syntax error. This
will be updated in the following months.

For more details about the change here, refer to the PR corresponding to
the individual commits and the release blog post.

## Test Plan

Write _lots_ and _lots_ of tests for both valid and invalid syntax and
verify the output.

## Acknowledgements

- @MichaReiser for reviewing 100+ parser PRs and continuously providing
guidance throughout the project
- @LaBatata101 for initiating the transition to a hand-written parser in
#9152
- @addisoncrump for implementing the fuzzer which helped
[catch](https://github.com/astral-sh/ruff/pull/10903)
[a](https://github.com/astral-sh/ruff/pull/10910)
[lot](https://github.com/astral-sh/ruff/pull/10966)
[of](https://github.com/astral-sh/ruff/pull/10896)
[bugs](https://github.com/astral-sh/ruff/pull/10877)

---------

Co-authored-by: Victor Hugo Gomes <labatata101@linuxmail.org>
Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
Dhruv Manilawala 2024-04-18 17:57:39 +05:30 committed by GitHub
parent e09180b1df
commit 13ffb5bc19
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
852 changed files with 112948 additions and 103620 deletions

View file

@ -0,0 +1,6 @@
# Check http://editorconfig.org for more information
# This is the main config file for this project:
root = true
[*.py]
insert_final_newline = false

View file

@ -0,0 +1,4 @@
x: *int = 1
x: yield a = 1
x: yield from b = 1
x: y := int = 1

View file

@ -0,0 +1,10 @@
"abc": str = "def"
call(): str = "no"
*x: int = 1, 2
# Tuple assignment
x,: int = 1
x, y: int = 1, 2
(x, y): int = 1, 2
# List assignment
[x]: int = 1
[x, y]: int = 1, 2

View file

@ -0,0 +1,3 @@
x: Any = *a and b
x: Any = x := 1
x: list = [x, *a | b, *a or b]

View file

@ -0,0 +1,2 @@
a: type X = int
lambda: type X = int

View file

@ -0,0 +1 @@
assert x,

View file

@ -0,0 +1 @@
assert

View file

@ -0,0 +1,4 @@
assert False, *x
assert False, assert x
assert False, yield x
assert False, x := 1

View file

@ -0,0 +1,4 @@
assert *x
assert assert x
assert yield x
assert x := 1

View file

@ -0,0 +1,4 @@
1 = 1
x = 1 = 2
x = 1 = y = 2 = z
["a", "b"] = ["a", "b"]

View file

@ -0,0 +1,5 @@
x = *a and b
x = *yield x
x = *yield from x
x = *lambda x: x
x = x := 1

View file

@ -0,0 +1,4 @@
a = pass = c
a + b
a = b = pass = c
a + b

View file

@ -0,0 +1,6 @@
x =
1 + 1
x = y =
2 + 2
x = = y
3 + 3

View file

@ -0,0 +1,8 @@
async class Foo: ...
async while test: ...
async x = 1
async async def foo(): ...
# TODO(dhruvmanila): Here, `match` is actually a Name token because
# of the soft keyword # transformer
async match test:
case _: ...

View file

@ -0,0 +1,6 @@
1 += 1
"a" += "b"
*x += 1
pass += 1
x += pass
(x + y) += 1

View file

@ -0,0 +1,5 @@
x += *a and b
x += *yield x
x += *yield from x
x += *lambda x: x
x += y := 1

View file

@ -0,0 +1,4 @@
x +=
1 + 1
x += y +=
2 + 2

View file

@ -0,0 +1,3 @@
class Foo:
class Foo():
x = 42

View file

@ -0,0 +1,3 @@
class : ...
class (): ...
class (metaclass=ABC): ...

View file

@ -0,0 +1,3 @@
class Foo[T1, *T2(a, b):
pass
x = 10

View file

@ -0,0 +1,6 @@
# Here, the error is highlighted at the `pass` token
if True:
pass
# The parser is at the end of the program, so let's highlight
# at the newline token after `:`
if True:

View file

@ -0,0 +1 @@
if True: if True: pass

View file

@ -0,0 +1,2 @@
(async)
(x async x in iter)

View file

@ -0,0 +1,6 @@
@*x
@(*x)
@((*x))
@yield x
@yield from x
def foo(): ...

View file

@ -0,0 +1,5 @@
@def foo(): ...
@
def foo(): ...
@@
def foo(): ...

View file

@ -0,0 +1,3 @@
@x def foo(): ...
@x async def foo(): ...
@x class Foo: ...

View file

@ -0,0 +1,4 @@
@foo
async with x: ...
@foo
x = 1

View file

@ -0,0 +1,4 @@
del x, y.
z
del x, y[
z

View file

@ -0,0 +1 @@
del

View file

@ -0,0 +1,2 @@
import a..b
import a...b

View file

@ -0,0 +1,8 @@
try:
pass
except yield x:
pass
try:
pass
except* *x:
pass

View file

@ -0,0 +1,6 @@
try:
pass
except Exception as:
pass
except Exception as
pass

View file

@ -0,0 +1,13 @@
try:
pass
except as exc:
pass
# If a '*' is present then exception type is required
try:
pass
except*:
pass
except*
pass
except* as exc:
pass

View file

@ -0,0 +1,4 @@
try:
pass
except as:
pass

View file

@ -0,0 +1,12 @@
try:
pass
except x, y:
pass
except x, y as exc:
pass
try:
pass
except* x, y:
pass
except* x, y as eg:
pass

View file

@ -0,0 +1,2 @@
f"{}"
f"{ }"

View file

@ -0,0 +1,2 @@
f"{x!123}"
f"{x!'a'}"

View file

@ -0,0 +1,4 @@
# Starred expression inside f-string has a minimum precedence of bitwise or.
f"{*}"
f"{*x and y}"
f"{*yield x}"

View file

@ -0,0 +1 @@
f"{lambda x: x}"

View file

@ -0,0 +1,5 @@
f"{"
f"{foo!r"
f"{foo="
f"{"
f"""{"""

View file

@ -0,0 +1,2 @@
f"hello {x:"
f"hello {x:.3f"

View file

@ -0,0 +1,3 @@
for x in *a and b: ...
for x in yield a: ...
for target in x := 1: ...

View file

@ -0,0 +1,6 @@
for 1 in x: ...
for "a" in x: ...
for *x and y in z: ...
for *x | y in z: ...
for await x in z: ...
for [x, 1, y, *["a"]] in z: ...

View file

@ -0,0 +1,2 @@
for a b: ...
for a: ...

View file

@ -0,0 +1,2 @@
for x in:
a = 1

View file

@ -0,0 +1 @@
for in x: ...

View file

@ -0,0 +1,3 @@
from x import a.
from x import a.b
from x import a, b.c, d, e.f, g

View file

@ -0,0 +1,3 @@
from x import
from x import ()
from x import ,,

View file

@ -0,0 +1,2 @@
from
from import x

View file

@ -0,0 +1,4 @@
from x import (a, b
1 + 1
from x import (a, b,
2 + 2

View file

@ -0,0 +1,4 @@
from x import *, a
from x import a, *, b
from x import *, a as b
from x import *, *, a

View file

@ -0,0 +1,3 @@
from a import b,
from a import b as c,
from a import b, c,

View file

@ -0,0 +1,3 @@
def foo():
def foo() -> int:
x = 42

View file

@ -0,0 +1,3 @@
def foo() -> *int: ...
def foo() -> (*int): ...
def foo() -> yield x: ...

View file

@ -0,0 +1,2 @@
def (): ...
def () -> int: ...

View file

@ -0,0 +1 @@
def foo() -> : ...

View file

@ -0,0 +1,5 @@
def foo(a: int, b:
def foo():
return 42
def foo(a: int, b: str
x = 10

View file

@ -0,0 +1,3 @@
def foo[T1, *T2(a, b):
return a + b
x = 10

View file

@ -0,0 +1,2 @@
def foo() -> int,: ...
def foo() -> int, str: ...

View file

@ -0,0 +1 @@
global

View file

@ -0,0 +1 @@
global x + 1

View file

@ -0,0 +1,3 @@
global ,
global x,
global x, y,

View file

@ -0,0 +1,6 @@
if x:
pass
elif y
pass
else:
pass

View file

@ -0,0 +1,2 @@
if True:
1 + 1

View file

@ -0,0 +1,6 @@
if x:
pass
elif *x:
pass
elif yield x:
pass

View file

@ -0,0 +1,3 @@
if *x: ...
if yield x: ...
if yield from x: ...

View file

@ -0,0 +1,4 @@
if x
if x
pass
a = 1

View file

@ -0,0 +1 @@
if : ...

View file

@ -0,0 +1,6 @@
if True:
pass
elf:
pass
else:
pass

View file

@ -0,0 +1,4 @@
'hello' 'world
1 + 1
'hello' f'world {x}
2 + 2

View file

@ -0,0 +1,11 @@
(
'hello'
f'world {x}
)
1 + 1
(
'first'
'second
f'third'
)
2 + 2

View file

@ -0,0 +1 @@
import x as

View file

@ -0,0 +1 @@
import

View file

@ -0,0 +1,2 @@
import (a)
import (a, b)

View file

@ -0,0 +1,2 @@
import *
import x, *, y

View file

@ -0,0 +1,2 @@
import ,
import x, y,

View file

@ -0,0 +1,3 @@
b'123a𝐁c'
rb"a𝐁c123"
b"""123a𝐁c"""

View file

@ -0,0 +1,4 @@
del x + 1
del {'x': 1}
del {'x', 'y'}
del None, True, False, 1, 1.0, "abc"

View file

@ -0,0 +1,2 @@
f'hello \N{INVALID} world'
f"""hello \N{INVALID} world"""

View file

@ -0,0 +1,2 @@
'hello \N{INVALID} world'
"""hello \N{INVALID} world"""

View file

@ -0,0 +1,4 @@
lambda x: *y
lambda x: *y,
lambda x: *y, z
lambda x: *y and z

View file

@ -0,0 +1,2 @@
lambda x: yield y
lambda x: yield from y

View file

@ -0,0 +1,2 @@
match foo:
case _: ...

View file

@ -0,0 +1,5 @@
match x:
x = 1
match x:
match y:
case _: ...

View file

@ -0,0 +1,6 @@
match x:
case y if *a: ...
match x:
case y if (*a): ...
match x:
case y if yield x: ...

View file

@ -0,0 +1,7 @@
match (*x):
case _: ...
# Starred expression precedence test
match *x and y, z:
case _: ...
match yield x:
case _: ...

View file

@ -0,0 +1,2 @@
match x:
case y if: ...

View file

@ -0,0 +1,3 @@
# TODO(dhruvmanila): Here, `case` is a name token because of soft keyword transformer
match x:
case : ...

View file

@ -0,0 +1 @@
match foo: case _: ...

View file

@ -0,0 +1,2 @@
match *foo:
case _: ...

View file

@ -0,0 +1,3 @@
'first' b'second'
f'first' b'second'
'first' f'second' b'third'

View file

@ -0,0 +1,6 @@
if True: pass elif False: pass else: pass
if True: pass; elif False: pass; else: pass
for x in iter: break else: pass
for x in iter: break; else: pass
try: pass except exc: pass else: pass finally: pass
try: pass; except exc: pass; else: pass; finally: pass

View file

@ -0,0 +1 @@
nonlocal

View file

@ -0,0 +1 @@
nonlocal x + 1

View file

@ -0,0 +1,3 @@
nonlocal ,
nonlocal x,
nonlocal x, y,

View file

@ -0,0 +1,2 @@
def foo(x:): ...
def foo(x:,): ...

View file

@ -0,0 +1,2 @@
def foo(x=): ...
def foo(x: int = ): ...

View file

@ -0,0 +1,3 @@
def foo(arg: *int): ...
def foo(arg: yield int): ...
def foo(arg: x := int): ...

Some files were not shown because too many files have changed in this diff Show more