Replace LALRPOP parser with hand-written parser (#10036)

(Supersedes #9152, authored by @LaBatata101)

## Summary

This PR replaces the current parser generated from LALRPOP to a
hand-written recursive descent parser.

It also updates the grammar for [PEP
646](https://peps.python.org/pep-0646/) so that the parser outputs the
correct AST. For example, in `data[*x]`, the index expression is now a
tuple with a single starred expression instead of just a starred
expression.

Beyond the performance improvements, the parser is also error resilient
and can provide better error messages. The behavior as seen by any
downstream tools isn't changed. That is, the linter and formatter can
still assume that the parser will _stop_ at the first syntax error. This
will be updated in the following months.

For more details about the change here, refer to the PR corresponding to
the individual commits and the release blog post.

## Test Plan

Write _lots_ and _lots_ of tests for both valid and invalid syntax and
verify the output.

## Acknowledgements

- @MichaReiser for reviewing 100+ parser PRs and continuously providing
guidance throughout the project
- @LaBatata101 for initiating the transition to a hand-written parser in
#9152
- @addisoncrump for implementing the fuzzer which helped
[catch](https://github.com/astral-sh/ruff/pull/10903)
[a](https://github.com/astral-sh/ruff/pull/10910)
[lot](https://github.com/astral-sh/ruff/pull/10966)
[of](https://github.com/astral-sh/ruff/pull/10896)
[bugs](https://github.com/astral-sh/ruff/pull/10877)

---------

Co-authored-by: Victor Hugo Gomes <labatata101@linuxmail.org>
Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
Dhruv Manilawala 2024-04-18 17:57:39 +05:30 committed by GitHub
parent e09180b1df
commit 13ffb5bc19
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
852 changed files with 112948 additions and 103620 deletions

View file

@ -0,0 +1,105 @@
# These test cases specifically tests out parsing a list of with items that start with a
# left parenthesis. This makes parsing ambiguous as to whether the left parenthesis is to
# parenthesize the with items or part of a parenthesized expression. It's not to test the
# with statement itself.
# The following sections basically separates between which node does the
# start parenthesis belongs to.
# Parenthesized with items
# ------------------------
#
# - The opening parenthesis belongs to the with statement.
# - The range of the first with item shouldn't include the parenthesis.
with (item): ...
with (item,): ... # with a trailing comma
with (((item))): ...
with (item1, item2): ...
with (item1, item2,): ... # with a trailing comma
with ((item1), (item2), item3 as f, (item4)): ...
with ((item1, item2), item3): ...
with ((x, y) as f): ...
with (item1 as f1, item2 as f2): ...
with (item1 as f1, item2 as f2,): ... # with a trailing comma
with (item == 10,): ...
with ((item := 10)): ...
with ((item := 10,)): ...
with ((*item,)): ...
with ((item1 := 10), item2): ...
with (item1 as f, (item2 := 10)): ...
with (foo()): ...
with (foo(),): ...
with (foo() as f): ...
with (f"{item := 42}"): ...
with (f"{(item := 42)}"): ...
with ((x for x in range(10)), item): ...
with (item, (x for x in range(10))): ...
with (item, (x for x in range(10)), item): ...
with (data[1:2]): ...
with (data[1:2] as f): ...
with ((x for x in iter) as y): ...
# Parenthesized expression
# ------------------------
#
# - The opening parenthesis belongs to the context expression of the first with item.
# - The range of the first with item should include the parenthesis.
with (item) as f: ...
with (item := 10): ...
with (item := 10) as f: ...
with ( item := 1 ): ...
with (item1 := 42), item2: ...
with (root + filename).read(): ... # Postfix expression
with (root + filename).read() as f: ... # Postfix expression
with (foo)(): ... # Postfix expression
with (foo)() as f: ... # Postfix expression
with (foo()) as f: ...
with (data[1:2]) as f: ...
with (1, 2, 3)[0]: ... # Postfix expression
with (1, 2, 3)[0] as f: ... # Postfix expression
with (item1), (item2): ...
with (open('a.py')), (open('b.py')): ...
with (yield x): ...
with ((yield x)): ...
with (yield from x): ...
with ((yield from x)): ...
with (yield x) as f: ...
with (yield x,) as f: ...
# Tuple expression
# ----------------
#
# - This is a sub-case of the parenthesized expression and requires transforming the list of
# with items from the speculative parsing to a single with item containing a tuple expression.
# - The opening parenthesis belongs to the tuple expression of the first with item.
# - The range of the first with item should include the parenthesis.
with (): ...
with () as f: ...
with (item := 42,): ...
with (1, item := 2): ...
with (item1 := 10, item2): ...
with (item1, item2 := 2, item3) as f: ...
with (item,) as f: ...
with (*item,): ...
with (*item,) as f: ...
with (item1, item2) as f: ...
with (item1, item2,) as f: ...
with (item1, item2), item3: ...
with ((item1, item2), item3) as f: ...
with (item1,), item2, (item3, item4) as f: ...
with (item1, item2) as f1, item3 as f2: ...
with (item1, *item2): ...
with (item1, *item2) as f: ...
with (item1 := 10, *item2): ...
with ((item1 := 10), *item2): ...
# Parenthesized generator expression
# ----------------------------------
#
# - The opening parenthesis belongs to the generator expression
# - The range of the with item should include the parenthesis
with (x for x in range(10)): ...
with (x async for x in range(10)): ...
with (x for x in range(10)), item: ...

View file

@ -0,0 +1,6 @@
x: int
x: int = 1
(x): 1 + 2
x: tuple[int] | int = (1,)
x: int if True else str = 1
x: lambda x: y = 1

View file

@ -0,0 +1,11 @@
assert 1 < 2
assert call()
assert a and b
assert lambda x: y
assert await x
assert x if True else y
assert x, "error"
assert x, lambda x: y
assert x, await x
assert x, x if True else y

View file

@ -0,0 +1,45 @@
x = (1, 2, 3)
(x, y) = (1, 2, 3)
[x, y] = (1, 2, 3)
x.y = (1, 2, 3)
x[y] = (1, 2, 3)
(x, *y) = (1, 2, 3)
# This last group of tests checks that assignments we expect to be parsed
# (including some interesting ones) continue to be parsed successfully.
*foo = 42
[x, y, z] = [1, 2, 3]
(x, y, z) = (1, 2, 3)
x[0] = 42
# This is actually a type error, not a syntax error. So check that it
# doesn't fail parsing.
5[0] = 42
x[1:2] = [42]
# This is actually a type error, not a syntax error. So check that it
# doesn't fail parsing.
5[1:2] = [42]
foo.bar = 42
# This is actually an attribute error, not a syntax error. So check that
# it doesn't fail parsing.
"foo".y = 42
foo = 42
[] = *data
() = *data
a, b = ab
a = b = c

View file

@ -0,0 +1,21 @@
x += 1
x.y += (1, 2, 3)
x[y] += (1, 2, 3)
# All possible augmented assignment tokens
x += 1
x -= 1
x *= 1
x /= 1
x //= 1
x %= 1
x **= 1
x &= 1
x |= 1
x ^= 1
x <<= 1
x >>= 1
x @= 1
# Mixed
a //= (a + b) - c ** 2

View file

@ -0,0 +1,52 @@
class Test:
...
class Test():
def __init__(self):
pass
class Test(a=1, *A, **k):
...
class Test:
def method():
a, b = data
class Test(A, B):
def __init__(self):
pass
def method_with_default(self, arg='default'):
pass
# Class with generic types:
# TypeVar
class Test[T](): ...
# TypeVar with bound
class Test[T: str](): ...
# TypeVar with tuple bound
class Test[T: (str, bytes)](): ...
# Multiple TypeVar
class Test[T, U](): ...
# Trailing comma
class Test[T, U,](): ...
# TypeVarTuple
class Test[*Ts](): ...
# ParamSpec
class Test[**P](): ...
# Mixed types
class Test[X, Y: str, *U, **P]():
pass

View file

@ -0,0 +1,13 @@
del x
del (x)
del a, b,
del a, (b, c), d
del [a, b]
del [a, [b, c], d]
del x.y
del x[y]
del (
x,
x.y,
x[y],
)

View file

@ -0,0 +1,47 @@
for target in iter:
pass
for target in (1, 2, 3):
pass
for target.attr in call():
pass
for target[0] in x.attr:
pass
for target in x <= y:
pass
for target in a and b:
pass
for a, b, c, in iter:
pass
for (a, b) in iter:
pass
for target in *x.attr:
pass
for target in [1, 2]:
pass
for *target in a, b, c,:
pass
else:
pass
for target in *x | y: ...
for target in *await x: ...
for target in await x: ...
for target in lambda x: x: ...
for target in x if True else y: ...
if x:
for target in iter:
pass
# This `else` is not part of the `try` statement, so don't raise an error
else:
pass

View file

@ -0,0 +1,9 @@
from a import b # comment
from . import a
from foo.bar import baz as b, FooBar as fb
from .a import b
from ... import c
from .......................... import d
from ..........................a.b.c import d
from module import (a, b as B, c,)
from a import *

View file

@ -0,0 +1,162 @@
def no_parameters():
pass
def positional_parameters(a, b, c):
pass
def positional_parameters_with_default_values(a, b=20, c=30):
pass
def positional_parameters_with_default_values2(a, b=20, /, c=30):
pass
def positional_only_and_positional_parameters(a, /, b, c):
pass
def pos_args_with_defaults_and_varargs_and_kwargs(a, b=20, /, c=30, *args, **kwargs):
pass
def keyword_only_parameters(*, a, b, c):
pass
def keyword_only_parameters_with_defaults(*, a, b=20, c=30):
pass
def kw_only_args_with_defaults_and_varargs(*args, a, b=20, c=30):
pass
def kw_only_args_with_defaults_and_kwargs(*, a, b=20, c=30, **kwargs):
pass
def kw_only_args_with_defaults_and_varargs_and_kwargs(*args, a, b=20, c=30, **kwargs):
pass
def pos_and_kw_only_args(a, b, /, c, *, d, e, f):
pass
def pos_and_kw_only_args_with_defaults(a, b, /, c, *, d, e=20, f=30):
pass
def pos_and_kw_only_args_with_defaults_and_varargs(a, b, /, c, *args, d, e=20, f=30):
pass
def pos_and_kw_only_args_with_defaults_and_kwargs(
a, b, /, c, *, d, e=20, f=30, **kwargs
):
pass
def pos_and_kw_only_args_with_defaults_and_varargs_and_kwargs(
a, b, /, c, *args, d, e=20, f=30, **kwargs
):
pass
def positional_and_keyword_parameters(a, b, c, *, d, e, f):
pass
def positional_and_keyword_parameters_with_defaults(a, b, c, *, d, e=20, f=30):
pass
def positional_and_keyword_parameters_with_defaults_and_varargs(
a, b, c, *args, d, e=20, f=30
):
pass
def positional_and_keyword_parameters_with_defaults_and_varargs_and_kwargs(
a, b, c, *args, d, e=20, f=30, **kwargs
):
pass
# Function definitions with type parameters
def func[T](a: T) -> T:
pass
def func[T: str](a: T) -> T:
pass
def func[T: (str, bytes)](a: T) -> T:
pass
def func[*Ts](*a: *Ts) -> Tuple[*Ts]:
pass
def func[**P](*args: P.args, **kwargs: P.kwargs):
pass
def func[T, U: str, *Ts, **P]():
pass
def ellipsis(): ...
def multiple_statements() -> int:
call()
pass
...
def foo(*args):
pass
def foo(**kwargs):
pass
def foo(*args, **kwargs):
pass
def foo(a, /):
pass
def foo(a, /, b):
pass
def foo(a=1, /,):
pass
def foo(a, b, /, *, c):
pass
def foo(kw=1, *, a):
pass
def foo(x: int, y: "str", z: 1 + 2):
pass
def foo(self, a=1, b=2, c=3):
pass

View file

@ -0,0 +1,37 @@
if 1: 10
elif 2: 20
else: 30
if True:
1
...
if x < 1:
...
else:
pass
if a:
pass
elif b:
...
if a and b:
...
elif True:
...
elif c:
...
elif d:
...
else:
f()
# Valid test expression
if a := b: ...
elif a := b: ...
if lambda x: x: ...
elif lambda x: x: ...
if await x: ...
elif await x: ...
if (yield x): ...
elif (yield x): ...

View file

@ -0,0 +1,5 @@
import a
import a.b.c
import a.b.c as d
import a, b, c
import foo.bar as a, a.b.c.d as abcd

View file

@ -0,0 +1,337 @@
# Cases sampled from Lib/test/test_patma.py
# case test_patma_098
match x:
case -0j:
y = 0
# case test_patma_142
match x:
case bytes(z):
y = 0
# case test_patma_073
match x:
case 0 if 0:
y = 0
case 0 if 1:
y = 1
# case test_patma_006
match 3:
case 0 | 1 | 2 | 3:
x = True
# case test_patma_049
match x:
case [0, 1] | [1, 0]:
y = 0
# case black_check_sequence_then_mapping
match x:
case [*_]:
return "seq"
case {}:
return "map"
# case test_patma_035
match x:
case {0: [1, 2, {}]}:
y = 0
case {0: [1, 2, {}] | True} | {1: [[]]} | {0: [1, 2, {}]} | [] | "X" | {}:
y = 1
case []:
y = 2
# case test_patma_107
match x:
case 0.25 + 1.75j:
y = 0
# case test_patma_097
match x:
case -0j:
y = 0
# case test_patma_007
match 4:
case 0 | 1 | 2 | 3:
x = True
# case test_patma_154
match x:
case 0 if x:
y = 0
# case test_patma_134
match x:
case {1: 0}:
y = 0
case {0: 0}:
y = 1
case {**z}:
y = 2
# case test_patma_185
match Seq():
case [*_]:
y = 0
# case test_patma_063
match x:
case 1:
y = 0
case 1:
y = 1
# case test_patma_248
match x:
case {"foo": bar}:
y = bar
# case test_patma_019
match (0, 1, 2):
case [0, 1, *x, 2]:
y = 0
# case test_patma_052
match x:
case [0]:
y = 0
case [1, 0] if (x := x[:0]):
y = 1
case [1, 0]:
y = 2
# case test_patma_191
match w:
case [x, y, *_]:
z = 0
# case test_patma_110
match x:
case -0.25 - 1.75j:
y = 0
# case test_patma_151
match (x,):
case [y]:
z = 0
# case test_patma_114
match x:
case A.B.C.D:
y = 0
# case test_patma_232
match x:
case None:
y = 0
# case test_patma_058
match x:
case 0:
y = 0
# case test_patma_233
match x:
case False:
y = 0
# case test_patma_078
match x:
case []:
y = 0
case [""]:
y = 1
case "":
y = 2
# case test_patma_156
match x:
case z:
y = 0
# case test_patma_189
match w:
case [x, y, *rest]:
z = 0
# case test_patma_042
match x:
case (0 as z) | (1 as z) | (2 as z) if z == x % 2:
y = 0
# case test_patma_034
match x:
case {0: [1, 2, {}]}:
y = 0
case {0: [1, 2, {}] | False} | {1: [[]]} | {0: [1, 2, {}]} | [] | "X" | {}:
y = 1
case []:
y = 2
# case test_patma_123
match (0, 1, 2):
case 0, *x:
y = 0
# case test_patma_126
match (0, 1, 2):
case *x, 2,:
y = 0
# case test_patma_151
match x,:
case y,:
z = 0
# case test_patma_152
match w, x:
case y, z:
v = 0
# case test_patma_153
match w := x,:
case y as v,:
z = 0
match x:
# F-strings aren't allowed as patterns but it's a soft syntax error in Python.
case f"{y}":
pass
match {"test": 1}:
case {
**rest,
}:
print(rest)
match {"label": "test"}:
case {
"label": str() | None as label,
}:
print(label)
match x:
case [0, 1,]:
y = 0
match x:
case (0, 1,):
y = 0
match x:
case (0,):
y = 0
match x,:
case z:
pass
match x, y:
case z:
pass
match x, y,:
case z:
pass
# PatternMatchSingleton
match x:
case None:
...
case True:
...
case False:
...
# PatternMatchValue
match x:
case a.b:
...
case a.b.c:
...
case '':
...
case b'':
...
case 1:
...
case 1.0:
...
case 1.0J:
...
case 1 + 1j:
...
case -1:
...
case -1.:
...
case -0b01:
...
case (1):
...
# PatternMatchOr
match x:
case 1 | 2:
...
case '' | 1.1 | -1 | 1 + 1j | a.b:
...
# PatternMatchAs
match x:
case a:
...
case a as b:
...
case 1 | 2 as two:
...
case 1 + 3j as sum:
...
case a.b as ab:
...
case _:
...
case _ as x:
...
# PatternMatchSequence
match x:
case 1, 2, 3:
...
case (1, 2, 3,):
...
case (1 + 2j, a, None, a.b):
...
case (1 as X, b) as S:
...
case [1, 2, 3 + 1j]:
...
case ([1,2], 3):
...
case [1]:
...
# PatternMatchStar
match x:
case *a,:
...
case *_,:
...
case [1, 2, *rest]:
...
case (*_, 1, 2):
...
# PatternMatchClass
match x:
case Point():
...
case a.b.Point():
...
case Point2D(x=0):
...
case Point2D(x=0, y=0,):
...
case Point2D(0, 1):
...
case Point2D([0, 1], y=1):
...
case Point2D(x=[0, 1], y=1):
...
# PatternMatchMapping
match x := b:
case {1: _}:
...
case {'': a, None: (1, 2), **rest}:
...
# Pattern guard
match y:
case a if b := c: ...
case e if 1 < 2: ...
# `match` as an identifier
match *a + b, c # ((match * a) + b), c
match *(a + b), c # (match * (a + b)), c
match (*a + b, c) # match ((*(a + b)), c)
match -a * b + c # (match - (a * b)) + c
match -(a * b) + c # (match - (a * b)) + c
match (-a) * b + c # (match (-(a * b))) + c
match ().a # (match()).a
match (()).a # (match(())).a
match ((),).a # (match(())).a
match [a].b # (match[a]).b
match [a,].b # (match[(a,)]).b (not (match[a]).b)
match [(a,)].b # (match[(a,)]).b
match()[a:
b] # (match())[a: b]
if match := 1: pass
match match:
case 1: pass
case 2:
pass
match = lambda query: query == event
print(match(12))

View file

@ -0,0 +1,18 @@
# raise
raise
raise a
raise (a, b)
raise 1 < 2
raise a and b
raise lambda x: y
raise await x
raise x if True else y
# raise ... from ...
raise x from a
raise x from (a, b)
raise x from 1 < 2
raise x from a and b
raise x from lambda x: y
raise x from await x
raise x from x if True else y

View file

@ -0,0 +1,14 @@
return
return x
return *x
return *x | y
return *x, *y
return (x := 1)
return None
return x and y
return 1 < 2
return 1, 2,
return call()
return attr.value()
return await x
return lambda x: y

View file

@ -0,0 +1,13 @@
# Other simple statements are contained in their own files.
continue
break
if x: ...
if True: pass
1; 2; pass
1; ...; a if b else c
if c: B; del A
else: C
if x: yield x;

View file

@ -0,0 +1,99 @@
try:
...
except:
...
try:
...
except Exception1 as e:
...
except Exception2 as e:
...
try:
...
except Exception as e:
...
except:
...
finally:
...
try:
...
except:
...
else:
...
try:
...
except:
...
else:
...
finally:
...
try:
...
finally:
...
try:
...
else:
...
finally:
...
try:
...
except* GroupA as eg:
...
except* ExceptionGroup:
...
try:
raise ValueError(1)
except TypeError as e:
print(f"caught {type(e)}")
except OSError as e:
print(f"caught {type(e)}")
try:
raise ExceptionGroup("eg", [ValueError(1), TypeError(2), OSError(3), OSError(4)])
except* TypeError as e:
print(f"caught {type(e)} with nested {e.exceptions}")
except* OSError as e:
print(f"caught {type(e)} with nested {e.exceptions}")
try:
pass
except "exception":
pass
except 1:
pass
except True:
pass
except 1 + 1:
pass
except a | b:
pass
except x and y:
pass
except await x:
pass
except lambda x: x:
pass
except x if True else y:
pass
if True:
try:
pass
finally:
pass
# This `else` is not part of the `try` statement, so don't raise an error
else:
pass

View file

@ -0,0 +1,80 @@
type X = int
type X = int | str
type X = int | "ForwardRefY"
type X[T] = T | list[X[T]] # recursive
type X[T] = int
type X[T] = list[T] | set[T]
type X[T, *Ts, **P] = (T, Ts, P)
type X[T: int, *Ts, **P] = (T, Ts, P)
type X[T: (int, str), *Ts, **P] = (T, Ts, P)
# Soft keyword as alias name
type type = int
type match = int
type case = int
# Soft keyword as value
type foo = type
type foo = match
type foo = case
# Multine definitions
type \
X = int
type X \
= int
type X = \
int
type X = (
int
)
type \
X[T] = T
type X \
[T] = T
type X[T] \
= T
# Simple statements
type X = int; type X = str; type X = type
class X: type X = int
type Point = tuple[float, float]
type Point[T] = tuple[T, T]
type IntFunc[**P] = Callable[P, int] # ParamSpec
type LabeledTuple[*Ts] = tuple[str, *Ts] # TypeVarTuple
type HashableSequence[T: Hashable] = Sequence[T] # TypeVar with bound
type IntOrStrSequence[T: (int, str)] = Sequence[T] # TypeVar with constraints
# Type as an identifier
type *a + b, c # ((type * a) + b), c
type *(a + b), c # (type * (a + b)), c
type (*a + b, c) # type ((*(a + b)), c)
type -a * b + c # (type - (a * b)) + c
type -(a * b) + c # (type - (a * b)) + c
type (-a) * b + c # (type (-(a * b))) + c
type ().a # (type()).a
type (()).a # (type(())).a
type ((),).a # (type(())).a
type [a].b # (type[a]).b
type [a,].b # (type[(a,)]).b (not (type[a]).b)
type [(a,)].b # (type[(a,)]).b
type()[a:
b] # (type())[a: b]
if type := 1: pass
type = lambda query: query == event
print(type(12))
type(type)
a = (
type in C
)
a = (
type(b)
)
type (
X = int
)
type = 1
type = x = 1
x = type = 1
lambda x: type

View file

@ -0,0 +1,28 @@
while x:
...
while (x > 1) and y:
pass
else:
...
while x and y:
...
print('Hello World!')
else:
print('Olá, Mundo!')
...
while a := b: ...
while (a := b) and c: ...
while lambda x: x: ...
while await x: ...
if True:
while x:
pass
else:
pass
else:
pass

View file

@ -0,0 +1,14 @@
# This file only contains unparenthesized with items. Refer to ./ambiguous_lpar_with_items.py
# for parenthesized with items test cases
with item: ...
with item as f: ...
with item1, item2: ...
with item1 as f1, item2 as f2: ...
with x if True else y: ...
with x if True else y as f: ...
# Postfix expressions
with open() as f: ...
with open() as f.attr: ...