mirror of
https://github.com/astral-sh/ruff.git
synced 2025-07-23 21:17:53 +00:00
Replace LALRPOP parser with hand-written parser (#10036)
(Supersedes #9152, authored by @LaBatata101) ## Summary This PR replaces the current parser generated from LALRPOP to a hand-written recursive descent parser. It also updates the grammar for [PEP 646](https://peps.python.org/pep-0646/) so that the parser outputs the correct AST. For example, in `data[*x]`, the index expression is now a tuple with a single starred expression instead of just a starred expression. Beyond the performance improvements, the parser is also error resilient and can provide better error messages. The behavior as seen by any downstream tools isn't changed. That is, the linter and formatter can still assume that the parser will _stop_ at the first syntax error. This will be updated in the following months. For more details about the change here, refer to the PR corresponding to the individual commits and the release blog post. ## Test Plan Write _lots_ and _lots_ of tests for both valid and invalid syntax and verify the output. ## Acknowledgements - @MichaReiser for reviewing 100+ parser PRs and continuously providing guidance throughout the project - @LaBatata101 for initiating the transition to a hand-written parser in #9152 - @addisoncrump for implementing the fuzzer which helped [catch](https://github.com/astral-sh/ruff/pull/10903) [a](https://github.com/astral-sh/ruff/pull/10910) [lot](https://github.com/astral-sh/ruff/pull/10966) [of](https://github.com/astral-sh/ruff/pull/10896) [bugs](https://github.com/astral-sh/ruff/pull/10877) --------- Co-authored-by: Victor Hugo Gomes <labatata101@linuxmail.org> Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
parent
e09180b1df
commit
13ffb5bc19
852 changed files with 112948 additions and 103620 deletions
|
@ -71,8 +71,6 @@ for x in (y := [1, 2, 3]):
|
|||
async for x in (y := [1, 2, 3]):
|
||||
pass
|
||||
|
||||
del (x := 1)
|
||||
|
||||
try:
|
||||
pass
|
||||
except (e := Exception):
|
||||
|
|
|
@ -110,3 +110,14 @@ self.assertEqual(
|
|||
suite._tests[0].id().split(".")[0],
|
||||
os.path.basename(os.getcwd()),
|
||||
)
|
||||
|
||||
# PEP 646 introduced starred expression in indexes
|
||||
# https://peps.python.org/pep-0646/#change-1-star-expressions-in-indexes
|
||||
data[*x]
|
||||
data[*x,]
|
||||
data[
|
||||
*x,
|
||||
]
|
||||
data[ # comment 1
|
||||
*x, # comment 2
|
||||
] # comment 3
|
||||
|
|
|
@ -427,3 +427,9 @@ def function_with_one_argument_and_a_keyword_separator(
|
|||
*, argument: str
|
||||
) -> ReallyReallyReallyReallyReallyReallyReallyReallyLongName:
|
||||
pass
|
||||
|
||||
|
||||
# PEP 646 introduced type var tuple in parameter annotation
|
||||
# https://peps.python.org/pep-0646/#change-2-args-as-a-typevartuple
|
||||
def function_with_variadic_generics(*args: *tuple[int]): ...
|
||||
def function_with_variadic_generics(*args: *tuple[int],): ...
|
||||
|
|
|
@ -5,8 +5,8 @@ expression: comments.debug(test_case.source_code)
|
|||
{
|
||||
Node {
|
||||
kind: ModModule,
|
||||
range: 0..0,
|
||||
source: ``,
|
||||
range: 0..35,
|
||||
source: `⏎`,
|
||||
}: {
|
||||
"leading": [
|
||||
SourceComment {
|
||||
|
|
|
@ -1,13 +1,14 @@
|
|||
use ruff_formatter::{format_args, write, FormatRuleWithOptions};
|
||||
use ruff_formatter::{format_args, FormatRuleWithOptions};
|
||||
use ruff_python_ast::AnyNodeRef;
|
||||
use ruff_python_ast::ExprTuple;
|
||||
use ruff_text_size::Ranged;
|
||||
use ruff_text_size::{Ranged, TextRange};
|
||||
|
||||
use crate::builders::parenthesize_if_expands;
|
||||
use crate::comments::SourceComment;
|
||||
use crate::expression::parentheses::{
|
||||
empty_parenthesized, optional_parentheses, parenthesized, NeedsParentheses, OptionalParentheses,
|
||||
};
|
||||
use crate::other::commas::has_trailing_comma;
|
||||
use crate::prelude::*;
|
||||
|
||||
#[derive(Debug, Clone, Copy, Eq, PartialEq, Default)]
|
||||
|
@ -138,7 +139,27 @@ impl FormatNodeRule<ExprTuple> for FormatExprTuple {
|
|||
}
|
||||
[single] => match self.parentheses {
|
||||
TupleParentheses::Preserve if !is_parenthesized => {
|
||||
write!(f, [single.format(), token(",")])
|
||||
single.format().fmt(f)?;
|
||||
// The `TupleParentheses::Preserve` is only set by subscript expression
|
||||
// formatting. With PEP 646, a single element starred expression in the slice
|
||||
// position of a subscript expression is actually a tuple expression. For
|
||||
// example:
|
||||
//
|
||||
// ```python
|
||||
// data[*x]
|
||||
// # ^^ single element tuple expression without a trailing comma
|
||||
//
|
||||
// data[*x,]
|
||||
// # ^^^ single element tuple expression with a trailing comma
|
||||
// ```
|
||||
//
|
||||
//
|
||||
// This means that the formatter should only add a trailing comma if there is
|
||||
// one already.
|
||||
if has_trailing_comma(TextRange::new(single.end(), item.end()), f.context()) {
|
||||
token(",").fmt(f)?;
|
||||
}
|
||||
Ok(())
|
||||
}
|
||||
_ =>
|
||||
// A single element tuple always needs parentheses and a trailing comma, except when inside of a subscript
|
||||
|
|
|
@ -135,7 +135,7 @@ pub fn format_module_source(
|
|||
let source_type = options.source_type();
|
||||
let (tokens, comment_ranges) =
|
||||
tokens_and_ranges(source, source_type).map_err(|err| ParseError {
|
||||
offset: err.location(),
|
||||
location: err.location(),
|
||||
error: ParseErrorType::Lexical(err.into_error()),
|
||||
})?;
|
||||
let module = parse_tokens(tokens, source, source_type.as_mode())?;
|
||||
|
|
|
@ -17,7 +17,7 @@ impl FormatNodeRule<ModModule> for FormatModModule {
|
|||
if body.is_empty() {
|
||||
// Only preserve an empty line if the source contains an empty line too.
|
||||
if !f.context().comments().has_leading(item)
|
||||
&& lines_after(range.end(), f.context().source()) != 0
|
||||
&& lines_after(range.start(), f.context().source()) != 0
|
||||
{
|
||||
empty_line().fmt(f)
|
||||
} else {
|
||||
|
|
|
@ -9,20 +9,23 @@ use crate::MagicTrailingComma;
|
|||
/// should be respected).
|
||||
pub(crate) fn has_magic_trailing_comma(range: TextRange, context: &PyFormatContext) -> bool {
|
||||
match context.options().magic_trailing_comma() {
|
||||
MagicTrailingComma::Respect => {
|
||||
let first_token = SimpleTokenizer::new(context.source(), range)
|
||||
.skip_trivia()
|
||||
// Skip over any closing parentheses belonging to the expression
|
||||
.find(|token| token.kind() != SimpleTokenKind::RParen);
|
||||
|
||||
matches!(
|
||||
first_token,
|
||||
Some(SimpleToken {
|
||||
kind: SimpleTokenKind::Comma,
|
||||
..
|
||||
})
|
||||
)
|
||||
}
|
||||
MagicTrailingComma::Respect => has_trailing_comma(range, context),
|
||||
MagicTrailingComma::Ignore => false,
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns `true` if the range ends with a trailing comma.
|
||||
pub(crate) fn has_trailing_comma(range: TextRange, context: &PyFormatContext) -> bool {
|
||||
let first_token = SimpleTokenizer::new(context.source(), range)
|
||||
.skip_trivia()
|
||||
// Skip over any closing parentheses belonging to the expression
|
||||
.find(|token| token.kind() != SimpleTokenKind::RParen);
|
||||
|
||||
matches!(
|
||||
first_token,
|
||||
Some(SimpleToken {
|
||||
kind: SimpleTokenKind::Comma,
|
||||
..
|
||||
})
|
||||
)
|
||||
}
|
||||
|
|
|
@ -73,7 +73,7 @@ pub fn format_range(
|
|||
|
||||
let (tokens, comment_ranges) =
|
||||
tokens_and_ranges(source, options.source_type()).map_err(|err| ParseError {
|
||||
offset: err.location(),
|
||||
location: err.location(),
|
||||
error: ParseErrorType::Lexical(err.into_error()),
|
||||
})?;
|
||||
|
||||
|
|
|
@ -13,6 +13,3 @@ input_file: crates/ruff_python_formatter/resources/test/fixtures/ruff/empty_mult
|
|||
```python
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -11,6 +11,3 @@ input_file: crates/ruff_python_formatter/resources/test/fixtures/ruff/empty_trai
|
|||
```python
|
||||
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -77,8 +77,6 @@ for x in (y := [1, 2, 3]):
|
|||
async for x in (y := [1, 2, 3]):
|
||||
pass
|
||||
|
||||
del (x := 1)
|
||||
|
||||
try:
|
||||
pass
|
||||
except (e := Exception):
|
||||
|
@ -180,8 +178,6 @@ for x in (y := [1, 2, 3]):
|
|||
async for x in (y := [1, 2, 3]):
|
||||
pass
|
||||
|
||||
del (x := 1)
|
||||
|
||||
try:
|
||||
pass
|
||||
except (e := Exception):
|
||||
|
@ -207,6 +203,3 @@ def f():
|
|||
async def f():
|
||||
await (x := 1)
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -116,6 +116,17 @@ self.assertEqual(
|
|||
suite._tests[0].id().split(".")[0],
|
||||
os.path.basename(os.getcwd()),
|
||||
)
|
||||
|
||||
# PEP 646 introduced starred expression in indexes
|
||||
# https://peps.python.org/pep-0646/#change-1-star-expressions-in-indexes
|
||||
data[*x]
|
||||
data[*x,]
|
||||
data[
|
||||
*x,
|
||||
]
|
||||
data[ # comment 1
|
||||
*x, # comment 2
|
||||
] # comment 3
|
||||
```
|
||||
|
||||
## Output
|
||||
|
@ -231,7 +242,13 @@ self.assertEqual(
|
|||
suite._tests[0].id().split(".")[0],
|
||||
os.path.basename(os.getcwd()),
|
||||
)
|
||||
|
||||
# PEP 646 introduced starred expression in indexes
|
||||
# https://peps.python.org/pep-0646/#change-1-star-expressions-in-indexes
|
||||
data[*x]
|
||||
data[*x,]
|
||||
data[*x,]
|
||||
data[ # comment 1
|
||||
*x, # comment 2
|
||||
] # comment 3
|
||||
```
|
||||
|
||||
|
||||
|
||||
|
|
|
@ -433,6 +433,12 @@ def function_with_one_argument_and_a_keyword_separator(
|
|||
*, argument: str
|
||||
) -> ReallyReallyReallyReallyReallyReallyReallyReallyLongName:
|
||||
pass
|
||||
|
||||
|
||||
# PEP 646 introduced type var tuple in parameter annotation
|
||||
# https://peps.python.org/pep-0646/#change-2-args-as-a-typevartuple
|
||||
def function_with_variadic_generics(*args: *tuple[int]): ...
|
||||
def function_with_variadic_generics(*args: *tuple[int],): ...
|
||||
```
|
||||
|
||||
## Output
|
||||
|
@ -1014,4 +1020,12 @@ def function_with_one_argument_and_a_keyword_separator(
|
|||
*, argument: str
|
||||
) -> ReallyReallyReallyReallyReallyReallyReallyReallyLongName:
|
||||
pass
|
||||
|
||||
|
||||
# PEP 646 introduced type var tuple in parameter annotation
|
||||
# https://peps.python.org/pep-0646/#change-2-args-as-a-typevartuple
|
||||
def function_with_variadic_generics(*args: *tuple[int]): ...
|
||||
def function_with_variadic_generics(
|
||||
*args: *tuple[int],
|
||||
): ...
|
||||
```
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue