Replace LALRPOP parser with hand-written parser (#10036)

(Supersedes #9152, authored by @LaBatata101)

## Summary

This PR replaces the current parser generated from LALRPOP to a
hand-written recursive descent parser.

It also updates the grammar for [PEP
646](https://peps.python.org/pep-0646/) so that the parser outputs the
correct AST. For example, in `data[*x]`, the index expression is now a
tuple with a single starred expression instead of just a starred
expression.

Beyond the performance improvements, the parser is also error resilient
and can provide better error messages. The behavior as seen by any
downstream tools isn't changed. That is, the linter and formatter can
still assume that the parser will _stop_ at the first syntax error. This
will be updated in the following months.

For more details about the change here, refer to the PR corresponding to
the individual commits and the release blog post.

## Test Plan

Write _lots_ and _lots_ of tests for both valid and invalid syntax and
verify the output.

## Acknowledgements

- @MichaReiser for reviewing 100+ parser PRs and continuously providing
guidance throughout the project
- @LaBatata101 for initiating the transition to a hand-written parser in
#9152
- @addisoncrump for implementing the fuzzer which helped
[catch](https://github.com/astral-sh/ruff/pull/10903)
[a](https://github.com/astral-sh/ruff/pull/10910)
[lot](https://github.com/astral-sh/ruff/pull/10966)
[of](https://github.com/astral-sh/ruff/pull/10896)
[bugs](https://github.com/astral-sh/ruff/pull/10877)

---------

Co-authored-by: Victor Hugo Gomes <labatata101@linuxmail.org>
Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
Dhruv Manilawala 2024-04-18 17:57:39 +05:30 committed by GitHub
parent e09180b1df
commit 13ffb5bc19
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
852 changed files with 112948 additions and 103620 deletions

2
.gitattributes vendored
View file

@ -8,5 +8,7 @@ crates/ruff_linter/resources/test/fixtures/pycodestyle/W391_3.py text eol=crlf
crates/ruff_python_formatter/resources/test/fixtures/ruff/docstring_code_examples_crlf.py text eol=crlf
crates/ruff_python_formatter/tests/snapshots/format@docstring_code_examples_crlf.py.snap text eol=crlf
crates/ruff_python_parser/resources/inline linguist-generated=true
ruff.schema.json linguist-generated=true text=auto eol=lf
*.md.snap linguist-language=Markdown

219
Cargo.lock generated
View file

@ -143,15 +143,6 @@ version = "0.7.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "96d30a06541fbafbc7f82ed10c06164cfbd2c401138f6addd8404629c4b16711"
[[package]]
name = "ascii-canvas"
version = "3.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8824ecca2e851cec16968d54a01dd372ef8f95b244fb84b84e70128be347c3c6"
dependencies = [
"term",
]
[[package]]
name = "autocfg"
version = "1.1.0"
@ -173,21 +164,6 @@ dependencies = [
"serde",
]
[[package]]
name = "bit-set"
version = "0.5.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0700ddab506f33b20a03b13996eccd309a48e5ff77d0d95926aa0210fb4e95f1"
dependencies = [
"bit-vec",
]
[[package]]
name = "bit-vec"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "349f9b6a179ed607305526ca489b34ad0a41aed5f7980fa90eb03160b69598fb"
[[package]]
name = "bitflags"
version = "1.3.2"
@ -640,16 +616,6 @@ dependencies = [
"dirs-sys 0.4.1",
]
[[package]]
name = "dirs-next"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1"
dependencies = [
"cfg-if",
"dirs-sys-next",
]
[[package]]
name = "dirs-sys"
version = "0.3.7"
@ -673,17 +639,6 @@ dependencies = [
"windows-sys 0.48.0",
]
[[package]]
name = "dirs-sys-next"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d"
dependencies = [
"libc",
"redox_users",
"winapi",
]
[[package]]
name = "drop_bomb"
version = "0.1.5"
@ -702,15 +657,6 @@ version = "1.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "11157ac094ffbdde99aa67b23417ebdd801842852b500e395a45a9c0aac03e4a"
[[package]]
name = "ena"
version = "0.14.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c533630cf40e9caa44bd91aadc88a75d75a4c3a12b4cfde353cbed41daa1e1f1"
dependencies = [
"log",
]
[[package]]
name = "encode_unicode"
version = "0.3.6"
@ -783,12 +729,6 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "fixedbitset"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80"
[[package]]
name = "flate2"
version = "1.0.28"
@ -1147,15 +1087,6 @@ dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.11.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57"
dependencies = [
"either",
]
[[package]]
name = "itertools"
version = "0.12.1"
@ -1206,33 +1137,6 @@ dependencies = [
"libc",
]
[[package]]
name = "lalrpop"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "55cb077ad656299f160924eb2912aa147d7339ea7d69e1b5517326fdcec3c1ca"
dependencies = [
"ascii-canvas",
"bit-set",
"ena",
"itertools 0.11.0",
"lalrpop-util",
"petgraph",
"regex",
"regex-syntax 0.8.2",
"string_cache",
"term",
"tiny-keccak",
"unicode-xid",
"walkdir",
]
[[package]]
name = "lalrpop-util"
version = "0.20.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "507460a910eb7b32ee961886ff48539633b788a36b65692b95f225b844c82553"
[[package]]
name = "lazy_static"
version = "1.4.0"
@ -1333,16 +1237,6 @@ version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "01cda141df6706de531b6c46c3a33ecca755538219bd484262fa09410c13539c"
[[package]]
name = "lock_api"
version = "0.4.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3c168f8615b12bc01f9c17e2eb0cc07dcae1940121185446edc3744920e8ef45"
dependencies = [
"autocfg",
"scopeguard",
]
[[package]]
name = "log"
version = "0.4.21"
@ -1437,12 +1331,6 @@ version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "308d96db8debc727c3fd9744aac51751243420e46edf401010908da7f8d5e57c"
[[package]]
name = "new_debug_unreachable"
version = "1.0.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e4a24736216ec316047a1fc4252e27dabb04218aa4a3f37c6e7ddbf1f9782b54"
[[package]]
name = "nix"
version = "0.28.0"
@ -1561,29 +1449,6 @@ version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39"
[[package]]
name = "parking_lot"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f"
dependencies = [
"lock_api",
"parking_lot_core",
]
[[package]]
name = "parking_lot_core"
version = "0.9.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4c42a9226546d68acdd9c0a280d17ce19bfe27a46bf68784e4066115788d008e"
dependencies = [
"cfg-if",
"libc",
"redox_syscall",
"smallvec",
"windows-targets 0.48.5",
]
[[package]]
name = "paste"
version = "1.0.14"
@ -1687,23 +1552,13 @@ version = "2.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e"
[[package]]
name = "petgraph"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1d3afd2628e69da2be385eb6f2fd57c8ac7977ceeff6dc166ff1657b0e386a9"
dependencies = [
"fixedbitset",
"indexmap",
]
[[package]]
name = "phf"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ade2d8b8f33c7333b51bcf0428d37e217e9f32192ae4772156f65063b8ce03dc"
dependencies = [
"phf_shared 0.11.2",
"phf_shared",
]
[[package]]
@ -1713,7 +1568,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e8d39688d359e6b34654d328e262234662d16cc0f60ec8dcbe5e718709342a5a"
dependencies = [
"phf_generator",
"phf_shared 0.11.2",
"phf_shared",
]
[[package]]
@ -1722,19 +1577,10 @@ version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48e4cc64c2ad9ebe670cb8fd69dd50ae301650392e81c05f9bfcb2d5bdbc24b0"
dependencies = [
"phf_shared 0.11.2",
"phf_shared",
"rand",
]
[[package]]
name = "phf_shared"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
dependencies = [
"siphasher",
]
[[package]]
name = "phf_shared"
version = "0.11.2"
@ -1773,12 +1619,6 @@ version = "0.2.17"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
[[package]]
name = "precomputed-hash"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c"
[[package]]
name = "pretty_assertions"
version = "1.4.0"
@ -2336,22 +2176,24 @@ dependencies = [
name = "ruff_python_parser"
version = "0.0.0"
dependencies = [
"annotate-snippets 0.9.2",
"anyhow",
"bitflags 2.5.0",
"bstr",
"drop_bomb",
"insta",
"is-macro",
"itertools 0.12.1",
"lalrpop",
"lalrpop-util",
"memchr",
"ruff_python_ast",
"ruff_source_file",
"ruff_text_size",
"rustc-hash",
"static_assertions",
"tiny-keccak",
"unicode-ident",
"unicode-normalization",
"unicode_names2",
"walkdir",
]
[[package]]
@ -2632,12 +2474,6 @@ version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294"
[[package]]
name = "scopeguard"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
[[package]]
name = "seahash"
version = "4.1.0"
@ -2803,19 +2639,6 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "string_cache"
version = "0.8.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f91138e76242f575eb1d3b38b4f1362f10d3a43f47d182a5b359af488a02293b"
dependencies = [
"new_debug_unreachable",
"once_cell",
"parking_lot",
"phf_shared 0.10.0",
"precomputed-hash",
]
[[package]]
name = "strip-ansi-escapes"
version = "0.2.0"
@ -2899,17 +2722,6 @@ dependencies = [
"windows-sys 0.52.0",
]
[[package]]
name = "term"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c59df8ac95d96ff9bede18eb7300b0fda5e5d8d90960e76f8e14ae765eedbf1f"
dependencies = [
"dirs-next",
"rustversion",
"winapi",
]
[[package]]
name = "terminal_size"
version = "0.3.0"
@ -3016,15 +2828,6 @@ dependencies = [
"tikv-jemalloc-sys",
]
[[package]]
name = "tiny-keccak"
version = "2.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237"
dependencies = [
"crunchy",
]
[[package]]
name = "tinytemplate"
version = "1.2.1"
@ -3245,12 +3048,6 @@ version = "0.1.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e51733f11c9c4f72aa0c160008246859e340b00807569a0da0e7a1079b27ba85"
[[package]]
name = "unicode-xid"
version = "0.2.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f962df74c8c05a667b5ee8bcf162993134c104e96440b663c8daa176dc772d8c"
[[package]]
name = "unicode_names2"
version = "1.2.2"

View file

@ -52,7 +52,6 @@ is-wsl = { version = "0.4.0" }
itertools = { version = "0.12.1" }
js-sys = { version = "0.3.69" }
jod-thread = { version = "0.1.2" }
lalrpop-util = { version = "0.20.0", default-features = false }
lexical-parse-float = { version = "0.8.0", features = ["format"] }
libc = { version = "0.2.153" }
libcst = { version = "1.1.0", default-features = false }

View file

@ -11,3 +11,9 @@ ned = "ned"
pn = "pn" # `import panel as pd` is a thing
poit = "poit"
BA = "BA" # acronym for "Bad Allowed", used in testing.
[default]
extend-ignore-re = [
# Line ignore with trailing "spellchecker:disable-line"
"(?Rm)^.*#\\s*spellchecker:disable-line$"
]

View file

@ -523,7 +523,7 @@ from module import =
----- stdout -----
----- stderr -----
error: Failed to parse main.py:2:20: Unexpected token '='
error: Failed to parse main.py:2:20: Expected an import name
"###);
Ok(())

View file

@ -731,11 +731,11 @@ fn stdin_parse_error() {
success: false
exit_code: 1
----- stdout -----
-:1:17: E999 SyntaxError: Unexpected token '='
-:1:17: E999 SyntaxError: Expected an import name
Found 1 error.
----- stderr -----
error: Failed to parse at 1:17: Unexpected token '='
error: Failed to parse at 1:17: Expected an import name
"###);
}

View file

@ -53,6 +53,7 @@ file_resolver.extend_exclude = [
"crates/ruff/resources/",
"crates/ruff_linter/resources/",
"crates/ruff_python_formatter/resources/",
"crates/ruff_python_parser/resources/",
]
file_resolver.force_exclude = false
file_resolver.include = [

View file

@ -247,7 +247,7 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
}
}
}
ExprContext::Del => {}
_ => {}
}
if checker.enabled(Rule::SixPY3) {
flake8_2020::rules::name_or_attribute(checker, expr);

View file

@ -998,6 +998,7 @@ impl<'a> Visitor<'a> for Checker<'a> {
ExprContext::Load => self.handle_node_load(expr),
ExprContext::Store => self.handle_node_store(id, expr),
ExprContext::Del => self.handle_node_delete(expr),
ExprContext::Invalid => {}
},
_ => {}
}

View file

@ -194,7 +194,7 @@ impl DisplayParseError {
// Translate the byte offset to a location in the originating source.
let location =
if let Some(jupyter_index) = source_kind.as_ipy_notebook().map(Notebook::index) {
let source_location = source_code.source_location(error.offset);
let source_location = source_code.source_location(error.location.start());
ErrorLocation::Cell(
jupyter_index
@ -208,7 +208,7 @@ impl DisplayParseError {
},
)
} else {
ErrorLocation::File(source_code.source_location(error.offset))
ErrorLocation::File(source_code.source_location(error.location.start()))
};
Self {
@ -275,27 +275,7 @@ impl<'a> DisplayParseErrorType<'a> {
impl Display for DisplayParseErrorType<'_> {
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
match self.0 {
ParseErrorType::Eof => write!(f, "Expected token but reached end of file."),
ParseErrorType::ExtraToken(ref tok) => write!(
f,
"Got extraneous token: {tok}",
tok = TruncateAtNewline(&tok)
),
ParseErrorType::InvalidToken => write!(f, "Got invalid token"),
ParseErrorType::UnrecognizedToken(ref tok, ref expected) => {
if let Some(expected) = expected.as_ref() {
write!(
f,
"Expected '{expected}', but got {tok}",
tok = TruncateAtNewline(&tok)
)
} else {
write!(f, "Unexpected token {tok}", tok = TruncateAtNewline(&tok))
}
}
ParseErrorType::Lexical(ref error) => write!(f, "{error}"),
}
write!(f, "{}", TruncateAtNewline(&self.0))
}
}

View file

@ -67,7 +67,7 @@ impl<'a> Visitor<'a> for LoadedNamesVisitor<'a> {
Expr::Name(name) => match &name.ctx {
ExprContext::Load => self.loaded.push(name),
ExprContext::Store => self.stored.push(name),
ExprContext::Del => {}
_ => {}
},
_ => visitor::walk_expr(self, expr),
}

View file

@ -14,5 +14,3 @@ bom_unsorted.py:1:1: I001 [*] Import block is un-sorted or un-formatted
2 |-import bar
1 |+import bar
2 |+import foo

View file

@ -81,7 +81,7 @@ pub(crate) fn syntax_error(
parse_error: &ParseError,
locator: &Locator,
) {
let rest = locator.after(parse_error.offset);
let rest = locator.after(parse_error.location.start());
// Try to create a non-empty range so that the diagnostic can print a caret at the
// right position. This requires that we retrieve the next character, if any, and take its length
@ -95,6 +95,6 @@ pub(crate) fn syntax_error(
SyntaxError {
message: format!("{}", DisplayParseErrorType::new(&parse_error.error)),
},
TextRange::at(parse_error.offset, len),
TextRange::at(parse_error.location.start(), len),
));
}

View file

@ -1,12 +1,9 @@
---
source: crates/ruff_linter/src/rules/pycodestyle/mod.rs
---
E999.py:3:1: E999 SyntaxError: unindent does not match any outer indentation level
E999.py:2:9: E999 SyntaxError: Expected an indented block after function definition
|
2 | def x():
3 |
| ^ E999
4 |
3 |
|

View file

@ -110,5 +110,3 @@ UP027.py:10:17: UP027 [*] Replace unpacked list comprehension with a generator e
14 14 |
15 15 | # Should not change
16 16 | foo = [fn(x) for x in items]

View file

@ -1600,7 +1600,7 @@ mod tests {
fn any_over_stmt_type_alias() {
let seen = RefCell::new(Vec::new());
let name = Expr::Name(ExprName {
id: "x".to_string(),
id: "x".into(),
range: TextRange::default(),
ctx: ExprContext::Load,
});

View file

@ -1,6 +1,7 @@
#![allow(clippy::derive_partial_eq_without_eq)]
use std::cell::OnceCell;
use std::fmt;
use std::fmt::Debug;
use std::ops::Deref;
@ -947,12 +948,19 @@ impl Ranged for FStringExpressionElement {
}
}
/// An `FStringLiteralElement` with an empty `value` is an invalid f-string element.
#[derive(Clone, Debug, PartialEq)]
pub struct FStringLiteralElement {
pub range: TextRange,
pub value: Box<str>,
}
impl FStringLiteralElement {
pub fn is_valid(&self) -> bool {
!self.value.is_empty()
}
}
impl Ranged for FStringLiteralElement {
fn range(&self) -> TextRange {
self.range
@ -1571,6 +1579,9 @@ bitflags! {
/// for why we track the casing of the `r` prefix,
/// but not for any other prefix
const R_PREFIX_UPPER = 1 << 4;
/// The string was deemed invalid by the parser.
const INVALID = 1 << 5;
}
}
@ -1621,6 +1632,12 @@ impl StringLiteralFlags {
}
}
#[must_use]
pub fn with_invalid(mut self) -> Self {
self.0 |= StringLiteralFlagsInner::INVALID;
self
}
pub const fn prefix(self) -> StringLiteralPrefix {
if self.0.contains(StringLiteralFlagsInner::U_PREFIX) {
debug_assert!(!self.0.intersects(
@ -1737,6 +1754,15 @@ impl StringLiteral {
pub fn as_str(&self) -> &str {
self
}
/// Creates an invalid string literal with the given range.
pub fn invalid(range: TextRange) -> Self {
Self {
range,
value: "".into(),
flags: StringLiteralFlags::default().with_invalid(),
}
}
}
impl From<StringLiteral> for Expr {
@ -1952,6 +1978,9 @@ bitflags! {
/// See https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#r-strings-and-r-strings
/// for why we track the casing of the `r` prefix, but not for any other prefix
const R_PREFIX_UPPER = 1 << 3;
/// The bytestring was deemed invalid by the parser.
const INVALID = 1 << 4;
}
}
@ -2025,6 +2054,12 @@ impl BytesLiteralFlags {
self
}
#[must_use]
pub fn with_invalid(mut self) -> Self {
self.0 |= BytesLiteralFlagsInner::INVALID;
self
}
pub const fn prefix(self) -> ByteStringPrefix {
if self.0.contains(BytesLiteralFlagsInner::R_PREFIX_LOWER) {
debug_assert!(!self.0.contains(BytesLiteralFlagsInner::R_PREFIX_UPPER));
@ -2094,6 +2129,15 @@ impl BytesLiteral {
pub fn as_slice(&self) -> &[u8] {
self
}
/// Creates a new invalid bytes literal with the given range.
pub fn invalid(range: TextRange) -> Self {
Self {
range,
value: Box::new([]),
flags: BytesLiteralFlags::default().with_invalid(),
}
}
}
impl From<BytesLiteral> for Expr {
@ -2726,6 +2770,7 @@ pub enum ExprContext {
Load,
Store,
Del,
Invalid,
}
/// See also [boolop](https://docs.python.org/3/library/ast.html#ast.BoolOp)
@ -3506,10 +3551,17 @@ impl IpyEscapeKind {
}
}
/// An `Identifier` with an empty `id` is invalid.
///
/// For example, in the following code `id` will be empty.
/// ```python
/// def 1():
/// ...
/// ```
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
pub struct Identifier {
id: String,
range: TextRange,
pub id: String,
pub range: TextRange,
}
impl Identifier {
@ -3520,6 +3572,10 @@ impl Identifier {
range,
}
}
pub fn is_valid(&self) -> bool {
!self.id.is_empty()
}
}
impl Identifier {

View file

@ -71,8 +71,6 @@ for x in (y := [1, 2, 3]):
async for x in (y := [1, 2, 3]):
pass
del (x := 1)
try:
pass
except (e := Exception):

View file

@ -110,3 +110,14 @@ self.assertEqual(
suite._tests[0].id().split(".")[0],
os.path.basename(os.getcwd()),
)
# PEP 646 introduced starred expression in indexes
# https://peps.python.org/pep-0646/#change-1-star-expressions-in-indexes
data[*x]
data[*x,]
data[
*x,
]
data[ # comment 1
*x, # comment 2
] # comment 3

View file

@ -427,3 +427,9 @@ def function_with_one_argument_and_a_keyword_separator(
*, argument: str
) -> ReallyReallyReallyReallyReallyReallyReallyReallyLongName:
pass
# PEP 646 introduced type var tuple in parameter annotation
# https://peps.python.org/pep-0646/#change-2-args-as-a-typevartuple
def function_with_variadic_generics(*args: *tuple[int]): ...
def function_with_variadic_generics(*args: *tuple[int],): ...

View file

@ -5,8 +5,8 @@ expression: comments.debug(test_case.source_code)
{
Node {
kind: ModModule,
range: 0..0,
source: ``,
range: 0..35,
source: ``,
}: {
"leading": [
SourceComment {

View file

@ -1,13 +1,14 @@
use ruff_formatter::{format_args, write, FormatRuleWithOptions};
use ruff_formatter::{format_args, FormatRuleWithOptions};
use ruff_python_ast::AnyNodeRef;
use ruff_python_ast::ExprTuple;
use ruff_text_size::Ranged;
use ruff_text_size::{Ranged, TextRange};
use crate::builders::parenthesize_if_expands;
use crate::comments::SourceComment;
use crate::expression::parentheses::{
empty_parenthesized, optional_parentheses, parenthesized, NeedsParentheses, OptionalParentheses,
};
use crate::other::commas::has_trailing_comma;
use crate::prelude::*;
#[derive(Debug, Clone, Copy, Eq, PartialEq, Default)]
@ -138,7 +139,27 @@ impl FormatNodeRule<ExprTuple> for FormatExprTuple {
}
[single] => match self.parentheses {
TupleParentheses::Preserve if !is_parenthesized => {
write!(f, [single.format(), token(",")])
single.format().fmt(f)?;
// The `TupleParentheses::Preserve` is only set by subscript expression
// formatting. With PEP 646, a single element starred expression in the slice
// position of a subscript expression is actually a tuple expression. For
// example:
//
// ```python
// data[*x]
// # ^^ single element tuple expression without a trailing comma
//
// data[*x,]
// # ^^^ single element tuple expression with a trailing comma
// ```
//
//
// This means that the formatter should only add a trailing comma if there is
// one already.
if has_trailing_comma(TextRange::new(single.end(), item.end()), f.context()) {
token(",").fmt(f)?;
}
Ok(())
}
_ =>
// A single element tuple always needs parentheses and a trailing comma, except when inside of a subscript

View file

@ -135,7 +135,7 @@ pub fn format_module_source(
let source_type = options.source_type();
let (tokens, comment_ranges) =
tokens_and_ranges(source, source_type).map_err(|err| ParseError {
offset: err.location(),
location: err.location(),
error: ParseErrorType::Lexical(err.into_error()),
})?;
let module = parse_tokens(tokens, source, source_type.as_mode())?;

View file

@ -17,7 +17,7 @@ impl FormatNodeRule<ModModule> for FormatModModule {
if body.is_empty() {
// Only preserve an empty line if the source contains an empty line too.
if !f.context().comments().has_leading(item)
&& lines_after(range.end(), f.context().source()) != 0
&& lines_after(range.start(), f.context().source()) != 0
{
empty_line().fmt(f)
} else {

View file

@ -9,7 +9,13 @@ use crate::MagicTrailingComma;
/// should be respected).
pub(crate) fn has_magic_trailing_comma(range: TextRange, context: &PyFormatContext) -> bool {
match context.options().magic_trailing_comma() {
MagicTrailingComma::Respect => {
MagicTrailingComma::Respect => has_trailing_comma(range, context),
MagicTrailingComma::Ignore => false,
}
}
/// Returns `true` if the range ends with a trailing comma.
pub(crate) fn has_trailing_comma(range: TextRange, context: &PyFormatContext) -> bool {
let first_token = SimpleTokenizer::new(context.source(), range)
.skip_trivia()
// Skip over any closing parentheses belonging to the expression
@ -22,7 +28,4 @@ pub(crate) fn has_magic_trailing_comma(range: TextRange, context: &PyFormatConte
..
})
)
}
MagicTrailingComma::Ignore => false,
}
}

View file

@ -73,7 +73,7 @@ pub fn format_range(
let (tokens, comment_ranges) =
tokens_and_ranges(source, options.source_type()).map_err(|err| ParseError {
offset: err.location(),
location: err.location(),
error: ParseErrorType::Lexical(err.into_error()),
})?;

View file

@ -13,6 +13,3 @@ input_file: crates/ruff_python_formatter/resources/test/fixtures/ruff/empty_mult
```python
```

View file

@ -11,6 +11,3 @@ input_file: crates/ruff_python_formatter/resources/test/fixtures/ruff/empty_trai
```python
```

View file

@ -77,8 +77,6 @@ for x in (y := [1, 2, 3]):
async for x in (y := [1, 2, 3]):
pass
del (x := 1)
try:
pass
except (e := Exception):
@ -180,8 +178,6 @@ for x in (y := [1, 2, 3]):
async for x in (y := [1, 2, 3]):
pass
del (x := 1)
try:
pass
except (e := Exception):
@ -207,6 +203,3 @@ def f():
async def f():
await (x := 1)
```

View file

@ -116,6 +116,17 @@ self.assertEqual(
suite._tests[0].id().split(".")[0],
os.path.basename(os.getcwd()),
)
# PEP 646 introduced starred expression in indexes
# https://peps.python.org/pep-0646/#change-1-star-expressions-in-indexes
data[*x]
data[*x,]
data[
*x,
]
data[ # comment 1
*x, # comment 2
] # comment 3
```
## Output
@ -231,7 +242,13 @@ self.assertEqual(
suite._tests[0].id().split(".")[0],
os.path.basename(os.getcwd()),
)
# PEP 646 introduced starred expression in indexes
# https://peps.python.org/pep-0646/#change-1-star-expressions-in-indexes
data[*x]
data[*x,]
data[*x,]
data[ # comment 1
*x, # comment 2
] # comment 3
```

View file

@ -433,6 +433,12 @@ def function_with_one_argument_and_a_keyword_separator(
*, argument: str
) -> ReallyReallyReallyReallyReallyReallyReallyReallyLongName:
pass
# PEP 646 introduced type var tuple in parameter annotation
# https://peps.python.org/pep-0646/#change-2-args-as-a-typevartuple
def function_with_variadic_generics(*args: *tuple[int]): ...
def function_with_variadic_generics(*args: *tuple[int],): ...
```
## Output
@ -1014,4 +1020,12 @@ def function_with_one_argument_and_a_keyword_separator(
*, argument: str
) -> ReallyReallyReallyReallyReallyReallyReallyReallyLongName:
pass
# PEP 646 introduced type var tuple in parameter annotation
# https://peps.python.org/pep-0646/#change-2-args-as-a-typevartuple
def function_with_variadic_generics(*args: *tuple[int]): ...
def function_with_variadic_generics(
*args: *tuple[int],
): ...
```

View file

@ -0,0 +1,54 @@
# Contributing to the Python Parser
## Development
### Inline tests
The parser crate supports writing inline tests. These are tests that are written
in the source code itself, and are extracted to a separate file and run with the
test suite. They are written in the form of comments with a specific format. There
are two forms of inline tests:
Test that the parser successfully parses the input with no syntax errors. They're
written in the following format:
```rs
// test_ok this_is_the_test_name
// def foo():
// pass
println!("some rust code");
```
Test that the parser fails to parse the input with a syntax error. They're written
in the following format:
```rs
// test_err this_is_the_test_name
// [1, 2
println!("some rust code");
```
Note that the difference between the two is the `test_ok` and `test_err` keywords.
The comment block must be independent of any other comment blocks. For example, the
following is not extracted:
```rs
// Some random comment
//
// test_ok this_is_the_test_name
// def foo():
// pass
println!("some rust code");
```
To generate the corresponding Python files for the inline tests, run the following command:
```sh
cargo test --package ruff_python_parser --test generate_inline_tests
```
Then, run the Parser test suite with the following command:
```sh
cargo test --package ruff_python_parser
```

View file

@ -9,7 +9,6 @@ homepage = { workspace = true }
documentation = { workspace = true }
repository = { workspace = true }
license = { workspace = true }
build = "build.rs"
[lib]
@ -18,10 +17,11 @@ ruff_python_ast = { path = "../ruff_python_ast" }
ruff_text_size = { path = "../ruff_text_size" }
anyhow = { workspace = true }
bitflags = { workspace = true }
drop_bomb = { workspace = true }
bstr = { workspace = true }
is-macro = { workspace = true }
itertools = { workspace = true }
lalrpop-util = { workspace = true, default-features = false }
memchr = { workspace = true }
rustc-hash = { workspace = true }
static_assertions = { workspace = true }
@ -30,12 +30,11 @@ unicode_names2 = { workspace = true }
unicode-normalization = { workspace = true }
[dev-dependencies]
insta = { workspace = true }
ruff_source_file = { path = "../ruff_source_file" }
[build-dependencies]
anyhow = { workspace = true }
lalrpop = { version = "0.20.0", default-features = false, optional = true }
tiny-keccak = { version = "2", features = ["sha3"] }
annotate-snippets = { workspace = true }
insta = { workspace = true, features = ["glob"] }
walkdir = { workspace = true }
[lints]
workspace = true

View file

@ -1,112 +0,0 @@
use std::fmt::Write as _;
use std::fs::File;
use std::io::{BufRead, BufReader};
use std::path::{Path, PathBuf};
use tiny_keccak::{Hasher, Sha3};
fn main() {
const SOURCE: &str = "src/python.lalrpop";
println!("cargo:rerun-if-changed={SOURCE}");
let target;
let error;
#[cfg(feature = "lalrpop")]
{
let out_dir = PathBuf::from(std::env::var_os("OUT_DIR").unwrap());
target = out_dir.join("src/python.rs");
}
#[cfg(not(feature = "lalrpop"))]
{
target = PathBuf::from("src/python.rs");
error = "python.lalrpop and src/python.rs doesn't match. This is a ruff_python_parser bug. Please report it unless you are editing ruff_python_parser. Run `lalrpop src/python.lalrpop` to build ruff_python_parser again.";
}
let Some(message) = requires_lalrpop(SOURCE, &target) else {
return;
};
#[cfg(feature = "lalrpop")]
{
let Err(e) = try_lalrpop() else {
return;
};
error = e;
}
println!("cargo:warning={message}");
panic!("running lalrpop failed. {error:?}");
}
fn requires_lalrpop(source: &str, target: &Path) -> Option<String> {
let Ok(target) = File::open(target) else {
return Some("python.rs doesn't exist. regenerate.".to_owned());
};
let sha_prefix = "// sha3: ";
let Some(sha3_line) = BufReader::with_capacity(128, target)
.lines()
.find_map(|line| {
let line = line.unwrap();
line.starts_with(sha_prefix).then_some(line)
})
else {
// no sha3 line - maybe old version of lalrpop installed
return Some("python.rs doesn't include sha3 hash. regenerate.".to_owned());
};
let expected_sha3_str = sha3_line.strip_prefix(sha_prefix).unwrap();
let actual_sha3 = {
let mut hasher = Sha3::v256();
let mut f = BufReader::new(File::open(source).unwrap());
let mut line = String::new();
while f.read_line(&mut line).unwrap() != 0 {
if line.ends_with('\n') {
line.pop();
if line.ends_with('\r') {
line.pop();
}
}
hasher.update(line.as_bytes());
hasher.update(b"\n");
line.clear();
}
let mut hash = [0u8; 32];
hasher.finalize(&mut hash);
hash
};
let eq = sha_equal(expected_sha3_str, &actual_sha3);
if !eq {
let mut actual_sha3_str = String::new();
for byte in actual_sha3 {
write!(actual_sha3_str, "{byte:02x}").unwrap();
}
return Some(format!(
"python.rs hash expected: {expected_sha3_str} but actual: {actual_sha3_str}"
));
}
None
}
#[cfg(feature = "lalrpop")]
fn try_lalrpop() -> Result<(), Box<dyn std::error::Error>> {
// We are not using lalrpop::process_root() or Configuration::process_current_dir()
// because of https://github.com/lalrpop/lalrpop/issues/699.
lalrpop::Configuration::new()
.use_cargo_dir_conventions()
.set_in_dir(Path::new("."))
.process()
}
fn sha_equal(expected_sha3_str: &str, actual_sha3: &[u8; 32]) -> bool {
assert!(
expected_sha3_str.len() == 64,
"lalrpop version? hash bug is fixed in 0.19.8"
);
let mut expected_sha3 = [0u8; 32];
for (i, b) in expected_sha3.iter_mut().enumerate() {
*b = u8::from_str_radix(&expected_sha3_str[i * 2..][..2], 16).unwrap();
}
*actual_sha3 == expected_sha3
}

View file

@ -0,0 +1,6 @@
# Check http://editorconfig.org for more information
# This is the main config file for this project:
root = true
[*.py]
insert_final_newline = false

View file

@ -0,0 +1,4 @@
x: *int = 1
x: yield a = 1
x: yield from b = 1
x: y := int = 1

View file

@ -0,0 +1,10 @@
"abc": str = "def"
call(): str = "no"
*x: int = 1, 2
# Tuple assignment
x,: int = 1
x, y: int = 1, 2
(x, y): int = 1, 2
# List assignment
[x]: int = 1
[x, y]: int = 1, 2

View file

@ -0,0 +1,3 @@
x: Any = *a and b
x: Any = x := 1
x: list = [x, *a | b, *a or b]

View file

@ -0,0 +1,2 @@
a: type X = int
lambda: type X = int

View file

@ -0,0 +1 @@
assert x,

View file

@ -0,0 +1 @@
assert

View file

@ -0,0 +1,4 @@
assert False, *x
assert False, assert x
assert False, yield x
assert False, x := 1

View file

@ -0,0 +1,4 @@
assert *x
assert assert x
assert yield x
assert x := 1

View file

@ -0,0 +1,4 @@
1 = 1
x = 1 = 2
x = 1 = y = 2 = z
["a", "b"] = ["a", "b"]

View file

@ -0,0 +1,5 @@
x = *a and b
x = *yield x
x = *yield from x
x = *lambda x: x
x = x := 1

View file

@ -0,0 +1,4 @@
a = pass = c
a + b
a = b = pass = c
a + b

View file

@ -0,0 +1,6 @@
x =
1 + 1
x = y =
2 + 2
x = = y
3 + 3

View file

@ -0,0 +1,8 @@
async class Foo: ...
async while test: ...
async x = 1
async async def foo(): ...
# TODO(dhruvmanila): Here, `match` is actually a Name token because
# of the soft keyword # transformer
async match test:
case _: ...

View file

@ -0,0 +1,6 @@
1 += 1
"a" += "b"
*x += 1
pass += 1
x += pass
(x + y) += 1

View file

@ -0,0 +1,5 @@
x += *a and b
x += *yield x
x += *yield from x
x += *lambda x: x
x += y := 1

View file

@ -0,0 +1,4 @@
x +=
1 + 1
x += y +=
2 + 2

View file

@ -0,0 +1,3 @@
class Foo:
class Foo():
x = 42

View file

@ -0,0 +1,3 @@
class : ...
class (): ...
class (metaclass=ABC): ...

View file

@ -0,0 +1,3 @@
class Foo[T1, *T2(a, b):
pass
x = 10

View file

@ -0,0 +1,6 @@
# Here, the error is highlighted at the `pass` token
if True:
pass
# The parser is at the end of the program, so let's highlight
# at the newline token after `:`
if True:

View file

@ -0,0 +1 @@
if True: if True: pass

View file

@ -0,0 +1,2 @@
(async)
(x async x in iter)

View file

@ -0,0 +1,6 @@
@*x
@(*x)
@((*x))
@yield x
@yield from x
def foo(): ...

View file

@ -0,0 +1,5 @@
@def foo(): ...
@
def foo(): ...
@@
def foo(): ...

View file

@ -0,0 +1,3 @@
@x def foo(): ...
@x async def foo(): ...
@x class Foo: ...

View file

@ -0,0 +1,4 @@
@foo
async with x: ...
@foo
x = 1

View file

@ -0,0 +1,4 @@
del x, y.
z
del x, y[
z

View file

@ -0,0 +1 @@
del

View file

@ -0,0 +1,2 @@
import a..b
import a...b

View file

@ -0,0 +1,8 @@
try:
pass
except yield x:
pass
try:
pass
except* *x:
pass

View file

@ -0,0 +1,6 @@
try:
pass
except Exception as:
pass
except Exception as
pass

View file

@ -0,0 +1,13 @@
try:
pass
except as exc:
pass
# If a '*' is present then exception type is required
try:
pass
except*:
pass
except*
pass
except* as exc:
pass

View file

@ -0,0 +1,4 @@
try:
pass
except as:
pass

View file

@ -0,0 +1,12 @@
try:
pass
except x, y:
pass
except x, y as exc:
pass
try:
pass
except* x, y:
pass
except* x, y as eg:
pass

View file

@ -0,0 +1,2 @@
f"{}"
f"{ }"

View file

@ -0,0 +1,2 @@
f"{x!123}"
f"{x!'a'}"

View file

@ -0,0 +1,4 @@
# Starred expression inside f-string has a minimum precedence of bitwise or.
f"{*}"
f"{*x and y}"
f"{*yield x}"

View file

@ -0,0 +1 @@
f"{lambda x: x}"

View file

@ -0,0 +1,5 @@
f"{"
f"{foo!r"
f"{foo="
f"{"
f"""{"""

View file

@ -0,0 +1,2 @@
f"hello {x:"
f"hello {x:.3f"

View file

@ -0,0 +1,3 @@
for x in *a and b: ...
for x in yield a: ...
for target in x := 1: ...

View file

@ -0,0 +1,6 @@
for 1 in x: ...
for "a" in x: ...
for *x and y in z: ...
for *x | y in z: ...
for await x in z: ...
for [x, 1, y, *["a"]] in z: ...

View file

@ -0,0 +1,2 @@
for a b: ...
for a: ...

View file

@ -0,0 +1,2 @@
for x in:
a = 1

View file

@ -0,0 +1 @@
for in x: ...

View file

@ -0,0 +1,3 @@
from x import a.
from x import a.b
from x import a, b.c, d, e.f, g

View file

@ -0,0 +1,3 @@
from x import
from x import ()
from x import ,,

View file

@ -0,0 +1,2 @@
from
from import x

View file

@ -0,0 +1,4 @@
from x import (a, b
1 + 1
from x import (a, b,
2 + 2

View file

@ -0,0 +1,4 @@
from x import *, a
from x import a, *, b
from x import *, a as b
from x import *, *, a

View file

@ -0,0 +1,3 @@
from a import b,
from a import b as c,
from a import b, c,

View file

@ -0,0 +1,3 @@
def foo():
def foo() -> int:
x = 42

View file

@ -0,0 +1,3 @@
def foo() -> *int: ...
def foo() -> (*int): ...
def foo() -> yield x: ...

View file

@ -0,0 +1,2 @@
def (): ...
def () -> int: ...

View file

@ -0,0 +1 @@
def foo() -> : ...

View file

@ -0,0 +1,5 @@
def foo(a: int, b:
def foo():
return 42
def foo(a: int, b: str
x = 10

View file

@ -0,0 +1,3 @@
def foo[T1, *T2(a, b):
return a + b
x = 10

View file

@ -0,0 +1,2 @@
def foo() -> int,: ...
def foo() -> int, str: ...

View file

@ -0,0 +1 @@
global

View file

@ -0,0 +1 @@
global x + 1

View file

@ -0,0 +1,3 @@
global ,
global x,
global x, y,

View file

@ -0,0 +1,6 @@
if x:
pass
elif y
pass
else:
pass

View file

@ -0,0 +1,2 @@
if True:
1 + 1

Some files were not shown because too many files have changed in this diff Show more