mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-03 18:28:24 +00:00
Replace LALRPOP parser with hand-written parser (#10036)
(Supersedes #9152, authored by @LaBatata101) ## Summary This PR replaces the current parser generated from LALRPOP to a hand-written recursive descent parser. It also updates the grammar for [PEP 646](https://peps.python.org/pep-0646/) so that the parser outputs the correct AST. For example, in `data[*x]`, the index expression is now a tuple with a single starred expression instead of just a starred expression. Beyond the performance improvements, the parser is also error resilient and can provide better error messages. The behavior as seen by any downstream tools isn't changed. That is, the linter and formatter can still assume that the parser will _stop_ at the first syntax error. This will be updated in the following months. For more details about the change here, refer to the PR corresponding to the individual commits and the release blog post. ## Test Plan Write _lots_ and _lots_ of tests for both valid and invalid syntax and verify the output. ## Acknowledgements - @MichaReiser for reviewing 100+ parser PRs and continuously providing guidance throughout the project - @LaBatata101 for initiating the transition to a hand-written parser in #9152 - @addisoncrump for implementing the fuzzer which helped [catch](https://github.com/astral-sh/ruff/pull/10903) [a](https://github.com/astral-sh/ruff/pull/10910) [lot](https://github.com/astral-sh/ruff/pull/10966) [of](https://github.com/astral-sh/ruff/pull/10896) [bugs](https://github.com/astral-sh/ruff/pull/10877) --------- Co-authored-by: Victor Hugo Gomes <labatata101@linuxmail.org> Co-authored-by: Micha Reiser <micha@reiser.io>
This commit is contained in:
parent
e09180b1df
commit
13ffb5bc19
852 changed files with 112948 additions and 103620 deletions
|
@ -1600,7 +1600,7 @@ mod tests {
|
|||
fn any_over_stmt_type_alias() {
|
||||
let seen = RefCell::new(Vec::new());
|
||||
let name = Expr::Name(ExprName {
|
||||
id: "x".to_string(),
|
||||
id: "x".into(),
|
||||
range: TextRange::default(),
|
||||
ctx: ExprContext::Load,
|
||||
});
|
||||
|
|
|
@ -1,6 +1,7 @@
|
|||
#![allow(clippy::derive_partial_eq_without_eq)]
|
||||
|
||||
use std::cell::OnceCell;
|
||||
|
||||
use std::fmt;
|
||||
use std::fmt::Debug;
|
||||
use std::ops::Deref;
|
||||
|
@ -947,12 +948,19 @@ impl Ranged for FStringExpressionElement {
|
|||
}
|
||||
}
|
||||
|
||||
/// An `FStringLiteralElement` with an empty `value` is an invalid f-string element.
|
||||
#[derive(Clone, Debug, PartialEq)]
|
||||
pub struct FStringLiteralElement {
|
||||
pub range: TextRange,
|
||||
pub value: Box<str>,
|
||||
}
|
||||
|
||||
impl FStringLiteralElement {
|
||||
pub fn is_valid(&self) -> bool {
|
||||
!self.value.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
impl Ranged for FStringLiteralElement {
|
||||
fn range(&self) -> TextRange {
|
||||
self.range
|
||||
|
@ -1571,6 +1579,9 @@ bitflags! {
|
|||
/// for why we track the casing of the `r` prefix,
|
||||
/// but not for any other prefix
|
||||
const R_PREFIX_UPPER = 1 << 4;
|
||||
|
||||
/// The string was deemed invalid by the parser.
|
||||
const INVALID = 1 << 5;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1621,6 +1632,12 @@ impl StringLiteralFlags {
|
|||
}
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_invalid(mut self) -> Self {
|
||||
self.0 |= StringLiteralFlagsInner::INVALID;
|
||||
self
|
||||
}
|
||||
|
||||
pub const fn prefix(self) -> StringLiteralPrefix {
|
||||
if self.0.contains(StringLiteralFlagsInner::U_PREFIX) {
|
||||
debug_assert!(!self.0.intersects(
|
||||
|
@ -1737,6 +1754,15 @@ impl StringLiteral {
|
|||
pub fn as_str(&self) -> &str {
|
||||
self
|
||||
}
|
||||
|
||||
/// Creates an invalid string literal with the given range.
|
||||
pub fn invalid(range: TextRange) -> Self {
|
||||
Self {
|
||||
range,
|
||||
value: "".into(),
|
||||
flags: StringLiteralFlags::default().with_invalid(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<StringLiteral> for Expr {
|
||||
|
@ -1952,6 +1978,9 @@ bitflags! {
|
|||
/// See https://black.readthedocs.io/en/stable/the_black_code_style/current_style.html#r-strings-and-r-strings
|
||||
/// for why we track the casing of the `r` prefix, but not for any other prefix
|
||||
const R_PREFIX_UPPER = 1 << 3;
|
||||
|
||||
/// The bytestring was deemed invalid by the parser.
|
||||
const INVALID = 1 << 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -2025,6 +2054,12 @@ impl BytesLiteralFlags {
|
|||
self
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
pub fn with_invalid(mut self) -> Self {
|
||||
self.0 |= BytesLiteralFlagsInner::INVALID;
|
||||
self
|
||||
}
|
||||
|
||||
pub const fn prefix(self) -> ByteStringPrefix {
|
||||
if self.0.contains(BytesLiteralFlagsInner::R_PREFIX_LOWER) {
|
||||
debug_assert!(!self.0.contains(BytesLiteralFlagsInner::R_PREFIX_UPPER));
|
||||
|
@ -2094,6 +2129,15 @@ impl BytesLiteral {
|
|||
pub fn as_slice(&self) -> &[u8] {
|
||||
self
|
||||
}
|
||||
|
||||
/// Creates a new invalid bytes literal with the given range.
|
||||
pub fn invalid(range: TextRange) -> Self {
|
||||
Self {
|
||||
range,
|
||||
value: Box::new([]),
|
||||
flags: BytesLiteralFlags::default().with_invalid(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<BytesLiteral> for Expr {
|
||||
|
@ -2726,6 +2770,7 @@ pub enum ExprContext {
|
|||
Load,
|
||||
Store,
|
||||
Del,
|
||||
Invalid,
|
||||
}
|
||||
|
||||
/// See also [boolop](https://docs.python.org/3/library/ast.html#ast.BoolOp)
|
||||
|
@ -3506,10 +3551,17 @@ impl IpyEscapeKind {
|
|||
}
|
||||
}
|
||||
|
||||
/// An `Identifier` with an empty `id` is invalid.
|
||||
///
|
||||
/// For example, in the following code `id` will be empty.
|
||||
/// ```python
|
||||
/// def 1():
|
||||
/// ...
|
||||
/// ```
|
||||
#[derive(Clone, Debug, PartialEq, Eq, Hash)]
|
||||
pub struct Identifier {
|
||||
id: String,
|
||||
range: TextRange,
|
||||
pub id: String,
|
||||
pub range: TextRange,
|
||||
}
|
||||
|
||||
impl Identifier {
|
||||
|
@ -3520,6 +3572,10 @@ impl Identifier {
|
|||
range,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn is_valid(&self) -> bool {
|
||||
!self.id.is_empty()
|
||||
}
|
||||
}
|
||||
|
||||
impl Identifier {
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue