mirror of
https://github.com/astral-sh/ruff.git
synced 2025-08-03 18:28:24 +00:00
Make associativity a property of operator precedence (#11065)
## Summary This PR does a few things but the main change is that is makes associativity a property of operator precedence. 1. Rename `Precedence` -> `OperatorPrecedence` 2. Rename `parse_expression_with_precedence` -> `parse_binary_expression_or_higher` 3. Move `current_binding_power` to `OperatorPrecedence::try_from_tokens` [^1] 4. Add a `OperatorPrecedence::is_right_associative` method 5. Move from `increment_precedence` to using `<=` / `<` to check if the parsing loop needs to stop [^2] [^1]: Another alternative would be to have two separate methods to avoid lookahead as it's required only for once case (`not in`). So, `try_from_current_token(current).or_else(|| try_from_next_token(current, peek))` [^2]: This will allow us to easily make the refactors mentioned in #10752 ## Test Plan Make sure the precedence parsing algorithm is still correct by running the test suite, fuzz testing it and running it against a dozen or so open-source repositories.
This commit is contained in:
parent
c30735d4a7
commit
5b81b8368d
2 changed files with 100 additions and 134 deletions
|
@ -210,80 +210,26 @@ impl<'src> Parser<'src> {
|
|||
///
|
||||
/// [Python grammar]: https://docs.python.org/3/reference/grammar.html
|
||||
fn parse_simple_expression(&mut self, context: ExpressionContext) -> ParsedExpr {
|
||||
self.parse_expression_with_precedence(Precedence::Initial, context)
|
||||
self.parse_binary_expression_or_higher(OperatorPrecedence::Initial, context)
|
||||
}
|
||||
|
||||
/// Returns the binding power of the current token for a Pratt parser.
|
||||
///
|
||||
/// This includes the precedence and associativity of the current token.
|
||||
/// If the current token is not an operator, it returns [`Precedence::Unknown`],
|
||||
/// [`TokenKind::Unknown`], and [`Associativity::Left`] respectively.
|
||||
///
|
||||
/// See: <https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html>
|
||||
fn current_binding_power(&mut self) -> (Precedence, TokenKind, Associativity) {
|
||||
const NOT_AN_OPERATOR: (Precedence, TokenKind, Associativity) =
|
||||
(Precedence::Unknown, TokenKind::Unknown, Associativity::Left);
|
||||
|
||||
let kind = self.current_token_kind();
|
||||
|
||||
match kind {
|
||||
TokenKind::Or => (Precedence::Or, kind, Associativity::Left),
|
||||
TokenKind::And => (Precedence::And, kind, Associativity::Left),
|
||||
TokenKind::Not if self.peek() == TokenKind::In => (
|
||||
Precedence::ComparisonsMembershipIdentity,
|
||||
kind,
|
||||
Associativity::Left,
|
||||
),
|
||||
TokenKind::Is
|
||||
| TokenKind::In
|
||||
| TokenKind::EqEqual
|
||||
| TokenKind::NotEqual
|
||||
| TokenKind::Less
|
||||
| TokenKind::LessEqual
|
||||
| TokenKind::Greater
|
||||
| TokenKind::GreaterEqual => (
|
||||
Precedence::ComparisonsMembershipIdentity,
|
||||
kind,
|
||||
Associativity::Left,
|
||||
),
|
||||
TokenKind::Vbar => (Precedence::BitOr, kind, Associativity::Left),
|
||||
TokenKind::CircumFlex => (Precedence::BitXor, kind, Associativity::Left),
|
||||
TokenKind::Amper => (Precedence::BitAnd, kind, Associativity::Left),
|
||||
TokenKind::LeftShift | TokenKind::RightShift => {
|
||||
(Precedence::LeftRightShift, kind, Associativity::Left)
|
||||
}
|
||||
TokenKind::Plus | TokenKind::Minus => (Precedence::AddSub, kind, Associativity::Left),
|
||||
TokenKind::Star
|
||||
| TokenKind::Slash
|
||||
| TokenKind::DoubleSlash
|
||||
| TokenKind::Percent
|
||||
| TokenKind::At => (Precedence::MulDivRemain, kind, Associativity::Left),
|
||||
TokenKind::DoubleStar => (Precedence::Exponent, kind, Associativity::Right),
|
||||
_ => NOT_AN_OPERATOR,
|
||||
}
|
||||
}
|
||||
|
||||
/// Parses an expression with binding power of at least `previous_precedence`.
|
||||
///
|
||||
/// This method uses the [Pratt parsing algorithm].
|
||||
/// Parses a binary expression using the [Pratt parsing algorithm].
|
||||
///
|
||||
/// [Pratt parsing algorithm]: https://matklad.github.io/2020/04/13/simple-but-powerful-pratt-parsing.html
|
||||
fn parse_expression_with_precedence(
|
||||
fn parse_binary_expression_or_higher(
|
||||
&mut self,
|
||||
previous_precedence: Precedence,
|
||||
left_precedence: OperatorPrecedence,
|
||||
context: ExpressionContext,
|
||||
) -> ParsedExpr {
|
||||
let start = self.node_start();
|
||||
let lhs = self.parse_lhs_expression(previous_precedence, context);
|
||||
self.parse_expression_with_precedence_recursive(lhs, previous_precedence, context, start)
|
||||
let lhs = self.parse_lhs_expression(left_precedence, context);
|
||||
self.parse_binary_expression_or_higher_recursive(lhs, left_precedence, context, start)
|
||||
}
|
||||
|
||||
/// Parses an expression with binding power of at least `previous_precedence` given the
|
||||
/// left-hand side expression.
|
||||
pub(super) fn parse_expression_with_precedence_recursive(
|
||||
pub(super) fn parse_binary_expression_or_higher_recursive(
|
||||
&mut self,
|
||||
mut lhs: ParsedExpr,
|
||||
previous_precedence: Precedence,
|
||||
mut left: ParsedExpr,
|
||||
left_precedence: OperatorPrecedence,
|
||||
context: ExpressionContext,
|
||||
start: TextSize,
|
||||
) -> ParsedExpr {
|
||||
|
@ -292,30 +238,42 @@ impl<'src> Parser<'src> {
|
|||
loop {
|
||||
progress.assert_progressing(self);
|
||||
|
||||
let (current_precedence, token, associativity) = self.current_binding_power();
|
||||
if current_precedence < previous_precedence {
|
||||
break;
|
||||
}
|
||||
let token = self.current_token_kind();
|
||||
|
||||
if matches!(token, TokenKind::In) && context.is_in_excluded() {
|
||||
// Omit the `in` keyword when parsing the target expression in a comprehension or
|
||||
// a `for` statement.
|
||||
break;
|
||||
}
|
||||
|
||||
let operator_binding_power = match associativity {
|
||||
Associativity::Left => current_precedence.increment_precedence(),
|
||||
Associativity::Right => current_precedence,
|
||||
// We need to peek the next token because of `not in` operator.
|
||||
let Some(new_precedence) = OperatorPrecedence::try_from_tokens(token, self.peek())
|
||||
else {
|
||||
// Not an operator token.
|
||||
break;
|
||||
};
|
||||
|
||||
let stop_at_current_operator = if new_precedence.is_right_associative() {
|
||||
new_precedence < left_precedence
|
||||
} else {
|
||||
new_precedence <= left_precedence
|
||||
};
|
||||
|
||||
if stop_at_current_operator {
|
||||
break;
|
||||
}
|
||||
|
||||
// Operator token.
|
||||
self.bump(token);
|
||||
|
||||
// We need to create a dedicated node for boolean operations and comparison operations
|
||||
// even though they are infix operators.
|
||||
if token.is_bool_operator() {
|
||||
lhs = Expr::BoolOp(self.parse_bool_operation_expression(
|
||||
lhs.expr,
|
||||
left = Expr::BoolOp(self.parse_bool_operation_expression(
|
||||
left.expr,
|
||||
start,
|
||||
token,
|
||||
operator_binding_power,
|
||||
new_precedence,
|
||||
context,
|
||||
))
|
||||
.into();
|
||||
|
@ -323,28 +281,28 @@ impl<'src> Parser<'src> {
|
|||
}
|
||||
|
||||
if token.is_compare_operator() {
|
||||
lhs = Expr::Compare(self.parse_compare_expression(
|
||||
lhs.expr,
|
||||
left = Expr::Compare(self.parse_compare_expression(
|
||||
left.expr,
|
||||
start,
|
||||
token,
|
||||
operator_binding_power,
|
||||
new_precedence,
|
||||
context,
|
||||
))
|
||||
.into();
|
||||
continue;
|
||||
}
|
||||
|
||||
let rhs = self.parse_expression_with_precedence(operator_binding_power, context);
|
||||
let right = self.parse_binary_expression_or_higher(new_precedence, context);
|
||||
|
||||
lhs.expr = Expr::BinOp(ast::ExprBinOp {
|
||||
left: Box::new(lhs.expr),
|
||||
left.expr = Expr::BinOp(ast::ExprBinOp {
|
||||
left: Box::new(left.expr),
|
||||
op: Operator::try_from(token).unwrap(),
|
||||
right: Box::new(rhs.expr),
|
||||
right: Box::new(right.expr),
|
||||
range: self.node_range(start),
|
||||
});
|
||||
}
|
||||
|
||||
lhs
|
||||
left
|
||||
}
|
||||
|
||||
/// Parses the left-hand side of an expression.
|
||||
|
@ -352,13 +310,13 @@ impl<'src> Parser<'src> {
|
|||
/// This includes prefix expressions such as unary operators, boolean `not`,
|
||||
/// `await`, `lambda`. It also parses atoms and postfix expressions.
|
||||
///
|
||||
/// The given [`Precedence`] is used to determine if the parsed expression
|
||||
/// The given [`OperatorPrecedence`] is used to determine if the parsed expression
|
||||
/// is valid in that context. For example, a unary operator is not valid
|
||||
/// in an `await` expression in which case the `previous_precedence` would
|
||||
/// be [`Precedence::Await`].
|
||||
/// in an `await` expression in which case the `left_precedence` would
|
||||
/// be [`OperatorPrecedence::Await`].
|
||||
fn parse_lhs_expression(
|
||||
&mut self,
|
||||
previous_precedence: Precedence,
|
||||
left_precedence: OperatorPrecedence,
|
||||
context: ExpressionContext,
|
||||
) -> ParsedExpr {
|
||||
let start = self.node_start();
|
||||
|
@ -366,12 +324,12 @@ impl<'src> Parser<'src> {
|
|||
let lhs = match self.current_token_kind() {
|
||||
unary_tok @ (TokenKind::Plus | TokenKind::Minus | TokenKind::Tilde) => {
|
||||
let unary_expr = self.parse_unary_expression(context);
|
||||
if previous_precedence > Precedence::PosNegBitNot
|
||||
if left_precedence > OperatorPrecedence::PosNegBitNot
|
||||
// > The power operator `**` binds less tightly than an arithmetic
|
||||
// > or bitwise unary operator on its right, that is, 2**-1 is 0.5.
|
||||
//
|
||||
// Reference: https://docs.python.org/3/reference/expressions.html#id21
|
||||
&& previous_precedence != Precedence::Exponent
|
||||
&& left_precedence != OperatorPrecedence::Exponent
|
||||
{
|
||||
self.add_error(
|
||||
ParseErrorType::OtherError(format!(
|
||||
|
@ -384,7 +342,7 @@ impl<'src> Parser<'src> {
|
|||
}
|
||||
TokenKind::Not => {
|
||||
let unary_expr = self.parse_unary_expression(context);
|
||||
if previous_precedence > Precedence::Not {
|
||||
if left_precedence > OperatorPrecedence::Not {
|
||||
self.add_error(
|
||||
ParseErrorType::OtherError(
|
||||
"Boolean 'not' expression cannot be used here".to_string(),
|
||||
|
@ -396,7 +354,7 @@ impl<'src> Parser<'src> {
|
|||
}
|
||||
TokenKind::Star => {
|
||||
let starred_expr = self.parse_starred_expression(context);
|
||||
if previous_precedence > Precedence::Initial
|
||||
if left_precedence > OperatorPrecedence::Initial
|
||||
|| !context.is_starred_expression_allowed()
|
||||
{
|
||||
self.add_error(ParseErrorType::InvalidStarredExpressionUsage, &starred_expr);
|
||||
|
@ -406,7 +364,7 @@ impl<'src> Parser<'src> {
|
|||
TokenKind::Await => {
|
||||
let await_expr = self.parse_await_expression();
|
||||
// `await` expressions cannot be nested
|
||||
if previous_precedence >= Precedence::Await {
|
||||
if left_precedence >= OperatorPrecedence::Await {
|
||||
self.add_error(
|
||||
ParseErrorType::OtherError(
|
||||
"Await expression cannot be used here".to_string(),
|
||||
|
@ -425,7 +383,7 @@ impl<'src> Parser<'src> {
|
|||
}
|
||||
TokenKind::Yield => {
|
||||
let expr = self.parse_yield_expression();
|
||||
if previous_precedence > Precedence::Initial
|
||||
if left_precedence > OperatorPrecedence::Initial
|
||||
|| !context.is_yield_expression_allowed()
|
||||
{
|
||||
self.add_error(ParseErrorType::InvalidYieldExpressionUsage, &expr);
|
||||
|
@ -949,10 +907,10 @@ impl<'src> Parser<'src> {
|
|||
self.bump(self.current_token_kind());
|
||||
|
||||
let operand = if op.is_not() {
|
||||
self.parse_expression_with_precedence(Precedence::Not, context)
|
||||
self.parse_binary_expression_or_higher(OperatorPrecedence::Not, context)
|
||||
} else {
|
||||
// plus, minus and tilde
|
||||
self.parse_expression_with_precedence(Precedence::PosNegBitNot, context)
|
||||
self.parse_binary_expression_or_higher(OperatorPrecedence::PosNegBitNot, context)
|
||||
};
|
||||
|
||||
ast::ExprUnaryOp {
|
||||
|
@ -1001,7 +959,7 @@ impl<'src> Parser<'src> {
|
|||
lhs: Expr,
|
||||
start: TextSize,
|
||||
operator_token: TokenKind,
|
||||
operator_binding_power: Precedence,
|
||||
operator_binding_power: OperatorPrecedence,
|
||||
context: ExpressionContext,
|
||||
) -> ast::ExprBoolOp {
|
||||
let mut values = vec![lhs];
|
||||
|
@ -1013,7 +971,7 @@ impl<'src> Parser<'src> {
|
|||
progress.assert_progressing(self);
|
||||
|
||||
let parsed_expr =
|
||||
self.parse_expression_with_precedence(operator_binding_power, context);
|
||||
self.parse_binary_expression_or_higher(operator_binding_power, context);
|
||||
values.push(parsed_expr.expr);
|
||||
|
||||
if !self.eat(operator_token) {
|
||||
|
@ -1045,7 +1003,7 @@ impl<'src> Parser<'src> {
|
|||
lhs: Expr,
|
||||
start: TextSize,
|
||||
operator: TokenKind,
|
||||
operator_binding_power: Precedence,
|
||||
operator_binding_power: OperatorPrecedence,
|
||||
context: ExpressionContext,
|
||||
) -> ast::ExprCompare {
|
||||
let compare_operator = token_kind_to_cmp_op([operator, self.current_token_kind()]).unwrap();
|
||||
|
@ -1071,7 +1029,7 @@ impl<'src> Parser<'src> {
|
|||
progress.assert_progressing(self);
|
||||
|
||||
let parsed_expr =
|
||||
self.parse_expression_with_precedence(operator_binding_power, context);
|
||||
self.parse_binary_expression_or_higher(operator_binding_power, context);
|
||||
comparators.push(parsed_expr.expr);
|
||||
|
||||
let next_operator = self.current_token_kind();
|
||||
|
@ -2100,8 +2058,10 @@ impl<'src> Parser<'src> {
|
|||
let start = self.node_start();
|
||||
self.bump(TokenKind::Await);
|
||||
|
||||
let parsed_expr =
|
||||
self.parse_expression_with_precedence(Precedence::Await, ExpressionContext::default());
|
||||
let parsed_expr = self.parse_binary_expression_or_higher(
|
||||
OperatorPrecedence::Await,
|
||||
ExpressionContext::default(),
|
||||
);
|
||||
|
||||
ast::ExprAwait {
|
||||
value: Box::new(parsed_expr.expr),
|
||||
|
@ -2370,12 +2330,6 @@ impl Ranged for ParsedExpr {
|
|||
}
|
||||
}
|
||||
|
||||
/// Binding power associativity
|
||||
enum Associativity {
|
||||
Left,
|
||||
Right,
|
||||
}
|
||||
|
||||
/// Represents the precedence levels for various operators and expressions of Python.
|
||||
/// Variants at the top have lower precedence and variants at the bottom have
|
||||
/// higher precedence.
|
||||
|
@ -2386,9 +2340,7 @@ enum Associativity {
|
|||
///
|
||||
/// See: <https://docs.python.org/3/reference/expressions.html#operator-precedence>
|
||||
#[derive(Debug, Ord, Eq, PartialEq, PartialOrd, Copy, Clone)]
|
||||
pub(super) enum Precedence {
|
||||
/// Precedence for an unknown operator.
|
||||
Unknown,
|
||||
pub(super) enum OperatorPrecedence {
|
||||
/// The initital precedence when parsing an expression.
|
||||
Initial,
|
||||
/// Precedence of boolean `or` operator.
|
||||
|
@ -2421,29 +2373,43 @@ pub(super) enum Precedence {
|
|||
Await,
|
||||
}
|
||||
|
||||
impl Precedence {
|
||||
fn increment_precedence(self) -> Precedence {
|
||||
match self {
|
||||
Precedence::Or => Precedence::And,
|
||||
Precedence::And => Precedence::Not,
|
||||
Precedence::Not => Precedence::ComparisonsMembershipIdentity,
|
||||
Precedence::ComparisonsMembershipIdentity => Precedence::BitOr,
|
||||
Precedence::BitOr => Precedence::BitXor,
|
||||
Precedence::BitXor => Precedence::BitAnd,
|
||||
Precedence::BitAnd => Precedence::LeftRightShift,
|
||||
Precedence::LeftRightShift => Precedence::AddSub,
|
||||
Precedence::AddSub => Precedence::MulDivRemain,
|
||||
Precedence::MulDivRemain => Precedence::PosNegBitNot,
|
||||
Precedence::PosNegBitNot => Precedence::Exponent,
|
||||
Precedence::Exponent => Precedence::Await,
|
||||
// We've reached the highest precedence, we can't increment anymore,
|
||||
// so return the same precedence.
|
||||
Precedence::Await => Precedence::Await,
|
||||
// When this function is invoked, the precedence will never be 'Unknown' or 'Initial'.
|
||||
// This is due to their lower precedence values, causing them to exit the loop in the
|
||||
// `parse_expression_with_precedence` function before the execution of this function.
|
||||
Precedence::Unknown | Precedence::Initial => unreachable!(),
|
||||
}
|
||||
impl OperatorPrecedence {
|
||||
/// Returns the precedence for the given operator token or [None] if the token isn't an
|
||||
/// operator token.
|
||||
fn try_from_tokens(token: TokenKind, next: TokenKind) -> Option<OperatorPrecedence> {
|
||||
Some(match token {
|
||||
TokenKind::Or => OperatorPrecedence::Or,
|
||||
TokenKind::And => OperatorPrecedence::And,
|
||||
TokenKind::Not if next == TokenKind::In => {
|
||||
OperatorPrecedence::ComparisonsMembershipIdentity
|
||||
}
|
||||
TokenKind::Is
|
||||
| TokenKind::In
|
||||
| TokenKind::EqEqual
|
||||
| TokenKind::NotEqual
|
||||
| TokenKind::Less
|
||||
| TokenKind::LessEqual
|
||||
| TokenKind::Greater
|
||||
| TokenKind::GreaterEqual => OperatorPrecedence::ComparisonsMembershipIdentity,
|
||||
TokenKind::Vbar => OperatorPrecedence::BitOr,
|
||||
TokenKind::CircumFlex => OperatorPrecedence::BitXor,
|
||||
TokenKind::Amper => OperatorPrecedence::BitAnd,
|
||||
TokenKind::LeftShift | TokenKind::RightShift => OperatorPrecedence::LeftRightShift,
|
||||
TokenKind::Plus | TokenKind::Minus => OperatorPrecedence::AddSub,
|
||||
TokenKind::Star
|
||||
| TokenKind::Slash
|
||||
| TokenKind::DoubleSlash
|
||||
| TokenKind::Percent
|
||||
| TokenKind::At => OperatorPrecedence::MulDivRemain,
|
||||
TokenKind::DoubleStar => OperatorPrecedence::Exponent,
|
||||
_ => return None,
|
||||
})
|
||||
}
|
||||
|
||||
/// Returns `true` if the precedence is right-associative i.e., the operations are evaluated
|
||||
/// from right to left.
|
||||
fn is_right_associative(self) -> bool {
|
||||
matches!(self, OperatorPrecedence::Exponent)
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@ use crate::parser::{
|
|||
use crate::token_set::TokenSet;
|
||||
use crate::{Mode, ParseErrorType, Tok, TokenKind};
|
||||
|
||||
use super::expression::{ExpressionContext, Precedence};
|
||||
use super::expression::{ExpressionContext, OperatorPrecedence};
|
||||
use super::Parenthesized;
|
||||
|
||||
/// Tokens that represent compound statements.
|
||||
|
@ -2173,9 +2173,9 @@ impl<'src> Parser<'src> {
|
|||
// with (a | b) << c | d: ...
|
||||
// # Postfix should still be parsed first
|
||||
// with (a)[0] + b * c: ...
|
||||
self.parse_expression_with_precedence_recursive(
|
||||
self.parse_binary_expression_or_higher_recursive(
|
||||
lhs.into(),
|
||||
Precedence::Initial,
|
||||
OperatorPrecedence::Initial,
|
||||
ExpressionContext::default(),
|
||||
start,
|
||||
)
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue