Refactor binary expression parsing (#11073)

## Summary

This PR refactors the binary expression parsing in a way to make it
readable and easy to understand. It draws inspiration from the suggested
edits in the linked messages in #10752.

### Changes

* Ability to get the precedence of an operator
	* From a boolean operator (`BinOp`) to `OperatorPrecedence`
	* From a binary operator (`Operator`) to `OperatorPrecedence`
	* No comparison operator because all of them have the same precedence
* Implement methods on `TokenKind` to convert it to an appropriate
operator enum
	* Add `as_boolean_operator` which returns an `Option<BoolOp>`
	* Add `as_binary_operator` which returns an `Option<Operator>`
* No `as_comparison_operator` because it requires lookahead and I'm not
sure if `token.as_comparison_operator(peek)` is a good way to implement
it
* Introduce `BinaryLikeOperator`
	* Constructed from two tokens using the methods from the second point
* Add `precedence` method using the conversion methods mentioned in the
first point
* Make most of the functions in `TokenKind` private to the module
* Use `self` instead of `&self` for `TokenKind` 

fixes: #11072

## Test Plan

Refer #11088
This commit is contained in:
Dhruv Manilawala 2024-04-23 10:12:40 +05:30 committed by GitHub
parent 5b81b8368d
commit 7eba967e16
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 222 additions and 204 deletions

View file

@ -11,7 +11,6 @@ use ruff_python_ast::{
};
use ruff_text_size::{Ranged, TextLen, TextRange, TextSize};
use crate::parser::helpers::token_kind_to_cmp_op;
use crate::parser::progress::ParserProgress;
use crate::parser::{helpers, FunctionKind, Parser};
use crate::string::{parse_fstring_literal_element, parse_string_literal, StringType};
@ -238,21 +237,22 @@ impl<'src> Parser<'src> {
loop {
progress.assert_progressing(self);
let token = self.current_token_kind();
let current_token = self.current_token_kind();
if matches!(token, TokenKind::In) && context.is_in_excluded() {
if matches!(current_token, TokenKind::In) && context.is_in_excluded() {
// Omit the `in` keyword when parsing the target expression in a comprehension or
// a `for` statement.
break;
}
// We need to peek the next token because of `not in` operator.
let Some(new_precedence) = OperatorPrecedence::try_from_tokens(token, self.peek())
let Some(operator) = BinaryLikeOperator::try_from_tokens(current_token, self.peek())
else {
// Not an operator token.
// Not an operator.
break;
};
let new_precedence = operator.precedence();
let stop_at_current_operator = if new_precedence.is_right_associative() {
new_precedence < left_precedence
} else {
@ -263,43 +263,26 @@ impl<'src> Parser<'src> {
break;
}
// Operator token.
self.bump(token);
left.expr = match operator {
BinaryLikeOperator::Boolean(bool_op) => {
Expr::BoolOp(self.parse_boolean_expression(left.expr, start, bool_op, context))
}
BinaryLikeOperator::Comparison(cmp_op) => Expr::Compare(
self.parse_comparison_expression(left.expr, start, cmp_op, context),
),
BinaryLikeOperator::Binary(bin_op) => {
self.bump(TokenKind::from(bin_op));
// We need to create a dedicated node for boolean operations and comparison operations
// even though they are infix operators.
if token.is_bool_operator() {
left = Expr::BoolOp(self.parse_bool_operation_expression(
left.expr,
start,
token,
new_precedence,
context,
))
.into();
continue;
}
let right = self.parse_binary_expression_or_higher(new_precedence, context);
if token.is_compare_operator() {
left = Expr::Compare(self.parse_compare_expression(
left.expr,
start,
token,
new_precedence,
context,
))
.into();
continue;
}
let right = self.parse_binary_expression_or_higher(new_precedence, context);
left.expr = Expr::BinOp(ast::ExprBinOp {
left: Box::new(left.expr),
op: Operator::try_from(token).unwrap(),
right: Box::new(right.expr),
range: self.node_range(start),
});
Expr::BinOp(ast::ExprBinOp {
left: Box::new(left.expr),
op: bin_op,
right: Box::new(right.expr),
range: self.node_range(start),
})
}
};
}
left
@ -946,22 +929,23 @@ impl<'src> Parser<'src> {
/// Parses a boolean operation expression.
///
/// Note that the boolean `not` operator is parsed as a unary operator and
/// not as a boolean operation.
/// Note that the boolean `not` operator is parsed as a unary expression and
/// not as a boolean expression.
///
/// # Panics
///
/// If the parser isn't positioned at a `or` or `and` token.
///
/// See: <https://docs.python.org/3/reference/expressions.html#boolean-operations>
fn parse_bool_operation_expression(
fn parse_boolean_expression(
&mut self,
lhs: Expr,
start: TextSize,
operator_token: TokenKind,
operator_binding_power: OperatorPrecedence,
op: BoolOp,
context: ExpressionContext,
) -> ast::ExprBoolOp {
self.bump(TokenKind::from(op));
let mut values = vec![lhs];
let mut progress = ParserProgress::default();
@ -971,21 +955,42 @@ impl<'src> Parser<'src> {
progress.assert_progressing(self);
let parsed_expr =
self.parse_binary_expression_or_higher(operator_binding_power, context);
self.parse_binary_expression_or_higher(OperatorPrecedence::from(op), context);
values.push(parsed_expr.expr);
if !self.eat(operator_token) {
if !self.eat(TokenKind::from(op)) {
break;
}
}
ast::ExprBoolOp {
values,
op: BoolOp::try_from(operator_token).unwrap(),
op,
range: self.node_range(start),
}
}
/// Bump the appropriate token(s) for the given comparison operator.
fn bump_cmp_op(&mut self, op: CmpOp) {
let (first, second) = match op {
CmpOp::Eq => (TokenKind::EqEqual, None),
CmpOp::NotEq => (TokenKind::NotEqual, None),
CmpOp::Lt => (TokenKind::Less, None),
CmpOp::LtE => (TokenKind::LessEqual, None),
CmpOp::Gt => (TokenKind::Greater, None),
CmpOp::GtE => (TokenKind::GreaterEqual, None),
CmpOp::Is => (TokenKind::Is, None),
CmpOp::IsNot => (TokenKind::Is, Some(TokenKind::Not)),
CmpOp::In => (TokenKind::In, None),
CmpOp::NotIn => (TokenKind::Not, Some(TokenKind::In)),
};
self.bump(first);
if let Some(second) = second {
self.bump(second);
}
}
/// Parse a comparison expression.
///
/// This includes the following operators:
@ -998,72 +1003,47 @@ impl<'src> Parser<'src> {
/// If the parser isn't positioned at any of the comparison operators.
///
/// See: <https://docs.python.org/3/reference/expressions.html#comparisons>
fn parse_compare_expression(
fn parse_comparison_expression(
&mut self,
lhs: Expr,
start: TextSize,
operator: TokenKind,
operator_binding_power: OperatorPrecedence,
op: CmpOp,
context: ExpressionContext,
) -> ast::ExprCompare {
let compare_operator = token_kind_to_cmp_op([operator, self.current_token_kind()]).unwrap();
// Bump the appropriate token when the compare operator is made up of
// two separate tokens.
match compare_operator {
CmpOp::IsNot => {
self.bump(TokenKind::Not);
}
CmpOp::NotIn => {
self.bump(TokenKind::In);
}
_ => {}
}
self.bump_cmp_op(op);
let mut comparators = vec![];
let mut compare_operators = vec![compare_operator];
let mut operators = vec![op];
let mut progress = ParserProgress::default();
loop {
progress.assert_progressing(self);
let parsed_expr =
self.parse_binary_expression_or_higher(operator_binding_power, context);
comparators.push(parsed_expr.expr);
comparators.push(
self.parse_binary_expression_or_higher(
OperatorPrecedence::ComparisonsMembershipIdentity,
context,
)
.expr,
);
let next_operator = self.current_token_kind();
if !next_operator.is_compare_operator()
|| (matches!(next_operator, TokenKind::In) && context.is_in_excluded())
{
let next_token = self.current_token_kind();
if matches!(next_token, TokenKind::In) && context.is_in_excluded() {
break;
}
self.bump(next_operator); // compare operator
if let Ok(compare_operator) =
token_kind_to_cmp_op([next_operator, self.current_token_kind()])
{
// Bump the appropriate token when the compare operator is made up of
// two separate tokens.
match compare_operator {
CmpOp::IsNot => {
self.bump(TokenKind::Not);
}
CmpOp::NotIn => {
self.bump(TokenKind::In);
}
_ => {}
}
compare_operators.push(compare_operator);
} else {
let Some(next_op) = helpers::token_kind_to_cmp_op([next_token, self.peek()]) else {
break;
}
};
self.bump_cmp_op(next_op);
operators.push(next_op);
}
ast::ExprCompare {
left: Box::new(lhs),
ops: compare_operators.into_boxed_slice(),
ops: operators.into_boxed_slice(),
comparators: comparators.into_boxed_slice(),
range: self.node_range(start),
}
@ -2374,38 +2354,6 @@ pub(super) enum OperatorPrecedence {
}
impl OperatorPrecedence {
/// Returns the precedence for the given operator token or [None] if the token isn't an
/// operator token.
fn try_from_tokens(token: TokenKind, next: TokenKind) -> Option<OperatorPrecedence> {
Some(match token {
TokenKind::Or => OperatorPrecedence::Or,
TokenKind::And => OperatorPrecedence::And,
TokenKind::Not if next == TokenKind::In => {
OperatorPrecedence::ComparisonsMembershipIdentity
}
TokenKind::Is
| TokenKind::In
| TokenKind::EqEqual
| TokenKind::NotEqual
| TokenKind::Less
| TokenKind::LessEqual
| TokenKind::Greater
| TokenKind::GreaterEqual => OperatorPrecedence::ComparisonsMembershipIdentity,
TokenKind::Vbar => OperatorPrecedence::BitOr,
TokenKind::CircumFlex => OperatorPrecedence::BitXor,
TokenKind::Amper => OperatorPrecedence::BitAnd,
TokenKind::LeftShift | TokenKind::RightShift => OperatorPrecedence::LeftRightShift,
TokenKind::Plus | TokenKind::Minus => OperatorPrecedence::AddSub,
TokenKind::Star
| TokenKind::Slash
| TokenKind::DoubleSlash
| TokenKind::Percent
| TokenKind::At => OperatorPrecedence::MulDivRemain,
TokenKind::DoubleStar => OperatorPrecedence::Exponent,
_ => return None,
})
}
/// Returns `true` if the precedence is right-associative i.e., the operations are evaluated
/// from right to left.
fn is_right_associative(self) -> bool {
@ -2413,6 +2361,66 @@ impl OperatorPrecedence {
}
}
#[derive(Debug)]
enum BinaryLikeOperator {
Boolean(BoolOp),
Comparison(CmpOp),
Binary(Operator),
}
impl BinaryLikeOperator {
/// Attempts to convert the two tokens into the corresponding binary-like operator. Returns
/// [None] if it's not a binary-like operator.
fn try_from_tokens(current: TokenKind, next: TokenKind) -> Option<BinaryLikeOperator> {
if let Some(bool_op) = current.as_bool_operator() {
Some(BinaryLikeOperator::Boolean(bool_op))
} else if let Some(bin_op) = current.as_binary_operator() {
Some(BinaryLikeOperator::Binary(bin_op))
} else {
helpers::token_kind_to_cmp_op([current, next]).map(BinaryLikeOperator::Comparison)
}
}
/// Returns the [`OperatorPrecedence`] for the given operator token or [None] if the token
/// isn't an operator token.
fn precedence(&self) -> OperatorPrecedence {
match self {
BinaryLikeOperator::Boolean(bool_op) => OperatorPrecedence::from(*bool_op),
BinaryLikeOperator::Comparison(_) => OperatorPrecedence::ComparisonsMembershipIdentity,
BinaryLikeOperator::Binary(bin_op) => OperatorPrecedence::from(*bin_op),
}
}
}
impl From<BoolOp> for OperatorPrecedence {
#[inline]
fn from(op: BoolOp) -> Self {
match op {
BoolOp::And => OperatorPrecedence::And,
BoolOp::Or => OperatorPrecedence::Or,
}
}
}
impl From<Operator> for OperatorPrecedence {
#[inline]
fn from(op: Operator) -> Self {
match op {
Operator::Add | Operator::Sub => OperatorPrecedence::AddSub,
Operator::Mult
| Operator::Div
| Operator::FloorDiv
| Operator::Mod
| Operator::MatMult => OperatorPrecedence::MulDivRemain,
Operator::BitAnd => OperatorPrecedence::BitAnd,
Operator::BitOr => OperatorPrecedence::BitOr,
Operator::BitXor => OperatorPrecedence::BitXor,
Operator::LShift | Operator::RShift => OperatorPrecedence::LeftRightShift,
Operator::Pow => OperatorPrecedence::Exponent,
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
pub(super) enum GeneratorExpressionInParentheses {
/// The generator expression is in parentheses. The given [`TextSize`] is the

View file

@ -28,8 +28,8 @@ pub(super) fn set_expr_ctx(expr: &mut Expr, new_ctx: ExprContext) {
}
/// Converts a [`TokenKind`] array of size 2 to its correspondent [`CmpOp`].
pub(super) fn token_kind_to_cmp_op(kind: [TokenKind; 2]) -> Result<CmpOp, ()> {
Ok(match kind {
pub(super) const fn token_kind_to_cmp_op(tokens: [TokenKind; 2]) -> Option<CmpOp> {
Some(match tokens {
[TokenKind::Is, TokenKind::Not] => CmpOp::IsNot,
[TokenKind::Is, _] => CmpOp::Is,
[TokenKind::Not, TokenKind::In] => CmpOp::NotIn,
@ -40,6 +40,6 @@ pub(super) fn token_kind_to_cmp_op(kind: [TokenKind; 2]) -> Result<CmpOp, ()> {
[TokenKind::LessEqual, _] => CmpOp::LtE,
[TokenKind::Greater, _] => CmpOp::Gt,
[TokenKind::GreaterEqual, _] => CmpOp::GtE,
_ => return Err(()),
_ => return None,
})
}

View file

@ -536,17 +536,17 @@ pub enum TokenKind {
impl TokenKind {
#[inline]
pub const fn is_newline(&self) -> bool {
pub const fn is_newline(self) -> bool {
matches!(self, TokenKind::Newline | TokenKind::NonLogicalNewline)
}
#[inline]
pub const fn is_unary(&self) -> bool {
pub const fn is_unary(self) -> bool {
matches!(self, TokenKind::Plus | TokenKind::Minus)
}
#[inline]
pub const fn is_keyword(&self) -> bool {
pub const fn is_keyword(self) -> bool {
matches!(
self,
TokenKind::False
@ -587,7 +587,7 @@ impl TokenKind {
}
#[inline]
pub const fn is_operator(&self) -> bool {
pub const fn is_operator(self) -> bool {
matches!(
self,
TokenKind::Lpar
@ -646,7 +646,7 @@ impl TokenKind {
}
#[inline]
pub const fn is_singleton(&self) -> bool {
pub const fn is_singleton(self) -> bool {
matches!(self, TokenKind::False | TokenKind::True | TokenKind::None)
}
@ -663,7 +663,7 @@ impl TokenKind {
}
#[inline]
pub const fn is_arithmetic(&self) -> bool {
pub const fn is_arithmetic(self) -> bool {
matches!(
self,
TokenKind::DoubleStar
@ -677,7 +677,7 @@ impl TokenKind {
}
#[inline]
pub const fn is_bitwise_or_shift(&self) -> bool {
pub const fn is_bitwise_or_shift(self) -> bool {
matches!(
self,
TokenKind::LeftShift
@ -695,51 +695,65 @@ impl TokenKind {
}
#[inline]
pub const fn is_soft_keyword(&self) -> bool {
pub const fn is_soft_keyword(self) -> bool {
matches!(self, TokenKind::Match | TokenKind::Case)
}
/// Returns the [`BoolOp`] that corresponds to this token kind, if it is a boolean operator,
/// otherwise return [None].
#[inline]
pub const fn is_compare_operator(&self) -> bool {
matches!(
self,
TokenKind::Not
| TokenKind::In
| TokenKind::Is
| TokenKind::EqEqual
| TokenKind::NotEqual
| TokenKind::Less
| TokenKind::LessEqual
| TokenKind::Greater
| TokenKind::GreaterEqual
)
pub(crate) const fn as_bool_operator(self) -> Option<BoolOp> {
Some(match self {
TokenKind::And => BoolOp::And,
TokenKind::Or => BoolOp::Or,
_ => return None,
})
}
#[inline]
pub const fn is_bool_operator(&self) -> bool {
matches!(self, TokenKind::And | TokenKind::Or)
/// Returns the binary [`Operator`] that corresponds to the current token, if it's a binary
/// operator, otherwise return [None].
///
/// Use [`TokenKind::as_augmented_assign_operator`] to match against an augmented assignment
/// token.
pub(crate) const fn as_binary_operator(self) -> Option<Operator> {
Some(match self {
TokenKind::Plus => Operator::Add,
TokenKind::Minus => Operator::Sub,
TokenKind::Star => Operator::Mult,
TokenKind::At => Operator::MatMult,
TokenKind::DoubleStar => Operator::Pow,
TokenKind::Slash => Operator::Div,
TokenKind::DoubleSlash => Operator::FloorDiv,
TokenKind::Percent => Operator::Mod,
TokenKind::Amper => Operator::BitAnd,
TokenKind::Vbar => Operator::BitOr,
TokenKind::CircumFlex => Operator::BitXor,
TokenKind::LeftShift => Operator::LShift,
TokenKind::RightShift => Operator::RShift,
_ => return None,
})
}
/// Returns the [`Operator`] that corresponds to this token kind, if it is
/// an augmented assignment operator, or [`None`] otherwise.
#[inline]
pub const fn as_augmented_assign_operator(&self) -> Option<Operator> {
match self {
TokenKind::PlusEqual => Some(Operator::Add),
TokenKind::MinusEqual => Some(Operator::Sub),
TokenKind::StarEqual => Some(Operator::Mult),
TokenKind::AtEqual => Some(Operator::MatMult),
TokenKind::DoubleStarEqual => Some(Operator::Pow),
TokenKind::SlashEqual => Some(Operator::Div),
TokenKind::DoubleSlashEqual => Some(Operator::FloorDiv),
TokenKind::PercentEqual => Some(Operator::Mod),
TokenKind::AmperEqual => Some(Operator::BitAnd),
TokenKind::VbarEqual => Some(Operator::BitOr),
TokenKind::CircumflexEqual => Some(Operator::BitXor),
TokenKind::LeftShiftEqual => Some(Operator::LShift),
TokenKind::RightShiftEqual => Some(Operator::RShift),
_ => None,
}
pub(crate) const fn as_augmented_assign_operator(self) -> Option<Operator> {
Some(match self {
TokenKind::PlusEqual => Operator::Add,
TokenKind::MinusEqual => Operator::Sub,
TokenKind::StarEqual => Operator::Mult,
TokenKind::AtEqual => Operator::MatMult,
TokenKind::DoubleStarEqual => Operator::Pow,
TokenKind::SlashEqual => Operator::Div,
TokenKind::DoubleSlashEqual => Operator::FloorDiv,
TokenKind::PercentEqual => Operator::Mod,
TokenKind::AmperEqual => Operator::BitAnd,
TokenKind::VbarEqual => Operator::BitOr,
TokenKind::CircumflexEqual => Operator::BitXor,
TokenKind::LeftShiftEqual => Operator::LShift,
TokenKind::RightShiftEqual => Operator::RShift,
_ => return None,
})
}
pub const fn from_token(token: &Tok) -> Self {
@ -865,41 +879,6 @@ impl From<Tok> for TokenKind {
}
}
impl TryFrom<TokenKind> for Operator {
type Error = ();
fn try_from(value: TokenKind) -> Result<Self, Self::Error> {
Ok(match value {
TokenKind::At | TokenKind::AtEqual => Operator::MatMult,
TokenKind::Plus | TokenKind::PlusEqual => Operator::Add,
TokenKind::Star | TokenKind::StarEqual => Operator::Mult,
TokenKind::Vbar | TokenKind::VbarEqual => Operator::BitOr,
TokenKind::Minus | TokenKind::MinusEqual => Operator::Sub,
TokenKind::Slash | TokenKind::SlashEqual => Operator::Div,
TokenKind::Amper | TokenKind::AmperEqual => Operator::BitAnd,
TokenKind::Percent | TokenKind::PercentEqual => Operator::Mod,
TokenKind::DoubleStar | TokenKind::DoubleStarEqual => Operator::Pow,
TokenKind::LeftShift | TokenKind::LeftShiftEqual => Operator::LShift,
TokenKind::CircumFlex | TokenKind::CircumflexEqual => Operator::BitXor,
TokenKind::RightShift | TokenKind::RightShiftEqual => Operator::RShift,
TokenKind::DoubleSlash | TokenKind::DoubleSlashEqual => Operator::FloorDiv,
_ => return Err(()),
})
}
}
impl TryFrom<TokenKind> for BoolOp {
type Error = ();
fn try_from(value: TokenKind) -> Result<Self, Self::Error> {
Ok(match value {
TokenKind::And => BoolOp::And,
TokenKind::Or => BoolOp::Or,
_ => return Err(()),
})
}
}
impl TryFrom<&Tok> for UnaryOp {
type Error = String;
@ -922,6 +901,37 @@ impl TryFrom<TokenKind> for UnaryOp {
}
}
impl From<BoolOp> for TokenKind {
#[inline]
fn from(op: BoolOp) -> Self {
match op {
BoolOp::And => TokenKind::And,
BoolOp::Or => TokenKind::Or,
}
}
}
impl From<Operator> for TokenKind {
#[inline]
fn from(op: Operator) -> Self {
match op {
Operator::Add => TokenKind::Plus,
Operator::Sub => TokenKind::Minus,
Operator::Mult => TokenKind::Star,
Operator::MatMult => TokenKind::At,
Operator::Div => TokenKind::Slash,
Operator::Mod => TokenKind::Percent,
Operator::Pow => TokenKind::DoubleStar,
Operator::LShift => TokenKind::LeftShift,
Operator::RShift => TokenKind::RightShift,
Operator::BitOr => TokenKind::Vbar,
Operator::BitXor => TokenKind::CircumFlex,
Operator::BitAnd => TokenKind::Amper,
Operator::FloorDiv => TokenKind::DoubleSlash,
}
}
}
impl fmt::Display for TokenKind {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let value = match self {