Refactor unary expression parsing (#11088)

## Summary

This PR refactors unary expression parsing with the following changes:
* Ability to get `OperatorPrecedence` from a unary operator (`UnaryOp`)
* Implement methods on `TokenKind`
	* Add `as_unary_operator` which returns an `Option<UnaryOp>`
* Add `as_unary_arithmetic_operator` which returns an `Option<UnaryOp>`
(used for pattern parsing)
* Rename `is_unary` to `is_unary_arithmetic_operator` (used in the
linter)

resolves: #10752 

## Test Plan

Verify that the existing test cases pass, no ecosystem changes, run the
Python based fuzzer on 3000 random inputs and run it on dozens of
open-source repositories.
This commit is contained in:
Dhruv Manilawala 2024-04-23 10:25:02 +05:30 committed by GitHub
parent 7eba967e16
commit 38d2562f41
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 125 additions and 85 deletions

View file

@ -303,10 +303,21 @@ impl<'src> Parser<'src> {
context: ExpressionContext,
) -> ParsedExpr {
let start = self.node_start();
let token = self.current_token_kind();
let lhs = match self.current_token_kind() {
unary_tok @ (TokenKind::Plus | TokenKind::Minus | TokenKind::Tilde) => {
let unary_expr = self.parse_unary_expression(context);
if let Some(unary_op) = token.as_unary_operator() {
let expr = self.parse_unary_expression(unary_op, context);
if matches!(unary_op, UnaryOp::Not) {
if left_precedence > OperatorPrecedence::Not {
self.add_error(
ParseErrorType::OtherError(
"Boolean 'not' expression cannot be used here".to_string(),
),
&expr,
);
}
} else {
if left_precedence > OperatorPrecedence::PosNegBitNot
// > The power operator `**` binds less tightly than an arithmetic
// > or bitwise unary operator on its right, that is, 2**-1 is 0.5.
@ -316,36 +327,31 @@ impl<'src> Parser<'src> {
{
self.add_error(
ParseErrorType::OtherError(format!(
"Unary {unary_tok} expression cannot be used here",
"Unary '{unary_op}' expression cannot be used here",
)),
&unary_expr,
&expr,
);
}
Expr::UnaryOp(unary_expr).into()
}
TokenKind::Not => {
let unary_expr = self.parse_unary_expression(context);
if left_precedence > OperatorPrecedence::Not {
self.add_error(
ParseErrorType::OtherError(
"Boolean 'not' expression cannot be used here".to_string(),
),
&unary_expr,
);
}
Expr::UnaryOp(unary_expr).into()
}
return Expr::UnaryOp(expr).into();
}
match self.current_token_kind() {
TokenKind::Star => {
let starred_expr = self.parse_starred_expression(context);
if left_precedence > OperatorPrecedence::Initial
|| !context.is_starred_expression_allowed()
{
self.add_error(ParseErrorType::InvalidStarredExpressionUsage, &starred_expr);
}
Expr::Starred(starred_expr).into()
return Expr::Starred(starred_expr).into();
}
TokenKind::Await => {
let await_expr = self.parse_await_expression();
// `await` expressions cannot be nested
if left_precedence >= OperatorPrecedence::Await {
self.add_error(
@ -355,26 +361,31 @@ impl<'src> Parser<'src> {
&await_expr,
);
}
Expr::Await(await_expr).into()
return Expr::Await(await_expr).into();
}
TokenKind::Lambda => {
// Lambda expression isn't allowed in this context but we'll still
// parse it and report an error for better recovery.
// Lambda expression isn't allowed in this context but we'll still parse it and
// report an error for better recovery.
let lambda_expr = self.parse_lambda_expr();
self.add_error(ParseErrorType::InvalidLambdaExpressionUsage, &lambda_expr);
Expr::Lambda(lambda_expr).into()
return Expr::Lambda(lambda_expr).into();
}
TokenKind::Yield => {
let expr = self.parse_yield_expression();
if left_precedence > OperatorPrecedence::Initial
|| !context.is_yield_expression_allowed()
{
self.add_error(ParseErrorType::InvalidYieldExpressionUsage, &expr);
}
expr.into()
return expr.into();
}
_ => self.parse_atom(),
};
_ => {}
}
let lhs = self.parse_atom();
ParsedExpr {
expr: self.parse_postfix_expression(lhs.expr, start),
@ -881,20 +892,13 @@ impl<'src> Parser<'src> {
/// See: <https://docs.python.org/3/reference/expressions.html#unary-arithmetic-and-bitwise-operations>
pub(super) fn parse_unary_expression(
&mut self,
op: UnaryOp,
context: ExpressionContext,
) -> ast::ExprUnaryOp {
let start = self.node_start();
self.bump(TokenKind::from(op));
let op = UnaryOp::try_from(self.current_token_kind())
.expect("current token should be a unary operator");
self.bump(self.current_token_kind());
let operand = if op.is_not() {
self.parse_binary_expression_or_higher(OperatorPrecedence::Not, context)
} else {
// plus, minus and tilde
self.parse_binary_expression_or_higher(OperatorPrecedence::PosNegBitNot, context)
};
let operand = self.parse_binary_expression_or_higher(OperatorPrecedence::from(op), context);
ast::ExprUnaryOp {
op,
@ -2402,6 +2406,16 @@ impl From<BoolOp> for OperatorPrecedence {
}
}
impl From<UnaryOp> for OperatorPrecedence {
#[inline]
fn from(op: UnaryOp) -> Self {
match op {
UnaryOp::Not => OperatorPrecedence::Not,
_ => OperatorPrecedence::PosNegBitNot,
}
}
}
impl From<Operator> for OperatorPrecedence {
#[inline]
fn from(op: Operator) -> Self {

View file

@ -478,30 +478,34 @@ impl<'src> Parser<'src> {
},
})
}
// The `+` is only for better error recovery.
TokenKind::Minus | TokenKind::Plus
if matches!(
self.peek(),
TokenKind::Int | TokenKind::Float | TokenKind::Complex
) =>
{
let unary_expr = self.parse_unary_expression(ExpressionContext::default());
kind => {
// The `+` is only for better error recovery.
if let Some(unary_arithmetic_op) = kind.as_unary_arithmetic_operator() {
if matches!(
self.peek(),
TokenKind::Int | TokenKind::Float | TokenKind::Complex
) {
let unary_expr = self.parse_unary_expression(
unary_arithmetic_op,
ExpressionContext::default(),
);
if unary_expr.op.is_u_add() {
self.add_error(
ParseErrorType::OtherError(
"Unary '+' is not allowed as a literal pattern".to_string(),
),
&unary_expr,
);
if unary_expr.op.is_u_add() {
self.add_error(
ParseErrorType::OtherError(
"Unary '+' is not allowed as a literal pattern".to_string(),
),
&unary_expr,
);
}
return Pattern::MatchValue(ast::PatternMatchValue {
value: Box::new(Expr::UnaryOp(unary_expr)),
range: self.node_range(start),
});
}
}
Pattern::MatchValue(ast::PatternMatchValue {
value: Box::new(Expr::UnaryOp(unary_expr)),
range: self.node_range(start),
})
}
kind => {
// Upon encountering an unexpected token, return a `Pattern::MatchValue` containing
// an empty `Expr::Name`.
let invalid_node = if kind.is_keyword() {

View file

@ -540,11 +540,6 @@ impl TokenKind {
matches!(self, TokenKind::Newline | TokenKind::NonLogicalNewline)
}
#[inline]
pub const fn is_unary(self) -> bool {
matches!(self, TokenKind::Plus | TokenKind::Minus)
}
#[inline]
pub const fn is_keyword(self) -> bool {
matches!(
@ -699,6 +694,41 @@ impl TokenKind {
matches!(self, TokenKind::Match | TokenKind::Case)
}
/// Returns `true` if the current token is a unary arithmetic operator.
#[inline]
pub const fn is_unary_arithmetic_operator(self) -> bool {
matches!(self, TokenKind::Plus | TokenKind::Minus)
}
/// Returns the [`UnaryOp`] that corresponds to this token kind, if it is an arithmetic unary
/// operator, otherwise return [None].
///
/// Use [`TokenKind::as_unary_operator`] to match against any unary operator.
#[inline]
pub(crate) const fn as_unary_arithmetic_operator(self) -> Option<UnaryOp> {
Some(match self {
TokenKind::Plus => UnaryOp::UAdd,
TokenKind::Minus => UnaryOp::USub,
_ => return None,
})
}
/// Returns the [`UnaryOp`] that corresponds to this token kind, if it is a unary operator,
/// otherwise return [None].
///
/// Use [`TokenKind::as_unary_arithmetic_operator`] to match against only an arithmetic unary
/// operator.
#[inline]
pub(crate) const fn as_unary_operator(self) -> Option<UnaryOp> {
Some(match self {
TokenKind::Plus => UnaryOp::UAdd,
TokenKind::Minus => UnaryOp::USub,
TokenKind::Tilde => UnaryOp::Invert,
TokenKind::Not => UnaryOp::Not,
_ => return None,
})
}
/// Returns the [`BoolOp`] that corresponds to this token kind, if it is a boolean operator,
/// otherwise return [None].
#[inline]
@ -879,28 +909,6 @@ impl From<Tok> for TokenKind {
}
}
impl TryFrom<&Tok> for UnaryOp {
type Error = String;
fn try_from(value: &Tok) -> Result<Self, Self::Error> {
TokenKind::from_token(value).try_into()
}
}
impl TryFrom<TokenKind> for UnaryOp {
type Error = String;
fn try_from(value: TokenKind) -> Result<Self, Self::Error> {
Ok(match value {
TokenKind::Plus => UnaryOp::UAdd,
TokenKind::Minus => UnaryOp::USub,
TokenKind::Tilde => UnaryOp::Invert,
TokenKind::Not => UnaryOp::Not,
_ => return Err(format!("unexpected token: {value:?}")),
})
}
}
impl From<BoolOp> for TokenKind {
#[inline]
fn from(op: BoolOp) -> Self {
@ -911,6 +919,18 @@ impl From<BoolOp> for TokenKind {
}
}
impl From<UnaryOp> for TokenKind {
#[inline]
fn from(op: UnaryOp) -> Self {
match op {
UnaryOp::Invert => TokenKind::Tilde,
UnaryOp::Not => TokenKind::Not,
UnaryOp::UAdd => TokenKind::Plus,
UnaryOp::USub => TokenKind::Minus,
}
}
}
impl From<Operator> for TokenKind {
#[inline]
fn from(op: Operator) -> Self {