Parse Python 3.9+ parenthesized context managers

Since the upstream grammar for this is not LR(1), we abuse LALRPOP
macros and the Into/TryInto traits to build a cover grammar that
converts to either tuples or `with` items after additional validation.
It’s annoying and ugly, but something like this is basically our only
option short of switching to a more powerful parser algorithm.

Fixes #4145.

Signed-off-by: Anders Kaseorg <andersk@mit.edu>
This commit is contained in:
Anders Kaseorg 2022-12-11 02:55:56 -08:00
parent dec0bf571f
commit bfd847d04c
4 changed files with 2412 additions and 85 deletions

View file

@ -10,7 +10,8 @@ use crate::{
lexer,
context::set_context,
string::parse_strings,
token::StringKind
token::StringKind,
with::{ExprOrWithitems, TupleOrWithitems},
};
use num_bigint::BigInt;
@ -159,10 +160,6 @@ TestOrStarExprList: ast::Expr = {
TestList
};
TestOrStarNamedExprList: ast::Expr = {
GenericList<TestOrStarNamedExpr>
};
TestOrStarExpr: ast::Expr = {
Test,
StarExpr,
@ -173,6 +170,12 @@ TestOrStarNamedExpr: ast::Expr = {
StarExpr,
};
TestOrStarNamedExprOrWithitem: (ast::Expr, Option<Box<ast::Expr>>) = {
<e:NamedExpressionTest> => (e, None),
<e:StarExpr> => (e, None),
<e:Test> "as" <v:Expression> => (e, Some(Box::new(v))),
}
AugAssign: ast::Operator = {
"+=" => ast::Operator::Add,
"-=" => ast::Operator::Sub,
@ -472,7 +475,7 @@ ExceptClause: ast::Excepthandler = {
};
WithStatement: ast::Stmt = {
<location:@L> <is_async:"async"?> "with" <items:OneOrMore<WithItem>> ":" <body:Suite> <end_location:@R> => {
<location:@L> <is_async:"async"?> "with" <items:WithItems> ":" <body:Suite> <end_location:@R> => {
let type_comment = None;
let node = if is_async.is_some() {
ast::StmtKind::AsyncWith { items, body, type_comment }
@ -483,6 +486,25 @@ WithStatement: ast::Stmt = {
},
};
// These are only used for their types as macro parameters
ExprGoal: ast::Expr = {};
ExprOrWithitemsGoal: ExprOrWithitems = {};
WithItems: Vec<ast::Withitem> = {
<items:TestAs<ExprOrWithitemsGoal>> =>? items.try_into(),
<first:TestAs<ExprOrWithitemsGoal>> "as" <vars:Expression> =>? {
let optional_vars = Some(Box::new(set_context(vars, ast::ExprContext::Store)));
let context_expr = Box::new(first.try_into()?);
Ok(vec![ast::Withitem { context_expr, optional_vars }])
},
<first:TestAs<ExprOrWithitemsGoal>> <n:("as" Expression)?> "," <mut items:OneOrMore<WithItem>> =>? {
let optional_vars = n.map(|val| Box::new(set_context(val.1, ast::ExprContext::Store)));
let context_expr = Box::new(first.try_into()?);
items.insert(0, ast::Withitem { context_expr, optional_vars });
Ok(items)
}
};
WithItem: ast::Withitem = {
<context_expr:Test> <n:("as" Expression)?> => {
let optional_vars = n.map(|val| Box::new(set_context(val.1, ast::ExprContext::Store)));
@ -688,7 +710,8 @@ YieldExpr: ast::Expr = {
},
};
Test: ast::Expr = {
Test = TestAs<ExprGoal>;
TestAs<Goal>: Goal = {
<body:OrTest> <location:@L> "if" <test:OrTest> "else" <orelse:Test> <end_location:@R> => ast::Expr {
location,
end_location: Some(end_location),
@ -698,9 +721,9 @@ Test: ast::Expr = {
body: Box::new(body),
orelse: Box::new(orelse),
}
},
OrTest,
LambdaDef,
}.into(),
OrTestAs<Goal>,
<e:LambdaDef> => e.into(),
};
NamedExpressionTest: ast::Expr = {
@ -750,7 +773,8 @@ LambdaDef: ast::Expr = {
}
}
OrTest: ast::Expr = {
OrTest = OrTestAs<ExprGoal>;
OrTestAs<Goal>: Goal = {
<location:@L> <e1:AndTest> <e2:("or" AndTest)+> <end_location:@R> => {
let mut values = vec![e1];
values.extend(e2.into_iter().map(|e| e.1));
@ -759,12 +783,13 @@ OrTest: ast::Expr = {
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::BoolOp { op: ast::Boolop::Or, values }
}
}.into()
},
AndTest,
AndTestAs<Goal>,
};
AndTest: ast::Expr = {
AndTest = AndTestAs<ExprGoal>;
AndTestAs<Goal>: Goal = {
<location:@L> <e1:NotTest> <e2:("and" NotTest)+> <end_location:@R> => {
let mut values = vec![e1];
values.extend(e2.into_iter().map(|e| e.1));
@ -773,22 +798,24 @@ AndTest: ast::Expr = {
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::BoolOp { op: ast::Boolop::And, values }
}
}.into()
},
NotTest,
NotTestAs<Goal>,
};
NotTest: ast::Expr = {
NotTest = NotTestAs<ExprGoal>;
NotTestAs<Goal>: Goal = {
<location:@L> "not" <e:NotTest> <end_location:@R> => ast::Expr {
location,
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::UnaryOp { operand: Box::new(e), op: ast::Unaryop::Not }
},
Comparison,
}.into(),
ComparisonAs<Goal>,
};
Comparison: ast::Expr = {
Comparison = ComparisonAs<ExprGoal>;
ComparisonAs<Goal>: Goal = {
<location:@L> <left:Expression> <comparisons:(CompOp Expression)+> <end_location:@R> => {
let (ops, comparators) = comparisons.into_iter().unzip();
ast::Expr {
@ -796,9 +823,9 @@ Comparison: ast::Expr = {
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::Compare { left: Box::new(left), ops, comparators }
}
}.into()
},
Expression,
ExpressionAs<Goal>,
};
CompOp: ast::Cmpop = {
@ -814,44 +841,48 @@ CompOp: ast::Cmpop = {
"is" "not" => ast::Cmpop::IsNot,
};
Expression: ast::Expr = {
Expression = ExpressionAs<ExprGoal>;
ExpressionAs<Goal>: Goal = {
<e1:Expression> <location:@L> "|" <e2:XorExpression> <end_location:@R> => ast::Expr {
location,
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::BinOp { left: Box::new(e1), op: ast::Operator::BitOr, right: Box::new(e2) }
},
XorExpression,
}.into(),
XorExpressionAs<Goal>,
};
XorExpression: ast::Expr = {
XorExpression = XorExpressionAs<ExprGoal>;
XorExpressionAs<Goal>: Goal = {
<e1:XorExpression> <location:@L> "^" <e2:AndExpression> <end_location:@R> => ast::Expr {
location,
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::BinOp { left: Box::new(e1), op: ast::Operator::BitXor, right: Box::new(e2) }
},
AndExpression,
}.into(),
AndExpressionAs<Goal>,
};
AndExpression: ast::Expr = {
AndExpression = AndExpressionAs<ExprGoal>;
AndExpressionAs<Goal>: Goal = {
<e1:AndExpression> <location:@L> "&" <e2:ShiftExpression> <end_location:@R> => ast::Expr {
location,
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::BinOp { left: Box::new(e1), op: ast::Operator::BitAnd, right: Box::new(e2) }
},
ShiftExpression,
}.into(),
ShiftExpressionAs<Goal>,
};
ShiftExpression: ast::Expr = {
ShiftExpression = ShiftExpressionAs<ExprGoal>;
ShiftExpressionAs<Goal>: Goal = {
<e1:ShiftExpression> <location:@L> <op:ShiftOp> <e2:ArithmeticExpression> <end_location:@R> => ast::Expr {
location,
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::BinOp { left: Box::new(e1), op, right: Box::new(e2) }
},
ArithmeticExpression,
}.into(),
ArithmeticExpressionAs<Goal>,
};
ShiftOp: ast::Operator = {
@ -859,14 +890,15 @@ ShiftOp: ast::Operator = {
">>" => ast::Operator::RShift,
};
ArithmeticExpression: ast::Expr = {
ArithmeticExpression = ArithmeticExpressionAs<ExprGoal>;
ArithmeticExpressionAs<Goal>: Goal = {
<location:@L> <a:ArithmeticExpression> <op:AddOp> <b:Term> <end_location:@R> => ast::Expr {
location,
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::BinOp { left: Box::new(a), op, right: Box::new(b) }
},
Term,
}.into(),
TermAs<Goal>,
};
AddOp: ast::Operator = {
@ -874,14 +906,15 @@ AddOp: ast::Operator = {
"-" => ast::Operator::Sub,
};
Term: ast::Expr = {
Term = TermAs<ExprGoal>;
TermAs<Goal>: Goal = {
<a:Term> <location:@L> <op:MulOp> <b:Factor> <end_location:@R> => ast::Expr {
location,
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::BinOp { left: Box::new(a), op, right: Box::new(b) }
},
Factor,
}.into(),
FactorAs<Goal>,
};
MulOp: ast::Operator = {
@ -892,14 +925,15 @@ MulOp: ast::Operator = {
"@" => ast::Operator::MatMult,
};
Factor: ast::Expr = {
Factor = FactorAs<ExprGoal>;
FactorAs<Goal>: Goal = {
<location:@L> <op:UnaryOp> <e:Factor> <end_location:@R> => ast::Expr {
location,
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::UnaryOp { operand: Box::new(e), op }
},
Power,
}.into(),
PowerAs<Goal>,
};
UnaryOp: ast::Unaryop = {
@ -908,48 +942,53 @@ UnaryOp: ast::Unaryop = {
"~" => ast::Unaryop::Invert,
};
Power: ast::Expr = {
Power = PowerAs<ExprGoal>;
PowerAs<Goal>: Goal = {
<e:AtomExpr> <location:@L> "**" <b:Factor> <end_location:@R> => ast::Expr {
location,
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::BinOp { left: Box::new(e), op: ast::Operator::Pow, right: Box::new(b) }
},
AtomExpr,
}.into(),
AtomExprAs<Goal>,
};
AtomExpr: ast::Expr = {
<location:@L> "await" <atom:AtomExpr2> <end_location:@R> => ast::Expr {
location,
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::Await { value: Box::new(atom) }
AtomExpr = AtomExprAs<ExprGoal>;
AtomExprAs<Goal>: Goal = {
<location:@L> "await" <atom:AtomExpr2> <end_location:@R> => {
ast::Expr {
location,
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::Await { value: Box::new(atom) }
}.into()
},
AtomExpr2,
AtomExpr2As<Goal>,
}
AtomExpr2: ast::Expr = {
Atom,
AtomExpr2 = AtomExpr2As<ExprGoal>;
AtomExpr2As<Goal>: Goal = {
AtomAs<Goal>,
<location:@L> <f:AtomExpr2> "(" <a:ArgumentList> ")" <end_location:@R> => {
ast::Expr {
location,
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::Call { func: Box::new(f), args: a.args, keywords: a.keywords }
}
}.into()
},
<location:@L> <e:AtomExpr2> "[" <s:SubscriptList> "]" <end_location:@R> => ast::Expr {
location,
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::Subscript { value: Box::new(e), slice: Box::new(s), ctx: ast::ExprContext::Load }
},
}.into(),
<location:@L> <e:AtomExpr2> "." <attr:Identifier> <end_location:@R> => ast::Expr {
location,
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::Attribute { value: Box::new(e), attr, ctx: ast::ExprContext::Load }
},
}.into(),
};
SubscriptList: ast::Expr = {
@ -991,20 +1030,21 @@ SliceOp: Option<ast::Expr> = {
<location:@L> ":" <e:Test?> => e,
}
Atom: ast::Expr = {
<location:@L> <s:(@L string @R)+> =>? parse_strings(s).map_err(|e| e.into()),
Atom = AtomAs<ExprGoal>;
AtomAs<Goal>: Goal = {
<location:@L> <s:(@L string @R)+> =>? Ok(parse_strings(s)?.into()),
<location:@L> <value:Constant> <end_location:@R> => ast::Expr {
location,
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::Constant { value, kind: None }
},
}.into(),
<location:@L> <name:Identifier> <end_location:@R> => ast::Expr {
location,
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::Name { id: name, ctx: ast::ExprContext::Load }
},
}.into(),
<location:@L> "[" <e:ListLiteralValues?> "]"<end_location:@R> => {
let elts = e.unwrap_or_default();
ast::Expr {
@ -1012,7 +1052,7 @@ Atom: ast::Expr = {
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::List { elts, ctx: ast::ExprContext::Load }
}
}.into()
},
<location:@L> "[" <elt:TestOrStarNamedExpr> <generators:CompFor> "]" <end_location:@R> => {
ast::Expr {
@ -1020,34 +1060,38 @@ Atom: ast::Expr = {
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::ListComp { elt: Box::new(elt), generators }
}
}.into()
},
"(" <elt:TestOrStarNamedExprList> ")" =>? {
match elt.node {
ast::ExprKind::Starred { .. } => {
Err(LexicalError{
error: LexicalErrorType::OtherError("cannot use starred expression here".to_string()),
location: elt.location,
}.into())
}
_ => {
Ok(elt)
"(" <location:@L> <items:OneOrMore<TestOrStarNamedExprOrWithitem>> <trailing_comma:","?> <end_location:@R> ")" =>? {
if items.len() == 1 && items[0].1.is_none() && trailing_comma.is_none() {
match items[0].0.node {
ast::ExprKind::Starred { .. } => {
Err(LexicalError{
error: LexicalErrorType::OtherError("cannot use starred expression here".to_string()),
location: items[0].0.location,
}.into())
}
_ => {
Ok(items.into_iter().next().unwrap().0.into())
}
}
} else {
TupleOrWithitems { location, end_location, items }.try_into()
}
},
<location:@L> "(" ")" <end_location:@R> => ast::Expr::new(
location,
end_location,
ast::ExprKind::Tuple { elts: Vec::new(), ctx: ast::ExprContext::Load }
),
"(" <e:YieldExpr> ")" => e,
).into(),
"(" <e:YieldExpr> ")" => e.into(),
<location:@L> "(" <elt:NamedExpressionTest> <generators:CompFor> ")" <end_location:@R> => {
ast::Expr {
location,
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::GeneratorExp { elt: Box::new(elt), generators }
}
}.into()
},
"(" <location:@L> "**" <e:Expression> ")" <end_location:@R> =>? {
Err(LexicalError{
@ -1099,7 +1143,7 @@ Atom: ast::Expr = {
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::Dict { keys, values }
}
}.into()
},
<location:@L> "{" <e1:DictEntry> <generators:CompFor> "}" <end_location:@R> => {
ast::Expr {
@ -1111,26 +1155,26 @@ Atom: ast::Expr = {
value: Box::new(e1.1),
generators,
}
}
}.into()
},
<location:@L> "{" <elts:SetLiteralValues> "}" <end_location:@R> => ast::Expr {
location,
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::Set { elts }
},
}.into(),
<location:@L> "{" <elt:Test> <generators:CompFor> "}" <end_location:@R> => {
ast::Expr {
location,
end_location: Some(end_location),
custom: (),
node: ast::ExprKind::SetComp { elt: Box::new(elt), generators }
}
}.into()
},
<location:@L> "True" <end_location:@R> => ast::Expr::new(location, end_location, ast::ExprKind::Constant { value: true.into(), kind: None }),
<location:@L> "False" <end_location:@R> => ast::Expr::new(location, end_location, ast::ExprKind::Constant { value: false.into(), kind: None }),
<location:@L> "None" <end_location:@R> => ast::Expr::new(location, end_location, ast::ExprKind::Constant { value: ast::Constant::None, kind: None }),
<location:@L> "..." <end_location:@R> => ast::Expr::new(location, end_location, ast::ExprKind::Constant { value: ast::Constant::Ellipsis, kind: None }),
<location:@L> "True" <end_location:@R> => ast::Expr::new(location, end_location, ast::ExprKind::Constant { value: true.into(), kind: None }).into(),
<location:@L> "False" <end_location:@R> => ast::Expr::new(location, end_location, ast::ExprKind::Constant { value: false.into(), kind: None }).into(),
<location:@L> "None" <end_location:@R> => ast::Expr::new(location, end_location, ast::ExprKind::Constant { value: ast::Constant::None, kind: None }).into(),
<location:@L> "..." <end_location:@R> => ast::Expr::new(location, end_location, ast::ExprKind::Constant { value: ast::Constant::Ellipsis, kind: None }).into(),
};
ListLiteralValues: Vec<ast::Expr> = {

View file

@ -33,3 +33,4 @@ mod python;
mod context;
mod string;
pub mod token;
mod with;

File diff suppressed because it is too large Load diff

188
parser/src/with.rs Normal file
View file

@ -0,0 +1,188 @@
//! Intermediate types for `with` statement cover grammar.
//!
//! When we start parsing a `with` statement, we don't initially know
//! whether we're looking at a tuple or a Python 3.9+ parenthesized
//! collection of contexts:
//!
//! ```python
//! with (a, b, c) as t: # tuple
//! with (a, b, c): # withitems
//! ```
//!
//! Since LALRPOP requires us to commit to an output type before we
//! have enough information to decide, we build a cover grammar that's
//! convertible either way. This module contains the necessary
//! intermediate data types.
use crate::ast::{self, Location};
use crate::error::{LexicalError, LexicalErrorType};
use crate::token::Tok;
use lalrpop_util::ParseError as LalrpopError;
/// Represents a parenthesized collection that we might later convert
/// to a tuple or to `with` items.
///
/// It can be converted to either `Expr` or `ExprOrWithitems` with
/// `.try_into()`. The `Expr` conversion will fail if any `as`
/// variables are present. The `ExprOrWithitems` conversion cannot
/// fail (but we need it to have the same interface so we can use
/// LALRPOP macros to declare the cover grammar without much code
/// duplication).
pub struct TupleOrWithitems {
pub location: Location,
pub end_location: Location,
pub items: Vec<(ast::Expr, Option<Box<ast::Expr>>)>,
}
impl TryFrom<TupleOrWithitems> for ast::Expr {
type Error = LalrpopError<Location, Tok, LexicalError>;
fn try_from(tuple_or_withitems: TupleOrWithitems) -> Result<ast::Expr, Self::Error> {
Ok(ast::Expr {
location: tuple_or_withitems.location,
end_location: Some(tuple_or_withitems.end_location),
custom: (),
node: ast::ExprKind::Tuple {
elts: tuple_or_withitems
.items
.into_iter()
.map(|(expr, optional_vars)| {
if let Some(vars) = optional_vars {
Err(LexicalError {
error: LexicalErrorType::OtherError(
"cannot use 'as' here".to_string(),
),
location: vars.location,
})?
}
Ok(expr)
})
.collect::<Result<Vec<ast::Expr>, Self::Error>>()?,
ctx: ast::ExprContext::Load,
},
})
}
}
impl TryFrom<TupleOrWithitems> for ExprOrWithitems {
type Error = LalrpopError<Location, Tok, LexicalError>;
fn try_from(items: TupleOrWithitems) -> Result<ExprOrWithitems, Self::Error> {
Ok(ExprOrWithitems::TupleOrWithitems(items))
}
}
/// Represents either a non-tuple expression, or a parenthesized
/// collection that we might later convert to a tuple or to `with`
/// items.
///
/// It can be constructed from an `Expr` with `.into()`. (The same
/// interface can be used to convert an `Expr` into itself, which is
/// also important for our LALRPOP macro setup.)
///
/// It can be converted to either `Expr` or `Vec<Withitem>` with
/// `.try_into()`. The `Expr` conversion will fail if any `as`
/// clauses are present. The `Vec<Withitem>` conversion will fail if
/// both `as` clauses and starred expressions are present.
pub enum ExprOrWithitems {
Expr(ast::Expr),
TupleOrWithitems(TupleOrWithitems),
}
impl From<ast::Expr> for ExprOrWithitems {
fn from(expr: ast::Expr) -> ExprOrWithitems {
ExprOrWithitems::Expr(expr)
}
}
impl TryFrom<ExprOrWithitems> for ast::Expr {
type Error = LalrpopError<Location, Tok, LexicalError>;
fn try_from(expr_or_withitems: ExprOrWithitems) -> Result<ast::Expr, Self::Error> {
match expr_or_withitems {
ExprOrWithitems::Expr(expr) => Ok(expr),
ExprOrWithitems::TupleOrWithitems(items) => items.try_into(),
}
}
}
impl TryFrom<ExprOrWithitems> for Vec<ast::Withitem> {
type Error = LalrpopError<Location, Tok, LexicalError>;
fn try_from(expr_or_withitems: ExprOrWithitems) -> Result<Vec<ast::Withitem>, Self::Error> {
match expr_or_withitems {
ExprOrWithitems::TupleOrWithitems(tuple_or_withitems)
if !tuple_or_withitems.items.iter().any(|(context_expr, _)| {
matches!(context_expr.node, ast::ExprKind::Starred { .. })
}) =>
{
Ok(tuple_or_withitems
.items
.into_iter()
.map(|(context_expr, optional_vars)| ast::Withitem {
context_expr: Box::new(context_expr),
optional_vars,
})
.collect())
}
_ => Ok(vec![ast::Withitem {
context_expr: Box::new(expr_or_withitems.try_into()?),
optional_vars: None,
}]),
}
}
}
#[cfg(test)]
mod tests {
use crate::parser::parse_program;
#[test]
fn test_with_statement() {
let source = "\
with 0: pass
with 0 as x: pass
with 0, 1: pass
with 0 as x, 1 as y: pass
with 0 if 1 else 2: pass
with 0 if 1 else 2 as x: pass
with (): pass
with () as x: pass
with (0): pass
with (0) as x: pass
with (0,): pass
with (0,) as x: pass
with (0, 1): pass
with (0, 1) as x: pass
with (*a,): pass
with (*a,) as x: pass
with (0, *a): pass
with (0, *a) as x: pass
with (a := 0): pass
with (a := 0) as x: pass
with (a := 0, b := 1): pass
with (a := 0, b := 1) as x: pass
";
insta::assert_debug_snapshot!(parse_program(source, "<test>").unwrap());
}
#[test]
fn test_with_statement_invalid() {
for source in [
"with 0,: pass",
"with 0 as x,: pass",
"with 0 as *x: pass",
"with *a: pass",
"with *a as x: pass",
"with (*a): pass",
"with (*a) as x: pass",
"with *a, 0 as x: pass",
"with (*a, 0 as x): pass",
"with 0 as x, *a: pass",
"with (0 as x, *a): pass",
"with (0 as x) as y: pass",
"with (0 as x), 1: pass",
"with ((0 as x)): pass",
"with a := 0 as x: pass",
"with (a := 0 as x): pass",
] {
assert!(parse_program(source, "<test>").is_err());
}
}
}