LibCST/libcst/_parser/conversions/expression.py
zaicruvoir1rominet ca1f81f049
Avoid raising bare Exception (#1168)
* Keep old exception messages (avoid breaking-changes for users relying on exception messages)

* Move ``get_expected_str`` out of _exceptions.py, where it does not belong, to its own file in _parser/_parsing_check.py
2025-06-07 01:53:44 -07:00

1630 lines
53 KiB
Python

# Copyright (c) Meta Platforms, Inc. and affiliates.
#
# This source code is licensed under the MIT license found in the
# LICENSE file in the root directory of this source tree.
# pyre-unsafe
import re
import typing
from tokenize import (
Floatnumber as FLOATNUMBER_RE,
Imagnumber as IMAGNUMBER_RE,
Intnumber as INTNUMBER_RE,
)
from libcst import CSTLogicError
from libcst._exceptions import ParserSyntaxError, PartialParserSyntaxError
from libcst._maybe_sentinel import MaybeSentinel
from libcst._nodes.expression import (
Arg,
Asynchronous,
Attribute,
Await,
BinaryOperation,
BooleanOperation,
Call,
Comparison,
ComparisonTarget,
CompFor,
CompIf,
ConcatenatedString,
Dict,
DictComp,
DictElement,
Element,
Ellipsis,
Float,
FormattedString,
FormattedStringExpression,
FormattedStringText,
From,
GeneratorExp,
IfExp,
Imaginary,
Index,
Integer,
Lambda,
LeftCurlyBrace,
LeftParen,
LeftSquareBracket,
List,
ListComp,
Name,
NamedExpr,
Param,
Parameters,
RightCurlyBrace,
RightParen,
RightSquareBracket,
Set,
SetComp,
Slice,
StarredDictElement,
StarredElement,
Subscript,
SubscriptElement,
Tuple,
UnaryOperation,
Yield,
)
from libcst._nodes.op import (
Add,
And,
AssignEqual,
BaseBinaryOp,
BaseBooleanOp,
BaseCompOp,
BitAnd,
BitInvert,
BitOr,
BitXor,
Colon,
Comma,
Divide,
Dot,
Equal,
FloorDivide,
GreaterThan,
GreaterThanEqual,
In,
Is,
IsNot,
LeftShift,
LessThan,
LessThanEqual,
MatrixMultiply,
Minus,
Modulo,
Multiply,
Not,
NotEqual,
NotIn,
Or,
Plus,
Power,
RightShift,
Subtract,
)
from libcst._nodes.whitespace import SimpleWhitespace
from libcst._parser.custom_itertools import grouper
from libcst._parser.production_decorator import with_production
from libcst._parser.types.config import ParserConfig
from libcst._parser.types.partials import (
ArglistPartial,
AttributePartial,
CallPartial,
FormattedStringConversionPartial,
FormattedStringFormatSpecPartial,
SlicePartial,
SubscriptPartial,
WithLeadingWhitespace,
)
from libcst._parser.types.token import Token
from libcst._parser.whitespace_parser import parse_parenthesizable_whitespace
BINOP_TOKEN_LUT: typing.Dict[str, typing.Type[BaseBinaryOp]] = {
"*": Multiply,
"@": MatrixMultiply,
"/": Divide,
"%": Modulo,
"//": FloorDivide,
"+": Add,
"-": Subtract,
"<<": LeftShift,
">>": RightShift,
"&": BitAnd,
"^": BitXor,
"|": BitOr,
}
BOOLOP_TOKEN_LUT: typing.Dict[str, typing.Type[BaseBooleanOp]] = {"and": And, "or": Or}
COMPOP_TOKEN_LUT: typing.Dict[str, typing.Type[BaseCompOp]] = {
"<": LessThan,
">": GreaterThan,
"==": Equal,
"<=": LessThanEqual,
">=": GreaterThanEqual,
"in": In,
"is": Is,
}
# N.B. This uses a `testlist | star_expr`, not a `testlist_star_expr` because
# `testlist_star_expr` may not always be representable by a non-partial node, since it's
# only used as part of `expr_stmt`.
@with_production("expression_input", "(testlist | star_expr) ENDMARKER")
def convert_expression_input(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
(child, endmarker) = children
# HACK: UGLY! REMOVE THIS SOON!
# Unwrap WithLeadingWhitespace if it exists. It shouldn't exist by this point, but
# testlist isn't fully implemented, and we currently leak these partial objects.
if isinstance(child, WithLeadingWhitespace):
child = child.value
return child
@with_production("namedexpr_test", "test [':=' test]", version=">=3.8")
def convert_namedexpr_test(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
test, *assignment = children
if len(assignment) == 0:
return test
# Convert all of the operations that have no precedence in a loop
(walrus, value) = assignment
return WithLeadingWhitespace(
NamedExpr(
target=test.value,
whitespace_before_walrus=parse_parenthesizable_whitespace(
config, walrus.whitespace_before
),
whitespace_after_walrus=parse_parenthesizable_whitespace(
config, walrus.whitespace_after
),
value=value.value,
),
test.whitespace_before,
)
@with_production("test", "or_test ['if' or_test 'else' test] | lambdef")
def convert_test(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
if len(children) == 1:
(child,) = children
return child
else:
(body, if_token, test, else_token, orelse) = children
return WithLeadingWhitespace(
IfExp(
body=body.value,
test=test.value,
orelse=orelse.value,
whitespace_before_if=parse_parenthesizable_whitespace(
config, if_token.whitespace_before
),
whitespace_after_if=parse_parenthesizable_whitespace(
config, if_token.whitespace_after
),
whitespace_before_else=parse_parenthesizable_whitespace(
config, else_token.whitespace_before
),
whitespace_after_else=parse_parenthesizable_whitespace(
config, else_token.whitespace_after
),
),
body.whitespace_before,
)
@with_production("test_nocond", "or_test | lambdef_nocond")
def convert_test_nocond(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
(child,) = children
return child
@with_production("lambdef", "'lambda' [varargslist] ':' test")
@with_production("lambdef_nocond", "'lambda' [varargslist] ':' test_nocond")
def convert_lambda(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
lambdatoken, *params, colontoken, test = children
# Grab the whitespace around the colon. If there are no params, then
# the colon owns the whitespace before and after it. If there are
# any params, then the last param owns the whitespace before the colon.
# We handle the parameter movement below.
colon = Colon(
whitespace_before=parse_parenthesizable_whitespace(
config, colontoken.whitespace_before
),
whitespace_after=parse_parenthesizable_whitespace(
config, colontoken.whitespace_after
),
)
# Unpack optional parameters
if len(params) == 0:
parameters = Parameters()
whitespace_after_lambda = MaybeSentinel.DEFAULT
else:
(parameters,) = params
whitespace_after_lambda = parse_parenthesizable_whitespace(
config, lambdatoken.whitespace_after
)
# Handle pre-colon whitespace
if parameters.star_kwarg is not None:
if parameters.star_kwarg.comma == MaybeSentinel.DEFAULT:
parameters = parameters.with_changes(
star_kwarg=parameters.star_kwarg.with_changes(
whitespace_after_param=colon.whitespace_before
)
)
elif parameters.kwonly_params:
if parameters.kwonly_params[-1].comma == MaybeSentinel.DEFAULT:
parameters = parameters.with_changes(
kwonly_params=(
*parameters.kwonly_params[:-1],
parameters.kwonly_params[-1].with_changes(
whitespace_after_param=colon.whitespace_before
),
)
)
elif isinstance(parameters.star_arg, Param):
if parameters.star_arg.comma == MaybeSentinel.DEFAULT:
parameters = parameters.with_changes(
star_arg=parameters.star_arg.with_changes(
whitespace_after_param=colon.whitespace_before
)
)
elif parameters.params:
if parameters.params[-1].comma == MaybeSentinel.DEFAULT:
parameters = parameters.with_changes(
params=(
*parameters.params[:-1],
parameters.params[-1].with_changes(
whitespace_after_param=colon.whitespace_before
),
)
)
# Colon doesn't own its own pre-whitespace now.
colon = colon.with_changes(whitespace_before=SimpleWhitespace(""))
# Return a lambda
return WithLeadingWhitespace(
Lambda(
whitespace_after_lambda=whitespace_after_lambda,
params=parameters,
body=test.value,
colon=colon,
),
lambdatoken.whitespace_before,
)
@with_production("or_test", "and_test ('or' and_test)*")
@with_production("and_test", "not_test ('and' not_test)*")
def convert_boolop(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
leftexpr, *rightexprs = children
if len(rightexprs) == 0:
return leftexpr
whitespace_before = leftexpr.whitespace_before
leftexpr = leftexpr.value
# Convert all of the operations that have no precedence in a loop
for op, rightexpr in grouper(rightexprs, 2):
if op.string not in BOOLOP_TOKEN_LUT:
raise ParserSyntaxError(
f"Unexpected token '{op.string}'!",
lines=config.lines,
raw_line=0,
raw_column=0,
)
leftexpr = BooleanOperation(
left=leftexpr,
# pyre-ignore Pyre thinks that the type of the LUT is CSTNode.
operator=BOOLOP_TOKEN_LUT[op.string](
whitespace_before=parse_parenthesizable_whitespace(
config, op.whitespace_before
),
whitespace_after=parse_parenthesizable_whitespace(
config, op.whitespace_after
),
),
right=rightexpr.value,
)
return WithLeadingWhitespace(leftexpr, whitespace_before)
@with_production("not_test", "'not' not_test | comparison")
def convert_not_test(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
if len(children) == 1:
(child,) = children
return child
else:
nottoken, nottest = children
return WithLeadingWhitespace(
UnaryOperation(
operator=Not(
whitespace_after=parse_parenthesizable_whitespace(
config, nottoken.whitespace_after
)
),
expression=nottest.value,
),
nottoken.whitespace_before,
)
@with_production("comparison", "expr (comp_op expr)*")
def convert_comparison(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
if len(children) == 1:
(child,) = children
return child
lhs, *rest = children
comparisons: typing.List[ComparisonTarget] = []
for operator, comparator in grouper(rest, 2):
comparisons.append(
ComparisonTarget(operator=operator, comparator=comparator.value)
)
return WithLeadingWhitespace(
Comparison(left=lhs.value, comparisons=tuple(comparisons)),
lhs.whitespace_before,
)
@with_production(
"comp_op", "('<'|'>'|'=='|'>='|'<='|'<>'|'!='|'in'|'not' 'in'|'is'|'is' 'not')"
)
def convert_comp_op(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
if len(children) == 1:
(op,) = children
if op.string in COMPOP_TOKEN_LUT:
# A regular comparison containing one token
# pyre-ignore Pyre thinks that the type of the LUT is CSTNode.
return COMPOP_TOKEN_LUT[op.string](
whitespace_before=parse_parenthesizable_whitespace(
config, op.whitespace_before
),
whitespace_after=parse_parenthesizable_whitespace(
config, op.whitespace_after
),
)
elif op.string in ["!=", "<>"]:
# Not equal, which can take two forms in some cases
return NotEqual(
whitespace_before=parse_parenthesizable_whitespace(
config, op.whitespace_before
),
value=op.string,
whitespace_after=parse_parenthesizable_whitespace(
config, op.whitespace_after
),
)
else:
# this should be unreachable
raise ParserSyntaxError(
f"Unexpected token '{op.string}'!",
lines=config.lines,
raw_line=0,
raw_column=0,
)
else:
# A two-token comparison
leftcomp, rightcomp = children
if leftcomp.string == "not" and rightcomp.string == "in":
return NotIn(
whitespace_before=parse_parenthesizable_whitespace(
config, leftcomp.whitespace_before
),
whitespace_between=parse_parenthesizable_whitespace(
config, leftcomp.whitespace_after
),
whitespace_after=parse_parenthesizable_whitespace(
config, rightcomp.whitespace_after
),
)
elif leftcomp.string == "is" and rightcomp.string == "not":
return IsNot(
whitespace_before=parse_parenthesizable_whitespace(
config, leftcomp.whitespace_before
),
whitespace_between=parse_parenthesizable_whitespace(
config, leftcomp.whitespace_after
),
whitespace_after=parse_parenthesizable_whitespace(
config, rightcomp.whitespace_after
),
)
else:
# this should be unreachable
raise ParserSyntaxError(
f"Unexpected token '{leftcomp.string} {rightcomp.string}'!",
lines=config.lines,
raw_line=0,
raw_column=0,
)
@with_production("star_expr", "'*' expr")
def convert_star_expr(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
star, expr = children
return WithLeadingWhitespace(
StarredElement(
expr.value,
whitespace_before_value=parse_parenthesizable_whitespace(
config, expr.whitespace_before
),
# atom is responsible for parenthesis and trailing_whitespace if they exist
# testlist_comp, exprlist, dictorsetmaker, etc are responsible for the comma
# if it exists.
),
whitespace_before=star.whitespace_before,
)
@with_production("expr", "xor_expr ('|' xor_expr)*")
@with_production("xor_expr", "and_expr ('^' and_expr)*")
@with_production("and_expr", "shift_expr ('&' shift_expr)*")
@with_production("shift_expr", "arith_expr (('<<'|'>>') arith_expr)*")
@with_production("arith_expr", "term (('+'|'-') term)*")
@with_production("term", "factor (('*'|'@'|'/'|'%'|'//') factor)*", version=">=3.5")
@with_production("term", "factor (('*'|'/'|'%'|'//') factor)*", version="<3.5")
def convert_binop(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
leftexpr, *rightexprs = children
if len(rightexprs) == 0:
return leftexpr
whitespace_before = leftexpr.whitespace_before
leftexpr = leftexpr.value
# Convert all of the operations that have no precedence in a loop
for op, rightexpr in grouper(rightexprs, 2):
if op.string not in BINOP_TOKEN_LUT:
raise ParserSyntaxError(
f"Unexpected token '{op.string}'!",
lines=config.lines,
raw_line=0,
raw_column=0,
)
leftexpr = BinaryOperation(
left=leftexpr,
# pyre-ignore Pyre thinks that the type of the LUT is CSTNode.
operator=BINOP_TOKEN_LUT[op.string](
whitespace_before=parse_parenthesizable_whitespace(
config, op.whitespace_before
),
whitespace_after=parse_parenthesizable_whitespace(
config, op.whitespace_after
),
),
right=rightexpr.value,
)
return WithLeadingWhitespace(leftexpr, whitespace_before)
@with_production("factor", "('+'|'-'|'~') factor | power")
def convert_factor(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
if len(children) == 1:
(child,) = children
return child
op, factor = children
# First, tokenize the unary operator
if op.string == "+":
opnode = Plus(
whitespace_after=parse_parenthesizable_whitespace(
config, op.whitespace_after
)
)
elif op.string == "-":
opnode = Minus(
whitespace_after=parse_parenthesizable_whitespace(
config, op.whitespace_after
)
)
elif op.string == "~":
opnode = BitInvert(
whitespace_after=parse_parenthesizable_whitespace(
config, op.whitespace_after
)
)
else:
raise ParserSyntaxError(
f"Unexpected token '{op.string}'!",
lines=config.lines,
raw_line=0,
raw_column=0,
)
return WithLeadingWhitespace(
UnaryOperation(operator=opnode, expression=factor.value), op.whitespace_before
)
@with_production("power", "atom_expr ['**' factor]")
def convert_power(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
if len(children) == 1:
(child,) = children
return child
left, power, right = children
return WithLeadingWhitespace(
BinaryOperation(
left=left.value,
operator=Power(
whitespace_before=parse_parenthesizable_whitespace(
config, power.whitespace_before
),
whitespace_after=parse_parenthesizable_whitespace(
config, power.whitespace_after
),
),
right=right.value,
),
left.whitespace_before,
)
@with_production("atom_expr", "atom_expr_await | atom_expr_trailer")
def convert_atom_expr(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
(child,) = children
return child
@with_production("atom_expr_await", "AWAIT atom_expr_trailer")
def convert_atom_expr_await(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
keyword, expr = children
return WithLeadingWhitespace(
Await(
whitespace_after_await=parse_parenthesizable_whitespace(
config, keyword.whitespace_after
),
expression=expr.value,
),
keyword.whitespace_before,
)
@with_production("atom_expr_trailer", "atom trailer*")
def convert_atom_expr_trailer(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
atom, *trailers = children
whitespace_before = atom.whitespace_before
atom = atom.value
# Need to walk through all trailers from left to right and construct
# a series of nodes based on each partial type. We can't do this with
# left recursion due to limits in the parser.
for trailer in trailers:
if isinstance(trailer, SubscriptPartial):
atom = Subscript(
value=atom,
whitespace_after_value=parse_parenthesizable_whitespace(
config, trailer.whitespace_before
),
lbracket=trailer.lbracket,
# pyre-fixme[6]: Expected `Sequence[SubscriptElement]` for 4th param
# but got `Union[typing.Sequence[SubscriptElement], Index, Slice]`.
slice=trailer.slice,
rbracket=trailer.rbracket,
)
elif isinstance(trailer, AttributePartial):
atom = Attribute(value=atom, dot=trailer.dot, attr=trailer.attr)
elif isinstance(trailer, CallPartial):
# If the trailing argument doesn't have a comma, then it owns the
# trailing whitespace before the rpar. Otherwise, the comma owns
# it.
if (
len(trailer.args) > 0
and trailer.args[-1].comma == MaybeSentinel.DEFAULT
):
args = (
*trailer.args[:-1],
trailer.args[-1].with_changes(
whitespace_after_arg=trailer.rpar.whitespace_before
),
)
else:
args = trailer.args
atom = Call(
func=atom,
whitespace_after_func=parse_parenthesizable_whitespace(
config, trailer.lpar.whitespace_before
),
whitespace_before_args=trailer.lpar.value.whitespace_after,
# pyre-fixme[6]: Expected `Sequence[Arg]` for 4th param but got
# `Tuple[object, ...]`.
args=tuple(args),
)
else:
# This is an invalid trailer, so lets give up
raise CSTLogicError()
return WithLeadingWhitespace(atom, whitespace_before)
@with_production(
"trailer", "trailer_arglist | trailer_subscriptlist | trailer_attribute"
)
def convert_trailer(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
(child,) = children
return child
@with_production("trailer_arglist", "'(' [arglist] ')'")
def convert_trailer_arglist(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
lpar, *arglist, rpar = children
return CallPartial(
lpar=WithLeadingWhitespace(
LeftParen(
whitespace_after=parse_parenthesizable_whitespace(
config, lpar.whitespace_after
)
),
lpar.whitespace_before,
),
args=() if not arglist else arglist[0].args,
rpar=RightParen(
whitespace_before=parse_parenthesizable_whitespace(
config, rpar.whitespace_before
)
),
)
@with_production("trailer_subscriptlist", "'[' subscriptlist ']'")
def convert_trailer_subscriptlist(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
(lbracket, subscriptlist, rbracket) = children
return SubscriptPartial(
lbracket=LeftSquareBracket(
whitespace_after=parse_parenthesizable_whitespace(
config, lbracket.whitespace_after
)
),
slice=subscriptlist.value,
rbracket=RightSquareBracket(
whitespace_before=parse_parenthesizable_whitespace(
config, rbracket.whitespace_before
)
),
whitespace_before=lbracket.whitespace_before,
)
@with_production("subscriptlist", "subscript (',' subscript)* [',']")
def convert_subscriptlist(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
# This is a list of SubscriptElement, so construct as such by grouping every
# subscript with an optional comma and adding to a list.
elements = []
for slice, comma in grouper(children, 2):
if comma is None:
elements.append(SubscriptElement(slice=slice.value))
else:
elements.append(
SubscriptElement(
slice=slice.value,
comma=Comma(
whitespace_before=parse_parenthesizable_whitespace(
config, comma.whitespace_before
),
whitespace_after=parse_parenthesizable_whitespace(
config, comma.whitespace_after
),
),
)
)
return WithLeadingWhitespace(elements, children[0].whitespace_before)
@with_production("subscript", "test | [test] ':' [test] [sliceop]")
def convert_subscript(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
if len(children) == 1 and not isinstance(children[0], Token):
# This is just an index node
(test,) = children
return WithLeadingWhitespace(Index(test.value), test.whitespace_before)
if isinstance(children[-1], SlicePartial):
# We got a partial slice as the final param. Extract the final
# bits of the full subscript.
*others, sliceop = children
whitespace_before = others[0].whitespace_before
second_colon = sliceop.second_colon
step = sliceop.step
else:
# We can just parse this below, without taking extras from the
# partial child.
others = children
whitespace_before = others[0].whitespace_before
second_colon = MaybeSentinel.DEFAULT
step = None
# We need to create a partial slice to pass up. So, align so we have
# a list that's always [Optional[Test], Colon, Optional[Test]].
if isinstance(others[0], Token):
# First token is a colon, so insert an empty test on the LHS. We
# know the RHS is a test since it's not a sliceop.
slicechildren = [None, *others]
else:
# First token is non-colon, so its a test.
slicechildren = [*others]
if len(slicechildren) < 3:
# Now, we have to fill in the RHS. We know its two long
# at this point if its not already 3.
slicechildren = [*slicechildren, None]
lower, first_colon, upper = slicechildren
return WithLeadingWhitespace(
Slice(
lower=lower.value if lower is not None else None,
first_colon=Colon(
whitespace_before=parse_parenthesizable_whitespace(
config,
first_colon.whitespace_before,
),
whitespace_after=parse_parenthesizable_whitespace(
config,
first_colon.whitespace_after,
),
),
upper=upper.value if upper is not None else None,
second_colon=second_colon,
step=step,
),
whitespace_before=whitespace_before,
)
@with_production("sliceop", "':' [test]")
def convert_sliceop(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
if len(children) == 2:
colon, test = children
step = test.value
else:
(colon,) = children
step = None
return SlicePartial(
second_colon=Colon(
whitespace_before=parse_parenthesizable_whitespace(
config, colon.whitespace_before
),
whitespace_after=parse_parenthesizable_whitespace(
config, colon.whitespace_after
),
),
step=step,
)
@with_production("trailer_attribute", "'.' NAME")
def convert_trailer_attribute(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
dot, name = children
return AttributePartial(
dot=Dot(
whitespace_before=parse_parenthesizable_whitespace(
config, dot.whitespace_before
),
whitespace_after=parse_parenthesizable_whitespace(
config, dot.whitespace_after
),
),
attr=Name(name.string),
)
@with_production(
"atom",
"atom_parens | atom_squarebrackets | atom_curlybraces | atom_string | atom_basic | atom_ellipses",
)
def convert_atom(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
(child,) = children
return child
@with_production("atom_basic", "NAME | NUMBER | 'None' | 'True' | 'False'")
def convert_atom_basic(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
(child,) = children
if child.type.name == "NAME":
# This also handles 'None', 'True', and 'False' directly, but we
# keep it in the grammar to be more correct.
return WithLeadingWhitespace(Name(child.string), child.whitespace_before)
elif child.type.name == "NUMBER":
# We must determine what type of number it is since we split node
# types up this way.
if re.fullmatch(INTNUMBER_RE, child.string):
return WithLeadingWhitespace(Integer(child.string), child.whitespace_before)
elif re.fullmatch(FLOATNUMBER_RE, child.string):
return WithLeadingWhitespace(Float(child.string), child.whitespace_before)
elif re.fullmatch(IMAGNUMBER_RE, child.string):
return WithLeadingWhitespace(
Imaginary(child.string), child.whitespace_before
)
else:
raise ParserSyntaxError(
f"Unparseable number {child.string}",
lines=config.lines,
raw_line=0,
raw_column=0,
)
else:
raise ParserSyntaxError(
f"Logic error, unexpected token {child.type.name}",
lines=config.lines,
raw_line=0,
raw_column=0,
)
@with_production("atom_squarebrackets", "'[' [testlist_comp_list] ']'")
def convert_atom_squarebrackets(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
lbracket_tok, *body, rbracket_tok = children
lbracket = LeftSquareBracket(
whitespace_after=parse_parenthesizable_whitespace(
config, lbracket_tok.whitespace_after
)
)
rbracket = RightSquareBracket(
whitespace_before=parse_parenthesizable_whitespace(
config, rbracket_tok.whitespace_before
)
)
if len(body) == 0:
list_node = List((), lbracket=lbracket, rbracket=rbracket)
else: # len(body) == 1
# body[0] is a List or ListComp
list_node = body[0].value.with_changes(lbracket=lbracket, rbracket=rbracket)
return WithLeadingWhitespace(list_node, lbracket_tok.whitespace_before)
@with_production("atom_curlybraces", "'{' [dictorsetmaker] '}'")
def convert_atom_curlybraces(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
lbrace_tok, *body, rbrace_tok = children
lbrace = LeftCurlyBrace(
whitespace_after=parse_parenthesizable_whitespace(
config, lbrace_tok.whitespace_after
)
)
rbrace = RightCurlyBrace(
whitespace_before=parse_parenthesizable_whitespace(
config, rbrace_tok.whitespace_before
)
)
if len(body) == 0:
dict_or_set_node = Dict((), lbrace=lbrace, rbrace=rbrace)
else: # len(body) == 1
dict_or_set_node = body[0].value.with_changes(lbrace=lbrace, rbrace=rbrace)
return WithLeadingWhitespace(dict_or_set_node, lbrace_tok.whitespace_before)
@with_production("atom_parens", "'(' [yield_expr|testlist_comp_tuple] ')'")
def convert_atom_parens(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
lpar_tok, *atoms, rpar_tok = children
lpar = LeftParen(
whitespace_after=parse_parenthesizable_whitespace(
config, lpar_tok.whitespace_after
)
)
rpar = RightParen(
whitespace_before=parse_parenthesizable_whitespace(
config, rpar_tok.whitespace_before
)
)
if len(atoms) == 1:
# inner_atom is a _BaseParenthesizedNode
inner_atom = atoms[0].value
return WithLeadingWhitespace(
inner_atom.with_changes(
# pyre-fixme[60]: Expected to unpack an iterable, but got `unknown`.
lpar=(lpar, *inner_atom.lpar),
# pyre-fixme[60]: Expected to unpack an iterable, but got `unknown`.
rpar=(*inner_atom.rpar, rpar),
),
lpar_tok.whitespace_before,
)
else:
return WithLeadingWhitespace(
Tuple((), lpar=(lpar,), rpar=(rpar,)), lpar_tok.whitespace_before
)
@with_production("atom_ellipses", "'...'")
def convert_atom_ellipses(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
(token,) = children
return WithLeadingWhitespace(Ellipsis(), token.whitespace_before)
@with_production("atom_string", "(STRING | fstring) [atom_string]")
def convert_atom_string(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
if len(children) == 1:
return children[0]
else:
left, right = children
return WithLeadingWhitespace(
ConcatenatedString(
left=left.value,
whitespace_between=parse_parenthesizable_whitespace(
config, right.whitespace_before
),
right=right.value,
),
left.whitespace_before,
)
@with_production("fstring", "FSTRING_START fstring_content* FSTRING_END")
def convert_fstring(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
start, *content, end = children
return WithLeadingWhitespace(
FormattedString(start=start.string, parts=tuple(content), end=end.string),
start.whitespace_before,
)
@with_production("fstring_content", "FSTRING_STRING | fstring_expr")
def convert_fstring_content(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
(child,) = children
if isinstance(child, Token):
# Construct and return a raw string portion.
return FormattedStringText(child.string)
else:
# Pass the expression up one production.
return child
@with_production("fstring_conversion", "'!' NAME")
def convert_fstring_conversion(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
exclaim, name = children
# There cannot be a space between the two tokens, so no need to preserve this.
return FormattedStringConversionPartial(name.string, exclaim.whitespace_before)
@with_production("fstring_equality", "'='", version=">=3.8")
def convert_fstring_equality(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
(equal,) = children
return AssignEqual(
whitespace_before=parse_parenthesizable_whitespace(
config, equal.whitespace_before
),
whitespace_after=parse_parenthesizable_whitespace(
config, equal.whitespace_after
),
)
@with_production(
"fstring_expr",
"'{' (testlist_comp_tuple | yield_expr) [ fstring_equality ] [ fstring_conversion ] [ fstring_format_spec ] '}'",
version=">=3.8",
)
@with_production(
"fstring_expr",
"'{' (testlist_comp_tuple | yield_expr) [ fstring_conversion ] [ fstring_format_spec ] '}'",
version="<3.8",
)
def convert_fstring_expr(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
openbrkt, testlist, *conversions, closebrkt = children
# Extract any optional equality (self-debugging expressions)
if len(conversions) > 0 and isinstance(conversions[0], AssignEqual):
equal = conversions[0]
conversions = conversions[1:]
else:
equal = None
# Extract any optional conversion
if len(conversions) > 0 and isinstance(
conversions[0], FormattedStringConversionPartial
):
conversion = conversions[0].value
conversions = conversions[1:]
else:
conversion = None
# Extract any optional format spec
if len(conversions) > 0:
format_spec = conversions[0].values
else:
format_spec = None
# Fix up any spacing issue we find due to the fact that the equal can
# have whitespace and is also at the end of the expression.
if equal is not None:
whitespace_after_expression = SimpleWhitespace("")
else:
whitespace_after_expression = parse_parenthesizable_whitespace(
config, children[2].whitespace_before
)
return FormattedStringExpression(
whitespace_before_expression=parse_parenthesizable_whitespace(
config, testlist.whitespace_before
),
expression=testlist.value,
equal=equal,
whitespace_after_expression=whitespace_after_expression,
conversion=conversion,
format_spec=format_spec,
)
@with_production("fstring_format_spec", "':' fstring_content*")
def convert_fstring_format_spec(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
colon, *content = children
return FormattedStringFormatSpecPartial(tuple(content), colon.whitespace_before)
@with_production(
"testlist_comp_tuple",
"(namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] )",
version=">=3.8",
)
@with_production(
"testlist_comp_tuple",
"(test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )",
version=">=3.5,<3.8",
)
@with_production(
"testlist_comp_tuple",
"(test) ( comp_for | (',' (test))* [','] )",
version="<3.5",
)
def convert_testlist_comp_tuple(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
return _convert_testlist_comp(
config,
children,
single_child_is_sequence=False,
sequence_type=Tuple,
comprehension_type=GeneratorExp,
)
@with_production(
"testlist_comp_list",
"(namedexpr_test|star_expr) ( comp_for | (',' (namedexpr_test|star_expr))* [','] )",
version=">=3.8",
)
@with_production(
"testlist_comp_list",
"(test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )",
version=">=3.5,<3.8",
)
@with_production(
"testlist_comp_list",
"(test) ( comp_for | (',' (test))* [','] )",
version="<3.5",
)
def convert_testlist_comp_list(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
return _convert_testlist_comp(
config,
children,
single_child_is_sequence=True,
sequence_type=List,
comprehension_type=ListComp,
)
def _convert_testlist_comp(
config: ParserConfig,
children: typing.Sequence[typing.Any],
single_child_is_sequence: bool,
sequence_type: typing.Union[
typing.Type[Tuple], typing.Type[List], typing.Type[Set]
],
comprehension_type: typing.Union[
typing.Type[GeneratorExp], typing.Type[ListComp], typing.Type[SetComp]
],
) -> typing.Any:
# This is either a single-element list, or the second token is a comma, so we're not
# in a generator.
if len(children) == 1 or isinstance(children[1], Token):
return _convert_sequencelike(
config, children, single_child_is_sequence, sequence_type
)
else:
# N.B. The parent node (e.g. atom) is responsible for computing and attaching
# whitespace information on any parenthesis, square brackets, or curly braces
elt, for_in = children
return WithLeadingWhitespace(
comprehension_type(elt=elt.value, for_in=for_in, lpar=(), rpar=()),
elt.whitespace_before,
)
@with_production("testlist_star_expr", "(test|star_expr) (',' (test|star_expr))* [',']")
@with_production("testlist", "test (',' test)* [',']")
@with_production("exprlist", "(expr|star_expr) (',' (expr|star_expr))* [',']")
def convert_test_or_expr_list(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
# Used by expression statements and assignments. Neither of these cases want to
# treat a single child as a sequence.
return _convert_sequencelike(
config, children, single_child_is_sequence=False, sequence_type=Tuple
)
def _convert_sequencelike(
config: ParserConfig,
children: typing.Sequence[typing.Any],
single_child_is_sequence: bool,
sequence_type: typing.Union[
typing.Type[Tuple], typing.Type[List], typing.Type[Set]
],
) -> typing.Any:
if not single_child_is_sequence and len(children) == 1:
return children[0]
# N.B. The parent node (e.g. atom) is responsible for computing and attaching
# whitespace information on any parenthesis, square brackets, or curly braces
elements = []
for wrapped_expr_or_starred_element, comma_token in grouper(children, 2):
expr_or_starred_element = wrapped_expr_or_starred_element.value
if comma_token is None:
comma = MaybeSentinel.DEFAULT
else:
comma = Comma(
whitespace_before=parse_parenthesizable_whitespace(
config, comma_token.whitespace_before
),
# Only compute whitespace_after if we're not a trailing comma.
# If we're a trailing comma, that whitespace should be consumed by the
# TrailingWhitespace, parenthesis, etc.
whitespace_after=(
parse_parenthesizable_whitespace(
config, comma_token.whitespace_after
)
if comma_token is not children[-1]
else SimpleWhitespace("")
),
)
if isinstance(expr_or_starred_element, StarredElement):
starred_element = expr_or_starred_element
elements.append(starred_element.with_changes(comma=comma))
else:
expr = expr_or_starred_element
elements.append(Element(value=expr, comma=comma))
# lpar/rpar are the responsibility of our parent
return WithLeadingWhitespace(
sequence_type(elements, lpar=(), rpar=()),
children[0].whitespace_before,
)
@with_production(
"dictorsetmaker",
(
"( ((test ':' test | '**' expr)"
+ " (comp_for | (',' (test ':' test | '**' expr))* [','])) |"
+ "((test | star_expr) "
+ " (comp_for | (',' (test | star_expr))* [','])) )"
),
version=">=3.5",
)
@with_production(
"dictorsetmaker",
(
"( ((test ':' test)"
+ " (comp_for | (',' (test ':' test))* [','])) |"
+ "((test) "
+ " (comp_for | (',' (test))* [','])) )"
),
version="<3.5",
)
def convert_dictorsetmaker(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
# We'll always have at least one child. `atom_curlybraces` handles empty
# dicts.
if len(children) > 1 and (
(isinstance(children[1], Token) and children[1].string == ":")
or (isinstance(children[0], Token) and children[0].string == "**")
):
return _convert_dict(config, children)
else:
return _convert_set(config, children)
def _convert_dict_element(
config: ParserConfig,
children_iter: typing.Iterator[typing.Any],
last_child: typing.Any,
) -> typing.Union[DictElement, StarredDictElement]:
first = next(children_iter)
if isinstance(first, Token) and first.string == "**":
expr = next(children_iter)
element = StarredDictElement(
expr.value,
whitespace_before_value=parse_parenthesizable_whitespace(
config, expr.whitespace_before
),
)
else:
key = first
colon_tok = next(children_iter)
value = next(children_iter)
element = DictElement(
key.value,
value.value,
whitespace_before_colon=parse_parenthesizable_whitespace(
config, colon_tok.whitespace_before
),
whitespace_after_colon=parse_parenthesizable_whitespace(
config, colon_tok.whitespace_after
),
)
# Handle the trailing comma (if there is one)
try:
comma_token = next(children_iter)
element = element.with_changes(
comma=Comma(
whitespace_before=parse_parenthesizable_whitespace(
config, comma_token.whitespace_before
),
# Only compute whitespace_after if we're not a trailing comma.
# If we're a trailing comma, that whitespace should be consumed by the
# RightBracket.
whitespace_after=(
parse_parenthesizable_whitespace(
config, comma_token.whitespace_after
)
if comma_token is not last_child
else SimpleWhitespace("")
),
)
)
except StopIteration:
pass
return element
def _convert_dict(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
is_first_starred = isinstance(children[0], Token) and children[0].string == "**"
if is_first_starred:
possible_comp_for = None if len(children) < 3 else children[2]
else:
possible_comp_for = None if len(children) < 4 else children[3]
if isinstance(possible_comp_for, CompFor):
if is_first_starred:
raise PartialParserSyntaxError(
"dict unpacking cannot be used in dict comprehension"
)
return _convert_dict_comp(config, children)
children_iter = iter(children)
last_child = children[-1]
elements = []
while True:
try:
elements.append(_convert_dict_element(config, children_iter, last_child))
except StopIteration:
break
# lbrace, rbrace, lpar, and rpar will be attached as-needed by the atom grammar
return WithLeadingWhitespace(Dict(tuple(elements)), children[0].whitespace_before)
def _convert_dict_comp(config, children: typing.Sequence[typing.Any]) -> typing.Any:
key, colon_token, value, comp_for = children
return WithLeadingWhitespace(
DictComp(
key.value,
value.value,
comp_for,
# lbrace, rbrace, lpar, and rpar will be attached as-needed by the atom grammar
whitespace_before_colon=parse_parenthesizable_whitespace(
config, colon_token.whitespace_before
),
whitespace_after_colon=parse_parenthesizable_whitespace(
config, colon_token.whitespace_after
),
),
key.whitespace_before,
)
def _convert_set(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
return _convert_testlist_comp(
config,
children,
single_child_is_sequence=True,
sequence_type=Set,
comprehension_type=SetComp,
)
@with_production("arglist", "argument (',' argument)* [',']")
def convert_arglist(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
args = []
for argument, comma in grouper(children, 2):
if comma is None:
args.append(argument)
else:
args.append(
argument.with_changes(
comma=Comma(
whitespace_before=parse_parenthesizable_whitespace(
config, comma.whitespace_before
),
whitespace_after=parse_parenthesizable_whitespace(
config, comma.whitespace_after
),
)
)
)
return ArglistPartial(args)
@with_production("argument", "arg_assign_comp_for | star_arg")
def convert_argument(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
(child,) = children
return child
@with_production(
"arg_assign_comp_for", "test [comp_for] | test '=' test", version="<=3.7"
)
@with_production(
"arg_assign_comp_for",
"test [comp_for] | test ':=' test | test '=' test",
version=">=3.8",
)
def convert_arg_assign_comp_for(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
if len(children) == 1:
# Simple test
(child,) = children
return Arg(value=child.value)
elif len(children) == 2:
elt, for_in = children
return Arg(value=GeneratorExp(elt.value, for_in, lpar=(), rpar=()))
else:
lhs, equal, rhs = children
# "key := value" assignment; positional
if equal.string == ":=":
val = convert_namedexpr_test(config, children)
if not isinstance(val, WithLeadingWhitespace):
raise TypeError(
f"convert_namedexpr_test returned {val!r}, not WithLeadingWhitespace"
)
return Arg(value=val.value)
# "key = value" assignment; keyword argument
return Arg(
keyword=lhs.value,
equal=AssignEqual(
whitespace_before=parse_parenthesizable_whitespace(
config, equal.whitespace_before
),
whitespace_after=parse_parenthesizable_whitespace(
config, equal.whitespace_after
),
),
value=rhs.value,
)
@with_production("star_arg", "'**' test | '*' test")
def convert_star_arg(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
star, test = children
return Arg(
star=star.string,
whitespace_after_star=parse_parenthesizable_whitespace(
config, star.whitespace_after
),
value=test.value,
)
@with_production("sync_comp_for", "'for' exprlist 'in' or_test comp_if* [comp_for]")
def convert_sync_comp_for(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
# unpack
for_tok, target, in_tok, iter, *trailing = children
if len(trailing) and isinstance(trailing[-1], CompFor):
*ifs, inner_for_in = trailing
else:
ifs, inner_for_in = trailing, None
return CompFor(
target=target.value,
iter=iter.value,
ifs=ifs,
inner_for_in=inner_for_in,
whitespace_before=parse_parenthesizable_whitespace(
config, for_tok.whitespace_before
),
whitespace_after_for=parse_parenthesizable_whitespace(
config, for_tok.whitespace_after
),
whitespace_before_in=parse_parenthesizable_whitespace(
config, in_tok.whitespace_before
),
whitespace_after_in=parse_parenthesizable_whitespace(
config, in_tok.whitespace_after
),
)
@with_production("comp_for", "[ASYNC] sync_comp_for", version=">=3.6")
@with_production("comp_for", "sync_comp_for", version="<=3.5")
def convert_comp_for(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
if len(children) == 1:
(sync_comp_for,) = children
return sync_comp_for
else:
(async_tok, sync_comp_for) = children
return sync_comp_for.with_changes(
# asynchronous steals the `CompFor`'s `whitespace_before`.
asynchronous=Asynchronous(whitespace_after=sync_comp_for.whitespace_before),
# But, in exchange, `CompFor` gets to keep `async_tok`'s leading
# whitespace, because that's now the beginning of the `CompFor`.
whitespace_before=parse_parenthesizable_whitespace(
config, async_tok.whitespace_before
),
)
@with_production("comp_if", "'if' test_nocond")
def convert_comp_if(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
if_tok, test = children
return CompIf(
test.value,
whitespace_before=parse_parenthesizable_whitespace(
config, if_tok.whitespace_before
),
whitespace_before_test=parse_parenthesizable_whitespace(
config, test.whitespace_before
),
)
@with_production("yield_expr", "'yield' [yield_arg]")
def convert_yield_expr(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
if len(children) == 1:
# Yielding implicit none
(yield_token,) = children
yield_node = Yield(value=None)
else:
# Yielding explicit value
(yield_token, yield_arg) = children
yield_node = Yield(
value=yield_arg.value,
whitespace_after_yield=parse_parenthesizable_whitespace(
config, yield_arg.whitespace_before
),
)
return WithLeadingWhitespace(yield_node, yield_token.whitespace_before)
@with_production("yield_arg", "testlist", version="<3.3")
@with_production("yield_arg", "'from' test | testlist", version=">=3.3,<3.8")
@with_production("yield_arg", "'from' test | testlist_star_expr", version=">=3.8")
def convert_yield_arg(
config: ParserConfig, children: typing.Sequence[typing.Any]
) -> typing.Any:
if len(children) == 1:
# Just a regular testlist, pass it up
(child,) = children
return child
else:
# Its a yield from
(from_token, test) = children
return WithLeadingWhitespace(
From(
item=test.value,
whitespace_after_from=parse_parenthesizable_whitespace(
config, test.whitespace_before
),
),
from_token.whitespace_before,
)