Add parser logic for cst.Tuple

There's a lot of different places tuples can get used, and
`Element`/`StarredElement` require some tricky handling of commas, so
this took a while.

Hopefully, this work should make `List` easy to implement.
`dictsetmaker` and comprehensions are still going to be a pain though...

Along the way, I found out that `Element`/`StarredElement`'s
`whitespace_after` wasn't needed, because the tuple's parenthesis (if
they exist) or parent node would be a better owner anyways, so that's
removed in this commit.
This commit is contained in:
Benjamin Woodruff 2019-07-03 17:20:08 -07:00
parent 39e4e433d8
commit edd8496f19
4 changed files with 247 additions and 187 deletions

View file

@ -2055,9 +2055,6 @@ class BaseElement(CSTNode, ABC):
# pyre-fixme[13]: Attribute `value` is never initialized.
value: BaseExpression
comma: Union[Comma, MaybeSentinel] = MaybeSentinel.DEFAULT
# Used if we don't have a comma, otherwise the parser will attach the whitespace to
# the comma.
whitespace_after: BaseParenthesizableWhitespace = SimpleWhitespace("")
@abstractmethod
def _codegen_impl(
@ -2077,16 +2074,10 @@ class Element(BaseElement):
# Any trailing comma
comma: Union[Comma, MaybeSentinel] = MaybeSentinel.DEFAULT
# Whitespace
whitespace_after: BaseParenthesizableWhitespace = SimpleWhitespace("")
def _visit_and_replace_children(self, visitor: CSTVisitor) -> "Element":
return Element(
value=visit_required("value", self.value, visitor),
comma=visit_sentinel("comma", self.comma, visitor),
whitespace_after=visit_required(
"whitespace_after", self.whitespace_after, visitor
),
)
def _codegen_impl(
@ -2097,8 +2088,6 @@ class Element(BaseElement):
) -> None:
with state.record_syntactic_position(self):
self.value._codegen(state)
self.whitespace_after._codegen(state)
comma = self.comma
if comma is MaybeSentinel.DEFAULT and default_comma:
if default_comma_whitespace:
@ -2124,7 +2113,6 @@ class StarredElement(BaseElement, _BaseParenthesizedNode):
# Whitespace
whitespace_before_value: BaseParenthesizableWhitespace = SimpleWhitespace("")
whitespace_after: BaseParenthesizableWhitespace = SimpleWhitespace("")
def _visit_and_replace_children(self, visitor: CSTVisitor) -> "StarredElement":
return StarredElement(
@ -2133,9 +2121,6 @@ class StarredElement(BaseElement, _BaseParenthesizedNode):
"whitespace_before_value", self.whitespace_before_value, visitor
),
value=visit_required("value", self.value, visitor),
whitespace_after=visit_required(
"whitespace_after", self.whitespace_after, visitor
),
rpar=visit_sequence("rpar", self.rpar, visitor),
comma=visit_sentinel("comma", self.comma, visitor),
)
@ -2159,12 +2144,11 @@ class StarredElement(BaseElement, _BaseParenthesizedNode):
state.add_token(",")
elif isinstance(comma, Comma):
comma._codegen(state)
self.whitespace_after._codegen(state)
@add_slots
@dataclass(frozen=True)
class Tuple(BaseExpression):
class Tuple(BaseAtom, BaseAssignTargetExpression, BaseDelTargetExpression):
elements: Sequence[Union[Element, StarredElement]]
# Sequence of open parenthesis for precedence dictation.
@ -2183,8 +2167,7 @@ class Tuple(BaseExpression):
if position == ExpressionPosition.LEFT:
last_element = elements[-1]
return (
not last_element.whitespace_after.empty
or isinstance(last_element.comma, Comma)
isinstance(last_element.comma, Comma)
or (
isinstance(last_element, StarredElement)
and len(last_element.rpar) > 0

View file

@ -4,110 +4,117 @@
# LICENSE file in the root directory of this source tree.
# pyre-strict
from typing import Callable, Optional
from typing import Any, Callable
import libcst.nodes as cst
from libcst.nodes._internal import CodeRange
from libcst.nodes.tests.base import CSTNodeTest
from libcst.parser import parse_expression, parse_statement
from libcst.testing.utils import data_provider
class TupleTest(CSTNodeTest):
@data_provider(
(
[
# zero-element tuple
(cst.Tuple([]), "()"),
# one-element tuple
(cst.Tuple([cst.Element(cst.Name("single_element"))]), "(single_element,)"),
# two-element tuple
(
cst.Tuple([cst.Element(cst.Name("one")), cst.Element(cst.Name("two"))]),
"(one, two)",
),
{"node": cst.Tuple([]), "code": "()", "parser": parse_expression},
# one-element tuple, sentinel comma value
{
"node": cst.Tuple([cst.Element(cst.Name("single_element"))]),
"code": "(single_element,)",
"parser": None,
},
{
"node": cst.Tuple([cst.StarredElement(cst.Name("single_element"))]),
"code": "(*single_element,)",
"parser": None,
},
# two-element tuple, sentinel comma value
{
"node": cst.Tuple(
[cst.Element(cst.Name("one")), cst.Element(cst.Name("two"))]
),
"code": "(one, two)",
"parser": None,
},
# remove parenthesis
(
cst.Tuple(
{
"node": cst.Tuple(
[cst.Element(cst.Name("one")), cst.Element(cst.Name("two"))],
lpar=[],
rpar=[],
),
"one, two",
),
"code": "one, two",
"parser": None,
},
# add extra parenthesis
(
cst.Tuple(
{
"node": cst.Tuple(
[cst.Element(cst.Name("one")), cst.Element(cst.Name("two"))],
lpar=[cst.LeftParen(), cst.LeftParen()],
rpar=[cst.RightParen(), cst.RightParen()],
),
"((one, two))",
),
"code": "((one, two))",
"parser": None,
},
# starred element
(
cst.Tuple(
[cst.Element(cst.Name("one")), cst.StarredElement(cst.Name("two"))]
),
"(one, *two)",
),
# custom comma on Element
(
cst.Tuple(
{
"node": cst.Tuple(
[
cst.Element(cst.Name("one")),
cst.StarredElement(cst.Name("one")),
cst.StarredElement(cst.Name("two")),
]
),
"code": "(*one, *two)",
"parser": None,
},
# custom comma on Element
{
"node": cst.Tuple(
[
cst.Element(cst.Name("one"), comma=cst.Comma()),
cst.Element(cst.Name("two"), comma=cst.Comma()),
]
),
"(one, two,)",
),
"code": "(one,two,)",
"parser": parse_expression,
},
# custom comma on StarredElement
(
cst.Tuple(
{
"node": cst.Tuple(
[
cst.Element(cst.Name("one")),
cst.StarredElement(cst.Name("one"), comma=cst.Comma()),
cst.StarredElement(cst.Name("two"), comma=cst.Comma()),
]
),
"(one, *two,)",
CodeRange.create((1, 1), (1, 11)),
),
"code": "(*one,*two,)",
"parser": parse_expression,
"expected_position": CodeRange.create((1, 1), (1, 11)),
},
# custom parenthesis on StarredElement
(
cst.Tuple(
{
"node": cst.Tuple(
[
cst.StarredElement(
cst.Name("abc"),
lpar=[cst.LeftParen()],
rpar=[cst.RightParen()],
comma=cst.Comma(),
)
]
),
"((*abc),)",
CodeRange.create((1, 1), (1, 8)),
),
# custom whitespace on Element
(
cst.Tuple(
[
cst.Element(cst.Name("one")),
cst.Element(
cst.Name("two"), whitespace_after=cst.SimpleWhitespace(" ")
),
],
lpar=[],
rpar=[], # rpar can't own the trailing whitespace if it's not there
),
"one, two ",
CodeRange.create((1, 0), (1, 10)),
),
"code": "((*abc),)",
"parser": parse_expression,
"expected_position": CodeRange.create((1, 1), (1, 8)),
},
# custom whitespace on StarredElement
(
cst.Tuple(
{
"node": cst.Tuple(
[
cst.Element(cst.Name("one")),
cst.Element(cst.Name("one"), comma=cst.Comma()),
cst.StarredElement(
cst.Name("two"),
whitespace_before_value=cst.SimpleWhitespace(" "),
whitespace_after=cst.SimpleWhitespace(" "),
lpar=[cst.LeftParen()],
rpar=[cst.RightParen()],
),
@ -115,31 +122,37 @@ class TupleTest(CSTNodeTest):
lpar=[],
rpar=[], # rpar can't own the trailing whitespace if it's not there
),
"one, (* two) ",
CodeRange.create((1, 0), (1, 17)),
),
"code": "one,(* two)",
"parser": parse_expression,
"expected_position": CodeRange.create((1, 0), (1, 12)),
},
# missing spaces around tuple, okay with parenthesis
(
cst.For(
{
"node": cst.For(
target=cst.Tuple(
[cst.Element(cst.Name("k")), cst.Element(cst.Name("v"))]
[
cst.Element(cst.Name("k"), comma=cst.Comma()),
cst.Element(cst.Name("v")),
]
),
iter=cst.Name("abc"),
body=cst.SimpleStatementSuite([cst.Pass()]),
whitespace_after_for=cst.SimpleWhitespace(""),
whitespace_before_in=cst.SimpleWhitespace(""),
),
"for(k, v)in abc: pass\n",
),
"code": "for(k,v)in abc: pass\n",
"parser": parse_statement,
},
# no spaces around tuple, but using values that are parenthesized
(
cst.For(
{
"node": cst.For(
target=cst.Tuple(
[
cst.Element(
cst.Name(
"k", lpar=[cst.LeftParen()], rpar=[cst.RightParen()]
)
),
comma=cst.Comma(),
),
cst.Element(
cst.Name(
@ -155,26 +168,51 @@ class TupleTest(CSTNodeTest):
whitespace_after_for=cst.SimpleWhitespace(""),
whitespace_before_in=cst.SimpleWhitespace(""),
),
"for(k), (v)in abc: pass\n",
),
"code": "for(k),(v)in abc: pass\n",
"parser": parse_statement,
},
# starred elements are safe to use without a space before them
(
cst.For(
{
"node": cst.For(
target=cst.Tuple(
[cst.StarredElement(cst.Name("foo"))], lpar=[], rpar=[]
[cst.StarredElement(cst.Name("foo"), comma=cst.Comma())],
lpar=[],
rpar=[],
),
iter=cst.Name("bar"),
body=cst.SimpleStatementSuite([cst.Pass()]),
whitespace_after_for=cst.SimpleWhitespace(""),
),
"for*foo, in bar: pass\n",
),
)
"code": "for*foo, in bar: pass\n",
"parser": parse_statement,
},
# a trailing comma doesn't mess up TrailingWhitespace
{
"node": cst.SimpleStatementLine(
[
cst.Expr(
cst.Tuple(
[
cst.Element(cst.Name("one"), comma=cst.Comma()),
cst.Element(cst.Name("two"), comma=cst.Comma()),
],
lpar=[],
rpar=[],
)
)
],
trailing_whitespace=cst.TrailingWhitespace(
whitespace=cst.SimpleWhitespace(" "),
comment=cst.Comment("# comment"),
),
),
"code": "one,two, # comment\n",
"parser": parse_statement,
},
]
)
def test_valid(
self, node: cst.CSTNode, code: str, position: Optional[CodeRange] = None
) -> None:
self.validate_node(node, code, expected_position=position)
def test_valid(self, **kwargs: Any) -> None:
self.validate_node(**kwargs)
@data_provider(
(

View file

@ -75,15 +75,6 @@ def convert_expression_input(config: ParserConfig, children: Sequence[Any]) -> A
return child
@with_production("testlist_star_expr", "(test|star_expr) (',' (test|star_expr))* [',']")
def convert_testlist_star_expr(config: ParserConfig, children: Sequence[Any]) -> Any:
if len(children) == 1:
(child,) = children
return child
else:
return make_dummy_node(config, children)
@with_production("test", "or_test ['if' or_test 'else' test] | lambdef")
def convert_test(config: ParserConfig, children: Sequence[Any]) -> Any:
if len(children) == 1:
@ -342,7 +333,19 @@ def convert_comp_op(config: ParserConfig, children: Sequence[Any]) -> Any:
@with_production("star_expr", "'*' expr")
def convert_star_expr(config: ParserConfig, children: Sequence[Any]) -> Any:
return make_dummy_node(config, children)
star, expr = children
return WithLeadingWhitespace(
cst.StarredElement(
expr.value,
whitespace_before_value=parse_parenthesizable_whitespace(
config, expr.whitespace_before
),
# atom is responsible for parenthesis and trailing_whitespace if they exist
# testlist_comp, exprlist, dictorsetmaker, etc are responsible for the comma
# if it exists.
),
whitespace_before=star.whitespace_before,
)
@with_production("expr", "xor_expr ('|' xor_expr)*")
@ -731,7 +734,7 @@ def convert_atom_basic(config: ParserConfig, children: Sequence[Any]) -> Any:
raise Exception(f"Logic error, unexpected token {child.type.name}")
@with_production("atom_squarebrackets", "'[' [testlist_comp] ']'")
@with_production("atom_squarebrackets", "'[' [testlist_comp_list] ']'")
def convert_atom_squarebrackets(config: ParserConfig, children: Sequence[Any]) -> Any:
return make_dummy_node(config, children)
@ -741,9 +744,21 @@ def convert_atom_curlybrackets(config: ParserConfig, children: Sequence[Any]) ->
return make_dummy_node(config, children)
@with_production("atom_parens", "'(' [yield_expr|testlist_comp] ')'")
@with_production("atom_parens", "'(' [yield_expr|testlist_comp_tuple] ')'")
def convert_atom_parens(config: ParserConfig, children: Sequence[Any]) -> Any:
lpar, *atoms, rpar = children
lpar_tok, *atoms, rpar_tok = children
lpar = cst.LeftParen(
whitespace_after=parse_parenthesizable_whitespace(
config, lpar_tok.whitespace_after
)
)
rpar = cst.RightParen(
whitespace_before=parse_parenthesizable_whitespace(
config, rpar_tok.whitespace_before
)
)
if len(atoms) == 1:
inner_atom = atoms[0].value
@ -756,59 +771,22 @@ def convert_atom_parens(config: ParserConfig, children: Sequence[Any]) -> Any:
return WithLeadingWhitespace(
inner_atom.with_changes(
number=inner_atom.number.with_changes(
lpar=(
(
cst.LeftParen(
whitespace_after=parse_parenthesizable_whitespace(
config, lpar.whitespace_after
)
),
)
+ tuple(inner_atom.lpar)
),
rpar=(
tuple(inner_atom.rpar)
+ (
cst.RightParen(
whitespace_before=parse_parenthesizable_whitespace(
config, rpar.whitespace_before
)
),
)
),
lpar=(lpar, *inner_atom.lpar), rpar=(*inner_atom.rpar, rpar)
)
),
lpar.whitespace_before,
lpar_tok.whitespace_before,
)
else:
return WithLeadingWhitespace(
inner_atom.with_changes(
lpar=(
(
cst.LeftParen(
whitespace_after=parse_parenthesizable_whitespace(
config, lpar.whitespace_after
)
),
)
+ tuple(inner_atom.lpar)
),
rpar=(
tuple(inner_atom.rpar)
+ (
cst.RightParen(
whitespace_before=parse_parenthesizable_whitespace(
config, rpar.whitespace_before
)
),
)
),
lpar=(lpar, *inner_atom.lpar), rpar=(*inner_atom.rpar, rpar)
),
lpar.whitespace_before,
lpar_tok.whitespace_before,
)
else:
# We don't support tuples yet
return make_dummy_node(config, children)
return WithLeadingWhitespace(
cst.Tuple((), lpar=(lpar,), rpar=(rpar,)), lpar_tok.whitespace_before
)
@with_production("atom_ellipses", "'...'")
@ -938,23 +916,95 @@ def convert_fstring_format_spec(config: ParserConfig, children: Sequence[Any]) -
@with_production(
"testlist_comp", "(test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )"
"testlist_comp_tuple",
"(test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )",
)
def convert_testlist_comp(config: ParserConfig, children: Sequence[Any]) -> Any:
if len(children) == 1:
(child,) = children
return child
def convert_testlist_comp_tuple(config: ParserConfig, children: Sequence[Any]) -> Any:
return _convert_testlist_comp(
config,
children,
single_child_is_sequence=False, # should be true for testlist_comp_list
sequence_type=cst.Tuple,
)
@with_production(
"testlist_comp_list",
"(test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )",
)
def convert_testlist_comp_list(config: ParserConfig, children: Sequence[Any]) -> Any:
return make_dummy_node(config, children)
def _convert_testlist_comp(
config: ParserConfig,
children: Sequence[Any],
single_child_is_sequence: bool,
sequence_type: Type[cst.Tuple],
) -> Any:
# This is either a single-element list, or the second token is a comma, so we're not
# in a generator.
if len(children) == 1 or isinstance(children[1], Token):
return _convert_sequencelike(
config, children, single_child_is_sequence, sequence_type
)
else:
# TODO: make a generator/comprehension
return make_dummy_node(config, children)
@with_production("testlist_star_expr", "(test|star_expr) (',' (test|star_expr))* [',']")
@with_production("testlist", "test (',' test)* [',']")
def convert_testlist(config: ParserConfig, children: Sequence[Any]) -> Any:
if len(children) == 1:
(child,) = children
return child
else:
return make_dummy_node(config, children)
@with_production("exprlist", "(expr|star_expr) (',' (expr|star_expr))* [',']")
def convert_test_or_expr_list(config: ParserConfig, children: Sequence[Any]) -> Any:
# Used by expression statements and assignments. Neither of these cases want to
# treat a single child as a sequence.
return _convert_sequencelike(
config, children, single_child_is_sequence=False, sequence_type=cst.Tuple
)
def _convert_sequencelike(
config: ParserConfig,
children: Sequence[Any],
single_child_is_sequence: bool,
sequence_type: Type[cst.Tuple], # TODO: Type[Union[Tuple, List, Set]]
) -> Any:
if not single_child_is_sequence and len(children) == 1:
return children[0]
# N.B. The parent node (e.g. atom) is responsible for computing and attaching
# whitespace_after on our last Element/StarredElement node.
elements = []
for wrapped_expr_or_starred_element, comma_token in grouper(children, 2):
expr_or_starred_element = wrapped_expr_or_starred_element.value
if comma_token is None:
comma = MaybeSentinel.DEFAULT
else:
comma = cst.Comma(
whitespace_before=parse_parenthesizable_whitespace(
config, comma_token.whitespace_before
),
# Only compute whitespace_after if we're not a trailing comma.
# If we're a trailing comma, that whitespace should be consumed by the
# TrailingWhitespace, parenthesis, etc.
whitespace_after=parse_parenthesizable_whitespace(
config, comma_token.whitespace_after
)
if comma_token is not children[-1]
else cst.SimpleWhitespace(""),
)
if isinstance(expr_or_starred_element, cst.StarredElement):
starred_element = expr_or_starred_element
elements.append(starred_element.with_changes(comma=comma))
else:
expr = expr_or_starred_element
elements.append(cst.Element(value=expr, comma=comma))
# lpar/rpar are the responsibility of our parent
return WithLeadingWhitespace(
sequence_type(elements, lpar=(), rpar=()), children[0].whitespace_before
)
@with_production(
@ -970,15 +1020,6 @@ def convert_dictorsetmaker(config: ParserConfig, children: Sequence[Any]) -> Any
return make_dummy_node(config, children)
@with_production("exprlist", "(expr|star_expr) (',' (expr|star_expr))* [',']")
def convert_exprlist(config: ParserConfig, children: Sequence[Any]) -> Any:
if len(children) == 1:
(child,) = children
return child
else:
return make_dummy_node(config, children)
@with_production("arglist", "argument (',' argument)* [',']")
def convert_arglist(config: ParserConfig, children: Sequence[Any]) -> Any:
args = []

View file

@ -35,7 +35,6 @@ from libcst.parser._conversions.expression import (
convert_comparison,
convert_dictorsetmaker,
convert_expression_input,
convert_exprlist,
convert_factor,
convert_fstring,
convert_fstring_content,
@ -53,9 +52,9 @@ from libcst.parser._conversions.expression import (
convert_sync_comp_for,
convert_test,
convert_test_nocond,
convert_testlist,
convert_testlist_comp,
convert_testlist_star_expr,
convert_test_or_expr_list,
convert_testlist_comp_list,
convert_testlist_comp_tuple,
convert_trailer,
convert_trailer_arglist,
convert_trailer_attribute,
@ -213,7 +212,6 @@ _NONTERMINAL_CONVERSIONS_SEQUENCE: Tuple[NonterminalConversion, ...] = (
convert_funcdef_annotation,
convert_suite,
convert_indented_suite,
convert_testlist_star_expr,
convert_test,
convert_test_nocond,
convert_lambda,
@ -248,10 +246,10 @@ _NONTERMINAL_CONVERSIONS_SEQUENCE: Tuple[NonterminalConversion, ...] = (
convert_fstring_expr,
convert_fstring_format_spec,
convert_atom_ellipses,
convert_testlist_comp,
convert_testlist,
convert_testlist_comp_tuple,
convert_testlist_comp_list,
convert_test_or_expr_list,
convert_dictorsetmaker,
convert_exprlist,
convert_arglist,
convert_argument,
convert_arg_assign_comp_for,