From edd8496f19781dfcc363d6a8caae69ef33b11448 Mon Sep 17 00:00:00 2001 From: Benjamin Woodruff Date: Wed, 3 Jul 2019 17:20:08 -0700 Subject: [PATCH] Add parser logic for cst.Tuple There's a lot of different places tuples can get used, and `Element`/`StarredElement` require some tricky handling of commas, so this took a while. Hopefully, this work should make `List` easy to implement. `dictsetmaker` and comprehensions are still going to be a pain though... Along the way, I found out that `Element`/`StarredElement`'s `whitespace_after` wasn't needed, because the tuple's parenthesis (if they exist) or parent node would be a better owner anyways, so that's removed in this commit. --- libcst/nodes/_expression.py | 21 +-- libcst/nodes/tests/test_tuple.py | 204 ++++++++++++++--------- libcst/parser/_conversions/expression.py | 195 +++++++++++++--------- libcst/parser/_grammar.py | 14 +- 4 files changed, 247 insertions(+), 187 deletions(-) diff --git a/libcst/nodes/_expression.py b/libcst/nodes/_expression.py index ceeb2919..d30492b5 100644 --- a/libcst/nodes/_expression.py +++ b/libcst/nodes/_expression.py @@ -2055,9 +2055,6 @@ class BaseElement(CSTNode, ABC): # pyre-fixme[13]: Attribute `value` is never initialized. value: BaseExpression comma: Union[Comma, MaybeSentinel] = MaybeSentinel.DEFAULT - # Used if we don't have a comma, otherwise the parser will attach the whitespace to - # the comma. - whitespace_after: BaseParenthesizableWhitespace = SimpleWhitespace("") @abstractmethod def _codegen_impl( @@ -2077,16 +2074,10 @@ class Element(BaseElement): # Any trailing comma comma: Union[Comma, MaybeSentinel] = MaybeSentinel.DEFAULT - # Whitespace - whitespace_after: BaseParenthesizableWhitespace = SimpleWhitespace("") - def _visit_and_replace_children(self, visitor: CSTVisitor) -> "Element": return Element( value=visit_required("value", self.value, visitor), comma=visit_sentinel("comma", self.comma, visitor), - whitespace_after=visit_required( - "whitespace_after", self.whitespace_after, visitor - ), ) def _codegen_impl( @@ -2097,8 +2088,6 @@ class Element(BaseElement): ) -> None: with state.record_syntactic_position(self): self.value._codegen(state) - - self.whitespace_after._codegen(state) comma = self.comma if comma is MaybeSentinel.DEFAULT and default_comma: if default_comma_whitespace: @@ -2124,7 +2113,6 @@ class StarredElement(BaseElement, _BaseParenthesizedNode): # Whitespace whitespace_before_value: BaseParenthesizableWhitespace = SimpleWhitespace("") - whitespace_after: BaseParenthesizableWhitespace = SimpleWhitespace("") def _visit_and_replace_children(self, visitor: CSTVisitor) -> "StarredElement": return StarredElement( @@ -2133,9 +2121,6 @@ class StarredElement(BaseElement, _BaseParenthesizedNode): "whitespace_before_value", self.whitespace_before_value, visitor ), value=visit_required("value", self.value, visitor), - whitespace_after=visit_required( - "whitespace_after", self.whitespace_after, visitor - ), rpar=visit_sequence("rpar", self.rpar, visitor), comma=visit_sentinel("comma", self.comma, visitor), ) @@ -2159,12 +2144,11 @@ class StarredElement(BaseElement, _BaseParenthesizedNode): state.add_token(",") elif isinstance(comma, Comma): comma._codegen(state) - self.whitespace_after._codegen(state) @add_slots @dataclass(frozen=True) -class Tuple(BaseExpression): +class Tuple(BaseAtom, BaseAssignTargetExpression, BaseDelTargetExpression): elements: Sequence[Union[Element, StarredElement]] # Sequence of open parenthesis for precedence dictation. @@ -2183,8 +2167,7 @@ class Tuple(BaseExpression): if position == ExpressionPosition.LEFT: last_element = elements[-1] return ( - not last_element.whitespace_after.empty - or isinstance(last_element.comma, Comma) + isinstance(last_element.comma, Comma) or ( isinstance(last_element, StarredElement) and len(last_element.rpar) > 0 diff --git a/libcst/nodes/tests/test_tuple.py b/libcst/nodes/tests/test_tuple.py index 7f3d9689..aefa811f 100644 --- a/libcst/nodes/tests/test_tuple.py +++ b/libcst/nodes/tests/test_tuple.py @@ -4,110 +4,117 @@ # LICENSE file in the root directory of this source tree. # pyre-strict -from typing import Callable, Optional +from typing import Any, Callable import libcst.nodes as cst from libcst.nodes._internal import CodeRange from libcst.nodes.tests.base import CSTNodeTest +from libcst.parser import parse_expression, parse_statement from libcst.testing.utils import data_provider class TupleTest(CSTNodeTest): @data_provider( - ( + [ # zero-element tuple - (cst.Tuple([]), "()"), - # one-element tuple - (cst.Tuple([cst.Element(cst.Name("single_element"))]), "(single_element,)"), - # two-element tuple - ( - cst.Tuple([cst.Element(cst.Name("one")), cst.Element(cst.Name("two"))]), - "(one, two)", - ), + {"node": cst.Tuple([]), "code": "()", "parser": parse_expression}, + # one-element tuple, sentinel comma value + { + "node": cst.Tuple([cst.Element(cst.Name("single_element"))]), + "code": "(single_element,)", + "parser": None, + }, + { + "node": cst.Tuple([cst.StarredElement(cst.Name("single_element"))]), + "code": "(*single_element,)", + "parser": None, + }, + # two-element tuple, sentinel comma value + { + "node": cst.Tuple( + [cst.Element(cst.Name("one")), cst.Element(cst.Name("two"))] + ), + "code": "(one, two)", + "parser": None, + }, # remove parenthesis - ( - cst.Tuple( + { + "node": cst.Tuple( [cst.Element(cst.Name("one")), cst.Element(cst.Name("two"))], lpar=[], rpar=[], ), - "one, two", - ), + "code": "one, two", + "parser": None, + }, # add extra parenthesis - ( - cst.Tuple( + { + "node": cst.Tuple( [cst.Element(cst.Name("one")), cst.Element(cst.Name("two"))], lpar=[cst.LeftParen(), cst.LeftParen()], rpar=[cst.RightParen(), cst.RightParen()], ), - "((one, two))", - ), + "code": "((one, two))", + "parser": None, + }, # starred element - ( - cst.Tuple( - [cst.Element(cst.Name("one")), cst.StarredElement(cst.Name("two"))] - ), - "(one, *two)", - ), - # custom comma on Element - ( - cst.Tuple( + { + "node": cst.Tuple( [ - cst.Element(cst.Name("one")), + cst.StarredElement(cst.Name("one")), + cst.StarredElement(cst.Name("two")), + ] + ), + "code": "(*one, *two)", + "parser": None, + }, + # custom comma on Element + { + "node": cst.Tuple( + [ + cst.Element(cst.Name("one"), comma=cst.Comma()), cst.Element(cst.Name("two"), comma=cst.Comma()), ] ), - "(one, two,)", - ), + "code": "(one,two,)", + "parser": parse_expression, + }, # custom comma on StarredElement - ( - cst.Tuple( + { + "node": cst.Tuple( [ - cst.Element(cst.Name("one")), + cst.StarredElement(cst.Name("one"), comma=cst.Comma()), cst.StarredElement(cst.Name("two"), comma=cst.Comma()), ] ), - "(one, *two,)", - CodeRange.create((1, 1), (1, 11)), - ), + "code": "(*one,*two,)", + "parser": parse_expression, + "expected_position": CodeRange.create((1, 1), (1, 11)), + }, # custom parenthesis on StarredElement - ( - cst.Tuple( + { + "node": cst.Tuple( [ cst.StarredElement( cst.Name("abc"), lpar=[cst.LeftParen()], rpar=[cst.RightParen()], + comma=cst.Comma(), ) ] ), - "((*abc),)", - CodeRange.create((1, 1), (1, 8)), - ), - # custom whitespace on Element - ( - cst.Tuple( - [ - cst.Element(cst.Name("one")), - cst.Element( - cst.Name("two"), whitespace_after=cst.SimpleWhitespace(" ") - ), - ], - lpar=[], - rpar=[], # rpar can't own the trailing whitespace if it's not there - ), - "one, two ", - CodeRange.create((1, 0), (1, 10)), - ), + "code": "((*abc),)", + "parser": parse_expression, + "expected_position": CodeRange.create((1, 1), (1, 8)), + }, # custom whitespace on StarredElement - ( - cst.Tuple( + { + "node": cst.Tuple( [ - cst.Element(cst.Name("one")), + cst.Element(cst.Name("one"), comma=cst.Comma()), cst.StarredElement( cst.Name("two"), whitespace_before_value=cst.SimpleWhitespace(" "), - whitespace_after=cst.SimpleWhitespace(" "), lpar=[cst.LeftParen()], rpar=[cst.RightParen()], ), @@ -115,31 +122,37 @@ class TupleTest(CSTNodeTest): lpar=[], rpar=[], # rpar can't own the trailing whitespace if it's not there ), - "one, (* two) ", - CodeRange.create((1, 0), (1, 17)), - ), + "code": "one,(* two)", + "parser": parse_expression, + "expected_position": CodeRange.create((1, 0), (1, 12)), + }, # missing spaces around tuple, okay with parenthesis - ( - cst.For( + { + "node": cst.For( target=cst.Tuple( - [cst.Element(cst.Name("k")), cst.Element(cst.Name("v"))] + [ + cst.Element(cst.Name("k"), comma=cst.Comma()), + cst.Element(cst.Name("v")), + ] ), iter=cst.Name("abc"), body=cst.SimpleStatementSuite([cst.Pass()]), whitespace_after_for=cst.SimpleWhitespace(""), whitespace_before_in=cst.SimpleWhitespace(""), ), - "for(k, v)in abc: pass\n", - ), + "code": "for(k,v)in abc: pass\n", + "parser": parse_statement, + }, # no spaces around tuple, but using values that are parenthesized - ( - cst.For( + { + "node": cst.For( target=cst.Tuple( [ cst.Element( cst.Name( "k", lpar=[cst.LeftParen()], rpar=[cst.RightParen()] - ) + ), + comma=cst.Comma(), ), cst.Element( cst.Name( @@ -155,26 +168,51 @@ class TupleTest(CSTNodeTest): whitespace_after_for=cst.SimpleWhitespace(""), whitespace_before_in=cst.SimpleWhitespace(""), ), - "for(k), (v)in abc: pass\n", - ), + "code": "for(k),(v)in abc: pass\n", + "parser": parse_statement, + }, # starred elements are safe to use without a space before them - ( - cst.For( + { + "node": cst.For( target=cst.Tuple( - [cst.StarredElement(cst.Name("foo"))], lpar=[], rpar=[] + [cst.StarredElement(cst.Name("foo"), comma=cst.Comma())], + lpar=[], + rpar=[], ), iter=cst.Name("bar"), body=cst.SimpleStatementSuite([cst.Pass()]), whitespace_after_for=cst.SimpleWhitespace(""), ), - "for*foo, in bar: pass\n", - ), - ) + "code": "for*foo, in bar: pass\n", + "parser": parse_statement, + }, + # a trailing comma doesn't mess up TrailingWhitespace + { + "node": cst.SimpleStatementLine( + [ + cst.Expr( + cst.Tuple( + [ + cst.Element(cst.Name("one"), comma=cst.Comma()), + cst.Element(cst.Name("two"), comma=cst.Comma()), + ], + lpar=[], + rpar=[], + ) + ) + ], + trailing_whitespace=cst.TrailingWhitespace( + whitespace=cst.SimpleWhitespace(" "), + comment=cst.Comment("# comment"), + ), + ), + "code": "one,two, # comment\n", + "parser": parse_statement, + }, + ] ) - def test_valid( - self, node: cst.CSTNode, code: str, position: Optional[CodeRange] = None - ) -> None: - self.validate_node(node, code, expected_position=position) + def test_valid(self, **kwargs: Any) -> None: + self.validate_node(**kwargs) @data_provider( ( diff --git a/libcst/parser/_conversions/expression.py b/libcst/parser/_conversions/expression.py index 41255200..f33462bd 100644 --- a/libcst/parser/_conversions/expression.py +++ b/libcst/parser/_conversions/expression.py @@ -75,15 +75,6 @@ def convert_expression_input(config: ParserConfig, children: Sequence[Any]) -> A return child -@with_production("testlist_star_expr", "(test|star_expr) (',' (test|star_expr))* [',']") -def convert_testlist_star_expr(config: ParserConfig, children: Sequence[Any]) -> Any: - if len(children) == 1: - (child,) = children - return child - else: - return make_dummy_node(config, children) - - @with_production("test", "or_test ['if' or_test 'else' test] | lambdef") def convert_test(config: ParserConfig, children: Sequence[Any]) -> Any: if len(children) == 1: @@ -342,7 +333,19 @@ def convert_comp_op(config: ParserConfig, children: Sequence[Any]) -> Any: @with_production("star_expr", "'*' expr") def convert_star_expr(config: ParserConfig, children: Sequence[Any]) -> Any: - return make_dummy_node(config, children) + star, expr = children + return WithLeadingWhitespace( + cst.StarredElement( + expr.value, + whitespace_before_value=parse_parenthesizable_whitespace( + config, expr.whitespace_before + ), + # atom is responsible for parenthesis and trailing_whitespace if they exist + # testlist_comp, exprlist, dictorsetmaker, etc are responsible for the comma + # if it exists. + ), + whitespace_before=star.whitespace_before, + ) @with_production("expr", "xor_expr ('|' xor_expr)*") @@ -731,7 +734,7 @@ def convert_atom_basic(config: ParserConfig, children: Sequence[Any]) -> Any: raise Exception(f"Logic error, unexpected token {child.type.name}") -@with_production("atom_squarebrackets", "'[' [testlist_comp] ']'") +@with_production("atom_squarebrackets", "'[' [testlist_comp_list] ']'") def convert_atom_squarebrackets(config: ParserConfig, children: Sequence[Any]) -> Any: return make_dummy_node(config, children) @@ -741,9 +744,21 @@ def convert_atom_curlybrackets(config: ParserConfig, children: Sequence[Any]) -> return make_dummy_node(config, children) -@with_production("atom_parens", "'(' [yield_expr|testlist_comp] ')'") +@with_production("atom_parens", "'(' [yield_expr|testlist_comp_tuple] ')'") def convert_atom_parens(config: ParserConfig, children: Sequence[Any]) -> Any: - lpar, *atoms, rpar = children + lpar_tok, *atoms, rpar_tok = children + + lpar = cst.LeftParen( + whitespace_after=parse_parenthesizable_whitespace( + config, lpar_tok.whitespace_after + ) + ) + + rpar = cst.RightParen( + whitespace_before=parse_parenthesizable_whitespace( + config, rpar_tok.whitespace_before + ) + ) if len(atoms) == 1: inner_atom = atoms[0].value @@ -756,59 +771,22 @@ def convert_atom_parens(config: ParserConfig, children: Sequence[Any]) -> Any: return WithLeadingWhitespace( inner_atom.with_changes( number=inner_atom.number.with_changes( - lpar=( - ( - cst.LeftParen( - whitespace_after=parse_parenthesizable_whitespace( - config, lpar.whitespace_after - ) - ), - ) - + tuple(inner_atom.lpar) - ), - rpar=( - tuple(inner_atom.rpar) - + ( - cst.RightParen( - whitespace_before=parse_parenthesizable_whitespace( - config, rpar.whitespace_before - ) - ), - ) - ), + lpar=(lpar, *inner_atom.lpar), rpar=(*inner_atom.rpar, rpar) ) ), - lpar.whitespace_before, + lpar_tok.whitespace_before, ) else: return WithLeadingWhitespace( inner_atom.with_changes( - lpar=( - ( - cst.LeftParen( - whitespace_after=parse_parenthesizable_whitespace( - config, lpar.whitespace_after - ) - ), - ) - + tuple(inner_atom.lpar) - ), - rpar=( - tuple(inner_atom.rpar) - + ( - cst.RightParen( - whitespace_before=parse_parenthesizable_whitespace( - config, rpar.whitespace_before - ) - ), - ) - ), + lpar=(lpar, *inner_atom.lpar), rpar=(*inner_atom.rpar, rpar) ), - lpar.whitespace_before, + lpar_tok.whitespace_before, ) else: - # We don't support tuples yet - return make_dummy_node(config, children) + return WithLeadingWhitespace( + cst.Tuple((), lpar=(lpar,), rpar=(rpar,)), lpar_tok.whitespace_before + ) @with_production("atom_ellipses", "'...'") @@ -938,23 +916,95 @@ def convert_fstring_format_spec(config: ParserConfig, children: Sequence[Any]) - @with_production( - "testlist_comp", "(test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )" + "testlist_comp_tuple", + "(test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )", ) -def convert_testlist_comp(config: ParserConfig, children: Sequence[Any]) -> Any: - if len(children) == 1: - (child,) = children - return child +def convert_testlist_comp_tuple(config: ParserConfig, children: Sequence[Any]) -> Any: + return _convert_testlist_comp( + config, + children, + single_child_is_sequence=False, # should be true for testlist_comp_list + sequence_type=cst.Tuple, + ) + + +@with_production( + "testlist_comp_list", + "(test|star_expr) ( comp_for | (',' (test|star_expr))* [','] )", +) +def convert_testlist_comp_list(config: ParserConfig, children: Sequence[Any]) -> Any: + return make_dummy_node(config, children) + + +def _convert_testlist_comp( + config: ParserConfig, + children: Sequence[Any], + single_child_is_sequence: bool, + sequence_type: Type[cst.Tuple], +) -> Any: + # This is either a single-element list, or the second token is a comma, so we're not + # in a generator. + if len(children) == 1 or isinstance(children[1], Token): + return _convert_sequencelike( + config, children, single_child_is_sequence, sequence_type + ) else: + # TODO: make a generator/comprehension return make_dummy_node(config, children) +@with_production("testlist_star_expr", "(test|star_expr) (',' (test|star_expr))* [',']") @with_production("testlist", "test (',' test)* [',']") -def convert_testlist(config: ParserConfig, children: Sequence[Any]) -> Any: - if len(children) == 1: - (child,) = children - return child - else: - return make_dummy_node(config, children) +@with_production("exprlist", "(expr|star_expr) (',' (expr|star_expr))* [',']") +def convert_test_or_expr_list(config: ParserConfig, children: Sequence[Any]) -> Any: + # Used by expression statements and assignments. Neither of these cases want to + # treat a single child as a sequence. + return _convert_sequencelike( + config, children, single_child_is_sequence=False, sequence_type=cst.Tuple + ) + + +def _convert_sequencelike( + config: ParserConfig, + children: Sequence[Any], + single_child_is_sequence: bool, + sequence_type: Type[cst.Tuple], # TODO: Type[Union[Tuple, List, Set]] +) -> Any: + if not single_child_is_sequence and len(children) == 1: + return children[0] + # N.B. The parent node (e.g. atom) is responsible for computing and attaching + # whitespace_after on our last Element/StarredElement node. + elements = [] + for wrapped_expr_or_starred_element, comma_token in grouper(children, 2): + expr_or_starred_element = wrapped_expr_or_starred_element.value + if comma_token is None: + comma = MaybeSentinel.DEFAULT + else: + comma = cst.Comma( + whitespace_before=parse_parenthesizable_whitespace( + config, comma_token.whitespace_before + ), + # Only compute whitespace_after if we're not a trailing comma. + # If we're a trailing comma, that whitespace should be consumed by the + # TrailingWhitespace, parenthesis, etc. + whitespace_after=parse_parenthesizable_whitespace( + config, comma_token.whitespace_after + ) + if comma_token is not children[-1] + else cst.SimpleWhitespace(""), + ) + + if isinstance(expr_or_starred_element, cst.StarredElement): + starred_element = expr_or_starred_element + elements.append(starred_element.with_changes(comma=comma)) + else: + expr = expr_or_starred_element + elements.append(cst.Element(value=expr, comma=comma)) + + # lpar/rpar are the responsibility of our parent + return WithLeadingWhitespace( + sequence_type(elements, lpar=(), rpar=()), children[0].whitespace_before + ) @with_production( @@ -970,15 +1020,6 @@ def convert_dictorsetmaker(config: ParserConfig, children: Sequence[Any]) -> Any return make_dummy_node(config, children) -@with_production("exprlist", "(expr|star_expr) (',' (expr|star_expr))* [',']") -def convert_exprlist(config: ParserConfig, children: Sequence[Any]) -> Any: - if len(children) == 1: - (child,) = children - return child - else: - return make_dummy_node(config, children) - - @with_production("arglist", "argument (',' argument)* [',']") def convert_arglist(config: ParserConfig, children: Sequence[Any]) -> Any: args = [] diff --git a/libcst/parser/_grammar.py b/libcst/parser/_grammar.py index 448c8b1a..2b5d449c 100644 --- a/libcst/parser/_grammar.py +++ b/libcst/parser/_grammar.py @@ -35,7 +35,6 @@ from libcst.parser._conversions.expression import ( convert_comparison, convert_dictorsetmaker, convert_expression_input, - convert_exprlist, convert_factor, convert_fstring, convert_fstring_content, @@ -53,9 +52,9 @@ from libcst.parser._conversions.expression import ( convert_sync_comp_for, convert_test, convert_test_nocond, - convert_testlist, - convert_testlist_comp, - convert_testlist_star_expr, + convert_test_or_expr_list, + convert_testlist_comp_list, + convert_testlist_comp_tuple, convert_trailer, convert_trailer_arglist, convert_trailer_attribute, @@ -213,7 +212,6 @@ _NONTERMINAL_CONVERSIONS_SEQUENCE: Tuple[NonterminalConversion, ...] = ( convert_funcdef_annotation, convert_suite, convert_indented_suite, - convert_testlist_star_expr, convert_test, convert_test_nocond, convert_lambda, @@ -248,10 +246,10 @@ _NONTERMINAL_CONVERSIONS_SEQUENCE: Tuple[NonterminalConversion, ...] = ( convert_fstring_expr, convert_fstring_format_spec, convert_atom_ellipses, - convert_testlist_comp, - convert_testlist, + convert_testlist_comp_tuple, + convert_testlist_comp_list, + convert_test_or_expr_list, convert_dictorsetmaker, - convert_exprlist, convert_arglist, convert_argument, convert_arg_assign_comp_for,