# Copyright 2004-2005 Elemental Security, Inc. All Rights Reserved. # Licensed to PSF under a Contributor Agreement. # Modifications: # Copyright David Halter and Contributors # Modifications are dual-licensed: MIT and PSF. # 99% of the code is different from pgen2, now. # A fork of `parso.parser`. # https://github.com/davidhalter/parso/blob/v0.3.4/parso/parser.py # # The following changes were made: # - Typing was added. # - Error recovery is removed. # - The Jedi-specific _allowed_transition_names_and_token_types API is removed. # - Improved error messages by using our exceptions module. # - node_map/leaf_map were removed in favor of just calling convert_*. # - convert_node/convert_leaf were renamed to convert_nonterminal/convert_terminal # - convert_nonterminal is called regardless of the number of children. Parso avoids # calling it in some cases to avoid creating extra nodes. # - The parser is constructed with the tokens to allow us to track a bit more state. As # As a consequence parser may only be used once. # - Supports our custom Token class, instead of `parso.python.tokenize.Token`. from dataclasses import dataclass, field from typing import Generic, Iterable, List, Sequence, TypeVar, Union from libcst._exceptions import EOFSentinel, ParserSyntaxError, PartialParserSyntaxError from libcst._parser._parsing_check import get_expected_str from libcst._parser.parso.pgen2.generator import DFAState, Grammar, ReservedString from libcst._parser.parso.python.token import TokenType from libcst._parser.types.token import Token _NodeT = TypeVar("_NodeT") _TokenTypeT = TypeVar("_TokenTypeT", bound=TokenType) _TokenT = TypeVar("_TokenT", bound=Token) @dataclass(frozen=False) class StackNode(Generic[_TokenTypeT, _NodeT]): dfa: "DFAState[_TokenTypeT]" nodes: List[_NodeT] = field(default_factory=list) @property def nonterminal(self) -> str: return self.dfa.from_rule def _token_to_transition( grammar: "Grammar[_TokenTypeT]", type_: _TokenTypeT, value: str ) -> Union[ReservedString, _TokenTypeT]: # Map from token to label if type_.contains_syntax: # Check for reserved words (keywords) try: return grammar.reserved_syntax_strings[value] except KeyError: pass return type_ # TODO: This should be an ABC, but there's a metaclass conflict between Generic and ABC # that's fixed in Python 3.7. class BaseParser(Generic[_TokenT, _TokenTypeT, _NodeT]): """Parser engine. A Parser instance contains state pertaining to the current token sequence, and should not be used concurrently by different threads to parse separate token sequences. See python/tokenize.py for how to get input tokens by a string. """ tokens: Iterable[_TokenT] lines: Sequence[str] # used when generating parse errors _pgen_grammar: "Grammar[_TokenTypeT]" stack: List[StackNode[_TokenTypeT, _NodeT]] # Keep track of if parse was called. Because a parser may keep global mutable state, # each BaseParser instance should only be used once. __was_parse_called: bool def __init__( self, *, tokens: Iterable[_TokenT], lines: Sequence[str], pgen_grammar: "Grammar[_TokenTypeT]", start_nonterminal: str, ) -> None: self.tokens = tokens self.lines = lines self._pgen_grammar = pgen_grammar first_dfa = pgen_grammar.nonterminal_to_dfas[start_nonterminal][0] self.stack = [StackNode(first_dfa)] self.__was_parse_called = False def parse(self) -> _NodeT: # Ensure that we don't re-use parsers. if self.__was_parse_called: raise ValueError("Each parser object may only be used to parse once.") self.__was_parse_called = True for token in self.tokens: self._add_token(token) while True: tos = self.stack[-1] if not tos.dfa.is_final: expected_str = get_expected_str( EOFSentinel.EOF, tos.dfa.transitions.keys() ) raise ParserSyntaxError( f"Incomplete input. {expected_str}", lines=self.lines, raw_line=len(self.lines), raw_column=len(self.lines[-1]), ) if len(self.stack) > 1: self._pop() else: return self.convert_nonterminal(tos.nonterminal, tos.nodes) def convert_nonterminal( self, nonterminal: str, children: Sequence[_NodeT] ) -> _NodeT: ... def convert_terminal(self, token: _TokenT) -> _NodeT: ... def _add_token(self, token: _TokenT) -> None: """ This is the only core function for parsing. Here happens basically everything. Everything is well prepared by the parser generator and we only apply the necessary steps here. """ grammar = self._pgen_grammar stack = self.stack # pyre-fixme[6]: Expected `_TokenTypeT` for 2nd param but got `TokenType`. transition = _token_to_transition(grammar, token.type, token.string) while True: try: plan = stack[-1].dfa.transitions[transition] break except KeyError: if stack[-1].dfa.is_final: try: self._pop() except PartialParserSyntaxError as ex: # Upconvert the PartialParserSyntaxError to a ParserSyntaxError # by backfilling the line/column information. raise ParserSyntaxError( ex.message, lines=self.lines, raw_line=token.start_pos[0], raw_column=token.start_pos[1], ) except Exception as ex: # convert_nonterminal may fail due to a bug in our code. Try to # recover enough to at least tell us where in the file it # failed. raise ParserSyntaxError( f"Internal error: {ex}", lines=self.lines, raw_line=token.start_pos[0], raw_column=token.start_pos[1], ) else: # We never broke out -- EOF is too soon -- Unfinished statement. # # BUG: The `expected_str` may not be complete because we already # popped the other possibilities off the stack at this point, but # it still seems useful to list some of the possibilities that we # could've expected. expected_str = get_expected_str( token, stack[-1].dfa.transitions.keys() ) raise ParserSyntaxError( f"Incomplete input. {expected_str}", lines=self.lines, raw_line=token.start_pos[0], raw_column=token.start_pos[1], ) except IndexError: # I don't think this will ever happen with Python's grammar, because if # there are any extra tokens at the end of the input, we'll instead # complain that we expected ENDMARKER. # # However, let's leave it just in case. expected_str = get_expected_str(token, EOFSentinel.EOF) raise ParserSyntaxError( f"Too much input. {expected_str}", lines=self.lines, raw_line=token.start_pos[0], raw_column=token.start_pos[1], ) # Logically, `plan` is always defined, but pyre can't reasonably determine that. stack[-1].dfa = plan.next_dfa for push in plan.dfa_pushes: stack.append(StackNode(push)) leaf = self.convert_terminal(token) stack[-1].nodes.append(leaf) def _pop(self) -> None: tos = self.stack.pop() # Unlike parso and lib2to3, we call `convert_nonterminal` unconditionally # instead of only when we have more than one child. This allows us to create a # far more consistent and predictable tree. new_node = self.convert_nonterminal(tos.dfa.from_rule, tos.nodes) self.stack[-1].nodes.append(new_node)