mirror of
https://github.com/python/cpython.git
synced 2025-11-02 03:01:58 +00:00
bpo-40334: PEP 617 implementation: New PEG parser for CPython (GH-19503)
Co-authored-by: Guido van Rossum <guido@python.org> Co-authored-by: Lysandros Nikolaou <lisandrosnik@gmail.com>
This commit is contained in:
parent
a81849b031
commit
c5fc156852
91 changed files with 27057 additions and 146 deletions
153
Tools/peg_generator/pegen/first_sets.py
Executable file
153
Tools/peg_generator/pegen/first_sets.py
Executable file
|
|
@ -0,0 +1,153 @@
|
|||
#!/usr/bin/env python3.8
|
||||
|
||||
import argparse
|
||||
import collections
|
||||
import pprint
|
||||
import sys
|
||||
from typing import Optional, Set, Dict
|
||||
|
||||
from pegen.build import build_parser
|
||||
from pegen.grammar import (
|
||||
Alt,
|
||||
Cut,
|
||||
Gather,
|
||||
Grammar,
|
||||
GrammarVisitor,
|
||||
Group,
|
||||
Leaf,
|
||||
Lookahead,
|
||||
NamedItem,
|
||||
NameLeaf,
|
||||
NegativeLookahead,
|
||||
Opt,
|
||||
Repeat,
|
||||
Repeat0,
|
||||
Repeat1,
|
||||
Rhs,
|
||||
Rule,
|
||||
StringLeaf,
|
||||
PositiveLookahead,
|
||||
)
|
||||
|
||||
argparser = argparse.ArgumentParser(
|
||||
prog="calculate_first_sets", description="Calculate the first sets of a grammar",
|
||||
)
|
||||
argparser.add_argument("grammar_file", help="The grammar file")
|
||||
|
||||
|
||||
class FirstSetCalculator(GrammarVisitor):
|
||||
def __init__(self, rules: Dict[str, Rule]) -> None:
|
||||
self.rules = rules
|
||||
for rule in rules.values():
|
||||
rule.nullable_visit(rules)
|
||||
self.first_sets: Dict[str, Set[str]] = dict()
|
||||
self.in_process: Set[str] = set()
|
||||
|
||||
def calculate(self) -> Dict[str, Set[str]]:
|
||||
for name, rule in self.rules.items():
|
||||
self.visit(rule)
|
||||
return self.first_sets
|
||||
|
||||
def visit_Alt(self, item: Alt) -> Set[str]:
|
||||
result: Set[str] = set()
|
||||
to_remove: Set[str] = set()
|
||||
for other in item.items:
|
||||
new_terminals = self.visit(other)
|
||||
if isinstance(other.item, NegativeLookahead):
|
||||
to_remove |= new_terminals
|
||||
result |= new_terminals
|
||||
if to_remove:
|
||||
result -= to_remove
|
||||
|
||||
# If the set of new terminals can start with the empty string,
|
||||
# it means that the item is completelly nullable and we should
|
||||
# also considering at least the next item in case the current
|
||||
# one fails to parse.
|
||||
|
||||
if "" in new_terminals:
|
||||
continue
|
||||
|
||||
if not isinstance(other.item, (Opt, NegativeLookahead, Repeat0)):
|
||||
break
|
||||
|
||||
# Do not allow the empty string to propagate.
|
||||
result.discard("")
|
||||
|
||||
return result
|
||||
|
||||
def visit_Cut(self, item: Cut) -> Set[str]:
|
||||
return set()
|
||||
|
||||
def visit_Group(self, item: Group) -> Set[str]:
|
||||
return self.visit(item.rhs)
|
||||
|
||||
def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]:
|
||||
return self.visit(item.node)
|
||||
|
||||
def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]:
|
||||
return self.visit(item.node)
|
||||
|
||||
def visit_NamedItem(self, item: NamedItem) -> Set[str]:
|
||||
return self.visit(item.item)
|
||||
|
||||
def visit_Opt(self, item: Opt) -> Set[str]:
|
||||
return self.visit(item.node)
|
||||
|
||||
def visit_Gather(self, item: Gather) -> Set[str]:
|
||||
return self.visit(item.node)
|
||||
|
||||
def visit_Repeat0(self, item: Repeat0) -> Set[str]:
|
||||
return self.visit(item.node)
|
||||
|
||||
def visit_Repeat1(self, item: Repeat1) -> Set[str]:
|
||||
return self.visit(item.node)
|
||||
|
||||
def visit_NameLeaf(self, item: NameLeaf) -> Set[str]:
|
||||
if item.value not in self.rules:
|
||||
return {item.value}
|
||||
|
||||
if item.value not in self.first_sets:
|
||||
self.first_sets[item.value] = self.visit(self.rules[item.value])
|
||||
return self.first_sets[item.value]
|
||||
elif item.value in self.in_process:
|
||||
return set()
|
||||
|
||||
return self.first_sets[item.value]
|
||||
|
||||
def visit_StringLeaf(self, item: StringLeaf) -> Set[str]:
|
||||
return {item.value}
|
||||
|
||||
def visit_Rhs(self, item: Rhs) -> Set[str]:
|
||||
result: Set[str] = set()
|
||||
for alt in item.alts:
|
||||
result |= self.visit(alt)
|
||||
return result
|
||||
|
||||
def visit_Rule(self, item: Rule) -> Set[str]:
|
||||
if item.name in self.in_process:
|
||||
return set()
|
||||
elif item.name not in self.first_sets:
|
||||
self.in_process.add(item.name)
|
||||
terminals = self.visit(item.rhs)
|
||||
if item.nullable:
|
||||
terminals.add("")
|
||||
self.first_sets[item.name] = terminals
|
||||
self.in_process.remove(item.name)
|
||||
return self.first_sets[item.name]
|
||||
|
||||
|
||||
def main() -> None:
|
||||
args = argparser.parse_args()
|
||||
|
||||
try:
|
||||
grammar, parser, tokenizer = build_parser(args.grammar_file)
|
||||
except Exception as err:
|
||||
print("ERROR: Failed to parse grammar file", file=sys.stderr)
|
||||
sys.exit(1)
|
||||
|
||||
firs_sets = FirstSetCalculator(grammar.rules).calculate()
|
||||
pprint.pprint(firs_sets)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Loading…
Add table
Add a link
Reference in a new issue