Extract visitors from the grammar nodes and call makers in the peg generator (GH-28172)

Simplify the peg generator logic by extracting as much visitors as possible to disentangle the flow and separate concerns.
This commit is contained in:
Pablo Galindo Salgado 2021-09-05 14:58:52 +01:00 committed by GitHub
parent 28264269de
commit b01fd533fe
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
13 changed files with 419 additions and 415 deletions

View file

@ -1,22 +1,76 @@
import ast
import contextlib
import re
from abc import abstractmethod
from typing import IO, AbstractSet, Dict, Iterator, List, Optional, Set, Text, Tuple
from typing import (
IO,
AbstractSet,
Any,
Dict,
Iterable,
Iterator,
List,
Optional,
Set,
Text,
Tuple,
Union,
)
from pegen import sccutils
from pegen.grammar import (
Alt,
Cut,
Forced,
Gather,
Grammar,
GrammarError,
GrammarVisitor,
Group,
Lookahead,
NamedItem,
NameLeaf,
Opt,
Plain,
Repeat0,
Repeat1,
Rhs,
Rule,
StringLeaf,
)
class RuleCollectorVisitor(GrammarVisitor):
"""Visitor that invokes a provieded callmaker visitor with just the NamedItem nodes"""
def __init__(self, rules: Dict[str, Rule], callmakervisitor: GrammarVisitor) -> None:
self.rulses = rules
self.callmaker = callmakervisitor
def visit_Rule(self, rule: Rule) -> None:
self.visit(rule.flatten())
def visit_NamedItem(self, item: NamedItem) -> None:
self.callmaker.visit(item)
class KeywordCollectorVisitor(GrammarVisitor):
"""Visitor that collects all the keywods and soft keywords in the Grammar"""
def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]):
self.generator = gen
self.keywords = keywords
self.soft_keywords = soft_keywords
def visit_StringLeaf(self, node: StringLeaf) -> None:
val = ast.literal_eval(node.value)
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
if node.value.endswith("'") and node.value not in self.keywords:
self.keywords[val] = self.generator.keyword_type()
else:
return self.soft_keywords.add(node.value.replace('"', ""))
class RuleCheckingVisitor(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule], tokens: Set[str]):
self.rules = rules
@ -39,6 +93,8 @@ class ParserGenerator:
def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]):
self.grammar = grammar
self.tokens = tokens
self.keywords: Dict[str, int] = {}
self.soft_keywords: Set[str] = set()
self.rules = grammar.rules
self.validate_rule_names()
if "trailer" not in grammar.metas and "start" not in self.rules:
@ -48,12 +104,10 @@ class ParserGenerator:
checker.visit(rule)
self.file = file
self.level = 0
compute_nullables(self.rules)
self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
self.todo = self.rules.copy() # Rules to generate
self.counter = 0 # For name_rule()/name_loop()
self.keyword_counter = 499 # For keyword_type()
self.all_rules: Dict[str, Rule] = {} # Rules + temporal rules
self.all_rules: Dict[str, Rule] = self.rules.copy() # Rules + temporal rules
self._local_variable_stack: List[List[str]] = []
def validate_rule_names(self) -> None:
@ -94,39 +148,43 @@ class ParserGenerator:
for line in lines.splitlines():
self.print(line)
def collect_todo(self) -> None:
def collect_rules(self) -> None:
keyword_collector = KeywordCollectorVisitor(self, self.keywords, self.soft_keywords)
for rule in self.all_rules.values():
keyword_collector.visit(rule)
rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor)
done: Set[str] = set()
while True:
alltodo = list(self.todo)
self.all_rules.update(self.todo)
todo = [i for i in alltodo if i not in done]
computed_rules = list(self.all_rules)
todo = [i for i in computed_rules if i not in done]
if not todo:
break
done = set(self.all_rules)
for rulename in todo:
self.todo[rulename].collect_todo(self)
done = set(alltodo)
rule_collector.visit(self.all_rules[rulename])
def keyword_type(self) -> int:
self.keyword_counter += 1
return self.keyword_counter
def name_node(self, rhs: Rhs) -> str:
def artifical_rule_from_rhs(self, rhs: Rhs) -> str:
self.counter += 1
name = f"_tmp_{self.counter}" # TODO: Pick a nicer name.
self.todo[name] = Rule(name, None, rhs)
self.all_rules[name] = Rule(name, None, rhs)
return name
def name_loop(self, node: Plain, is_repeat1: bool) -> str:
def artificial_rule_from_repeat(self, node: Plain, is_repeat1: bool) -> str:
self.counter += 1
if is_repeat1:
prefix = "_loop1_"
else:
prefix = "_loop0_"
name = f"{prefix}{self.counter}" # TODO: It's ugly to signal via the name.
self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
name = f"{prefix}{self.counter}"
self.all_rules[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
return name
def name_gather(self, node: Gather) -> str:
def artifical_rule_from_gather(self, node: Gather) -> str:
self.counter += 1
name = f"_gather_{self.counter}"
self.counter += 1
@ -135,7 +193,7 @@ class ParserGenerator:
[NamedItem(None, node.separator), NamedItem("elem", node.node)],
action="elem",
)
self.todo[extra_function_name] = Rule(
self.all_rules[extra_function_name] = Rule(
extra_function_name,
None,
Rhs([extra_function_alt]),
@ -143,7 +201,7 @@ class ParserGenerator:
alt = Alt(
[NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],
)
self.todo[name] = Rule(
self.all_rules[name] = Rule(
name,
None,
Rhs([alt]),
@ -160,13 +218,120 @@ class ParserGenerator:
return name
def compute_nullables(rules: Dict[str, Rule]) -> None:
class NullableVisitor(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule]) -> None:
self.rules = rules
self.visited: Set[Any] = set()
self.nullables: Set[Union[Rule, NamedItem]] = set()
def visit_Rule(self, rule: Rule) -> bool:
if rule in self.visited:
return False
self.visited.add(rule)
if self.visit(rule.rhs):
self.nullables.add(rule)
return rule in self.nullables
def visit_Rhs(self, rhs: Rhs) -> bool:
for alt in rhs.alts:
if self.visit(alt):
return True
return False
def visit_Alt(self, alt: Alt) -> bool:
for item in alt.items:
if not self.visit(item):
return False
return True
def visit_Forced(self, force: Forced) -> bool:
return True
def visit_LookAhead(self, lookahead: Lookahead) -> bool:
return True
def visit_Opt(self, opt: Opt) -> bool:
return True
def visit_Repeat0(self, repeat: Repeat0) -> bool:
return True
def visit_Repeat1(self, repeat: Repeat1) -> bool:
return False
def visit_Gather(self, gather: Gather) -> bool:
return False
def visit_Cut(self, cut: Cut) -> bool:
return False
def visit_Group(self, group: Group) -> bool:
return self.visit(group.rhs)
def visit_NamedItem(self, item: NamedItem) -> bool:
if self.visit(item.item):
self.nullables.add(item)
return item in self.nullables
def visit_NameLeaf(self, node: NameLeaf) -> bool:
if node.value in self.rules:
return self.visit(self.rules[node.value])
# Token or unknown; never empty.
return False
def visit_StringLeaf(self, node: StringLeaf) -> bool:
# The string token '' is considered empty.
return not node.value
def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]:
"""Compute which rules in a grammar are nullable.
Thanks to TatSu (tatsu/leftrec.py) for inspiration.
"""
nullable_visitor = NullableVisitor(rules)
for rule in rules.values():
rule.nullable_visit(rules)
nullable_visitor.visit(rule)
return nullable_visitor.nullables
class InitialNamesVisitor(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule]) -> None:
self.rules = rules
self.nullables = compute_nullables(rules)
def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]:
names: Set[str] = set()
for value in node:
if isinstance(value, list):
for item in value:
names |= self.visit(item, *args, **kwargs)
else:
names |= self.visit(value, *args, **kwargs)
return names
def visit_Alt(self, alt: Alt) -> Set[Any]:
names: Set[str] = set()
for item in alt.items:
names |= self.visit(item)
if item not in self.nullables:
break
return names
def visit_Forced(self, force: Forced) -> Set[Any]:
return set()
def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]:
return set()
def visit_Cut(self, cut: Cut) -> Set[Any]:
return set()
def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]:
return {node.value}
def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]:
return set()
def compute_left_recursives(
@ -207,10 +372,11 @@ def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
Note that this requires the nullable flags to have been computed.
"""
initial_name_visitor = InitialNamesVisitor(rules)
graph = {}
vertices: Set[str] = set()
for rulename, rhs in rules.items():
graph[rulename] = names = rhs.initial_names()
graph[rulename] = names = initial_name_visitor.visit(rhs)
vertices |= names
for vertex in vertices:
graph.setdefault(vertex, set())