mirror of
https://github.com/python/cpython.git
synced 2025-08-04 17:08:35 +00:00
Extract visitors from the grammar nodes and call makers in the peg generator (GH-28172)
Simplify the peg generator logic by extracting as much visitors as possible to disentangle the flow and separate concerns.
This commit is contained in:
parent
28264269de
commit
b01fd533fe
13 changed files with 419 additions and 415 deletions
|
@ -1,22 +1,76 @@
|
|||
import ast
|
||||
import contextlib
|
||||
import re
|
||||
from abc import abstractmethod
|
||||
from typing import IO, AbstractSet, Dict, Iterator, List, Optional, Set, Text, Tuple
|
||||
from typing import (
|
||||
IO,
|
||||
AbstractSet,
|
||||
Any,
|
||||
Dict,
|
||||
Iterable,
|
||||
Iterator,
|
||||
List,
|
||||
Optional,
|
||||
Set,
|
||||
Text,
|
||||
Tuple,
|
||||
Union,
|
||||
)
|
||||
|
||||
from pegen import sccutils
|
||||
from pegen.grammar import (
|
||||
Alt,
|
||||
Cut,
|
||||
Forced,
|
||||
Gather,
|
||||
Grammar,
|
||||
GrammarError,
|
||||
GrammarVisitor,
|
||||
Group,
|
||||
Lookahead,
|
||||
NamedItem,
|
||||
NameLeaf,
|
||||
Opt,
|
||||
Plain,
|
||||
Repeat0,
|
||||
Repeat1,
|
||||
Rhs,
|
||||
Rule,
|
||||
StringLeaf,
|
||||
)
|
||||
|
||||
|
||||
class RuleCollectorVisitor(GrammarVisitor):
|
||||
"""Visitor that invokes a provieded callmaker visitor with just the NamedItem nodes"""
|
||||
|
||||
def __init__(self, rules: Dict[str, Rule], callmakervisitor: GrammarVisitor) -> None:
|
||||
self.rulses = rules
|
||||
self.callmaker = callmakervisitor
|
||||
|
||||
def visit_Rule(self, rule: Rule) -> None:
|
||||
self.visit(rule.flatten())
|
||||
|
||||
def visit_NamedItem(self, item: NamedItem) -> None:
|
||||
self.callmaker.visit(item)
|
||||
|
||||
|
||||
class KeywordCollectorVisitor(GrammarVisitor):
|
||||
"""Visitor that collects all the keywods and soft keywords in the Grammar"""
|
||||
|
||||
def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]):
|
||||
self.generator = gen
|
||||
self.keywords = keywords
|
||||
self.soft_keywords = soft_keywords
|
||||
|
||||
def visit_StringLeaf(self, node: StringLeaf) -> None:
|
||||
val = ast.literal_eval(node.value)
|
||||
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
|
||||
if node.value.endswith("'") and node.value not in self.keywords:
|
||||
self.keywords[val] = self.generator.keyword_type()
|
||||
else:
|
||||
return self.soft_keywords.add(node.value.replace('"', ""))
|
||||
|
||||
|
||||
class RuleCheckingVisitor(GrammarVisitor):
|
||||
def __init__(self, rules: Dict[str, Rule], tokens: Set[str]):
|
||||
self.rules = rules
|
||||
|
@ -39,6 +93,8 @@ class ParserGenerator:
|
|||
def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]):
|
||||
self.grammar = grammar
|
||||
self.tokens = tokens
|
||||
self.keywords: Dict[str, int] = {}
|
||||
self.soft_keywords: Set[str] = set()
|
||||
self.rules = grammar.rules
|
||||
self.validate_rule_names()
|
||||
if "trailer" not in grammar.metas and "start" not in self.rules:
|
||||
|
@ -48,12 +104,10 @@ class ParserGenerator:
|
|||
checker.visit(rule)
|
||||
self.file = file
|
||||
self.level = 0
|
||||
compute_nullables(self.rules)
|
||||
self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
|
||||
self.todo = self.rules.copy() # Rules to generate
|
||||
self.counter = 0 # For name_rule()/name_loop()
|
||||
self.keyword_counter = 499 # For keyword_type()
|
||||
self.all_rules: Dict[str, Rule] = {} # Rules + temporal rules
|
||||
self.all_rules: Dict[str, Rule] = self.rules.copy() # Rules + temporal rules
|
||||
self._local_variable_stack: List[List[str]] = []
|
||||
|
||||
def validate_rule_names(self) -> None:
|
||||
|
@ -94,39 +148,43 @@ class ParserGenerator:
|
|||
for line in lines.splitlines():
|
||||
self.print(line)
|
||||
|
||||
def collect_todo(self) -> None:
|
||||
def collect_rules(self) -> None:
|
||||
keyword_collector = KeywordCollectorVisitor(self, self.keywords, self.soft_keywords)
|
||||
for rule in self.all_rules.values():
|
||||
keyword_collector.visit(rule)
|
||||
|
||||
rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor)
|
||||
done: Set[str] = set()
|
||||
while True:
|
||||
alltodo = list(self.todo)
|
||||
self.all_rules.update(self.todo)
|
||||
todo = [i for i in alltodo if i not in done]
|
||||
computed_rules = list(self.all_rules)
|
||||
todo = [i for i in computed_rules if i not in done]
|
||||
if not todo:
|
||||
break
|
||||
done = set(self.all_rules)
|
||||
for rulename in todo:
|
||||
self.todo[rulename].collect_todo(self)
|
||||
done = set(alltodo)
|
||||
rule_collector.visit(self.all_rules[rulename])
|
||||
|
||||
def keyword_type(self) -> int:
|
||||
self.keyword_counter += 1
|
||||
return self.keyword_counter
|
||||
|
||||
def name_node(self, rhs: Rhs) -> str:
|
||||
def artifical_rule_from_rhs(self, rhs: Rhs) -> str:
|
||||
self.counter += 1
|
||||
name = f"_tmp_{self.counter}" # TODO: Pick a nicer name.
|
||||
self.todo[name] = Rule(name, None, rhs)
|
||||
self.all_rules[name] = Rule(name, None, rhs)
|
||||
return name
|
||||
|
||||
def name_loop(self, node: Plain, is_repeat1: bool) -> str:
|
||||
def artificial_rule_from_repeat(self, node: Plain, is_repeat1: bool) -> str:
|
||||
self.counter += 1
|
||||
if is_repeat1:
|
||||
prefix = "_loop1_"
|
||||
else:
|
||||
prefix = "_loop0_"
|
||||
name = f"{prefix}{self.counter}" # TODO: It's ugly to signal via the name.
|
||||
self.todo[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
|
||||
name = f"{prefix}{self.counter}"
|
||||
self.all_rules[name] = Rule(name, None, Rhs([Alt([NamedItem(None, node)])]))
|
||||
return name
|
||||
|
||||
def name_gather(self, node: Gather) -> str:
|
||||
def artifical_rule_from_gather(self, node: Gather) -> str:
|
||||
self.counter += 1
|
||||
name = f"_gather_{self.counter}"
|
||||
self.counter += 1
|
||||
|
@ -135,7 +193,7 @@ class ParserGenerator:
|
|||
[NamedItem(None, node.separator), NamedItem("elem", node.node)],
|
||||
action="elem",
|
||||
)
|
||||
self.todo[extra_function_name] = Rule(
|
||||
self.all_rules[extra_function_name] = Rule(
|
||||
extra_function_name,
|
||||
None,
|
||||
Rhs([extra_function_alt]),
|
||||
|
@ -143,7 +201,7 @@ class ParserGenerator:
|
|||
alt = Alt(
|
||||
[NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],
|
||||
)
|
||||
self.todo[name] = Rule(
|
||||
self.all_rules[name] = Rule(
|
||||
name,
|
||||
None,
|
||||
Rhs([alt]),
|
||||
|
@ -160,13 +218,120 @@ class ParserGenerator:
|
|||
return name
|
||||
|
||||
|
||||
def compute_nullables(rules: Dict[str, Rule]) -> None:
|
||||
class NullableVisitor(GrammarVisitor):
|
||||
def __init__(self, rules: Dict[str, Rule]) -> None:
|
||||
self.rules = rules
|
||||
self.visited: Set[Any] = set()
|
||||
self.nullables: Set[Union[Rule, NamedItem]] = set()
|
||||
|
||||
def visit_Rule(self, rule: Rule) -> bool:
|
||||
if rule in self.visited:
|
||||
return False
|
||||
self.visited.add(rule)
|
||||
if self.visit(rule.rhs):
|
||||
self.nullables.add(rule)
|
||||
return rule in self.nullables
|
||||
|
||||
def visit_Rhs(self, rhs: Rhs) -> bool:
|
||||
for alt in rhs.alts:
|
||||
if self.visit(alt):
|
||||
return True
|
||||
return False
|
||||
|
||||
def visit_Alt(self, alt: Alt) -> bool:
|
||||
for item in alt.items:
|
||||
if not self.visit(item):
|
||||
return False
|
||||
return True
|
||||
|
||||
def visit_Forced(self, force: Forced) -> bool:
|
||||
return True
|
||||
|
||||
def visit_LookAhead(self, lookahead: Lookahead) -> bool:
|
||||
return True
|
||||
|
||||
def visit_Opt(self, opt: Opt) -> bool:
|
||||
return True
|
||||
|
||||
def visit_Repeat0(self, repeat: Repeat0) -> bool:
|
||||
return True
|
||||
|
||||
def visit_Repeat1(self, repeat: Repeat1) -> bool:
|
||||
return False
|
||||
|
||||
def visit_Gather(self, gather: Gather) -> bool:
|
||||
return False
|
||||
|
||||
def visit_Cut(self, cut: Cut) -> bool:
|
||||
return False
|
||||
|
||||
def visit_Group(self, group: Group) -> bool:
|
||||
return self.visit(group.rhs)
|
||||
|
||||
def visit_NamedItem(self, item: NamedItem) -> bool:
|
||||
if self.visit(item.item):
|
||||
self.nullables.add(item)
|
||||
return item in self.nullables
|
||||
|
||||
def visit_NameLeaf(self, node: NameLeaf) -> bool:
|
||||
if node.value in self.rules:
|
||||
return self.visit(self.rules[node.value])
|
||||
# Token or unknown; never empty.
|
||||
return False
|
||||
|
||||
def visit_StringLeaf(self, node: StringLeaf) -> bool:
|
||||
# The string token '' is considered empty.
|
||||
return not node.value
|
||||
|
||||
|
||||
def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]:
|
||||
"""Compute which rules in a grammar are nullable.
|
||||
|
||||
Thanks to TatSu (tatsu/leftrec.py) for inspiration.
|
||||
"""
|
||||
nullable_visitor = NullableVisitor(rules)
|
||||
for rule in rules.values():
|
||||
rule.nullable_visit(rules)
|
||||
nullable_visitor.visit(rule)
|
||||
return nullable_visitor.nullables
|
||||
|
||||
|
||||
class InitialNamesVisitor(GrammarVisitor):
|
||||
def __init__(self, rules: Dict[str, Rule]) -> None:
|
||||
self.rules = rules
|
||||
self.nullables = compute_nullables(rules)
|
||||
|
||||
def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]:
|
||||
names: Set[str] = set()
|
||||
for value in node:
|
||||
if isinstance(value, list):
|
||||
for item in value:
|
||||
names |= self.visit(item, *args, **kwargs)
|
||||
else:
|
||||
names |= self.visit(value, *args, **kwargs)
|
||||
return names
|
||||
|
||||
def visit_Alt(self, alt: Alt) -> Set[Any]:
|
||||
names: Set[str] = set()
|
||||
for item in alt.items:
|
||||
names |= self.visit(item)
|
||||
if item not in self.nullables:
|
||||
break
|
||||
return names
|
||||
|
||||
def visit_Forced(self, force: Forced) -> Set[Any]:
|
||||
return set()
|
||||
|
||||
def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]:
|
||||
return set()
|
||||
|
||||
def visit_Cut(self, cut: Cut) -> Set[Any]:
|
||||
return set()
|
||||
|
||||
def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]:
|
||||
return {node.value}
|
||||
|
||||
def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]:
|
||||
return set()
|
||||
|
||||
|
||||
def compute_left_recursives(
|
||||
|
@ -207,10 +372,11 @@ def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
|
|||
|
||||
Note that this requires the nullable flags to have been computed.
|
||||
"""
|
||||
initial_name_visitor = InitialNamesVisitor(rules)
|
||||
graph = {}
|
||||
vertices: Set[str] = set()
|
||||
for rulename, rhs in rules.items():
|
||||
graph[rulename] = names = rhs.initial_names()
|
||||
graph[rulename] = names = initial_name_visitor.visit(rhs)
|
||||
vertices |= names
|
||||
for vertex in vertices:
|
||||
graph.setdefault(vertex, set())
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue