mirror of
https://github.com/python/cpython.git
synced 2025-11-24 12:20:42 +00:00
gh-138970: Add general metadata system to the peg generator (#138971)
This commit is contained in:
parent
b485e50fde
commit
0ce9fb7e3b
5 changed files with 113 additions and 26 deletions
|
|
@ -1106,3 +1106,49 @@ class TestGrammarVisualizer(unittest.TestCase):
|
|||
)
|
||||
|
||||
self.assertEqual(output, expected_output)
|
||||
|
||||
def test_rule_flags(self) -> None:
|
||||
"""Test the new rule flags syntax that accepts arbitrary lists of flags."""
|
||||
# Test grammar with various flag combinations
|
||||
grammar_source = """
|
||||
start: simple_rule
|
||||
|
||||
simple_rule (memo):
|
||||
| "hello"
|
||||
|
||||
multi_flag_rule (memo, custom, test):
|
||||
| "world"
|
||||
|
||||
single_custom_flag (custom):
|
||||
| "test"
|
||||
|
||||
no_flags_rule:
|
||||
| "plain"
|
||||
"""
|
||||
|
||||
grammar: Grammar = parse_string(grammar_source, GrammarParser)
|
||||
rules = grammar.rules
|
||||
|
||||
# Test memo-only rule
|
||||
simple_rule = rules['simple_rule']
|
||||
self.assertTrue(simple_rule.memo, "simple_rule should have memo=True")
|
||||
self.assertEqual(simple_rule.flags, frozenset(['memo']),
|
||||
f"simple_rule flags should be {'memo'}, got {simple_rule.flags}")
|
||||
|
||||
# Test multi-flag rule
|
||||
multi_flag_rule = rules['multi_flag_rule']
|
||||
self.assertTrue(multi_flag_rule.memo, "multi_flag_rule should have memo=True")
|
||||
self.assertEqual(multi_flag_rule.flags, frozenset({'memo', 'custom', 'test'}),
|
||||
f"multi_flag_rule flags should contain memo, custom, test, got {multi_flag_rule.flags}")
|
||||
|
||||
# Test single custom flag rule
|
||||
single_custom_rule = rules['single_custom_flag']
|
||||
self.assertFalse(single_custom_rule.memo, "single_custom_flag should have memo=False")
|
||||
self.assertEqual(single_custom_rule.flags, frozenset(['custom']),
|
||||
f"single_custom_flag flags should be {'custom'}, got {single_custom_rule.flags}")
|
||||
|
||||
# Test no flags rule
|
||||
no_flags_rule = rules['no_flags_rule']
|
||||
self.assertFalse(no_flags_rule.memo, "no_flags_rule should have memo=False")
|
||||
self.assertEqual(no_flags_rule.flags, [],
|
||||
f"no_flags_rule flags should be the empty set, got {no_flags_rule.flags}")
|
||||
|
|
|
|||
|
|
@ -595,7 +595,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
|
|||
self.print(f"{node.name}_raw(Parser *p)")
|
||||
|
||||
def _should_memoize(self, node: Rule) -> bool:
|
||||
return node.memo and not node.left_recursive
|
||||
return "memo" in node.flags and not node.left_recursive
|
||||
|
||||
def _handle_default_rule_body(self, node: Rule, rhs: Rhs, result_type: str) -> None:
|
||||
memoize = self._should_memoize(node)
|
||||
|
|
|
|||
|
|
@ -58,11 +58,11 @@ SIMPLE_STR = True
|
|||
|
||||
|
||||
class Rule:
|
||||
def __init__(self, name: str, type: str | None, rhs: Rhs, memo: object | None = None):
|
||||
def __init__(self, name: str, type: str | None, rhs: Rhs, flags: frozenset[str] | None = None):
|
||||
self.name = name
|
||||
self.type = type
|
||||
self.rhs = rhs
|
||||
self.memo = bool(memo)
|
||||
self.flags = flags or frozenset()
|
||||
self.left_recursive = False
|
||||
self.leader = False
|
||||
|
||||
|
|
@ -135,7 +135,6 @@ class StringLeaf(Leaf):
|
|||
class Rhs:
|
||||
def __init__(self, alts: list[Alt]):
|
||||
self.alts = alts
|
||||
self.memo: tuple[str | None, str] | None = None
|
||||
|
||||
def __str__(self) -> str:
|
||||
return " | ".join(str(alt) for alt in self.alts)
|
||||
|
|
@ -263,7 +262,6 @@ class Repeat:
|
|||
|
||||
def __init__(self, node: Plain):
|
||||
self.node = node
|
||||
self.memo: tuple[str | None, str] | None = None
|
||||
|
||||
def __iter__(self) -> Iterator[Plain]:
|
||||
yield self.node
|
||||
|
|
|
|||
67
Tools/peg_generator/pegen/grammar_parser.py
generated
67
Tools/peg_generator/pegen/grammar_parser.py
generated
|
|
@ -147,12 +147,12 @@ class GeneratedParser(Parser):
|
|||
|
||||
@memoize
|
||||
def rule(self) -> Optional[Rule]:
|
||||
# rule: rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" alts NEWLINE
|
||||
# rule: rulename flags? ":" alts NEWLINE INDENT more_alts DEDENT | rulename flags? ":" NEWLINE INDENT more_alts DEDENT | rulename flags? ":" alts NEWLINE
|
||||
mark = self._mark()
|
||||
if (
|
||||
(rulename := self.rulename())
|
||||
and
|
||||
(opt := self.memoflag(),)
|
||||
(flags := self.flags(),)
|
||||
and
|
||||
(literal := self.expect(":"))
|
||||
and
|
||||
|
|
@ -166,12 +166,12 @@ class GeneratedParser(Parser):
|
|||
and
|
||||
(_dedent := self.expect('DEDENT'))
|
||||
):
|
||||
return Rule ( rulename [0] , rulename [1] , Rhs ( alts . alts + more_alts . alts ) , memo = opt )
|
||||
return Rule ( rulename [0] , rulename [1] , Rhs ( alts . alts + more_alts . alts ) , flags = flags )
|
||||
self._reset(mark)
|
||||
if (
|
||||
(rulename := self.rulename())
|
||||
and
|
||||
(opt := self.memoflag(),)
|
||||
(flags := self.flags(),)
|
||||
and
|
||||
(literal := self.expect(":"))
|
||||
and
|
||||
|
|
@ -183,12 +183,12 @@ class GeneratedParser(Parser):
|
|||
and
|
||||
(_dedent := self.expect('DEDENT'))
|
||||
):
|
||||
return Rule ( rulename [0] , rulename [1] , more_alts , memo = opt )
|
||||
return Rule ( rulename [0] , rulename [1] , more_alts , flags = flags )
|
||||
self._reset(mark)
|
||||
if (
|
||||
(rulename := self.rulename())
|
||||
and
|
||||
(opt := self.memoflag(),)
|
||||
(flags := self.flags(),)
|
||||
and
|
||||
(literal := self.expect(":"))
|
||||
and
|
||||
|
|
@ -196,7 +196,7 @@ class GeneratedParser(Parser):
|
|||
and
|
||||
(_newline := self.expect('NEWLINE'))
|
||||
):
|
||||
return Rule ( rulename [0] , rulename [1] , alts , memo = opt )
|
||||
return Rule ( rulename [0] , rulename [1] , alts , flags = flags )
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
|
|
@ -219,17 +219,28 @@ class GeneratedParser(Parser):
|
|||
return None
|
||||
|
||||
@memoize
|
||||
def memoflag(self) -> Optional[str]:
|
||||
# memoflag: '(' "memo" ')'
|
||||
def flags(self) -> Optional[frozenset [str]]:
|
||||
# flags: '(' ','.flag+ ')'
|
||||
mark = self._mark()
|
||||
if (
|
||||
(literal := self.expect('('))
|
||||
and
|
||||
(literal_1 := self.expect("memo"))
|
||||
(a := self._gather_2())
|
||||
and
|
||||
(literal_2 := self.expect(')'))
|
||||
(literal_1 := self.expect(')'))
|
||||
):
|
||||
return "memo"
|
||||
return frozenset ( a )
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def flag(self) -> Optional[str]:
|
||||
# flag: NAME
|
||||
mark = self._mark()
|
||||
if (
|
||||
(name := self.name())
|
||||
):
|
||||
return name . string
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
|
|
@ -661,8 +672,38 @@ class GeneratedParser(Parser):
|
|||
self._reset(mark)
|
||||
return None
|
||||
|
||||
@memoize
|
||||
def _loop0_1(self) -> Optional[Any]:
|
||||
# _loop0_1: ',' flag
|
||||
mark = self._mark()
|
||||
children = []
|
||||
while (
|
||||
(literal := self.expect(','))
|
||||
and
|
||||
(elem := self.flag())
|
||||
):
|
||||
children.append(elem)
|
||||
mark = self._mark()
|
||||
self._reset(mark)
|
||||
return children
|
||||
|
||||
@memoize
|
||||
def _gather_2(self) -> Optional[Any]:
|
||||
# _gather_2: flag _loop0_1
|
||||
mark = self._mark()
|
||||
if (
|
||||
(elem := self.flag())
|
||||
is not None
|
||||
and
|
||||
(seq := self._loop0_1())
|
||||
is not None
|
||||
):
|
||||
return [elem] + seq
|
||||
self._reset(mark)
|
||||
return None
|
||||
|
||||
KEYWORDS = ()
|
||||
SOFT_KEYWORDS = ('memo',)
|
||||
SOFT_KEYWORDS = ()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
|||
|
|
@ -50,19 +50,21 @@ rules[RuleList]:
|
|||
| rule { [rule] }
|
||||
|
||||
rule[Rule]:
|
||||
| rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT {
|
||||
Rule(rulename[0], rulename[1], Rhs(alts.alts + more_alts.alts), memo=opt) }
|
||||
| rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT {
|
||||
Rule(rulename[0], rulename[1], more_alts, memo=opt) }
|
||||
| rulename memoflag? ":" alts NEWLINE { Rule(rulename[0], rulename[1], alts, memo=opt) }
|
||||
| rulename flags=flags? ":" alts NEWLINE INDENT more_alts DEDENT {
|
||||
Rule(rulename[0], rulename[1], Rhs(alts.alts + more_alts.alts), flags=flags) }
|
||||
| rulename flags=flags? ":" NEWLINE INDENT more_alts DEDENT {
|
||||
Rule(rulename[0], rulename[1], more_alts, flags=flags) }
|
||||
| rulename flags=flags? ":" alts NEWLINE { Rule(rulename[0], rulename[1], alts, flags=flags) }
|
||||
|
||||
rulename[RuleName]:
|
||||
| NAME annotation { (name.string, annotation) }
|
||||
| NAME { (name.string, None) }
|
||||
|
||||
# In the future this may return something more complicated
|
||||
memoflag[str]:
|
||||
| '(' "memo" ')' { "memo" }
|
||||
flags[frozenset[str]]:
|
||||
| '(' a=','.flag+ ')' { frozenset(a) }
|
||||
|
||||
flag[str]:
|
||||
| NAME { name.string }
|
||||
|
||||
alts[Rhs]:
|
||||
| alt "|" alts { Rhs([alt] + alts.alts)}
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue