gh-138970: Add general metadata system to the peg generator (#138971)

This commit is contained in:
Pablo Galindo Salgado 2025-09-18 02:17:04 +01:00 committed by GitHub
parent b485e50fde
commit 0ce9fb7e3b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 113 additions and 26 deletions

View file

@ -1106,3 +1106,49 @@ class TestGrammarVisualizer(unittest.TestCase):
)
self.assertEqual(output, expected_output)
def test_rule_flags(self) -> None:
"""Test the new rule flags syntax that accepts arbitrary lists of flags."""
# Test grammar with various flag combinations
grammar_source = """
start: simple_rule
simple_rule (memo):
| "hello"
multi_flag_rule (memo, custom, test):
| "world"
single_custom_flag (custom):
| "test"
no_flags_rule:
| "plain"
"""
grammar: Grammar = parse_string(grammar_source, GrammarParser)
rules = grammar.rules
# Test memo-only rule
simple_rule = rules['simple_rule']
self.assertTrue(simple_rule.memo, "simple_rule should have memo=True")
self.assertEqual(simple_rule.flags, frozenset(['memo']),
f"simple_rule flags should be {'memo'}, got {simple_rule.flags}")
# Test multi-flag rule
multi_flag_rule = rules['multi_flag_rule']
self.assertTrue(multi_flag_rule.memo, "multi_flag_rule should have memo=True")
self.assertEqual(multi_flag_rule.flags, frozenset({'memo', 'custom', 'test'}),
f"multi_flag_rule flags should contain memo, custom, test, got {multi_flag_rule.flags}")
# Test single custom flag rule
single_custom_rule = rules['single_custom_flag']
self.assertFalse(single_custom_rule.memo, "single_custom_flag should have memo=False")
self.assertEqual(single_custom_rule.flags, frozenset(['custom']),
f"single_custom_flag flags should be {'custom'}, got {single_custom_rule.flags}")
# Test no flags rule
no_flags_rule = rules['no_flags_rule']
self.assertFalse(no_flags_rule.memo, "no_flags_rule should have memo=False")
self.assertEqual(no_flags_rule.flags, [],
f"no_flags_rule flags should be the empty set, got {no_flags_rule.flags}")

View file

@ -595,7 +595,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print(f"{node.name}_raw(Parser *p)")
def _should_memoize(self, node: Rule) -> bool:
return node.memo and not node.left_recursive
return "memo" in node.flags and not node.left_recursive
def _handle_default_rule_body(self, node: Rule, rhs: Rhs, result_type: str) -> None:
memoize = self._should_memoize(node)

View file

@ -58,11 +58,11 @@ SIMPLE_STR = True
class Rule:
def __init__(self, name: str, type: str | None, rhs: Rhs, memo: object | None = None):
def __init__(self, name: str, type: str | None, rhs: Rhs, flags: frozenset[str] | None = None):
self.name = name
self.type = type
self.rhs = rhs
self.memo = bool(memo)
self.flags = flags or frozenset()
self.left_recursive = False
self.leader = False
@ -135,7 +135,6 @@ class StringLeaf(Leaf):
class Rhs:
def __init__(self, alts: list[Alt]):
self.alts = alts
self.memo: tuple[str | None, str] | None = None
def __str__(self) -> str:
return " | ".join(str(alt) for alt in self.alts)
@ -263,7 +262,6 @@ class Repeat:
def __init__(self, node: Plain):
self.node = node
self.memo: tuple[str | None, str] | None = None
def __iter__(self) -> Iterator[Plain]:
yield self.node

View file

@ -147,12 +147,12 @@ class GeneratedParser(Parser):
@memoize
def rule(self) -> Optional[Rule]:
# rule: rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" alts NEWLINE
# rule: rulename flags? ":" alts NEWLINE INDENT more_alts DEDENT | rulename flags? ":" NEWLINE INDENT more_alts DEDENT | rulename flags? ":" alts NEWLINE
mark = self._mark()
if (
(rulename := self.rulename())
and
(opt := self.memoflag(),)
(flags := self.flags(),)
and
(literal := self.expect(":"))
and
@ -166,12 +166,12 @@ class GeneratedParser(Parser):
and
(_dedent := self.expect('DEDENT'))
):
return Rule ( rulename [0] , rulename [1] , Rhs ( alts . alts + more_alts . alts ) , memo = opt )
return Rule ( rulename [0] , rulename [1] , Rhs ( alts . alts + more_alts . alts ) , flags = flags )
self._reset(mark)
if (
(rulename := self.rulename())
and
(opt := self.memoflag(),)
(flags := self.flags(),)
and
(literal := self.expect(":"))
and
@ -183,12 +183,12 @@ class GeneratedParser(Parser):
and
(_dedent := self.expect('DEDENT'))
):
return Rule ( rulename [0] , rulename [1] , more_alts , memo = opt )
return Rule ( rulename [0] , rulename [1] , more_alts , flags = flags )
self._reset(mark)
if (
(rulename := self.rulename())
and
(opt := self.memoflag(),)
(flags := self.flags(),)
and
(literal := self.expect(":"))
and
@ -196,7 +196,7 @@ class GeneratedParser(Parser):
and
(_newline := self.expect('NEWLINE'))
):
return Rule ( rulename [0] , rulename [1] , alts , memo = opt )
return Rule ( rulename [0] , rulename [1] , alts , flags = flags )
self._reset(mark)
return None
@ -219,17 +219,28 @@ class GeneratedParser(Parser):
return None
@memoize
def memoflag(self) -> Optional[str]:
# memoflag: '(' "memo" ')'
def flags(self) -> Optional[frozenset [str]]:
# flags: '(' ','.flag+ ')'
mark = self._mark()
if (
(literal := self.expect('('))
and
(literal_1 := self.expect("memo"))
(a := self._gather_2())
and
(literal_2 := self.expect(')'))
(literal_1 := self.expect(')'))
):
return "memo"
return frozenset ( a )
self._reset(mark)
return None
@memoize
def flag(self) -> Optional[str]:
# flag: NAME
mark = self._mark()
if (
(name := self.name())
):
return name . string
self._reset(mark)
return None
@ -661,8 +672,38 @@ class GeneratedParser(Parser):
self._reset(mark)
return None
@memoize
def _loop0_1(self) -> Optional[Any]:
# _loop0_1: ',' flag
mark = self._mark()
children = []
while (
(literal := self.expect(','))
and
(elem := self.flag())
):
children.append(elem)
mark = self._mark()
self._reset(mark)
return children
@memoize
def _gather_2(self) -> Optional[Any]:
# _gather_2: flag _loop0_1
mark = self._mark()
if (
(elem := self.flag())
is not None
and
(seq := self._loop0_1())
is not None
):
return [elem] + seq
self._reset(mark)
return None
KEYWORDS = ()
SOFT_KEYWORDS = ('memo',)
SOFT_KEYWORDS = ()
if __name__ == '__main__':

View file

@ -50,19 +50,21 @@ rules[RuleList]:
| rule { [rule] }
rule[Rule]:
| rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT {
Rule(rulename[0], rulename[1], Rhs(alts.alts + more_alts.alts), memo=opt) }
| rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT {
Rule(rulename[0], rulename[1], more_alts, memo=opt) }
| rulename memoflag? ":" alts NEWLINE { Rule(rulename[0], rulename[1], alts, memo=opt) }
| rulename flags=flags? ":" alts NEWLINE INDENT more_alts DEDENT {
Rule(rulename[0], rulename[1], Rhs(alts.alts + more_alts.alts), flags=flags) }
| rulename flags=flags? ":" NEWLINE INDENT more_alts DEDENT {
Rule(rulename[0], rulename[1], more_alts, flags=flags) }
| rulename flags=flags? ":" alts NEWLINE { Rule(rulename[0], rulename[1], alts, flags=flags) }
rulename[RuleName]:
| NAME annotation { (name.string, annotation) }
| NAME { (name.string, None) }
# In the future this may return something more complicated
memoflag[str]:
| '(' "memo" ')' { "memo" }
flags[frozenset[str]]:
| '(' a=','.flag+ ')' { frozenset(a) }
flag[str]:
| NAME { name.string }
alts[Rhs]:
| alt "|" alts { Rhs([alt] + alts.alts)}