cpython/Tools/peg_generator/pegen/validator.py
Petr Viktorin 48f21b3631
gh-118235: Move RAISE_SYNTAX_ERROR actions to invalid rules and make sure they stay there (GH-119731)
The Full Grammar specification in the docs omits rule actions, so grammar rules that raise a syntax error looked like valid syntax.
This was solved in ef940de by hiding those rules in the custom syntax highlighter.

This moves all syntax-error alternatives to invalid rules, adds a validator that ensures that actions containing RAISE_SYNTAX_ERROR are in invalid rules, and reverts the syntax highlighter hack.
2024-05-30 09:27:32 +02:00

53 lines
1.8 KiB
Python

from typing import Optional
from pegen import grammar
from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule
class ValidationError(Exception):
pass
class GrammarValidator(GrammarVisitor):
def __init__(self, grammar: grammar.Grammar) -> None:
self.grammar = grammar
self.rulename: Optional[str] = None
def validate_rule(self, rulename: str, node: Rule) -> None:
self.rulename = rulename
self.visit(node)
self.rulename = None
class SubRuleValidator(GrammarValidator):
def visit_Rhs(self, node: Rhs) -> None:
for index, alt in enumerate(node.alts):
alts_to_consider = node.alts[index + 1 :]
for other_alt in alts_to_consider:
self.check_intersection(alt, other_alt)
def check_intersection(self, first_alt: Alt, second_alt: Alt) -> None:
if str(second_alt).startswith(str(first_alt)):
raise ValidationError(
f"In {self.rulename} there is an alternative that will "
f"never be visited:\n{second_alt}"
)
class RaiseRuleValidator(GrammarValidator):
def visit_Alt(self, node: Alt) -> None:
if self.rulename and self.rulename.startswith('invalid'):
# raising is allowed in invalid rules
return
if node.action and 'RAISE_SYNTAX_ERROR' in node.action:
raise ValidationError(
f"In {self.rulename!r} there is an alternative that contains "
f"RAISE_SYNTAX_ERROR; this is only allowed in invalid_ rules"
)
def validate_grammar(the_grammar: grammar.Grammar) -> None:
for validator_cls in GrammarValidator.__subclasses__():
validator = validator_cls(the_grammar)
for rule_name, rule in the_grammar.rules.items():
validator.validate_rule(rule_name, rule)