Update pegen to use the latest upstream developments (GH-27586)

This commit is contained in:
Pablo Galindo Salgado 2021-08-12 17:37:30 +01:00 committed by GitHub
parent 8e832fb2a2
commit 953d27261e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
26 changed files with 1243 additions and 673 deletions

View file

@ -11,8 +11,8 @@ from test import support
from test.support import os_helper from test.support import os_helper
from test.support.script_helper import assert_python_ok from test.support.script_helper import assert_python_ok
_py_cflags_nodist = sysconfig.get_config_var('PY_CFLAGS_NODIST') _py_cflags_nodist = sysconfig.get_config_var("PY_CFLAGS_NODIST")
_pgo_flag = sysconfig.get_config_var('PGO_PROF_USE_FLAG') _pgo_flag = sysconfig.get_config_var("PGO_PROF_USE_FLAG")
if _pgo_flag and _py_cflags_nodist and _pgo_flag in _py_cflags_nodist: if _pgo_flag and _py_cflags_nodist and _pgo_flag in _py_cflags_nodist:
raise unittest.SkipTest("peg_generator test disabled under PGO build") raise unittest.SkipTest("peg_generator test disabled under PGO build")
@ -458,3 +458,28 @@ class TestCParser(unittest.TestCase):
self.check_input_strings_for_grammar(valid_cases, invalid_cases) self.check_input_strings_for_grammar(valid_cases, invalid_cases)
""" """
self.run_test(grammar_source, test_source) self.run_test(grammar_source, test_source)
def test_forced(self) -> None:
grammar_source = """
start: NAME &&':' | NAME
"""
test_source = """
self.assertEqual(parse.parse_string("number :", mode=0), None)
with self.assertRaises(SyntaxError) as e:
parse.parse_string("a", mode=0)
self.assertIn("expected ':'", str(e.exception))
"""
self.run_test(grammar_source, test_source)
def test_forced_with_group(self) -> None:
grammar_source = """
start: NAME &&(':' | ';') | NAME
"""
test_source = """
self.assertEqual(parse.parse_string("number :", mode=0), None)
self.assertEqual(parse.parse_string("number ;", mode=0), None)
with self.assertRaises(SyntaxError) as e:
parse.parse_string("a", mode=0)
self.assertIn("expected (':' | ';')", e.exception.args[0])
"""
self.run_test(grammar_source, test_source)

View file

@ -3,8 +3,8 @@ import unittest
from test import test_tools from test import test_tools
from typing import Dict, Set from typing import Dict, Set
test_tools.skip_if_missing('peg_generator') test_tools.skip_if_missing("peg_generator")
with test_tools.imports_under_tool('peg_generator'): with test_tools.imports_under_tool("peg_generator"):
from pegen.grammar_parser import GeneratedParser as GrammarParser from pegen.grammar_parser import GeneratedParser as GrammarParser
from pegen.testutil import parse_string from pegen.testutil import parse_string
from pegen.first_sets import FirstSetCalculator from pegen.first_sets import FirstSetCalculator
@ -23,29 +23,38 @@ class TestFirstSets(unittest.TestCase):
A: 'a' | '-' A: 'a' | '-'
B: 'b' | '+' B: 'b' | '+'
""" """
self.assertEqual(self.calculate_first_sets(grammar), { self.assertEqual(
"A": {"'a'", "'-'"}, self.calculate_first_sets(grammar),
"B": {"'+'", "'b'"}, {
"expr": {"'+'", "'a'", "'b'", "'-'"}, "A": {"'a'", "'-'"},
"start": {"'+'", "'a'", "'b'", "'-'"}, "B": {"'+'", "'b'"},
}) "expr": {"'+'", "'a'", "'b'", "'-'"},
"start": {"'+'", "'a'", "'b'", "'-'"},
},
)
def test_optionals(self) -> None: def test_optionals(self) -> None:
grammar = """ grammar = """
start: expr NEWLINE start: expr NEWLINE
expr: ['a'] ['b'] 'c' expr: ['a'] ['b'] 'c'
""" """
self.assertEqual(self.calculate_first_sets(grammar), { self.assertEqual(
"expr": {"'c'", "'a'", "'b'"}, self.calculate_first_sets(grammar),
"start": {"'c'", "'a'", "'b'"}, {
}) "expr": {"'c'", "'a'", "'b'"},
"start": {"'c'", "'a'", "'b'"},
},
)
def test_repeat_with_separator(self) -> None: def test_repeat_with_separator(self) -> None:
grammar = """ grammar = """
start: ','.thing+ NEWLINE start: ','.thing+ NEWLINE
thing: NUMBER thing: NUMBER
""" """
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}}) self.assertEqual(
self.calculate_first_sets(grammar),
{"thing": {"NUMBER"}, "start": {"NUMBER"}},
)
def test_optional_operator(self) -> None: def test_optional_operator(self) -> None:
grammar = """ grammar = """
@ -53,11 +62,14 @@ class TestFirstSets(unittest.TestCase):
sum: (term)? 'b' sum: (term)? 'b'
term: NUMBER term: NUMBER
""" """
self.assertEqual(self.calculate_first_sets(grammar), { self.assertEqual(
"term": {"NUMBER"}, self.calculate_first_sets(grammar),
"sum": {"NUMBER", "'b'"}, {
"start": {"'b'", "NUMBER"}, "term": {"NUMBER"},
}) "sum": {"NUMBER", "'b'"},
"start": {"'b'", "NUMBER"},
},
)
def test_optional_literal(self) -> None: def test_optional_literal(self) -> None:
grammar = """ grammar = """
@ -65,60 +77,83 @@ class TestFirstSets(unittest.TestCase):
sum: '+' ? term sum: '+' ? term
term: NUMBER term: NUMBER
""" """
self.assertEqual(self.calculate_first_sets(grammar), { self.assertEqual(
"term": {"NUMBER"}, self.calculate_first_sets(grammar),
"sum": {"'+'", "NUMBER"}, {
"start": {"'+'", "NUMBER"}, "term": {"NUMBER"},
}) "sum": {"'+'", "NUMBER"},
"start": {"'+'", "NUMBER"},
},
)
def test_optional_after(self) -> None: def test_optional_after(self) -> None:
grammar = """ grammar = """
start: term NEWLINE start: term NEWLINE
term: NUMBER ['+'] term: NUMBER ['+']
""" """
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"NUMBER"}}) self.assertEqual(
self.calculate_first_sets(grammar),
{"term": {"NUMBER"}, "start": {"NUMBER"}},
)
def test_optional_before(self) -> None: def test_optional_before(self) -> None:
grammar = """ grammar = """
start: term NEWLINE start: term NEWLINE
term: ['+'] NUMBER term: ['+'] NUMBER
""" """
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}}) self.assertEqual(
self.calculate_first_sets(grammar),
{"term": {"NUMBER", "'+'"}, "start": {"NUMBER", "'+'"}},
)
def test_repeat_0(self) -> None: def test_repeat_0(self) -> None:
grammar = """ grammar = """
start: thing* "+" NEWLINE start: thing* "+" NEWLINE
thing: NUMBER thing: NUMBER
""" """
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}}) self.assertEqual(
self.calculate_first_sets(grammar),
{"thing": {"NUMBER"}, "start": {'"+"', "NUMBER"}},
)
def test_repeat_0_with_group(self) -> None: def test_repeat_0_with_group(self) -> None:
grammar = """ grammar = """
start: ('+' '-')* term NEWLINE start: ('+' '-')* term NEWLINE
term: NUMBER term: NUMBER
""" """
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}}) self.assertEqual(
self.calculate_first_sets(grammar),
{"term": {"NUMBER"}, "start": {"'+'", "NUMBER"}},
)
def test_repeat_1(self) -> None: def test_repeat_1(self) -> None:
grammar = """ grammar = """
start: thing+ '-' NEWLINE start: thing+ '-' NEWLINE
thing: NUMBER thing: NUMBER
""" """
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}}) self.assertEqual(
self.calculate_first_sets(grammar),
{"thing": {"NUMBER"}, "start": {"NUMBER"}},
)
def test_repeat_1_with_group(self) -> None: def test_repeat_1_with_group(self) -> None:
grammar = """ grammar = """
start: ('+' term)+ term NEWLINE start: ('+' term)+ term NEWLINE
term: NUMBER term: NUMBER
""" """
self.assertEqual(self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}}) self.assertEqual(
self.calculate_first_sets(grammar), {"term": {"NUMBER"}, "start": {"'+'"}}
)
def test_gather(self) -> None: def test_gather(self) -> None:
grammar = """ grammar = """
start: ','.thing+ NEWLINE start: ','.thing+ NEWLINE
thing: NUMBER thing: NUMBER
""" """
self.assertEqual(self.calculate_first_sets(grammar), {"thing": {"NUMBER"}, "start": {"NUMBER"}}) self.assertEqual(
self.calculate_first_sets(grammar),
{"thing": {"NUMBER"}, "start": {"NUMBER"}},
)
def test_positive_lookahead(self) -> None: def test_positive_lookahead(self) -> None:
grammar = """ grammar = """
@ -126,11 +161,14 @@ class TestFirstSets(unittest.TestCase):
expr: &'a' opt expr: &'a' opt
opt: 'a' | 'b' | 'c' opt: 'a' | 'b' | 'c'
""" """
self.assertEqual(self.calculate_first_sets(grammar), { self.assertEqual(
"expr": {"'a'"}, self.calculate_first_sets(grammar),
"start": {"'a'"}, {
"opt": {"'b'", "'c'", "'a'"}, "expr": {"'a'"},
}) "start": {"'a'"},
"opt": {"'b'", "'c'", "'a'"},
},
)
def test_negative_lookahead(self) -> None: def test_negative_lookahead(self) -> None:
grammar = """ grammar = """
@ -138,11 +176,14 @@ class TestFirstSets(unittest.TestCase):
expr: !'a' opt expr: !'a' opt
opt: 'a' | 'b' | 'c' opt: 'a' | 'b' | 'c'
""" """
self.assertEqual(self.calculate_first_sets(grammar), { self.assertEqual(
"opt": {"'b'", "'a'", "'c'"}, self.calculate_first_sets(grammar),
"expr": {"'b'", "'c'"}, {
"start": {"'b'", "'c'"}, "opt": {"'b'", "'a'", "'c'"},
}) "expr": {"'b'", "'c'"},
"start": {"'b'", "'c'"},
},
)
def test_left_recursion(self) -> None: def test_left_recursion(self) -> None:
grammar = """ grammar = """
@ -153,21 +194,27 @@ class TestFirstSets(unittest.TestCase):
bar: 'bar' bar: 'bar'
baz: 'baz' baz: 'baz'
""" """
self.assertEqual(self.calculate_first_sets(grammar), { self.assertEqual(
"expr": {"NUMBER", "'-'"}, self.calculate_first_sets(grammar),
"term": {"NUMBER"}, {
"start": {"NUMBER", "'-'"}, "expr": {"NUMBER", "'-'"},
"foo": {"'foo'"}, "term": {"NUMBER"},
"bar": {"'bar'"}, "start": {"NUMBER", "'-'"},
"baz": {"'baz'"}, "foo": {"'foo'"},
}) "bar": {"'bar'"},
"baz": {"'baz'"},
},
)
def test_advance_left_recursion(self) -> None: def test_advance_left_recursion(self) -> None:
grammar = """ grammar = """
start: NUMBER | sign start start: NUMBER | sign start
sign: ['-'] sign: ['-']
""" """
self.assertEqual(self.calculate_first_sets(grammar), {"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}}) self.assertEqual(
self.calculate_first_sets(grammar),
{"sign": {"'-'", ""}, "start": {"'-'", "NUMBER"}},
)
def test_mutual_left_recursion(self) -> None: def test_mutual_left_recursion(self) -> None:
grammar = """ grammar = """
@ -175,11 +222,14 @@ class TestFirstSets(unittest.TestCase):
foo: bar 'A' | 'B' foo: bar 'A' | 'B'
bar: foo 'C' | 'D' bar: foo 'C' | 'D'
""" """
self.assertEqual(self.calculate_first_sets(grammar), { self.assertEqual(
"foo": {"'D'", "'B'"}, self.calculate_first_sets(grammar),
"bar": {"'D'"}, {
"start": {"'D'", "'B'"}, "foo": {"'D'", "'B'"},
}) "bar": {"'D'"},
"start": {"'D'", "'B'"},
},
)
def test_nasty_left_recursion(self) -> None: def test_nasty_left_recursion(self) -> None:
# TODO: Validate this # TODO: Validate this
@ -188,7 +238,10 @@ class TestFirstSets(unittest.TestCase):
target: maybe '+' | NAME target: maybe '+' | NAME
maybe: maybe '-' | target maybe: maybe '-' | target
""" """
self.assertEqual(self.calculate_first_sets(grammar), {"maybe": set(), "target": {"NAME"}, "start": {"NAME"}}) self.assertEqual(
self.calculate_first_sets(grammar),
{"maybe": set(), "target": {"NAME"}, "start": {"NAME"}},
)
def test_nullable_rule(self) -> None: def test_nullable_rule(self) -> None:
grammar = """ grammar = """
@ -196,17 +249,22 @@ class TestFirstSets(unittest.TestCase):
sign: ['-'] sign: ['-']
thing: NUMBER thing: NUMBER
""" """
self.assertEqual(self.calculate_first_sets(grammar), { self.assertEqual(
"sign": {"", "'-'"}, self.calculate_first_sets(grammar),
"thing": {"NUMBER"}, {
"start": {"NUMBER", "'-'"}, "sign": {"", "'-'"},
}) "thing": {"NUMBER"},
"start": {"NUMBER", "'-'"},
},
)
def test_epsilon_production_in_start_rule(self) -> None: def test_epsilon_production_in_start_rule(self) -> None:
grammar = """ grammar = """
start: ['-'] $ start: ['-'] $
""" """
self.assertEqual(self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}}) self.assertEqual(
self.calculate_first_sets(grammar), {"start": {"ENDMARKER", "'-'"}}
)
def test_multiple_nullable_rules(self) -> None: def test_multiple_nullable_rules(self) -> None:
grammar = """ grammar = """
@ -216,10 +274,13 @@ class TestFirstSets(unittest.TestCase):
other: '*' other: '*'
another: '/' another: '/'
""" """
self.assertEqual(self.calculate_first_sets(grammar), { self.assertEqual(
"sign": {"", "'-'"}, self.calculate_first_sets(grammar),
"thing": {"'+'", ""}, {
"start": {"'+'", "'-'", "'*'"}, "sign": {"", "'-'"},
"other": {"'*'"}, "thing": {"'+'", ""},
"another": {"'/'"}, "start": {"'+'", "'-'", "'*'"},
}) "other": {"'*'"},
"another": {"'/'"},
},
)

View file

@ -1,8 +1,8 @@
import unittest import unittest
from test import test_tools from test import test_tools
test_tools.skip_if_missing('peg_generator') test_tools.skip_if_missing("peg_generator")
with test_tools.imports_under_tool('peg_generator'): with test_tools.imports_under_tool("peg_generator"):
from pegen.grammar_parser import GeneratedParser as GrammarParser from pegen.grammar_parser import GeneratedParser as GrammarParser
from pegen.validator import SubRuleValidator, ValidationError from pegen.validator import SubRuleValidator, ValidationError
from pegen.testutil import parse_string from pegen.testutil import parse_string

View file

@ -1,3 +1,5 @@
import ast
import difflib
import io import io
import textwrap import textwrap
import unittest import unittest
@ -6,14 +8,10 @@ from test import test_tools
from typing import Dict, Any from typing import Dict, Any
from tokenize import TokenInfo, NAME, NEWLINE, NUMBER, OP from tokenize import TokenInfo, NAME, NEWLINE, NUMBER, OP
test_tools.skip_if_missing('peg_generator') test_tools.skip_if_missing("peg_generator")
with test_tools.imports_under_tool('peg_generator'): with test_tools.imports_under_tool("peg_generator"):
from pegen.grammar_parser import GeneratedParser as GrammarParser from pegen.grammar_parser import GeneratedParser as GrammarParser
from pegen.testutil import ( from pegen.testutil import parse_string, generate_parser, make_parser
parse_string,
generate_parser,
make_parser
)
from pegen.grammar import GrammarVisitor, GrammarError, Grammar from pegen.grammar import GrammarVisitor, GrammarError, Grammar
from pegen.grammar_visualizer import ASTGrammarPrinter from pegen.grammar_visualizer import ASTGrammarPrinter
from pegen.parser import Parser from pegen.parser import Parser
@ -38,7 +36,9 @@ class TestPegen(unittest.TestCase):
# Check the str() and repr() of a few rules; AST nodes don't support ==. # Check the str() and repr() of a few rules; AST nodes don't support ==.
self.assertEqual(str(rules["start"]), "start: sum NEWLINE") self.assertEqual(str(rules["start"]), "start: sum NEWLINE")
self.assertEqual(str(rules["sum"]), "sum: term '+' term | term") self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
expected_repr = "Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))" expected_repr = (
"Rule('term', None, Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))"
)
self.assertEqual(repr(rules["term"]), expected_repr) self.assertEqual(repr(rules["term"]), expected_repr)
def test_long_rule_str(self) -> None: def test_long_rule_str(self) -> None:
@ -71,7 +71,7 @@ class TestPegen(unittest.TestCase):
self.assertEqual(str(rules["sum"]), "sum: term '+' term | term") self.assertEqual(str(rules["sum"]), "sum: term '+' term | term")
self.assertEqual( self.assertEqual(
repr(rules["term"]), repr(rules["term"]),
"Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))" "Rule('term', 'int', Rhs([Alt([NamedItem(None, NameLeaf('NUMBER'))])]))",
) )
def test_gather(self) -> None: def test_gather(self) -> None:
@ -81,24 +81,31 @@ class TestPegen(unittest.TestCase):
""" """
rules = parse_string(grammar, GrammarParser).rules rules = parse_string(grammar, GrammarParser).rules
self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE") self.assertEqual(str(rules["start"]), "start: ','.thing+ NEWLINE")
self.assertTrue(repr(rules["start"]).startswith( self.assertTrue(
"Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'" repr(rules["start"]).startswith(
)) "Rule('start', None, Rhs([Alt([NamedItem(None, Gather(StringLeaf(\"','\"), NameLeaf('thing'"
)
)
self.assertEqual(str(rules["thing"]), "thing: NUMBER") self.assertEqual(str(rules["thing"]), "thing: NUMBER")
parser_class = make_parser(grammar) parser_class = make_parser(grammar)
node = parse_string("42\n", parser_class) node = parse_string("42\n", parser_class)
assert node == [
[[TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n")]],
TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"),
]
node = parse_string("1, 2\n", parser_class) node = parse_string("1, 2\n", parser_class)
assert node == [ self.assertEqual(
node,
[ [
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n")], [
[TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n")], TokenInfo(
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2\n"
),
TokenInfo(
NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2\n"
),
],
TokenInfo(
NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n"
),
], ],
TokenInfo(NEWLINE, string="\n", start=(1, 4), end=(1, 5), line="1, 2\n"), )
]
def test_expr_grammar(self) -> None: def test_expr_grammar(self) -> None:
grammar = """ grammar = """
@ -108,10 +115,13 @@ class TestPegen(unittest.TestCase):
""" """
parser_class = make_parser(grammar) parser_class = make_parser(grammar)
node = parse_string("42\n", parser_class) node = parse_string("42\n", parser_class)
self.assertEqual(node, [ self.assertEqual(
[[TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n")]], node,
TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"), [
]) TokenInfo(NUMBER, string="42", start=(1, 0), end=(1, 2), line="42\n"),
TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="42\n"),
],
)
def test_optional_operator(self) -> None: def test_optional_operator(self) -> None:
grammar = """ grammar = """
@ -120,22 +130,39 @@ class TestPegen(unittest.TestCase):
term: NUMBER term: NUMBER
""" """
parser_class = make_parser(grammar) parser_class = make_parser(grammar)
node = parse_string("1+2\n", parser_class) node = parse_string("1 + 2\n", parser_class)
self.assertEqual(node, [ self.assertEqual(
node,
[ [
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+2\n")],
[ [
TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+2\n"), TokenInfo(
[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1+2\n")], NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n"
),
[
TokenInfo(
OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"
),
TokenInfo(
NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n"
),
],
], ],
TokenInfo(
NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"
),
], ],
TokenInfo(NEWLINE, string="\n", start=(1, 3), end=(1, 4), line="1+2\n"), )
])
node = parse_string("1\n", parser_class) node = parse_string("1\n", parser_class)
self.assertEqual(node, [ self.assertEqual(
[[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None], node,
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), [
]) [
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
None,
],
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
],
)
def test_optional_literal(self) -> None: def test_optional_literal(self) -> None:
grammar = """ grammar = """
@ -145,18 +172,29 @@ class TestPegen(unittest.TestCase):
""" """
parser_class = make_parser(grammar) parser_class = make_parser(grammar)
node = parse_string("1+\n", parser_class) node = parse_string("1+\n", parser_class)
self.assertEqual(node, [ self.assertEqual(
node,
[ [
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n")], [
TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"), TokenInfo(
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1+\n"
),
TokenInfo(OP, string="+", start=(1, 1), end=(1, 2), line="1+\n"),
],
TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"),
], ],
TokenInfo(NEWLINE, string="\n", start=(1, 2), end=(1, 3), line="1+\n"), )
])
node = parse_string("1\n", parser_class) node = parse_string("1\n", parser_class)
self.assertEqual(node, [ self.assertEqual(
[[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None], node,
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), [
]) [
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
None,
],
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
],
)
def test_alt_optional_operator(self) -> None: def test_alt_optional_operator(self) -> None:
grammar = """ grammar = """
@ -166,21 +204,38 @@ class TestPegen(unittest.TestCase):
""" """
parser_class = make_parser(grammar) parser_class = make_parser(grammar)
node = parse_string("1 + 2\n", parser_class) node = parse_string("1 + 2\n", parser_class)
self.assertEqual(node, [ self.assertEqual(
node,
[ [
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n")],
[ [
TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"), TokenInfo(
[TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n")], NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2\n"
),
[
TokenInfo(
OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2\n"
),
TokenInfo(
NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2\n"
),
],
], ],
TokenInfo(
NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"
),
], ],
TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 + 2\n"), )
])
node = parse_string("1\n", parser_class) node = parse_string("1\n", parser_class)
self.assertEqual(node, [ self.assertEqual(
[[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], None], node,
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), [
]) [
TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
None,
],
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
],
)
def test_repeat_0_simple(self) -> None: def test_repeat_0_simple(self) -> None:
grammar = """ grammar = """
@ -189,20 +244,32 @@ class TestPegen(unittest.TestCase):
""" """
parser_class = make_parser(grammar) parser_class = make_parser(grammar)
node = parse_string("1 2 3\n", parser_class) node = parse_string("1 2 3\n", parser_class)
self.assertEqual(node, [ self.assertEqual(
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")], node,
[ [
[[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]], TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"),
[[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]], [
TokenInfo(
NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n"
),
TokenInfo(
NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n"
),
],
TokenInfo(
NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"
),
], ],
TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"), )
])
node = parse_string("1\n", parser_class) node = parse_string("1\n", parser_class)
self.assertEqual(node, [ self.assertEqual(
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n")], node,
[], [
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"), TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1\n"),
]) [],
TokenInfo(NEWLINE, string="\n", start=(1, 1), end=(1, 2), line="1\n"),
],
)
def test_repeat_0_complex(self) -> None: def test_repeat_0_complex(self) -> None:
grammar = """ grammar = """
@ -211,24 +278,43 @@ class TestPegen(unittest.TestCase):
""" """
parser_class = make_parser(grammar) parser_class = make_parser(grammar)
node = parse_string("1 + 2 + 3\n", parser_class) node = parse_string("1 + 2 + 3\n", parser_class)
self.assertEqual(node, [ self.assertEqual(
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")], node,
[ [
TokenInfo(
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n"
),
[ [
[ [
TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"), TokenInfo(
[TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")], OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
] ),
], TokenInfo(
[ NUMBER,
string="2",
start=(1, 4),
end=(1, 5),
line="1 + 2 + 3\n",
),
],
[ [
TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"), TokenInfo(
[TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")], OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
] ),
TokenInfo(
NUMBER,
string="3",
start=(1, 8),
end=(1, 9),
line="1 + 2 + 3\n",
),
],
], ],
TokenInfo(
NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
),
], ],
TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"), )
])
def test_repeat_1_simple(self) -> None: def test_repeat_1_simple(self) -> None:
grammar = """ grammar = """
@ -237,14 +323,23 @@ class TestPegen(unittest.TestCase):
""" """
parser_class = make_parser(grammar) parser_class = make_parser(grammar)
node = parse_string("1 2 3\n", parser_class) node = parse_string("1 2 3\n", parser_class)
self.assertEqual(node, [ self.assertEqual(
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n")], node,
[ [
[[TokenInfo(NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n")]], TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 2 3\n"),
[[TokenInfo(NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n")]], [
TokenInfo(
NUMBER, string="2", start=(1, 2), end=(1, 3), line="1 2 3\n"
),
TokenInfo(
NUMBER, string="3", start=(1, 4), end=(1, 5), line="1 2 3\n"
),
],
TokenInfo(
NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"
),
], ],
TokenInfo(NEWLINE, string="\n", start=(1, 5), end=(1, 6), line="1 2 3\n"), )
])
with self.assertRaises(SyntaxError): with self.assertRaises(SyntaxError):
parse_string("1\n", parser_class) parse_string("1\n", parser_class)
@ -255,24 +350,43 @@ class TestPegen(unittest.TestCase):
""" """
parser_class = make_parser(grammar) parser_class = make_parser(grammar)
node = parse_string("1 + 2 + 3\n", parser_class) node = parse_string("1 + 2 + 3\n", parser_class)
self.assertEqual(node, [ self.assertEqual(
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")], node,
[ [
TokenInfo(
NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n"
),
[ [
[ [
TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"), TokenInfo(
[TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")], OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
] ),
], TokenInfo(
[ NUMBER,
string="2",
start=(1, 4),
end=(1, 5),
line="1 + 2 + 3\n",
),
],
[ [
TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"), TokenInfo(
[TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")], OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
] ),
TokenInfo(
NUMBER,
string="3",
start=(1, 8),
end=(1, 9),
line="1 + 2 + 3\n",
),
],
], ],
TokenInfo(
NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
),
], ],
TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"), )
])
with self.assertRaises(SyntaxError): with self.assertRaises(SyntaxError):
parse_string("1\n", parser_class) parse_string("1\n", parser_class)
@ -283,14 +397,25 @@ class TestPegen(unittest.TestCase):
""" """
parser_class = make_parser(grammar) parser_class = make_parser(grammar)
node = parse_string("1, 2, 3\n", parser_class) node = parse_string("1, 2, 3\n", parser_class)
self.assertEqual(node, [ self.assertEqual(
node,
[ [
[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n")], [
[TokenInfo(NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n")], TokenInfo(
[TokenInfo(NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n")], NUMBER, string="1", start=(1, 0), end=(1, 1), line="1, 2, 3\n"
),
TokenInfo(
NUMBER, string="2", start=(1, 3), end=(1, 4), line="1, 2, 3\n"
),
TokenInfo(
NUMBER, string="3", start=(1, 6), end=(1, 7), line="1, 2, 3\n"
),
],
TokenInfo(
NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"
),
], ],
TokenInfo(NEWLINE, string="\n", start=(1, 7), end=(1, 8), line="1, 2, 3\n"), )
])
def test_left_recursive(self) -> None: def test_left_recursive(self) -> None:
grammar_source = """ grammar_source = """
@ -311,18 +436,41 @@ class TestPegen(unittest.TestCase):
self.assertFalse(rules["bar"].left_recursive) self.assertFalse(rules["bar"].left_recursive)
self.assertFalse(rules["baz"].left_recursive) self.assertFalse(rules["baz"].left_recursive)
node = parse_string("1 + 2 + 3\n", parser_class) node = parse_string("1 + 2 + 3\n", parser_class)
self.assertEqual(node, [ self.assertEqual(
node,
[ [
[ [
[[TokenInfo(NUMBER, string="1", start=(1, 0), end=(1, 1), line="1 + 2 + 3\n")]], [
TokenInfo(OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"), TokenInfo(
[TokenInfo(NUMBER, string="2", start=(1, 4), end=(1, 5), line="1 + 2 + 3\n")], NUMBER,
string="1",
start=(1, 0),
end=(1, 1),
line="1 + 2 + 3\n",
),
TokenInfo(
OP, string="+", start=(1, 2), end=(1, 3), line="1 + 2 + 3\n"
),
TokenInfo(
NUMBER,
string="2",
start=(1, 4),
end=(1, 5),
line="1 + 2 + 3\n",
),
],
TokenInfo(
OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"
),
TokenInfo(
NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n"
),
], ],
TokenInfo(OP, string="+", start=(1, 6), end=(1, 7), line="1 + 2 + 3\n"), TokenInfo(
[TokenInfo(NUMBER, string="3", start=(1, 8), end=(1, 9), line="1 + 2 + 3\n")], NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"
),
], ],
TokenInfo(NEWLINE, string="\n", start=(1, 9), end=(1, 10), line="1 + 2 + 3\n"), )
])
def test_python_expr(self) -> None: def test_python_expr(self) -> None:
grammar = """ grammar = """
@ -392,31 +540,79 @@ class TestPegen(unittest.TestCase):
exec(out.getvalue(), ns) exec(out.getvalue(), ns)
parser_class: Type[Parser] = ns["GeneratedParser"] parser_class: Type[Parser] = ns["GeneratedParser"]
node = parse_string("D A C A E", parser_class) node = parse_string("D A C A E", parser_class)
self.assertEqual(node, [
self.assertEqual(
node,
[ [
[ [
[ [
[TokenInfo(type=NAME, string="D", start=(1, 0), end=(1, 1), line="D A C A E")], [
TokenInfo(type=NAME, string="A", start=(1, 2), end=(1, 3), line="D A C A E"), TokenInfo(
type=NAME,
string="D",
start=(1, 0),
end=(1, 1),
line="D A C A E",
),
TokenInfo(
type=NAME,
string="A",
start=(1, 2),
end=(1, 3),
line="D A C A E",
),
],
TokenInfo(
type=NAME,
string="C",
start=(1, 4),
end=(1, 5),
line="D A C A E",
),
], ],
TokenInfo(type=NAME, string="C", start=(1, 4), end=(1, 5), line="D A C A E"), TokenInfo(
type=NAME,
string="A",
start=(1, 6),
end=(1, 7),
line="D A C A E",
),
], ],
TokenInfo(type=NAME, string="A", start=(1, 6), end=(1, 7), line="D A C A E"), TokenInfo(
type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"
),
], ],
TokenInfo(type=NAME, string="E", start=(1, 8), end=(1, 9), line="D A C A E"), )
])
node = parse_string("B C A E", parser_class) node = parse_string("B C A E", parser_class)
self.assertIsNotNone(node) self.assertEqual(
self.assertEqual(node, [ node,
[ [
[ [
[TokenInfo(type=NAME, string="B", start=(1, 0), end=(1, 1), line="B C A E")], [
TokenInfo(type=NAME, string="C", start=(1, 2), end=(1, 3), line="B C A E"), TokenInfo(
type=NAME,
string="B",
start=(1, 0),
end=(1, 1),
line="B C A E",
),
TokenInfo(
type=NAME,
string="C",
start=(1, 2),
end=(1, 3),
line="B C A E",
),
],
TokenInfo(
type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"
),
], ],
TokenInfo(type=NAME, string="A", start=(1, 4), end=(1, 5), line="B C A E"), TokenInfo(
type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"
),
], ],
TokenInfo(type=NAME, string="E", start=(1, 6), end=(1, 7), line="B C A E"), )
])
def test_nasty_mutually_left_recursive(self) -> None: def test_nasty_mutually_left_recursive(self) -> None:
# This grammar does not recognize 'x - + =', much to my chagrin. # This grammar does not recognize 'x - + =', much to my chagrin.
@ -454,43 +650,44 @@ class TestPegen(unittest.TestCase):
""" """
parser_class = make_parser(grammar) parser_class = make_parser(grammar)
node = parse_string("foo = 12 + 12 .", parser_class) node = parse_string("foo = 12 + 12 .", parser_class)
self.assertEqual(node, [ self.assertEqual(
node,
[ [
TokenInfo(
NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 ."
),
TokenInfo(
OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."
),
[ [
[TokenInfo(NAME, string="foo", start=(1, 0), end=(1, 3), line="foo = 12 + 12 .")], TokenInfo(
TokenInfo(OP, string="=", start=(1, 4), end=(1, 5), line="foo = 12 + 12 ."), NUMBER,
string="12",
start=(1, 6),
end=(1, 8),
line="foo = 12 + 12 .",
),
[ [
[ [
TokenInfo( TokenInfo(
NUMBER, string="12", start=(1, 6), end=(1, 8), line="foo = 12 + 12 ." OP,
) string="+",
], start=(1, 9),
[ end=(1, 10),
[ line="foo = 12 + 12 .",
[ ),
TokenInfo( TokenInfo(
OP, NUMBER,
string="+", string="12",
start=(1, 9), start=(1, 11),
end=(1, 10), end=(1, 13),
line="foo = 12 + 12 .", line="foo = 12 + 12 .",
), ),
[ ]
TokenInfo(
NUMBER,
string="12",
start=(1, 11),
end=(1, 13),
line="foo = 12 + 12 .",
)
],
]
]
],
], ],
] ],
] ],
]) )
def test_named_lookahead_error(self) -> None: def test_named_lookahead_error(self) -> None:
grammar = """ grammar = """
@ -533,11 +730,14 @@ class TestPegen(unittest.TestCase):
""" """
parser_class = make_parser(grammar) parser_class = make_parser(grammar)
node = parse_string("(1)", parser_class) node = parse_string("(1)", parser_class)
self.assertEqual(node, [ self.assertEqual(
TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"), node,
[TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)")], [
TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"), TokenInfo(OP, string="(", start=(1, 0), end=(1, 1), line="(1)"),
]) TokenInfo(NUMBER, string="1", start=(1, 1), end=(1, 2), line="(1)"),
TokenInfo(OP, string=")", start=(1, 2), end=(1, 3), line="(1)"),
],
)
def test_dangling_reference(self) -> None: def test_dangling_reference(self) -> None:
grammar = """ grammar = """
@ -589,6 +789,124 @@ class TestPegen(unittest.TestCase):
with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_x'"): with self.assertRaisesRegex(GrammarError, "cannot start with underscore: '_x'"):
parser_class = make_parser(grammar) parser_class = make_parser(grammar)
def test_soft_keyword(self) -> None:
grammar = """
start:
| "number" n=NUMBER { eval(n.string) }
| "string" n=STRING { n.string }
| SOFT_KEYWORD l=NAME n=(NUMBER | NAME | STRING) { f"{l.string} = {n.string}"}
"""
parser_class = make_parser(grammar)
self.assertEqual(parse_string("number 1", parser_class, verbose=True), 1)
self.assertEqual(parse_string("string 'b'", parser_class, verbose=True), "'b'")
self.assertEqual(
parse_string("number test 1", parser_class, verbose=True), "test = 1"
)
assert (
parse_string("string test 'b'", parser_class, verbose=True) == "test = 'b'"
)
with self.assertRaises(SyntaxError):
parse_string("test 1", parser_class, verbose=True)
def test_forced(self) -> None:
grammar = """
start: NAME &&':' | NAME
"""
parser_class = make_parser(grammar)
self.assertTrue(parse_string("number :", parser_class, verbose=True))
with self.assertRaises(SyntaxError) as e:
parse_string("a", parser_class, verbose=True)
self.assertIn("expected ':'", str(e.exception))
def test_forced_with_group(self) -> None:
grammar = """
start: NAME &&(':' | ';') | NAME
"""
parser_class = make_parser(grammar)
self.assertTrue(parse_string("number :", parser_class, verbose=True))
self.assertTrue(parse_string("number ;", parser_class, verbose=True))
with self.assertRaises(SyntaxError) as e:
parse_string("a", parser_class, verbose=True)
self.assertIn("expected (':' | ';')", e.exception.args[0])
def test_unreachable_explicit(self) -> None:
source = """
start: NAME { UNREACHABLE }
"""
grammar = parse_string(source, GrammarParser)
out = io.StringIO()
genr = PythonParserGenerator(
grammar, out, unreachable_formatting="This is a test"
)
genr.generate("<string>")
self.assertIn("This is a test", out.getvalue())
def test_unreachable_implicit1(self) -> None:
source = """
start: NAME | invalid_input
invalid_input: NUMBER { None }
"""
grammar = parse_string(source, GrammarParser)
out = io.StringIO()
genr = PythonParserGenerator(
grammar, out, unreachable_formatting="This is a test"
)
genr.generate("<string>")
self.assertIn("This is a test", out.getvalue())
def test_unreachable_implicit2(self) -> None:
source = """
start: NAME | '(' invalid_input ')'
invalid_input: NUMBER { None }
"""
grammar = parse_string(source, GrammarParser)
out = io.StringIO()
genr = PythonParserGenerator(
grammar, out, unreachable_formatting="This is a test"
)
genr.generate("<string>")
self.assertIn("This is a test", out.getvalue())
def test_unreachable_implicit3(self) -> None:
source = """
start: NAME | invalid_input { None }
invalid_input: NUMBER
"""
grammar = parse_string(source, GrammarParser)
out = io.StringIO()
genr = PythonParserGenerator(
grammar, out, unreachable_formatting="This is a test"
)
genr.generate("<string>")
self.assertNotIn("This is a test", out.getvalue())
def test_locations_in_alt_action_and_group(self) -> None:
grammar = """
start: t=term NEWLINE? $ { ast.Expression(t, LOCATIONS) }
term:
| l=term '*' r=factor { ast.BinOp(l, ast.Mult(), r, LOCATIONS) }
| l=term '/' r=factor { ast.BinOp(l, ast.Div(), r, LOCATIONS) }
| factor
factor:
| (
n=NAME { ast.Name(id=n.string, ctx=ast.Load(), LOCATIONS) } |
n=NUMBER { ast.Constant(value=ast.literal_eval(n.string), LOCATIONS) }
)
"""
parser_class = make_parser(grammar)
source = "2*3\n"
o = ast.dump(parse_string(source, parser_class).body, include_attributes=True)
p = ast.dump(ast.parse(source).body[0].value, include_attributes=True).replace(
" kind=None,", ""
)
diff = "\n".join(
difflib.unified_diff(
o.split("\n"), p.split("\n"), "cpython", "python-pegen"
)
)
self.assertFalse(diff)
class TestGrammarVisitor: class TestGrammarVisitor:
class Visitor(GrammarVisitor): class Visitor(GrammarVisitor):

View file

@ -1176,7 +1176,7 @@ statements_rule(Parser *p)
) )
{ {
D(fprintf(stderr, "%*c+ statements[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "statement+")); D(fprintf(stderr, "%*c+ statements[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "statement+"));
_res = ( asdl_stmt_seq * ) _PyPegen_seq_flatten ( p , a ); _res = ( asdl_stmt_seq* ) _PyPegen_seq_flatten ( p , a );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -1217,7 +1217,7 @@ statement_rule(Parser *p)
) )
{ {
D(fprintf(stderr, "%*c+ statement[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "compound_stmt")); D(fprintf(stderr, "%*c+ statement[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "compound_stmt"));
_res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , a ); _res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , a );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -1294,7 +1294,7 @@ statement_newline_rule(Parser *p)
) )
{ {
D(fprintf(stderr, "%*c+ statement_newline[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "compound_stmt NEWLINE")); D(fprintf(stderr, "%*c+ statement_newline[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "compound_stmt NEWLINE"));
_res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , a ); _res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , a );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -1346,7 +1346,7 @@ statement_newline_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , CHECK ( stmt_ty , _PyAST_Pass ( EXTRA ) ) ); _res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , CHECK ( stmt_ty , _PyAST_Pass ( EXTRA ) ) );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -1416,7 +1416,7 @@ simple_stmts_rule(Parser *p)
) )
{ {
D(fprintf(stderr, "%*c+ simple_stmts[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "simple_stmt !';' NEWLINE")); D(fprintf(stderr, "%*c+ simple_stmts[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "simple_stmt !';' NEWLINE"));
_res = ( asdl_stmt_seq * ) _PyPegen_singleton_seq ( p , a ); _res = ( asdl_stmt_seq* ) _PyPegen_singleton_seq ( p , a );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -2403,7 +2403,7 @@ augassign_rule(Parser *p)
) )
{ {
D(fprintf(stderr, "%*c+ augassign[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'@='")); D(fprintf(stderr, "%*c+ augassign[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'@='"));
_res = CHECK_VERSION ( AugOperator * , 5 , "The '@' operator is" , _PyPegen_augoperator ( p , MatMult ) ); _res = CHECK_VERSION ( AugOperator* , 5 , "The '@' operator is" , _PyPegen_augoperator ( p , MatMult ) );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -2841,7 +2841,7 @@ global_stmt_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Global ( CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA ); _res = _PyAST_Global ( CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -2903,7 +2903,7 @@ nonlocal_stmt_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Nonlocal ( CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA ); _res = _PyAST_Nonlocal ( CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , a ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -3460,7 +3460,7 @@ import_from_targets_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = ( asdl_alias_seq * ) _PyPegen_singleton_seq ( p , CHECK ( alias_ty , _PyPegen_alias_for_star ( p , EXTRA ) ) ); _res = ( asdl_alias_seq* ) _PyPegen_singleton_seq ( p , CHECK ( alias_ty , _PyPegen_alias_for_star ( p , EXTRA ) ) );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -4649,7 +4649,7 @@ slash_with_default_rule(Parser *p)
) )
{ {
D(fprintf(stderr, "%*c+ slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "param_no_default* param_with_default+ '/' ','")); D(fprintf(stderr, "%*c+ slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "param_no_default* param_with_default+ '/' ','"));
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b ); _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -4681,7 +4681,7 @@ slash_with_default_rule(Parser *p)
) )
{ {
D(fprintf(stderr, "%*c+ slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "param_no_default* param_with_default+ '/' &')'")); D(fprintf(stderr, "%*c+ slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "param_no_default* param_with_default+ '/' &')'"));
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b ); _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -5340,7 +5340,7 @@ if_stmt_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq * , _PyPegen_singleton_seq ( p , c ) ) , EXTRA ); _res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq* , _PyPegen_singleton_seq ( p , c ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -5478,7 +5478,7 @@ elif_stmt_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq * , _PyPegen_singleton_seq ( p , c ) ) , EXTRA ); _res = _PyAST_If ( a , b , CHECK ( asdl_stmt_seq* , _PyPegen_singleton_seq ( p , c ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -6756,7 +6756,7 @@ subject_expr_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , value , values ) ) , Load , EXTRA ); _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , value , values ) ) , Load , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -9049,7 +9049,7 @@ mapping_pattern_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , items ) ) , rest -> v . Name . id , EXTRA ); _res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , items ) ) , rest -> v . Name . id , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -9092,7 +9092,7 @@ mapping_pattern_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , items ) ) , NULL , EXTRA ); _res = _PyAST_MatchMapping ( CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , items ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , items ) ) , NULL , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -9381,7 +9381,7 @@ class_pattern_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_MatchClass ( cls , NULL , CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA ); _res = _PyAST_MatchClass ( cls , NULL , CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -9433,7 +9433,7 @@ class_pattern_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_MatchClass ( cls , patterns , CHECK ( asdl_identifier_seq * , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq * , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq * , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA ); _res = _PyAST_MatchClass ( cls , patterns , CHECK ( asdl_identifier_seq* , _PyPegen_map_names_to_ids ( p , CHECK ( asdl_expr_seq* , _PyPegen_get_pattern_keys ( p , keywords ) ) ) ) , CHECK ( asdl_pattern_seq* , _PyPegen_get_patterns ( p , keywords ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -9642,7 +9642,7 @@ expressions_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA ); _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -9678,7 +9678,7 @@ expressions_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA ); _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -10004,7 +10004,7 @@ star_expressions_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA ); _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Load , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -10040,7 +10040,7 @@ star_expressions_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA ); _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_singleton_seq ( p , a ) ) , Load , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -10485,7 +10485,7 @@ disjunction_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_BoolOp ( Or , CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA ); _res = _PyAST_BoolOp ( Or , CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -10571,7 +10571,7 @@ conjunction_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_BoolOp ( And , CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA ); _res = _PyAST_BoolOp ( And , CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -10739,7 +10739,7 @@ comparison_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Compare ( a , CHECK ( asdl_int_seq * , _PyPegen_get_cmpops ( p , b ) ) , CHECK ( asdl_expr_seq * , _PyPegen_get_exprs ( p , b ) ) , EXTRA ); _res = _PyAST_Compare ( a , CHECK ( asdl_int_seq* , _PyPegen_get_cmpops ( p , b ) ) , CHECK ( asdl_expr_seq* , _PyPegen_get_exprs ( p , b ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -12837,7 +12837,7 @@ primary_raw(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Call ( a , CHECK ( asdl_expr_seq * , ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA ); _res = _PyAST_Call ( a , CHECK ( asdl_expr_seq* , ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -13896,7 +13896,7 @@ lambda_slash_with_default_rule(Parser *p)
) )
{ {
D(fprintf(stderr, "%*c+ lambda_slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default* lambda_param_with_default+ '/' ','")); D(fprintf(stderr, "%*c+ lambda_slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default* lambda_param_with_default+ '/' ','"));
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b ); _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -13928,7 +13928,7 @@ lambda_slash_with_default_rule(Parser *p)
) )
{ {
D(fprintf(stderr, "%*c+ lambda_slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default* lambda_param_with_default+ '/' &':'")); D(fprintf(stderr, "%*c+ lambda_slash_with_default[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "lambda_param_no_default* lambda_param_with_default+ '/' &':'"));
_res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq * ) a , b ); _res = _PyPegen_slash_with_default ( p , ( asdl_arg_seq* ) a , b );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -14689,7 +14689,7 @@ dict_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Dict ( CHECK ( asdl_expr_seq * , _PyPegen_get_keys ( p , a ) ) , CHECK ( asdl_expr_seq * , _PyPegen_get_values ( p , a ) ) , EXTRA ); _res = _PyAST_Dict ( CHECK ( asdl_expr_seq* , _PyPegen_get_keys ( p , a ) ) , CHECK ( asdl_expr_seq* , _PyPegen_get_values ( p , a ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -15556,7 +15556,7 @@ args_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Call ( _PyPegen_dummy_name ( p ) , CHECK_NULL_ALLOWED ( asdl_expr_seq * , _PyPegen_seq_extract_starred_exprs ( p , a ) ) , CHECK_NULL_ALLOWED ( asdl_keyword_seq * , _PyPegen_seq_delete_starred_exprs ( p , a ) ) , EXTRA ); _res = _PyAST_Call ( _PyPegen_dummy_name ( p ) , CHECK_NULL_ALLOWED ( asdl_expr_seq* , _PyPegen_seq_extract_starred_exprs ( p , a ) ) , CHECK_NULL_ALLOWED ( asdl_keyword_seq* , _PyPegen_seq_delete_starred_exprs ( p , a ) ) , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -16026,7 +16026,7 @@ star_targets_rule(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Tuple ( CHECK ( asdl_expr_seq * , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Store , EXTRA ); _res = _PyAST_Tuple ( CHECK ( asdl_expr_seq* , _PyPegen_seq_insert_in_front ( p , a , b ) ) , Store , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -16119,7 +16119,7 @@ star_targets_tuple_seq_rule(Parser *p)
) )
{ {
D(fprintf(stderr, "%*c+ star_targets_tuple_seq[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "star_target ((',' star_target))+ ','?")); D(fprintf(stderr, "%*c+ star_targets_tuple_seq[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "star_target ((',' star_target))+ ','?"));
_res = ( asdl_expr_seq * ) _PyPegen_seq_insert_in_front ( p , a , b ); _res = ( asdl_expr_seq* ) _PyPegen_seq_insert_in_front ( p , a , b );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -16146,7 +16146,7 @@ star_targets_tuple_seq_rule(Parser *p)
) )
{ {
D(fprintf(stderr, "%*c+ star_targets_tuple_seq[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "star_target ','")); D(fprintf(stderr, "%*c+ star_targets_tuple_seq[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "star_target ','"));
_res = ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , a ); _res = ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , a );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -16923,7 +16923,7 @@ t_primary_raw(Parser *p)
UNUSED(_end_lineno); // Only used by EXTRA macro UNUSED(_end_lineno); // Only used by EXTRA macro
int _end_col_offset = _token->end_col_offset; int _end_col_offset = _token->end_col_offset;
UNUSED(_end_col_offset); // Only used by EXTRA macro UNUSED(_end_col_offset); // Only used by EXTRA macro
_res = _PyAST_Call ( a , CHECK ( asdl_expr_seq * , ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA ); _res = _PyAST_Call ( a , CHECK ( asdl_expr_seq* , ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , b ) ) , NULL , EXTRA );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -17474,7 +17474,7 @@ type_expressions_rule(Parser *p)
) )
{ {
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.expression+ ',' '*' expression ',' '**' expression")); D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.expression+ ',' '*' expression ',' '**' expression"));
_res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq * , _PyPegen_seq_append_to_end ( p , a , b ) ) , c ); _res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq* , _PyPegen_seq_append_to_end ( p , a , b ) ) , c );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -17507,7 +17507,7 @@ type_expressions_rule(Parser *p)
) )
{ {
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.expression+ ',' '*' expression")); D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.expression+ ',' '*' expression"));
_res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , a , b ); _res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , a , b );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -17540,7 +17540,7 @@ type_expressions_rule(Parser *p)
) )
{ {
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.expression+ ',' '**' expression")); D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "','.expression+ ',' '**' expression"));
_res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , a , b ); _res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , a , b );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -17576,7 +17576,7 @@ type_expressions_rule(Parser *p)
) )
{ {
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*' expression ',' '**' expression")); D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*' expression ',' '**' expression"));
_res = ( asdl_expr_seq * ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq * , _PyPegen_singleton_seq ( p , a ) ) , b ); _res = ( asdl_expr_seq* ) _PyPegen_seq_append_to_end ( p , CHECK ( asdl_seq* , _PyPegen_singleton_seq ( p , a ) ) , b );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -17603,7 +17603,7 @@ type_expressions_rule(Parser *p)
) )
{ {
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*' expression")); D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'*' expression"));
_res = ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , a ); _res = ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , a );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -17630,7 +17630,7 @@ type_expressions_rule(Parser *p)
) )
{ {
D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'**' expression")); D(fprintf(stderr, "%*c+ type_expressions[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "'**' expression"));
_res = ( asdl_expr_seq * ) _PyPegen_singleton_seq ( p , a ); _res = ( asdl_expr_seq* ) _PyPegen_singleton_seq ( p , a );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);
@ -20149,7 +20149,7 @@ invalid_match_stmt_rule(Parser *p)
) )
{ {
D(fprintf(stderr, "%*c+ invalid_match_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "\"match\" subject_expr !':'")); D(fprintf(stderr, "%*c+ invalid_match_stmt[%d-%d]: %s succeeded!\n", p->level, ' ', _mark, p->mark, "\"match\" subject_expr !':'"));
_res = CHECK_VERSION ( void * , 10 , "Pattern matching is" , RAISE_SYNTAX_ERROR ( "expected ':'" ) ); _res = CHECK_VERSION ( void* , 10 , "Pattern matching is" , RAISE_SYNTAX_ERROR ( "expected ':'" ) );
if (_res == NULL && PyErr_Occurred()) { if (_res == NULL && PyErr_Occurred()) {
p->error_indicator = 1; p->error_indicator = 1;
D(p->level--); D(p->level--);

View file

@ -897,6 +897,19 @@ _PyPegen_expect_token(Parser *p, int type)
return t; return t;
} }
void*
_PyPegen_expect_forced_result(Parser *p, void* result, const char* expected) {
if (p->error_indicator == 1) {
return NULL;
}
if (result == NULL) {
RAISE_SYNTAX_ERROR("expected (%s)", expected);
return NULL;
}
return result;
}
Token * Token *
_PyPegen_expect_forced_token(Parser *p, int type, const char* expected) { _PyPegen_expect_forced_token(Parser *p, int type, const char* expected) {

View file

@ -130,6 +130,7 @@ int _PyPegen_lookahead_with_string(int , expr_ty (func)(Parser *, const char*),
int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *); int _PyPegen_lookahead(int, void *(func)(Parser *), Parser *);
Token *_PyPegen_expect_token(Parser *p, int type); Token *_PyPegen_expect_token(Parser *p, int type);
void* _PyPegen_expect_forced_result(Parser *p, void* result, const char* expected);
Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected); Token *_PyPegen_expect_forced_token(Parser *p, int type, const char* expected);
expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword); expr_ty _PyPegen_expect_soft_keyword(Parser *p, const char *keyword);
expr_ty _PyPegen_soft_keyword_token(Parser *p); expr_ty _PyPegen_soft_keyword_token(Parser *p);

View file

@ -1,5 +1,5 @@
[mypy] [mypy]
files = pegen, scripts files = pegen
follow_imports = error follow_imports = error
no_implicit_optional = True no_implicit_optional = True

View file

@ -100,7 +100,9 @@ c_parser.add_argument(
"--optimized", action="store_true", help="Compile the extension in optimized mode" "--optimized", action="store_true", help="Compile the extension in optimized mode"
) )
c_parser.add_argument( c_parser.add_argument(
"--skip-actions", action="store_true", help="Suppress code emission for rule actions", "--skip-actions",
action="store_true",
help="Suppress code emission for rule actions",
) )
python_parser = subparsers.add_parser("python", help="Generate Python code") python_parser = subparsers.add_parser("python", help="Generate Python code")
@ -114,7 +116,9 @@ python_parser.add_argument(
help="Where to write the generated parser", help="Where to write the generated parser",
) )
python_parser.add_argument( python_parser.add_argument(
"--skip-actions", action="store_true", help="Suppress code emission for rule actions", "--skip-actions",
action="store_true",
help="Suppress code emission for rule actions",
) )

View file

@ -6,9 +6,17 @@ always fail. We rely on string comparison of the base classes instead.
TODO: Remove the above-described hack. TODO: Remove the above-described hack.
""" """
from typing import Any, Optional, Tuple
def ast_dump(node, annotate_fields=True, include_attributes=False, *, indent=None):
def _format(node, level=0): def ast_dump(
node: Any,
annotate_fields: bool = True,
include_attributes: bool = False,
*,
indent: Optional[str] = None,
) -> str:
def _format(node: Any, level: int = 0) -> Tuple[str, bool]:
if indent is not None: if indent is not None:
level += 1 level += 1
prefix = "\n" + indent * level prefix = "\n" + indent * level

View file

@ -58,7 +58,7 @@ def compile_c_extension(
extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST") extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST")
extra_compile_args.append("-DPy_BUILD_CORE_MODULE") extra_compile_args.append("-DPy_BUILD_CORE_MODULE")
# Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c # Define _Py_TEST_PEGEN to not call PyAST_Validate() in Parser/pegen.c
extra_compile_args.append('-D_Py_TEST_PEGEN') extra_compile_args.append("-D_Py_TEST_PEGEN")
extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST") extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST")
if keep_asserts: if keep_asserts:
extra_compile_args.append("-UNDEBUG") extra_compile_args.append("-UNDEBUG")
@ -175,7 +175,10 @@ def build_c_generator(
def build_python_generator( def build_python_generator(
grammar: Grammar, grammar_file: str, output_file: str, skip_actions: bool = False, grammar: Grammar,
grammar_file: str,
output_file: str,
skip_actions: bool = False,
) -> ParserGenerator: ) -> ParserGenerator:
with open(output_file, "w") as file: with open(output_file, "w") as file:
gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions
@ -246,5 +249,10 @@ def build_python_parser_and_generator(
skip_actions (bool, optional): Whether to pretend no rule has any actions. skip_actions (bool, optional): Whether to pretend no rule has any actions.
""" """
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser) grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
gen = build_python_generator(grammar, grammar_file, output_file, skip_actions=skip_actions,) gen = build_python_generator(
grammar,
grammar_file,
output_file,
skip_actions=skip_actions,
)
return grammar, parser, tokenizer, gen return grammar, parser, tokenizer, gen

View file

@ -12,6 +12,7 @@ from pegen.grammar import (
Gather, Gather,
GrammarVisitor, GrammarVisitor,
Group, Group,
Leaf,
Lookahead, Lookahead,
NamedItem, NamedItem,
NameLeaf, NameLeaf,
@ -91,7 +92,16 @@ class FunctionCall:
parts.append(", 1") parts.append(", 1")
if self.assigned_variable: if self.assigned_variable:
if self.assigned_variable_type: if self.assigned_variable_type:
parts = ["(", self.assigned_variable, " = ", '(', self.assigned_variable_type, ')', *parts, ")"] parts = [
"(",
self.assigned_variable,
" = ",
"(",
self.assigned_variable_type,
")",
*parts,
")",
]
else: else:
parts = ["(", self.assigned_variable, " = ", *parts, ")"] parts = ["(", self.assigned_variable, " = ", *parts, ")"]
if self.comment: if self.comment:
@ -256,9 +266,10 @@ class CCallMakerVisitor(GrammarVisitor):
def visit_Forced(self, node: Forced) -> FunctionCall: def visit_Forced(self, node: Forced) -> FunctionCall:
call = self.generate_call(node.node) call = self.generate_call(node.node)
if call.nodetype == NodeTypes.GENERIC_TOKEN: if isinstance(node.node, Leaf):
assert isinstance(node.node, Leaf)
val = ast.literal_eval(node.node.value) val = ast.literal_eval(node.node.value)
assert val in self.exact_tokens, f"{node.value} is not a known literal" assert val in self.exact_tokens, f"{node.node.value} is not a known literal"
type = self.exact_tokens[val] type = self.exact_tokens[val]
return FunctionCall( return FunctionCall(
assigned_variable="_literal", assigned_variable="_literal",
@ -268,9 +279,19 @@ class CCallMakerVisitor(GrammarVisitor):
return_type="Token *", return_type="Token *",
comment=f"forced_token='{val}'", comment=f"forced_token='{val}'",
) )
if isinstance(node.node, Group):
call = self.visit(node.node.rhs)
call.assigned_variable = None
call.comment = None
return FunctionCall(
assigned_variable="_literal",
function=f"_PyPegen_expect_forced_result",
arguments=["p", str(call), f'"{node.node.rhs!s}"'],
return_type="void *",
comment=f"forced_token=({node.node.rhs!s})",
)
else: else:
raise NotImplementedError( raise NotImplementedError(f"Forced tokens don't work with {node.node} nodes")
f"Forced tokens don't work with {call.nodetype} tokens")
def visit_Opt(self, node: Opt) -> FunctionCall: def visit_Opt(self, node: Opt) -> FunctionCall:
call = self.generate_call(node.node) call = self.generate_call(node.node)
@ -347,7 +368,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
debug: bool = False, debug: bool = False,
skip_actions: bool = False, skip_actions: bool = False,
): ):
super().__init__(grammar, tokens, file) super().__init__(grammar, set(tokens.values()), file)
self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor( self.callmakervisitor: CCallMakerVisitor = CCallMakerVisitor(
self, exact_tokens, non_exact_tokens self, exact_tokens, non_exact_tokens
) )
@ -386,7 +407,11 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print(f"goto {goto_target};") self.print(f"goto {goto_target};")
self.print(f"}}") self.print(f"}}")
def out_of_memory_return(self, expr: str, cleanup_code: Optional[str] = None,) -> None: def out_of_memory_return(
self,
expr: str,
cleanup_code: Optional[str] = None,
) -> None:
self.print(f"if ({expr}) {{") self.print(f"if ({expr}) {{")
with self.indent(): with self.indent():
if cleanup_code is not None: if cleanup_code is not None:
@ -568,7 +593,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
self._set_up_token_start_metadata_extraction() self._set_up_token_start_metadata_extraction()
self.visit( self.visit(
rhs, is_loop=False, is_gather=node.is_gather(), rulename=node.name, rhs,
is_loop=False,
is_gather=node.is_gather(),
rulename=node.name,
) )
if self.debug: if self.debug:
self.print(f'D(fprintf(stderr, "Fail at %d: {node.name}\\n", p->mark));') self.print(f'D(fprintf(stderr, "Fail at %d: {node.name}\\n", p->mark));')
@ -601,7 +629,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts): if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
self._set_up_token_start_metadata_extraction() self._set_up_token_start_metadata_extraction()
self.visit( self.visit(
rhs, is_loop=True, is_gather=node.is_gather(), rulename=node.name, rhs,
is_loop=True,
is_gather=node.is_gather(),
rulename=node.name,
) )
if is_repeat1: if is_repeat1:
self.print("if (_n == 0 || p->error_indicator) {") self.print("if (_n == 0 || p->error_indicator) {")
@ -771,7 +802,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
def visit_Alt( def visit_Alt(
self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str] self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str]
) -> None: ) -> None:
if len(node.items) == 1 and str(node.items[0]).startswith('invalid_'): if len(node.items) == 1 and str(node.items[0]).startswith("invalid_"):
self.print(f"if (p->call_invalid_rules) {{ // {node}") self.print(f"if (p->call_invalid_rules) {{ // {node}")
else: else:
self.print(f"{{ // {node}") self.print(f"{{ // {node}")
@ -791,7 +822,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
if v == "_cut_var": if v == "_cut_var":
v += " = 0" # cut_var must be initialized v += " = 0" # cut_var must be initialized
self.print(f"{var_type}{v};") self.print(f"{var_type}{v};")
if v.startswith("_opt_var"): if v and v.startswith("_opt_var"):
self.print(f"UNUSED({v}); // Silence compiler warnings") self.print(f"UNUSED({v}); // Silence compiler warnings")
with self.local_variable_context(): with self.local_variable_context():

View file

@ -29,7 +29,8 @@ from pegen.grammar import (
) )
argparser = argparse.ArgumentParser( argparser = argparse.ArgumentParser(
prog="calculate_first_sets", description="Calculate the first sets of a grammar", prog="calculate_first_sets",
description="Calculate the first sets of a grammar",
) )
argparser.add_argument("grammar_file", help="The grammar file") argparser.add_argument("grammar_file", help="The grammar file")

View file

@ -2,7 +2,10 @@
# @generated by pegen from ./Tools/peg_generator/pegen/metagrammar.gram # @generated by pegen from ./Tools/peg_generator/pegen/metagrammar.gram
import ast import ast
from typing import Optional, Any import sys
import tokenize
from typing import Any, Optional
from pegen.parser import memoize, memoize_left_rec, logger, Parser from pegen.parser import memoize, memoize_left_rec, logger, Parser
from ast import literal_eval from ast import literal_eval
@ -35,83 +38,71 @@ from pegen.grammar import (
StringLeaf, StringLeaf,
) )
# Keywords and soft keywords are listed at the end of the parser definition.
class GeneratedParser(Parser): class GeneratedParser(Parser):
@memoize @memoize
def start(self) -> Optional[Grammar]: def start(self) -> Optional[Grammar]:
# start: grammar $ # start: grammar $
mark = self.mark() mark = self._mark()
cut = False
if ( if (
(grammar := self.grammar()) (grammar := self.grammar())
and and
(endmarker := self.expect('ENDMARKER')) (_endmarker := self.expect('ENDMARKER'))
): ):
return grammar return grammar
self.reset(mark) self._reset(mark)
if cut: return None
return None return None
@memoize @memoize
def grammar(self) -> Optional[Grammar]: def grammar(self) -> Optional[Grammar]:
# grammar: metas rules | rules # grammar: metas rules | rules
mark = self.mark() mark = self._mark()
cut = False
if ( if (
(metas := self.metas()) (metas := self.metas())
and and
(rules := self.rules()) (rules := self.rules())
): ):
return Grammar ( rules , metas ) return Grammar ( rules , metas )
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(rules := self.rules()) (rules := self.rules())
): ):
return Grammar ( rules , [ ] ) return Grammar ( rules , [] )
self.reset(mark) self._reset(mark)
if cut: return None
return None return None
@memoize @memoize
def metas(self) -> Optional[MetaList]: def metas(self) -> Optional[MetaList]:
# metas: meta metas | meta # metas: meta metas | meta
mark = self.mark() mark = self._mark()
cut = False
if ( if (
(meta := self.meta()) (meta := self.meta())
and and
(metas := self.metas()) (metas := self.metas())
): ):
return [ meta ] + metas return [meta] + metas
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(meta := self.meta()) (meta := self.meta())
): ):
return [ meta ] return [meta]
self.reset(mark) self._reset(mark)
if cut: return None
return None return None
@memoize @memoize
def meta(self) -> Optional[MetaTuple]: def meta(self) -> Optional[MetaTuple]:
# meta: "@" NAME NEWLINE | "@" NAME NAME NEWLINE | "@" NAME STRING NEWLINE # meta: "@" NAME NEWLINE | "@" NAME NAME NEWLINE | "@" NAME STRING NEWLINE
mark = self.mark() mark = self._mark()
cut = False
if ( if (
(literal := self.expect("@")) (literal := self.expect("@"))
and and
(name := self.name()) (name := self.name())
and and
(newline := self.expect('NEWLINE')) (_newline := self.expect('NEWLINE'))
): ):
return ( name . string , None ) return ( name . string , None )
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(literal := self.expect("@")) (literal := self.expect("@"))
and and
@ -119,12 +110,10 @@ class GeneratedParser(Parser):
and and
(b := self.name()) (b := self.name())
and and
(newline := self.expect('NEWLINE')) (_newline := self.expect('NEWLINE'))
): ):
return ( a . string , b . string ) return ( a . string , b . string )
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(literal := self.expect("@")) (literal := self.expect("@"))
and and
@ -132,40 +121,34 @@ class GeneratedParser(Parser):
and and
(string := self.string()) (string := self.string())
and and
(newline := self.expect('NEWLINE')) (_newline := self.expect('NEWLINE'))
): ):
return ( name . string , literal_eval ( string . string ) ) return ( name . string , literal_eval ( string . string ) )
self.reset(mark) self._reset(mark)
if cut: return None
return None return None
@memoize @memoize
def rules(self) -> Optional[RuleList]: def rules(self) -> Optional[RuleList]:
# rules: rule rules | rule # rules: rule rules | rule
mark = self.mark() mark = self._mark()
cut = False
if ( if (
(rule := self.rule()) (rule := self.rule())
and and
(rules := self.rules()) (rules := self.rules())
): ):
return [ rule ] + rules return [rule] + rules
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(rule := self.rule()) (rule := self.rule())
): ):
return [ rule ] return [rule]
self.reset(mark) self._reset(mark)
if cut: return None
return None return None
@memoize @memoize
def rule(self) -> Optional[Rule]: def rule(self) -> Optional[Rule]:
# rule: rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" alts NEWLINE # rule: rulename memoflag? ":" alts NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" NEWLINE INDENT more_alts DEDENT | rulename memoflag? ":" alts NEWLINE
mark = self.mark() mark = self._mark()
cut = False
if ( if (
(rulename := self.rulename()) (rulename := self.rulename())
and and
@ -175,18 +158,16 @@ class GeneratedParser(Parser):
and and
(alts := self.alts()) (alts := self.alts())
and and
(newline := self.expect('NEWLINE')) (_newline := self.expect('NEWLINE'))
and and
(indent := self.expect('INDENT')) (_indent := self.expect('INDENT'))
and and
(more_alts := self.more_alts()) (more_alts := self.more_alts())
and and
(dedent := self.expect('DEDENT')) (_dedent := self.expect('DEDENT'))
): ):
return Rule ( rulename [ 0 ] , rulename [ 1 ] , Rhs ( alts . alts + more_alts . alts ) , memo = opt ) return Rule ( rulename [0] , rulename [1] , Rhs ( alts . alts + more_alts . alts ) , memo = opt )
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(rulename := self.rulename()) (rulename := self.rulename())
and and
@ -194,18 +175,16 @@ class GeneratedParser(Parser):
and and
(literal := self.expect(":")) (literal := self.expect(":"))
and and
(newline := self.expect('NEWLINE')) (_newline := self.expect('NEWLINE'))
and and
(indent := self.expect('INDENT')) (_indent := self.expect('INDENT'))
and and
(more_alts := self.more_alts()) (more_alts := self.more_alts())
and and
(dedent := self.expect('DEDENT')) (_dedent := self.expect('DEDENT'))
): ):
return Rule ( rulename [ 0 ] , rulename [ 1 ] , more_alts , memo = opt ) return Rule ( rulename [0] , rulename [1] , more_alts , memo = opt )
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(rulename := self.rulename()) (rulename := self.rulename())
and and
@ -215,76 +194,49 @@ class GeneratedParser(Parser):
and and
(alts := self.alts()) (alts := self.alts())
and and
(newline := self.expect('NEWLINE')) (_newline := self.expect('NEWLINE'))
): ):
return Rule ( rulename [ 0 ] , rulename [ 1 ] , alts , memo = opt ) return Rule ( rulename [0] , rulename [1] , alts , memo = opt )
self.reset(mark) self._reset(mark)
if cut: return None
return None return None
@memoize @memoize
def rulename(self) -> Optional[RuleName]: def rulename(self) -> Optional[RuleName]:
# rulename: NAME '[' NAME '*' ']' | NAME '[' NAME ']' | NAME # rulename: NAME annotation | NAME
mark = self.mark() mark = self._mark()
cut = False
if ( if (
(name := self.name()) (name := self.name())
and and
(literal := self.expect('[')) (annotation := self.annotation())
and
(type := self.name())
and
(literal_1 := self.expect('*'))
and
(literal_2 := self.expect(']'))
): ):
return ( name . string , type . string + "*" ) return ( name . string , annotation )
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if (
(name := self.name())
and
(literal := self.expect('['))
and
(type := self.name())
and
(literal_1 := self.expect(']'))
):
return ( name . string , type . string )
self.reset(mark)
if cut: return None
cut = False
if ( if (
(name := self.name()) (name := self.name())
): ):
return ( name . string , None ) return ( name . string , None )
self.reset(mark) self._reset(mark)
if cut: return None
return None return None
@memoize @memoize
def memoflag(self) -> Optional[str]: def memoflag(self) -> Optional[str]:
# memoflag: '(' 'memo' ')' # memoflag: '(' "memo" ')'
mark = self.mark() mark = self._mark()
cut = False
if ( if (
(literal := self.expect('(')) (literal := self.expect('('))
and and
(literal_1 := self.expect('memo')) (literal_1 := self.expect("memo"))
and and
(literal_2 := self.expect(')')) (literal_2 := self.expect(')'))
): ):
return "memo" return "memo"
self.reset(mark) self._reset(mark)
if cut: return None
return None return None
@memoize @memoize
def alts(self) -> Optional[Rhs]: def alts(self) -> Optional[Rhs]:
# alts: alt "|" alts | alt # alts: alt "|" alts | alt
mark = self.mark() mark = self._mark()
cut = False
if ( if (
(alt := self.alt()) (alt := self.alt())
and and
@ -292,53 +244,45 @@ class GeneratedParser(Parser):
and and
(alts := self.alts()) (alts := self.alts())
): ):
return Rhs ( [ alt ] + alts . alts ) return Rhs ( [alt] + alts . alts )
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(alt := self.alt()) (alt := self.alt())
): ):
return Rhs ( [ alt ] ) return Rhs ( [alt] )
self.reset(mark) self._reset(mark)
if cut: return None
return None return None
@memoize @memoize
def more_alts(self) -> Optional[Rhs]: def more_alts(self) -> Optional[Rhs]:
# more_alts: "|" alts NEWLINE more_alts | "|" alts NEWLINE # more_alts: "|" alts NEWLINE more_alts | "|" alts NEWLINE
mark = self.mark() mark = self._mark()
cut = False
if ( if (
(literal := self.expect("|")) (literal := self.expect("|"))
and and
(alts := self.alts()) (alts := self.alts())
and and
(newline := self.expect('NEWLINE')) (_newline := self.expect('NEWLINE'))
and and
(more_alts := self.more_alts()) (more_alts := self.more_alts())
): ):
return Rhs ( alts . alts + more_alts . alts ) return Rhs ( alts . alts + more_alts . alts )
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(literal := self.expect("|")) (literal := self.expect("|"))
and and
(alts := self.alts()) (alts := self.alts())
and and
(newline := self.expect('NEWLINE')) (_newline := self.expect('NEWLINE'))
): ):
return Rhs ( alts . alts ) return Rhs ( alts . alts )
self.reset(mark) self._reset(mark)
if cut: return None
return None return None
@memoize @memoize
def alt(self) -> Optional[Alt]: def alt(self) -> Optional[Alt]:
# alt: items '$' action | items '$' | items action | items # alt: items '$' action | items '$' | items action | items
mark = self.mark() mark = self._mark()
cut = False
if ( if (
(items := self.items()) (items := self.items())
and and
@ -346,101 +290,65 @@ class GeneratedParser(Parser):
and and
(action := self.action()) (action := self.action())
): ):
return Alt ( items + [ NamedItem ( None , NameLeaf ( 'ENDMARKER' ) ) ] , action = action ) return Alt ( items + [NamedItem ( None , NameLeaf ( 'ENDMARKER' ) )] , action = action )
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(items := self.items()) (items := self.items())
and and
(literal := self.expect('$')) (literal := self.expect('$'))
): ):
return Alt ( items + [ NamedItem ( None , NameLeaf ( 'ENDMARKER' ) ) ] , action = None ) return Alt ( items + [NamedItem ( None , NameLeaf ( 'ENDMARKER' ) )] , action = None )
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(items := self.items()) (items := self.items())
and and
(action := self.action()) (action := self.action())
): ):
return Alt ( items , action = action ) return Alt ( items , action = action )
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(items := self.items()) (items := self.items())
): ):
return Alt ( items , action = None ) return Alt ( items , action = None )
self.reset(mark) self._reset(mark)
if cut: return None
return None return None
@memoize @memoize
def items(self) -> Optional[NamedItemList]: def items(self) -> Optional[NamedItemList]:
# items: named_item items | named_item # items: named_item items | named_item
mark = self.mark() mark = self._mark()
cut = False
if ( if (
(named_item := self.named_item()) (named_item := self.named_item())
and and
(items := self.items()) (items := self.items())
): ):
return [ named_item ] + items return [named_item] + items
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(named_item := self.named_item()) (named_item := self.named_item())
): ):
return [ named_item ] return [named_item]
self.reset(mark) self._reset(mark)
if cut: return None
return None return None
@memoize @memoize
def named_item(self) -> Optional[NamedItem]: def named_item(self) -> Optional[NamedItem]:
# named_item: NAME '[' NAME '*' ']' '=' ~ item | NAME '[' NAME ']' '=' ~ item | NAME '=' ~ item | item | forced_atom | lookahead # named_item: NAME annotation '=' ~ item | NAME '=' ~ item | item | forced_atom | lookahead
mark = self.mark() mark = self._mark()
cut = False cut = False
if ( if (
(name := self.name()) (name := self.name())
and and
(literal := self.expect('[')) (annotation := self.annotation())
and and
(type := self.name()) (literal := self.expect('='))
and
(literal_1 := self.expect('*'))
and
(literal_2 := self.expect(']'))
and
(literal_3 := self.expect('='))
and and
(cut := True) (cut := True)
and and
(item := self.item()) (item := self.item())
): ):
return NamedItem ( name . string , item , f"{type.string}*" ) return NamedItem ( name . string , item , annotation )
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if (
(name := self.name())
and
(literal := self.expect('['))
and
(type := self.name())
and
(literal_1 := self.expect(']'))
and
(literal_2 := self.expect('='))
and
(cut := True)
and
(item := self.item())
):
return NamedItem ( name . string , item , type . string )
self.reset(mark)
if cut: return None if cut: return None
cut = False cut = False
if ( if (
@ -453,35 +361,29 @@ class GeneratedParser(Parser):
(item := self.item()) (item := self.item())
): ):
return NamedItem ( name . string , item ) return NamedItem ( name . string , item )
self.reset(mark) self._reset(mark)
if cut: return None if cut: return None
cut = False
if ( if (
(item := self.item()) (item := self.item())
): ):
return NamedItem ( None , item ) return NamedItem ( None , item )
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(it := self.forced_atom()) (forced := self.forced_atom())
): ):
return NamedItem ( None , it ) return NamedItem ( None , forced )
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(it := self.lookahead()) (it := self.lookahead())
): ):
return NamedItem ( None , it ) return NamedItem ( None , it )
self.reset(mark) self._reset(mark)
if cut: return None
return None return None
@memoize @memoize
def forced_atom(self) -> Optional[NamedItem]: def forced_atom(self) -> Optional[Forced]:
# forced_atom: '&' '&' ~ atom # forced_atom: '&' '&' ~ atom
mark = self.mark() mark = self._mark()
cut = False cut = False
if ( if (
(literal := self.expect('&')) (literal := self.expect('&'))
@ -493,14 +395,14 @@ class GeneratedParser(Parser):
(atom := self.atom()) (atom := self.atom())
): ):
return Forced ( atom ) return Forced ( atom )
self.reset(mark) self._reset(mark)
if cut: return None if cut: return None
return None return None
@memoize @memoize
def lookahead(self) -> Optional[LookaheadOrCut]: def lookahead(self) -> Optional[LookaheadOrCut]:
# lookahead: '&' ~ atom | '!' ~ atom | '~' # lookahead: '&' ~ atom | '!' ~ atom | '~'
mark = self.mark() mark = self._mark()
cut = False cut = False
if ( if (
(literal := self.expect('&')) (literal := self.expect('&'))
@ -510,7 +412,7 @@ class GeneratedParser(Parser):
(atom := self.atom()) (atom := self.atom())
): ):
return PositiveLookahead ( atom ) return PositiveLookahead ( atom )
self.reset(mark) self._reset(mark)
if cut: return None if cut: return None
cut = False cut = False
if ( if (
@ -521,21 +423,19 @@ class GeneratedParser(Parser):
(atom := self.atom()) (atom := self.atom())
): ):
return NegativeLookahead ( atom ) return NegativeLookahead ( atom )
self.reset(mark) self._reset(mark)
if cut: return None if cut: return None
cut = False
if ( if (
(literal := self.expect('~')) (literal := self.expect('~'))
): ):
return Cut ( ) return Cut ( )
self.reset(mark) self._reset(mark)
if cut: return None
return None return None
@memoize @memoize
def item(self) -> Optional[Item]: def item(self) -> Optional[Item]:
# item: '[' ~ alts ']' | atom '?' | atom '*' | atom '+' | atom '.' atom '+' | atom # item: '[' ~ alts ']' | atom '?' | atom '*' | atom '+' | atom '.' atom '+' | atom
mark = self.mark() mark = self._mark()
cut = False cut = False
if ( if (
(literal := self.expect('[')) (literal := self.expect('['))
@ -547,36 +447,29 @@ class GeneratedParser(Parser):
(literal_1 := self.expect(']')) (literal_1 := self.expect(']'))
): ):
return Opt ( alts ) return Opt ( alts )
self.reset(mark) self._reset(mark)
if cut: return None if cut: return None
cut = False
if ( if (
(atom := self.atom()) (atom := self.atom())
and and
(literal := self.expect('?')) (literal := self.expect('?'))
): ):
return Opt ( atom ) return Opt ( atom )
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(atom := self.atom()) (atom := self.atom())
and and
(literal := self.expect('*')) (literal := self.expect('*'))
): ):
return Repeat0 ( atom ) return Repeat0 ( atom )
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(atom := self.atom()) (atom := self.atom())
and and
(literal := self.expect('+')) (literal := self.expect('+'))
): ):
return Repeat1 ( atom ) return Repeat1 ( atom )
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(sep := self.atom()) (sep := self.atom())
and and
@ -587,21 +480,18 @@ class GeneratedParser(Parser):
(literal_1 := self.expect('+')) (literal_1 := self.expect('+'))
): ):
return Gather ( sep , node ) return Gather ( sep , node )
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(atom := self.atom()) (atom := self.atom())
): ):
return atom return atom
self.reset(mark) self._reset(mark)
if cut: return None
return None return None
@memoize @memoize
def atom(self) -> Optional[Plain]: def atom(self) -> Optional[Plain]:
# atom: '(' ~ alts ')' | NAME | STRING # atom: '(' ~ alts ')' | NAME | STRING
mark = self.mark() mark = self._mark()
cut = False cut = False
if ( if (
(literal := self.expect('(')) (literal := self.expect('('))
@ -613,28 +503,24 @@ class GeneratedParser(Parser):
(literal_1 := self.expect(')')) (literal_1 := self.expect(')'))
): ):
return Group ( alts ) return Group ( alts )
self.reset(mark) self._reset(mark)
if cut: return None if cut: return None
cut = False
if ( if (
(name := self.name()) (name := self.name())
): ):
return NameLeaf ( name . string ) return NameLeaf ( name . string )
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(string := self.string()) (string := self.string())
): ):
return StringLeaf ( string . string ) return StringLeaf ( string . string )
self.reset(mark) self._reset(mark)
if cut: return None
return None return None
@memoize @memoize
def action(self) -> Optional[str]: def action(self) -> Optional[str]:
# action: "{" ~ target_atoms "}" # action: "{" ~ target_atoms "}"
mark = self.mark() mark = self._mark()
cut = False cut = False
if ( if (
(literal := self.expect("{")) (literal := self.expect("{"))
@ -646,95 +532,123 @@ class GeneratedParser(Parser):
(literal_1 := self.expect("}")) (literal_1 := self.expect("}"))
): ):
return target_atoms return target_atoms
self.reset(mark) self._reset(mark)
if cut: return None
return None
@memoize
def annotation(self) -> Optional[str]:
# annotation: "[" ~ target_atoms "]"
mark = self._mark()
cut = False
if (
(literal := self.expect("["))
and
(cut := True)
and
(target_atoms := self.target_atoms())
and
(literal_1 := self.expect("]"))
):
return target_atoms
self._reset(mark)
if cut: return None if cut: return None
return None return None
@memoize @memoize
def target_atoms(self) -> Optional[str]: def target_atoms(self) -> Optional[str]:
# target_atoms: target_atom target_atoms | target_atom # target_atoms: target_atom target_atoms | target_atom
mark = self.mark() mark = self._mark()
cut = False
if ( if (
(target_atom := self.target_atom()) (target_atom := self.target_atom())
and and
(target_atoms := self.target_atoms()) (target_atoms := self.target_atoms())
): ):
return target_atom + " " + target_atoms return target_atom + " " + target_atoms
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(target_atom := self.target_atom()) (target_atom := self.target_atom())
): ):
return target_atom return target_atom
self.reset(mark) self._reset(mark)
if cut: return None
return None return None
@memoize @memoize
def target_atom(self) -> Optional[str]: def target_atom(self) -> Optional[str]:
# target_atom: "{" ~ target_atoms "}" | NAME | NUMBER | STRING | "?" | ":" | !"}" OP # target_atom: "{" ~ target_atoms? "}" | "[" ~ target_atoms? "]" | NAME "*" | NAME | NUMBER | STRING | "?" | ":" | !"}" !"]" OP
mark = self.mark() mark = self._mark()
cut = False cut = False
if ( if (
(literal := self.expect("{")) (literal := self.expect("{"))
and and
(cut := True) (cut := True)
and and
(target_atoms := self.target_atoms()) (atoms := self.target_atoms(),)
and and
(literal_1 := self.expect("}")) (literal_1 := self.expect("}"))
): ):
return "{" + target_atoms + "}" return "{" + ( atoms or "" ) + "}"
self.reset(mark) self._reset(mark)
if cut: return None if cut: return None
cut = False cut = False
if (
(literal := self.expect("["))
and
(cut := True)
and
(atoms := self.target_atoms(),)
and
(literal_1 := self.expect("]"))
):
return "[" + ( atoms or "" ) + "]"
self._reset(mark)
if cut: return None
if (
(name := self.name())
and
(literal := self.expect("*"))
):
return name . string + "*"
self._reset(mark)
if ( if (
(name := self.name()) (name := self.name())
): ):
return name . string return name . string
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(number := self.number()) (number := self.number())
): ):
return number . string return number . string
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(string := self.string()) (string := self.string())
): ):
return string . string return string . string
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(literal := self.expect("?")) (literal := self.expect("?"))
): ):
return "?" return "?"
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
(literal := self.expect(":")) (literal := self.expect(":"))
): ):
return ":" return ":"
self.reset(mark) self._reset(mark)
if cut: return None
cut = False
if ( if (
self.negative_lookahead(self.expect, "}") self.negative_lookahead(self.expect, "}")
and and
self.negative_lookahead(self.expect, "]")
and
(op := self.op()) (op := self.op())
): ):
return op . string return op . string
self.reset(mark) self._reset(mark)
if cut: return None
return None return None
KEYWORDS = ()
SOFT_KEYWORDS = ('memo',)
if __name__ == '__main__': if __name__ == '__main__':
from pegen.parser import simple_parser_main from pegen.parser import simple_parser_main

View file

@ -38,7 +38,7 @@ issoftkeyword = frozenset(softkwlist).__contains__
EXTRA_KEYWORDS = ["async", "await"] EXTRA_KEYWORDS = ["async", "await"]
def main(): def main() -> None:
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description="Generate the Lib/keywords.py file from the grammar." description="Generate the Lib/keywords.py file from the grammar."
) )
@ -58,9 +58,7 @@ def main():
grammar, _, _ = build_parser(args.grammar) grammar, _, _ = build_parser(args.grammar)
with args.tokens_file as tok_file: with args.tokens_file as tok_file:
all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file) all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
gen: ParserGenerator = CParserGenerator( gen = CParserGenerator(grammar, all_tokens, exact_tok, non_exact_tok, file=None)
grammar, all_tokens, exact_tok, non_exact_tok, file=None
)
gen.collect_todo() gen.collect_todo()
with args.keyword_file as thefile: with args.keyword_file as thefile:
@ -68,7 +66,9 @@ def main():
all_soft_keywords = sorted(gen.callmakervisitor.soft_keywords) all_soft_keywords = sorted(gen.callmakervisitor.soft_keywords)
keywords = "" if not all_keywords else " " + ",\n ".join(map(repr, all_keywords)) keywords = "" if not all_keywords else " " + ",\n ".join(map(repr, all_keywords))
soft_keywords = "" if not all_soft_keywords else " " + ",\n ".join(map(repr, all_soft_keywords)) soft_keywords = (
"" if not all_soft_keywords else " " + ",\n ".join(map(repr, all_soft_keywords))
)
thefile.write(TEMPLATE.format(keywords=keywords, soft_keywords=soft_keywords)) thefile.write(TEMPLATE.format(keywords=keywords, soft_keywords=soft_keywords))

View file

@ -57,13 +57,12 @@ rule[Rule]:
| rulename memoflag? ":" alts NEWLINE { Rule(rulename[0], rulename[1], alts, memo=opt) } | rulename memoflag? ":" alts NEWLINE { Rule(rulename[0], rulename[1], alts, memo=opt) }
rulename[RuleName]: rulename[RuleName]:
| NAME '[' type=NAME '*' ']' { (name.string, type.string+"*") } | NAME annotation { (name.string, annotation) }
| NAME '[' type=NAME ']' { (name.string, type.string) }
| NAME { (name.string, None) } | NAME { (name.string, None) }
# In the future this may return something more complicated # In the future this may return something more complicated
memoflag[str]: memoflag[str]:
| '(' 'memo' ')' { "memo" } | '(' "memo" ')' { "memo" }
alts[Rhs]: alts[Rhs]:
| alt "|" alts { Rhs([alt] + alts.alts)} | alt "|" alts { Rhs([alt] + alts.alts)}
@ -84,14 +83,13 @@ items[NamedItemList]:
| named_item { [named_item] } | named_item { [named_item] }
named_item[NamedItem]: named_item[NamedItem]:
| NAME '[' type=NAME '*' ']' '=' ~ item {NamedItem(name.string, item, f"{type.string}*")} | NAME annotation '=' ~ item {NamedItem(name.string, item, annotation)}
| NAME '[' type=NAME ']' '=' ~ item {NamedItem(name.string, item, type.string)}
| NAME '=' ~ item {NamedItem(name.string, item)} | NAME '=' ~ item {NamedItem(name.string, item)}
| item {NamedItem(None, item)} | item {NamedItem(None, item)}
| it=forced_atom {NamedItem(None, it)} | forced=forced_atom {NamedItem(None, forced)}
| it=lookahead {NamedItem(None, it)} | it=lookahead {NamedItem(None, it)}
forced_atom[NamedItem]: forced_atom[Forced]:
| '&''&' ~ atom {Forced(atom)} | '&''&' ~ atom {Forced(atom)}
lookahead[LookaheadOrCut]: lookahead[LookaheadOrCut]:
@ -112,19 +110,22 @@ atom[Plain]:
| NAME {NameLeaf(name.string) } | NAME {NameLeaf(name.string) }
| STRING {StringLeaf(string.string)} | STRING {StringLeaf(string.string)}
# Mini-grammar for the actions # Mini-grammar for the actions and annotations
action[str]: "{" ~ target_atoms "}" { target_atoms } action[str]: "{" ~ target_atoms "}" { target_atoms }
annotation[str]: "[" ~ target_atoms "]" { target_atoms }
target_atoms[str]: target_atoms[str]:
| target_atom target_atoms { target_atom + " " + target_atoms } | target_atom target_atoms { target_atom + " " + target_atoms }
| target_atom { target_atom } | target_atom { target_atom }
target_atom[str]: target_atom[str]:
| "{" ~ target_atoms "}" { "{" + target_atoms + "}" } | "{" ~ atoms=target_atoms? "}" { "{" + (atoms or "") + "}" }
| "[" ~ atoms=target_atoms? "]" { "[" + (atoms or "") + "]" }
| NAME "*" { name.string + "*" }
| NAME { name.string } | NAME { name.string }
| NUMBER { number.string } | NUMBER { number.string }
| STRING { string.string } | STRING { string.string }
| "?" { "?" } | "?" { "?" }
| ":" { ":" } | ":" { ":" }
| !"}" OP { op.string } | !"}" !"]" OP { op.string }

View file

@ -4,13 +4,10 @@ import time
import token import token
import tokenize import tokenize
import traceback import traceback
from abc import abstractmethod from abc import abstractmethod
from typing import Any, Callable, cast, Dict, Optional, Tuple, Type, TypeVar from typing import Any, Callable, ClassVar, Dict, Optional, Tuple, Type, TypeVar, cast
from pegen.tokenizer import exact_token_types from pegen.tokenizer import Mark, Tokenizer, exact_token_types
from pegen.tokenizer import Mark
from pegen.tokenizer import Tokenizer
T = TypeVar("T") T = TypeVar("T")
P = TypeVar("P", bound="Parser") P = TypeVar("P", bound="Parser")
@ -45,12 +42,12 @@ def memoize(method: F) -> F:
method_name = method.__name__ method_name = method.__name__
def memoize_wrapper(self: P, *args: object) -> T: def memoize_wrapper(self: P, *args: object) -> T:
mark = self.mark() mark = self._mark()
key = mark, method_name, args key = mark, method_name, args
# Fast path: cache hit, and not verbose. # Fast path: cache hit, and not verbose.
if key in self._cache and not self._verbose: if key in self._cache and not self._verbose:
tree, endmark = self._cache[key] tree, endmark = self._cache[key]
self.reset(endmark) self._reset(endmark)
return tree return tree
# Slow path: no cache hit, or verbose. # Slow path: no cache hit, or verbose.
verbose = self._verbose verbose = self._verbose
@ -64,13 +61,13 @@ def memoize(method: F) -> F:
self._level -= 1 self._level -= 1
if verbose: if verbose:
print(f"{fill}... {method_name}({argsr}) -> {tree!s:.200}") print(f"{fill}... {method_name}({argsr}) -> {tree!s:.200}")
endmark = self.mark() endmark = self._mark()
self._cache[key] = tree, endmark self._cache[key] = tree, endmark
else: else:
tree, endmark = self._cache[key] tree, endmark = self._cache[key]
if verbose: if verbose:
print(f"{fill}{method_name}({argsr}) -> {tree!s:.200}") print(f"{fill}{method_name}({argsr}) -> {tree!s:.200}")
self.reset(endmark) self._reset(endmark)
return tree return tree
memoize_wrapper.__wrapped__ = method # type: ignore memoize_wrapper.__wrapped__ = method # type: ignore
@ -82,12 +79,12 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option
method_name = method.__name__ method_name = method.__name__
def memoize_left_rec_wrapper(self: P) -> Optional[T]: def memoize_left_rec_wrapper(self: P) -> Optional[T]:
mark = self.mark() mark = self._mark()
key = mark, method_name, () key = mark, method_name, ()
# Fast path: cache hit, and not verbose. # Fast path: cache hit, and not verbose.
if key in self._cache and not self._verbose: if key in self._cache and not self._verbose:
tree, endmark = self._cache[key] tree, endmark = self._cache[key]
self.reset(endmark) self._reset(endmark)
return tree return tree
# Slow path: no cache hit, or verbose. # Slow path: no cache hit, or verbose.
verbose = self._verbose verbose = self._verbose
@ -113,9 +110,13 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option
print(f"{fill}Recursive {method_name} at {mark} depth {depth}") print(f"{fill}Recursive {method_name} at {mark} depth {depth}")
while True: while True:
self.reset(mark) self._reset(mark)
result = method(self) self.in_recursive_rule += 1
endmark = self.mark() try:
result = method(self)
finally:
self.in_recursive_rule -= 1
endmark = self._mark()
depth += 1 depth += 1
if verbose: if verbose:
print( print(
@ -131,24 +132,24 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option
break break
self._cache[key] = lastresult, lastmark = result, endmark self._cache[key] = lastresult, lastmark = result, endmark
self.reset(lastmark) self._reset(lastmark)
tree = lastresult tree = lastresult
self._level -= 1 self._level -= 1
if verbose: if verbose:
print(f"{fill}{method_name}() -> {tree!s:.200} [cached]") print(f"{fill}{method_name}() -> {tree!s:.200} [cached]")
if tree: if tree:
endmark = self.mark() endmark = self._mark()
else: else:
endmark = mark endmark = mark
self.reset(endmark) self._reset(endmark)
self._cache[key] = tree, endmark self._cache[key] = tree, endmark
else: else:
tree, endmark = self._cache[key] tree, endmark = self._cache[key]
if verbose: if verbose:
print(f"{fill}{method_name}() -> {tree!s:.200} [fresh]") print(f"{fill}{method_name}() -> {tree!s:.200} [fresh]")
if tree: if tree:
self.reset(endmark) self._reset(endmark)
return tree return tree
memoize_left_rec_wrapper.__wrapped__ = method # type: ignore memoize_left_rec_wrapper.__wrapped__ = method # type: ignore
@ -158,15 +159,21 @@ def memoize_left_rec(method: Callable[[P], Optional[T]]) -> Callable[[P], Option
class Parser: class Parser:
"""Parsing base class.""" """Parsing base class."""
KEYWORDS: ClassVar[Tuple[str, ...]]
SOFT_KEYWORDS: ClassVar[Tuple[str, ...]]
def __init__(self, tokenizer: Tokenizer, *, verbose: bool = False): def __init__(self, tokenizer: Tokenizer, *, verbose: bool = False):
self._tokenizer = tokenizer self._tokenizer = tokenizer
self._verbose = verbose self._verbose = verbose
self._level = 0 self._level = 0
self._cache: Dict[Tuple[Mark, str, Tuple[Any, ...]], Tuple[Any, Mark]] = {} self._cache: Dict[Tuple[Mark, str, Tuple[Any, ...]], Tuple[Any, Mark]] = {}
# Integer tracking wether we are in a left recursive rule or not. Can be useful
# for error reporting.
self.in_recursive_rule = 0
# Pass through common tokenizer methods. # Pass through common tokenizer methods.
# TODO: Rename to _mark and _reset. self._mark = self._tokenizer.mark
self.mark = self._tokenizer.mark self._reset = self._tokenizer.reset
self.reset = self._tokenizer.reset
@abstractmethod @abstractmethod
def start(self) -> Any: def start(self) -> Any:
@ -179,7 +186,7 @@ class Parser:
@memoize @memoize
def name(self) -> Optional[tokenize.TokenInfo]: def name(self) -> Optional[tokenize.TokenInfo]:
tok = self._tokenizer.peek() tok = self._tokenizer.peek()
if tok.type == token.NAME: if tok.type == token.NAME and tok.string not in self.KEYWORDS:
return self._tokenizer.getnext() return self._tokenizer.getnext()
return None return None
@ -204,6 +211,20 @@ class Parser:
return self._tokenizer.getnext() return self._tokenizer.getnext()
return None return None
@memoize
def type_comment(self) -> Optional[tokenize.TokenInfo]:
tok = self._tokenizer.peek()
if tok.type == token.TYPE_COMMENT:
return self._tokenizer.getnext()
return None
@memoize
def soft_keyword(self) -> Optional[tokenize.TokenInfo]:
tok = self._tokenizer.peek()
if tok.type == token.NAME and tok.string in self.SOFT_KEYWORDS:
return self._tokenizer.getnext()
return None
@memoize @memoize
def expect(self, type: str) -> Optional[tokenize.TokenInfo]: def expect(self, type: str) -> Optional[tokenize.TokenInfo]:
tok = self._tokenizer.peek() tok = self._tokenizer.peek()
@ -219,23 +240,26 @@ class Parser:
return self._tokenizer.getnext() return self._tokenizer.getnext()
return None return None
def expect_forced(self, res: Any, expectation: str) -> Optional[tokenize.TokenInfo]:
if res is None:
raise self.make_syntax_error(f"expected {expectation}")
return res
def positive_lookahead(self, func: Callable[..., T], *args: object) -> T: def positive_lookahead(self, func: Callable[..., T], *args: object) -> T:
mark = self.mark() mark = self._mark()
ok = func(*args) ok = func(*args)
self.reset(mark) self._reset(mark)
return ok return ok
def negative_lookahead(self, func: Callable[..., object], *args: object) -> bool: def negative_lookahead(self, func: Callable[..., object], *args: object) -> bool:
mark = self.mark() mark = self._mark()
ok = func(*args) ok = func(*args)
self.reset(mark) self._reset(mark)
return not ok return not ok
def make_syntax_error(self, filename: str = "<unknown>") -> SyntaxError: def make_syntax_error(self, message: str, filename: str = "<unknown>") -> SyntaxError:
tok = self._tokenizer.diagnose() tok = self._tokenizer.diagnose()
return SyntaxError( return SyntaxError(message, (filename, tok.start[0], 1 + tok.start[1], tok.line))
"pegen parse failure", (filename, tok.start[0], 1 + tok.start[1], tok.line)
)
def simple_parser_main(parser_class: Type[Parser]) -> None: def simple_parser_main(parser_class: Type[Parser]) -> None:

View file

@ -1,30 +1,29 @@
import contextlib import contextlib
from abc import abstractmethod from abc import abstractmethod
from typing import IO, AbstractSet, Dict, Iterator, List, Optional, Set, Text, Tuple
from typing import AbstractSet, Dict, IO, Iterator, List, Optional, Set, Text, Tuple
from pegen import sccutils from pegen import sccutils
from pegen.grammar import ( from pegen.grammar import (
Grammar,
Rule,
Rhs,
Alt, Alt,
NamedItem,
Plain,
NameLeaf,
Gather, Gather,
Grammar,
GrammarError,
GrammarVisitor,
NamedItem,
NameLeaf,
Plain,
Rhs,
Rule,
) )
from pegen.grammar import GrammarError, GrammarVisitor
class RuleCheckingVisitor(GrammarVisitor): class RuleCheckingVisitor(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule], tokens: Dict[int, str]): def __init__(self, rules: Dict[str, Rule], tokens: Set[str]):
self.rules = rules self.rules = rules
self.tokens = tokens self.tokens = tokens
def visit_NameLeaf(self, node: NameLeaf) -> None: def visit_NameLeaf(self, node: NameLeaf) -> None:
if node.value not in self.rules and node.value not in self.tokens.values(): if node.value not in self.rules and node.value not in self.tokens:
# TODO: Add line/col info to (leaf) nodes
raise GrammarError(f"Dangling reference to rule {node.value!r}") raise GrammarError(f"Dangling reference to rule {node.value!r}")
def visit_NamedItem(self, node: NamedItem) -> None: def visit_NamedItem(self, node: NamedItem) -> None:
@ -37,7 +36,7 @@ class ParserGenerator:
callmakervisitor: GrammarVisitor callmakervisitor: GrammarVisitor
def __init__(self, grammar: Grammar, tokens: Dict[int, str], file: Optional[IO[Text]]): def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]):
self.grammar = grammar self.grammar = grammar
self.tokens = tokens self.tokens = tokens
self.rules = grammar.rules self.rules = grammar.rules
@ -133,13 +132,22 @@ class ParserGenerator:
self.counter += 1 self.counter += 1
extra_function_name = f"_loop0_{self.counter}" extra_function_name = f"_loop0_{self.counter}"
extra_function_alt = Alt( extra_function_alt = Alt(
[NamedItem(None, node.separator), NamedItem("elem", node.node)], action="elem", [NamedItem(None, node.separator), NamedItem("elem", node.node)],
action="elem",
) )
self.todo[extra_function_name] = Rule( self.todo[extra_function_name] = Rule(
extra_function_name, None, Rhs([extra_function_alt]), extra_function_name,
None,
Rhs([extra_function_alt]),
)
alt = Alt(
[NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],
)
self.todo[name] = Rule(
name,
None,
Rhs([alt]),
) )
alt = Alt([NamedItem("elem", node.node), NamedItem("seq", NameLeaf(extra_function_name))],)
self.todo[name] = Rule(name, None, Rhs([alt]),)
return name return name
def dedupe(self, name: str) -> str: def dedupe(self, name: str) -> str:

View file

@ -1,25 +1,28 @@
import ast
import re
import token import token
from typing import Any, Dict, Optional, IO, Text, Tuple from typing import IO, Any, Dict, Optional, Sequence, Set, Text, Tuple
from pegen import grammar
from pegen.grammar import ( from pegen.grammar import (
Alt,
Cut, Cut,
Forced,
Gather,
GrammarVisitor, GrammarVisitor,
NameLeaf, Group,
StringLeaf,
Rhs,
NamedItem,
Lookahead, Lookahead,
PositiveLookahead, NamedItem,
NameLeaf,
NegativeLookahead, NegativeLookahead,
Opt, Opt,
PositiveLookahead,
Repeat0, Repeat0,
Repeat1, Repeat1,
Gather, Rhs,
Group,
Rule, Rule,
Alt, StringLeaf,
) )
from pegen import grammar
from pegen.parser_generator import ParserGenerator from pegen.parser_generator import ParserGenerator
MODULE_PREFIX = """\ MODULE_PREFIX = """\
@ -27,7 +30,10 @@ MODULE_PREFIX = """\
# @generated by pegen from {filename} # @generated by pegen from {filename}
import ast import ast
from typing import Optional, Any import sys
import tokenize
from typing import Any, Optional
from pegen.parser import memoize, memoize_left_rec, logger, Parser from pegen.parser import memoize, memoize_left_rec, logger, Parser
@ -36,25 +42,81 @@ MODULE_SUFFIX = """
if __name__ == '__main__': if __name__ == '__main__':
from pegen.parser import simple_parser_main from pegen.parser import simple_parser_main
simple_parser_main(GeneratedParser) simple_parser_main({class_name})
""" """
class InvalidNodeVisitor(GrammarVisitor):
def visit_NameLeaf(self, node: NameLeaf) -> bool:
name = node.value
return name.startswith("invalid")
def visit_StringLeaf(self, node: StringLeaf) -> bool:
return False
def visit_NamedItem(self, node: NamedItem) -> bool:
return self.visit(node.item)
def visit_Rhs(self, node: Rhs) -> bool:
return any(self.visit(alt) for alt in node.alts)
def visit_Alt(self, node: Alt) -> bool:
return any(self.visit(item) for item in node.items)
def lookahead_call_helper(self, node: Lookahead) -> bool:
return self.visit(node.node)
def visit_PositiveLookahead(self, node: PositiveLookahead) -> bool:
return self.lookahead_call_helper(node)
def visit_NegativeLookahead(self, node: NegativeLookahead) -> bool:
return self.lookahead_call_helper(node)
def visit_Opt(self, node: Opt) -> bool:
return self.visit(node.node)
def visit_Repeat(self, node: Repeat0) -> Tuple[str, str]:
return self.visit(node.node)
def visit_Gather(self, node: Gather) -> Tuple[str, str]:
return self.visit(node.node)
def visit_Group(self, node: Group) -> bool:
return self.visit(node.rhs)
def visit_Cut(self, node: Cut) -> bool:
return False
def visit_Forced(self, node: Forced) -> bool:
return self.visit(node.node)
class PythonCallMakerVisitor(GrammarVisitor): class PythonCallMakerVisitor(GrammarVisitor):
def __init__(self, parser_generator: ParserGenerator): def __init__(self, parser_generator: ParserGenerator):
self.gen = parser_generator self.gen = parser_generator
self.cache: Dict[Any, Any] = {} self.cache: Dict[Any, Any] = {}
self.keywords: Set[str] = set()
self.soft_keywords: Set[str] = set()
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]: def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
name = node.value name = node.value
if name in ("NAME", "NUMBER", "STRING", "OP"): if name == "SOFT_KEYWORD":
return "soft_keyword", "self.soft_keyword()"
if name in ("NAME", "NUMBER", "STRING", "OP", "TYPE_COMMENT"):
name = name.lower() name = name.lower()
return name, f"self.{name}()" return name, f"self.{name}()"
if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER", "ASYNC", "AWAIT"): if name in ("NEWLINE", "DEDENT", "INDENT", "ENDMARKER", "ASYNC", "AWAIT"):
return name.lower(), f"self.expect({name!r})" # Avoid using names that can be Python keywords
return "_" + name.lower(), f"self.expect({name!r})"
return name, f"self.{name}()" return name, f"self.{name}()"
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]: def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
val = ast.literal_eval(node.value)
if re.match(r"[a-zA-Z_]\w*\Z", val): # This is a keyword
if node.value.endswith("'"):
self.keywords.add(val)
else:
self.soft_keywords.add(val)
return "literal", f"self.expect({node.value})" return "literal", f"self.expect({node.value})"
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]: def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
@ -125,16 +187,36 @@ class PythonCallMakerVisitor(GrammarVisitor):
def visit_Cut(self, node: Cut) -> Tuple[str, str]: def visit_Cut(self, node: Cut) -> Tuple[str, str]:
return "cut", "True" return "cut", "True"
def visit_Forced(self, node: Forced) -> Tuple[str, str]:
if isinstance(node.node, Group):
_, val = self.visit(node.node.rhs)
return "forced", f"self.expect_forced({val}, '''({node.node.rhs!s})''')"
else:
return (
"forced",
f"self.expect_forced(self.expect({node.node.value}), {node.node.value!r})",
)
class PythonParserGenerator(ParserGenerator, GrammarVisitor): class PythonParserGenerator(ParserGenerator, GrammarVisitor):
def __init__( def __init__(
self, self,
grammar: grammar.Grammar, grammar: grammar.Grammar,
file: Optional[IO[Text]], file: Optional[IO[Text]],
tokens: Dict[int, str] = token.tok_name, tokens: Set[str] = set(token.tok_name.values()),
location_formatting: Optional[str] = None,
unreachable_formatting: Optional[str] = None,
): ):
tokens.add("SOFT_KEYWORD")
super().__init__(grammar, tokens, file) super().__init__(grammar, tokens, file)
self.callmakervisitor = PythonCallMakerVisitor(self) self.callmakervisitor: PythonCallMakerVisitor = PythonCallMakerVisitor(self)
self.invalidvisitor: InvalidNodeVisitor = InvalidNodeVisitor()
self.unreachable_formatting = unreachable_formatting or "None # pragma: no cover"
self.location_formatting = (
location_formatting
or "lineno=start_lineno, col_offset=start_col_offset, "
"end_lineno=end_lineno, end_col_offset=end_col_offset"
)
def generate(self, filename: str) -> None: def generate(self, filename: str) -> None:
header = self.grammar.metas.get("header", MODULE_PREFIX) header = self.grammar.metas.get("header", MODULE_PREFIX)
@ -142,18 +224,35 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
self.print(header.rstrip("\n").format(filename=filename)) self.print(header.rstrip("\n").format(filename=filename))
subheader = self.grammar.metas.get("subheader", "") subheader = self.grammar.metas.get("subheader", "")
if subheader: if subheader:
self.print(subheader.format(filename=filename)) self.print(subheader)
self.print("class GeneratedParser(Parser):") cls_name = self.grammar.metas.get("class", "GeneratedParser")
self.print("# Keywords and soft keywords are listed at the end of the parser definition.")
self.print(f"class {cls_name}(Parser):")
while self.todo: while self.todo:
for rulename, rule in list(self.todo.items()): for rulename, rule in list(self.todo.items()):
del self.todo[rulename] del self.todo[rulename]
self.print() self.print()
with self.indent(): with self.indent():
self.visit(rule) self.visit(rule)
trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX)
self.print()
with self.indent():
self.print(f"KEYWORDS = {tuple(self.callmakervisitor.keywords)}")
self.print(f"SOFT_KEYWORDS = {tuple(self.callmakervisitor.soft_keywords)}")
trailer = self.grammar.metas.get("trailer", MODULE_SUFFIX.format(class_name=cls_name))
if trailer is not None: if trailer is not None:
self.print(trailer.rstrip("\n")) self.print(trailer.rstrip("\n"))
def alts_uses_locations(self, alts: Sequence[Alt]) -> bool:
for alt in alts:
if alt.action and "LOCATIONS" in alt.action:
return True
for n in alt.items:
if isinstance(n.item, Group) and self.alts_uses_locations(n.item.rhs.alts):
return True
return False
def visit_Rule(self, node: Rule) -> None: def visit_Rule(self, node: Rule) -> None:
is_loop = node.is_loop() is_loop = node.is_loop()
is_gather = node.is_gather() is_gather = node.is_gather()
@ -173,7 +272,10 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
self.print(f"# {node.name}: {rhs}") self.print(f"# {node.name}: {rhs}")
if node.nullable: if node.nullable:
self.print(f"# nullable={node.nullable}") self.print(f"# nullable={node.nullable}")
self.print("mark = self.mark()") self.print("mark = self._mark()")
if self.alts_uses_locations(node.rhs.alts):
self.print("tok = self._tokenizer.peek()")
self.print("start_lineno, start_col_offset = tok.start")
if is_loop: if is_loop:
self.print("children = []") self.print("children = []")
self.visit(rhs, is_loop=is_loop, is_gather=is_gather) self.visit(rhs, is_loop=is_loop, is_gather=is_gather)
@ -200,8 +302,10 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
self.visit(alt, is_loop=is_loop, is_gather=is_gather) self.visit(alt, is_loop=is_loop, is_gather=is_gather)
def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None: def visit_Alt(self, node: Alt, is_loop: bool, is_gather: bool) -> None:
has_cut = any(isinstance(item.item, Cut) for item in node.items)
with self.local_variable_context(): with self.local_variable_context():
self.print("cut = False") # TODO: Only if needed. if has_cut:
self.print("cut = False")
if is_loop: if is_loop:
self.print("while (") self.print("while (")
else: else:
@ -227,12 +331,26 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
f"[{self.local_variable_names[0]}] + {self.local_variable_names[1]}" f"[{self.local_variable_names[0]}] + {self.local_variable_names[1]}"
) )
else: else:
action = f"[{', '.join(self.local_variable_names)}]" if self.invalidvisitor.visit(node):
action = "UNREACHABLE"
elif len(self.local_variable_names) == 1:
action = f"{self.local_variable_names[0]}"
else:
action = f"[{', '.join(self.local_variable_names)}]"
elif "LOCATIONS" in action:
self.print("tok = self._tokenizer.get_last_non_whitespace_token()")
self.print("end_lineno, end_col_offset = tok.end")
action = action.replace("LOCATIONS", self.location_formatting)
if is_loop: if is_loop:
self.print(f"children.append({action})") self.print(f"children.append({action})")
self.print(f"mark = self.mark()") self.print(f"mark = self._mark()")
else: else:
if "UNREACHABLE" in action:
action = action.replace("UNREACHABLE", self.unreachable_formatting)
self.print(f"return {action}") self.print(f"return {action}")
self.print("self.reset(mark)")
self.print("self._reset(mark)")
# Skip remaining alternatives if a cut was reached. # Skip remaining alternatives if a cut was reached.
self.print("if cut: return None") # TODO: Only if needed. if has_cut:
self.print("if cut: return None")

View file

@ -18,7 +18,7 @@ from pegen.python_generator import PythonParserGenerator
from pegen.tokenizer import Tokenizer from pegen.tokenizer import Tokenizer
ALL_TOKENS = token.tok_name ALL_TOKENS = token.tok_name
EXACT_TOKENS = token.EXACT_TOKEN_TYPES # type: ignore EXACT_TOKENS = token.EXACT_TOKEN_TYPES
NON_EXACT_TOKENS = { NON_EXACT_TOKENS = {
name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values() name for index, name in token.tok_name.items() if index not in EXACT_TOKENS.values()
} }
@ -42,7 +42,7 @@ def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = F
parser = parser_class(tokenizer, verbose=verbose) parser = parser_class(tokenizer, verbose=verbose)
result = parser.start() result = parser.start()
if result is None: if result is None:
raise parser.make_syntax_error() raise parser.make_syntax_error("invalid syntax")
return result return result
@ -66,6 +66,7 @@ def import_file(full_name: str, path: str) -> Any:
"""Import a python module from a path""" """Import a python module from a path"""
spec = importlib.util.spec_from_file_location(full_name, path) spec = importlib.util.spec_from_file_location(full_name, path)
assert spec is not None
mod = importlib.util.module_from_spec(spec) mod = importlib.util.module_from_spec(spec)
# We assume this is not None and has an exec_module() method. # We assume this is not None and has an exec_module() method.

View file

@ -1,10 +1,10 @@
import token import token
import tokenize import tokenize
from typing import List, Iterator from typing import Dict, Iterator, List
Mark = int # NewType('Mark', int) Mark = int # NewType('Mark', int)
exact_token_types = token.EXACT_TOKEN_TYPES # type: ignore exact_token_types = token.EXACT_TOKEN_TYPES
def shorttok(tok: tokenize.TokenInfo) -> str: def shorttok(tok: tokenize.TokenInfo) -> str:
@ -19,26 +19,22 @@ class Tokenizer:
_tokens: List[tokenize.TokenInfo] _tokens: List[tokenize.TokenInfo]
def __init__(self, tokengen: Iterator[tokenize.TokenInfo], *, verbose: bool = False): def __init__(
self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False
):
self._tokengen = tokengen self._tokengen = tokengen
self._tokens = [] self._tokens = []
self._index = 0 self._index = 0
self._verbose = verbose self._verbose = verbose
self._lines: Dict[int, str] = {}
self._path = path
if verbose: if verbose:
self.report(False, False) self.report(False, False)
def getnext(self) -> tokenize.TokenInfo: def getnext(self) -> tokenize.TokenInfo:
"""Return the next token and updates the index.""" """Return the next token and updates the index."""
cached = True cached = not self._index == len(self._tokens)
while self._index == len(self._tokens): tok = self.peek()
tok = next(self._tokengen)
if tok.type in (tokenize.NL, tokenize.COMMENT):
continue
if tok.type == token.ERRORTOKEN and tok.string.isspace():
continue
self._tokens.append(tok)
cached = False
tok = self._tokens[self._index]
self._index += 1 self._index += 1
if self._verbose: if self._verbose:
self.report(cached, False) self.report(cached, False)
@ -52,7 +48,15 @@ class Tokenizer:
continue continue
if tok.type == token.ERRORTOKEN and tok.string.isspace(): if tok.type == token.ERRORTOKEN and tok.string.isspace():
continue continue
if (
tok.type == token.NEWLINE
and self._tokens
and self._tokens[-1].type == token.NEWLINE
):
continue
self._tokens.append(tok) self._tokens.append(tok)
if not self._path:
self._lines[tok.start[0]] = tok.line
return self._tokens[self._index] return self._tokens[self._index]
def diagnose(self) -> tokenize.TokenInfo: def diagnose(self) -> tokenize.TokenInfo:
@ -60,6 +64,34 @@ class Tokenizer:
self.getnext() self.getnext()
return self._tokens[-1] return self._tokens[-1]
def get_last_non_whitespace_token(self) -> tokenize.TokenInfo:
for tok in reversed(self._tokens[: self._index]):
if tok.type != tokenize.ENDMARKER and (
tok.type < tokenize.NEWLINE or tok.type > tokenize.DEDENT
):
break
return tok
def get_lines(self, line_numbers: List[int]) -> List[str]:
"""Retrieve source lines corresponding to line numbers."""
if self._lines:
lines = self._lines
else:
n = len(line_numbers)
lines = {}
count = 0
seen = 0
with open(self._path) as f:
for l in f:
count += 1
if count in line_numbers:
seen += 1
lines[count] = l
if seen == n:
break
return [lines[n] for n in line_numbers]
def mark(self) -> Mark: def mark(self) -> Mark:
return self._index return self._index

View file

@ -1,51 +1,45 @@
from typing import Optional
from pegen import grammar from pegen import grammar
from pegen.grammar import ( from pegen.grammar import (
Alt, Alt,
Cut,
Gather,
GrammarVisitor, GrammarVisitor,
Group,
Lookahead,
NamedItem,
NameLeaf,
NegativeLookahead,
Opt,
PositiveLookahead,
Repeat0,
Repeat1,
Rhs,
Rule, Rule,
StringLeaf, Rhs,
) )
class ValidationError(Exception): class ValidationError(Exception):
pass pass
class GrammarValidator(GrammarVisitor):
def __init__(self, grammar: grammar.Grammar):
self.grammar = grammar
self.rulename = None
def validate_rule(self, rulename: str, node: Rule): class GrammarValidator(GrammarVisitor):
def __init__(self, grammar: grammar.Grammar) -> None:
self.grammar = grammar
self.rulename: Optional[str] = None
def validate_rule(self, rulename: str, node: Rule) -> None:
self.rulename = rulename self.rulename = rulename
self.visit(node) self.visit(node)
self.rulename = None self.rulename = None
class SubRuleValidator(GrammarValidator): class SubRuleValidator(GrammarValidator):
def visit_Rhs(self, node: Rule): def visit_Rhs(self, node: Rhs) -> None:
for index, alt in enumerate(node.alts): for index, alt in enumerate(node.alts):
alts_to_consider = node.alts[index+1:] alts_to_consider = node.alts[index + 1 :]
for other_alt in alts_to_consider: for other_alt in alts_to_consider:
self.check_intersection(alt, other_alt) self.check_intersection(alt, other_alt)
def check_intersection(self, first_alt: Alt, second_alt: Alt) -> bool: def check_intersection(self, first_alt: Alt, second_alt: Alt) -> None:
if str(second_alt).startswith(str(first_alt)): if str(second_alt).startswith(str(first_alt)):
raise ValidationError( raise ValidationError(
f"In {self.rulename} there is an alternative that will " f"In {self.rulename} there is an alternative that will "
f"never be visited:\n{second_alt}") f"never be visited:\n{second_alt}"
)
def validate_grammar(the_grammar: grammar.Grammar):
def validate_grammar(the_grammar: grammar.Grammar) -> None:
for validator_cls in GrammarValidator.__subclasses__(): for validator_cls in GrammarValidator.__subclasses__():
validator = validator_cls(the_grammar) validator = validator_cls(the_grammar)
for rule_name, rule in the_grammar.rules.items(): for rule_name, rule in the_grammar.rules.items():

View file

@ -76,7 +76,10 @@ def run_benchmark_stdlib(subcommand):
parse_directory( parse_directory(
"../../Lib", "../../Lib",
verbose=False, verbose=False,
excluded_files=["*/bad*", "*/lib2to3/tests/data/*",], excluded_files=[
"*/bad*",
"*/lib2to3/tests/data/*",
],
short=True, short=True,
mode=modes[subcommand], mode=modes[subcommand],
) )

View file

@ -8,7 +8,8 @@ from typing import Dict, Any
from urllib.request import urlretrieve from urllib.request import urlretrieve
argparser = argparse.ArgumentParser( argparser = argparse.ArgumentParser(
prog="download_pypi_packages", description="Helper program to download PyPI packages", prog="download_pypi_packages",
description="Helper program to download PyPI packages",
) )
argparser.add_argument( argparser.add_argument(
"-n", "--number", type=int, default=100, help="Number of packages to download" "-n", "--number", type=int, default=100, help="Number of packages to download"

View file

@ -41,7 +41,10 @@ from pegen.grammar import (
Rhs, Rhs,
) )
argparser = argparse.ArgumentParser(prog="graph_grammar", description="Graph a grammar tree",) argparser = argparse.ArgumentParser(
prog="graph_grammar",
description="Graph a grammar tree",
)
argparser.add_argument( argparser.add_argument(
"-s", "-s",
"--start", "--start",

View file

@ -19,7 +19,8 @@ from scripts import test_parse_directory
HERE = pathlib.Path(__file__).resolve().parent HERE = pathlib.Path(__file__).resolve().parent
argparser = argparse.ArgumentParser( argparser = argparse.ArgumentParser(
prog="test_pypi_packages", description="Helper program to test parsing PyPI packages", prog="test_pypi_packages",
description="Helper program to test parsing PyPI packages",
) )
argparser.add_argument( argparser.add_argument(
"-t", "--tree", action="count", help="Compare parse tree to official AST", default=0 "-t", "--tree", action="count", help="Compare parse tree to official AST", default=0