[3.13] gh-138281: Run ruff on Tools/peg_generator (GH-138282) (#138472)

(cherry picked from commit 0d1f4e1639)

Co-authored-by: Adam Turner <9087854+AA-Turner@users.noreply.github.com>
This commit is contained in:
sobolevn 2025-09-03 21:46:52 +03:00 committed by GitHub
parent e8280e3f91
commit 826d4ebfb5
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
16 changed files with 247 additions and 226 deletions

View file

@ -14,6 +14,10 @@ repos:
name: Run Ruff (lint) on Argument Clinic
args: [--exit-non-zero-on-fix, --config=Tools/clinic/.ruff.toml]
files: ^Tools/clinic/|Lib/test/test_clinic.py
- id: ruff
name: Run Ruff (lint) on Tools/peg_generator/
args: [--exit-non-zero-on-fix, --config=Tools/peg_generator/.ruff.toml]
files: ^Tools/peg_generator/
- id: ruff-format
name: Run Ruff (format) on Doc/
args: [--check]

View file

@ -0,0 +1,27 @@
extend = "../../.ruff.toml" # Inherit the project-wide settings
extend-exclude = [
# Generated files:
"Tools/peg_generator/pegen/grammar_parser.py",
]
[lint]
select = [
"F", # pyflakes
"I", # isort
"UP", # pyupgrade
"RUF100", # Ban unused `# noqa` comments
"PGH004", # Ban blanket `# noqa` comments (only ignore specific error codes)
]
ignore = [
# Use PEP-604 unions rather than tuples for isinstance() checks.
# Makes code slower and more verbose. https://github.com/astral-sh/ruff/issues/7871.
"UP038",
]
unfixable = [
# The autofixes sometimes do the wrong things for these;
# it's better to have to manually look at the code and see how it needs fixing
"F841", # Detects unused variables
"F601", # Detects dictionaries that have duplicate keys
"F602", # Also detects dictionaries that have duplicate keys
]

View file

@ -10,7 +10,6 @@ import sys
import time
import token
import traceback
from typing import Tuple
from pegen.grammar import Grammar
from pegen.parser import Parser
@ -21,7 +20,7 @@ from pegen.validator import validate_grammar
def generate_c_code(
args: argparse.Namespace,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
from pegen.build import build_c_parser_and_generator
verbose = args.verbose
@ -50,7 +49,7 @@ def generate_c_code(
def generate_python_code(
args: argparse.Namespace,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
from pegen.build import build_python_parser_and_generator
verbose = args.verbose
@ -185,7 +184,7 @@ def main() -> None:
if __name__ == "__main__":
if sys.version_info < (3, 8):
if sys.version_info < (3, 8): # noqa: UP036
print("ERROR: using pegen requires at least Python 3.8!", file=sys.stderr)
sys.exit(1)
main()

View file

@ -6,7 +6,7 @@ always fail. We rely on string comparison of the base classes instead.
TODO: Remove the above-described hack.
"""
from typing import Any, Optional, Tuple
from typing import Any
def ast_dump(
@ -14,9 +14,9 @@ def ast_dump(
annotate_fields: bool = True,
include_attributes: bool = False,
*,
indent: Optional[str] = None,
indent: str | None = None,
) -> str:
def _format(node: Any, level: int = 0) -> Tuple[str, bool]:
def _format(node: Any, level: int = 0) -> tuple[str, bool]:
if indent is not None:
level += 1
prefix = "\n" + indent * level
@ -41,7 +41,7 @@ def ast_dump(
value, simple = _format(value, level)
allsimple = allsimple and simple
if keywords:
args.append("%s=%s" % (name, value))
args.append(f"{name}={value}")
else:
args.append(value)
if include_attributes and node._attributes:
@ -54,16 +54,16 @@ def ast_dump(
continue
value, simple = _format(value, level)
allsimple = allsimple and simple
args.append("%s=%s" % (name, value))
args.append(f"{name}={value}")
if allsimple and len(args) <= 3:
return "%s(%s)" % (node.__class__.__name__, ", ".join(args)), not args
return "%s(%s%s)" % (node.__class__.__name__, prefix, sep.join(args)), False
return "{}({})".format(node.__class__.__name__, ", ".join(args)), not args
return f"{node.__class__.__name__}({prefix}{sep.join(args)})", False
elif isinstance(node, list):
if not node:
return "[]", True
return "[%s%s]" % (prefix, sep.join(_format(x, level)[0] for x in node)), False
return f"[{prefix}{sep.join(_format(x, level)[0] for x in node)}]", False
return repr(node), True
if all(cls.__name__ != "AST" for cls in node.__class__.__mro__):
raise TypeError("expected AST, got %r" % node.__class__.__name__)
raise TypeError(f"expected AST, got {node.__class__.__name__!r}")
return _format(node)[0]

View file

@ -6,7 +6,7 @@ import sys
import sysconfig
import tempfile
import tokenize
from typing import IO, Any, Dict, List, Optional, Set, Tuple
from typing import IO, Any
from pegen.c_generator import CParserGenerator
from pegen.grammar import Grammar
@ -18,11 +18,11 @@ from pegen.tokenizer import Tokenizer
MOD_DIR = pathlib.Path(__file__).resolve().parent
TokenDefinitions = Tuple[Dict[int, str], Dict[str, int], Set[str]]
TokenDefinitions = tuple[dict[int, str], dict[str, int], set[str]]
Incomplete = Any # TODO: install `types-setuptools` and remove this alias
def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> list[str]:
flags = sysconfig.get_config_var(compiler_flags)
py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist)
if flags is None or py_flags_nodist is None:
@ -71,11 +71,11 @@ def fixup_build_ext(cmd: Incomplete) -> None:
def compile_c_extension(
generated_source_path: str,
build_dir: Optional[str] = None,
build_dir: str | None = None,
verbose: bool = False,
keep_asserts: bool = True,
disable_optimization: bool = False,
library_dir: Optional[str] = None,
library_dir: str | None = None,
) -> pathlib.Path:
"""Compile the generated source for a parser generator into an extension module.
@ -93,8 +93,7 @@ def compile_c_extension(
"""
import setuptools.command.build_ext
import setuptools.logging
from setuptools import Extension, Distribution
from setuptools import Distribution, Extension
try:
from setuptools.modified import newer_group
except ImportError:
@ -242,7 +241,7 @@ def compile_c_extension(
def build_parser(
grammar_file: str, verbose_tokenizer: bool = False, verbose_parser: bool = False
) -> Tuple[Grammar, Parser, Tokenizer]:
) -> tuple[Grammar, Parser, Tokenizer]:
with open(grammar_file) as file:
tokenizer = Tokenizer(tokenize.generate_tokens(file.readline), verbose=verbose_tokenizer)
parser = GrammarParser(tokenizer, verbose=verbose_parser)
@ -293,7 +292,7 @@ def build_c_generator(
keep_asserts_in_extension: bool = True,
skip_actions: bool = False,
) -> ParserGenerator:
with open(tokens_file, "r") as tok_file:
with open(tokens_file) as tok_file:
all_tokens, exact_tok, non_exact_tok = generate_token_definitions(tok_file)
with open(output_file, "w") as file:
gen: ParserGenerator = CParserGenerator(
@ -334,7 +333,7 @@ def build_c_parser_and_generator(
verbose_c_extension: bool = False,
keep_asserts_in_extension: bool = True,
skip_actions: bool = False,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
"""Generate rules, C parser, tokenizer, parser generator for a given grammar
Args:
@ -374,7 +373,7 @@ def build_python_parser_and_generator(
verbose_tokenizer: bool = False,
verbose_parser: bool = False,
skip_actions: bool = False,
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
) -> tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
"""Generate rules, python parser, tokenizer, parser generator for a given grammar
Args:

View file

@ -3,7 +3,7 @@ import os.path
import re
from dataclasses import dataclass, field
from enum import Enum
from typing import IO, Any, Dict, List, Optional, Set, Text, Tuple
from typing import IO, Any
from pegen import grammar
from pegen.grammar import (
@ -86,13 +86,13 @@ BASE_NODETYPES = {
@dataclass
class FunctionCall:
function: str
arguments: List[Any] = field(default_factory=list)
assigned_variable: Optional[str] = None
assigned_variable_type: Optional[str] = None
return_type: Optional[str] = None
nodetype: Optional[NodeTypes] = None
arguments: list[Any] = field(default_factory=list)
assigned_variable: str | None = None
assigned_variable_type: str | None = None
return_type: str | None = None
nodetype: NodeTypes | None = None
force_true: bool = False
comment: Optional[str] = None
comment: str | None = None
def __str__(self) -> str:
parts = []
@ -124,14 +124,14 @@ class CCallMakerVisitor(GrammarVisitor):
def __init__(
self,
parser_generator: ParserGenerator,
exact_tokens: Dict[str, int],
non_exact_tokens: Set[str],
exact_tokens: dict[str, int],
non_exact_tokens: set[str],
):
self.gen = parser_generator
self.exact_tokens = exact_tokens
self.non_exact_tokens = non_exact_tokens
self.cache: Dict[Any, FunctionCall] = {}
self.cleanup_statements: List[str] = []
self.cache: dict[Any, FunctionCall] = {}
self.cleanup_statements: list[str] = []
def keyword_helper(self, keyword: str) -> FunctionCall:
return FunctionCall(
@ -167,7 +167,7 @@ class CCallMakerVisitor(GrammarVisitor):
)
return FunctionCall(
assigned_variable=f"{name.lower()}_var",
function=f"_PyPegen_expect_token",
function="_PyPegen_expect_token",
arguments=["p", name],
nodetype=NodeTypes.GENERIC_TOKEN,
return_type="Token *",
@ -199,7 +199,7 @@ class CCallMakerVisitor(GrammarVisitor):
type = self.exact_tokens[val]
return FunctionCall(
assigned_variable="_literal",
function=f"_PyPegen_expect_token",
function="_PyPegen_expect_token",
arguments=["p", type],
nodetype=NodeTypes.GENERIC_TOKEN,
return_type="Token *",
@ -286,7 +286,7 @@ class CCallMakerVisitor(GrammarVisitor):
type = self.exact_tokens[val]
return FunctionCall(
assigned_variable="_literal",
function=f"_PyPegen_expect_forced_token",
function="_PyPegen_expect_forced_token",
arguments=["p", type, f'"{val}"'],
nodetype=NodeTypes.GENERIC_TOKEN,
return_type="Token *",
@ -298,7 +298,7 @@ class CCallMakerVisitor(GrammarVisitor):
call.comment = None
return FunctionCall(
assigned_variable="_literal",
function=f"_PyPegen_expect_forced_result",
function="_PyPegen_expect_forced_result",
arguments=["p", str(call), f'"{node.node.rhs!s}"'],
return_type="void *",
comment=f"forced_token=({node.node.rhs!s})",
@ -374,10 +374,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
def __init__(
self,
grammar: grammar.Grammar,
tokens: Dict[int, str],
exact_tokens: Dict[str, int],
non_exact_tokens: Set[str],
file: Optional[IO[Text]],
tokens: dict[int, str],
exact_tokens: dict[str, int],
non_exact_tokens: set[str],
file: IO[str] | None,
debug: bool = False,
skip_actions: bool = False,
):
@ -388,7 +388,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self._varname_counter = 0
self.debug = debug
self.skip_actions = skip_actions
self.cleanup_statements: List[str] = []
self.cleanup_statements: list[str] = []
def add_level(self) -> None:
self.print("if (p->level++ == MAXSTACK) {")
@ -424,12 +424,12 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print(f"if ({error_var}) {{")
with self.indent():
self.print(f"goto {goto_target};")
self.print(f"}}")
self.print("}")
def out_of_memory_return(
self,
expr: str,
cleanup_code: Optional[str] = None,
cleanup_code: str | None = None,
) -> None:
self.print(f"if ({expr}) {{")
with self.indent():
@ -438,14 +438,14 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print("p->error_indicator = 1;")
self.print("PyErr_NoMemory();")
self.add_return("NULL")
self.print(f"}}")
self.print("}")
def out_of_memory_goto(self, expr: str, goto_target: str) -> None:
self.print(f"if ({expr}) {{")
with self.indent():
self.print("PyErr_NoMemory();")
self.print(f"goto {goto_target};")
self.print(f"}}")
self.print("}")
def generate(self, filename: str) -> None:
self.collect_rules()
@ -488,8 +488,8 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
if trailer:
self.print(trailer.rstrip("\n") % dict(mode=mode, modulename=modulename))
def _group_keywords_by_length(self) -> Dict[int, List[Tuple[str, int]]]:
groups: Dict[int, List[Tuple[str, int]]] = {}
def _group_keywords_by_length(self) -> dict[int, list[tuple[str, int]]]:
groups: dict[int, list[tuple[str, int]]] = {}
for keyword_str, keyword_type in self.keywords.items():
length = len(keyword_str)
if length in groups:
@ -581,10 +581,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print("if (_raw == NULL || p->mark <= _resmark)")
with self.indent():
self.print("break;")
self.print(f"_resmark = p->mark;")
self.print("_resmark = p->mark;")
self.print("_res = _raw;")
self.print("}")
self.print(f"p->mark = _resmark;")
self.print("p->mark = _resmark;")
self.add_return("_res")
self.print("}")
self.print(f"static {result_type}")
@ -640,7 +640,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
if memoize:
self.print("int _start_mark = p->mark;")
self.print("void **_children = PyMem_Malloc(sizeof(void *));")
self.out_of_memory_return(f"!_children")
self.out_of_memory_return("!_children")
self.print("Py_ssize_t _children_capacity = 1;")
self.print("Py_ssize_t _n = 0;")
if any(alt.action and "EXTRA" in alt.action for alt in rhs.alts):
@ -658,7 +658,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.add_return("NULL")
self.print("}")
self.print("asdl_seq *_seq = (asdl_seq*)_Py_asdl_generic_seq_new(_n, p->arena);")
self.out_of_memory_return(f"!_seq", cleanup_code="PyMem_Free(_children);")
self.out_of_memory_return("!_seq", cleanup_code="PyMem_Free(_children);")
self.print("for (int i = 0; i < _n; i++) asdl_seq_SET_UNTYPED(_seq, i, _children[i]);")
self.print("PyMem_Free(_children);")
if memoize and node.name:
@ -712,7 +712,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print(call)
def visit_Rhs(
self, node: Rhs, is_loop: bool, is_gather: bool, rulename: Optional[str]
self, node: Rhs, is_loop: bool, is_gather: bool, rulename: str | None
) -> None:
if is_loop:
assert len(node.alts) == 1
@ -731,7 +731,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.visit(item)
self.print(")")
def emit_action(self, node: Alt, cleanup_code: Optional[str] = None) -> None:
def emit_action(self, node: Alt, cleanup_code: str | None = None) -> None:
self.print(f"_res = {node.action};")
self.print("if (_res == NULL && PyErr_Occurred()) {")
@ -773,7 +773,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
def emit_dummy_action(self) -> None:
self.print("_res = _PyPegen_dummy_name(p);")
def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None:
def handle_alt_normal(self, node: Alt, is_gather: bool, rulename: str | None) -> None:
self.join_conditions(keyword="if", node=node)
self.print("{")
# We have parsed successfully all the conditions for the option.
@ -793,10 +793,10 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.emit_default_action(is_gather, node)
# As the current option has parsed correctly, do not continue with the rest.
self.print(f"goto done;")
self.print("goto done;")
self.print("}")
def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: Optional[str]) -> None:
def handle_alt_loop(self, node: Alt, is_gather: bool, rulename: str | None) -> None:
# Condition of the main body of the alternative
self.join_conditions(keyword="while", node=node)
self.print("{")
@ -820,7 +820,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print(
"void **_new_children = PyMem_Realloc(_children, _children_capacity*sizeof(void *));"
)
self.out_of_memory_return(f"!_new_children", cleanup_code="PyMem_Free(_children);")
self.out_of_memory_return("!_new_children", cleanup_code="PyMem_Free(_children);")
self.print("_children = _new_children;")
self.print("}")
self.print("_children[_n++] = _res;")
@ -828,7 +828,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print("}")
def visit_Alt(
self, node: Alt, is_loop: bool, is_gather: bool, rulename: Optional[str]
self, node: Alt, is_loop: bool, is_gather: bool, rulename: str | None
) -> None:
if len(node.items) == 1 and str(node.items[0]).startswith("invalid_"):
self.print(f"if (p->call_invalid_rules) {{ // {node}")
@ -872,7 +872,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
self.print("}")
self.print("}")
def collect_vars(self, node: Alt) -> Dict[Optional[str], Optional[str]]:
def collect_vars(self, node: Alt) -> dict[str | None, str | None]:
types = {}
with self.local_variable_context():
for item in node.items:
@ -880,7 +880,7 @@ class CParserGenerator(ParserGenerator, GrammarVisitor):
types[name] = type
return types
def add_var(self, node: NamedItem) -> Tuple[Optional[str], Optional[str]]:
def add_var(self, node: NamedItem) -> tuple[str | None, str | None]:
call = self.callmakervisitor.generate_call(node.item)
name = node.name if node.name else call.assigned_variable
if name is not None:

View file

@ -3,7 +3,6 @@
import argparse
import pprint
import sys
from typing import Dict, Set
from pegen.build import build_parser
from pegen.grammar import (
@ -33,20 +32,20 @@ argparser.add_argument("grammar_file", help="The grammar file")
class FirstSetCalculator(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule]) -> None:
def __init__(self, rules: dict[str, Rule]) -> None:
self.rules = rules
self.nullables = compute_nullables(rules)
self.first_sets: Dict[str, Set[str]] = dict()
self.in_process: Set[str] = set()
self.first_sets: dict[str, set[str]] = dict()
self.in_process: set[str] = set()
def calculate(self) -> Dict[str, Set[str]]:
def calculate(self) -> dict[str, set[str]]:
for name, rule in self.rules.items():
self.visit(rule)
return self.first_sets
def visit_Alt(self, item: Alt) -> Set[str]:
result: Set[str] = set()
to_remove: Set[str] = set()
def visit_Alt(self, item: Alt) -> set[str]:
result: set[str] = set()
to_remove: set[str] = set()
for other in item.items:
new_terminals = self.visit(other)
if isinstance(other.item, NegativeLookahead):
@ -71,34 +70,34 @@ class FirstSetCalculator(GrammarVisitor):
return result
def visit_Cut(self, item: Cut) -> Set[str]:
def visit_Cut(self, item: Cut) -> set[str]:
return set()
def visit_Group(self, item: Group) -> Set[str]:
def visit_Group(self, item: Group) -> set[str]:
return self.visit(item.rhs)
def visit_PositiveLookahead(self, item: Lookahead) -> Set[str]:
def visit_PositiveLookahead(self, item: Lookahead) -> set[str]:
return self.visit(item.node)
def visit_NegativeLookahead(self, item: NegativeLookahead) -> Set[str]:
def visit_NegativeLookahead(self, item: NegativeLookahead) -> set[str]:
return self.visit(item.node)
def visit_NamedItem(self, item: NamedItem) -> Set[str]:
def visit_NamedItem(self, item: NamedItem) -> set[str]:
return self.visit(item.item)
def visit_Opt(self, item: Opt) -> Set[str]:
def visit_Opt(self, item: Opt) -> set[str]:
return self.visit(item.node)
def visit_Gather(self, item: Gather) -> Set[str]:
def visit_Gather(self, item: Gather) -> set[str]:
return self.visit(item.node)
def visit_Repeat0(self, item: Repeat0) -> Set[str]:
def visit_Repeat0(self, item: Repeat0) -> set[str]:
return self.visit(item.node)
def visit_Repeat1(self, item: Repeat1) -> Set[str]:
def visit_Repeat1(self, item: Repeat1) -> set[str]:
return self.visit(item.node)
def visit_NameLeaf(self, item: NameLeaf) -> Set[str]:
def visit_NameLeaf(self, item: NameLeaf) -> set[str]:
if item.value not in self.rules:
return {item.value}
@ -110,16 +109,16 @@ class FirstSetCalculator(GrammarVisitor):
return self.first_sets[item.value]
def visit_StringLeaf(self, item: StringLeaf) -> Set[str]:
def visit_StringLeaf(self, item: StringLeaf) -> set[str]:
return {item.value}
def visit_Rhs(self, item: Rhs) -> Set[str]:
result: Set[str] = set()
def visit_Rhs(self, item: Rhs) -> set[str]:
result: set[str] = set()
for alt in item.alts:
result |= self.visit(alt)
return result
def visit_Rule(self, item: Rule) -> Set[str]:
def visit_Rule(self, item: Rule) -> set[str]:
if item.name in self.in_process:
return set()
elif item.name not in self.first_sets:
@ -138,7 +137,7 @@ def main() -> None:
try:
grammar, parser, tokenizer = build_parser(args.grammar_file)
except Exception as err:
print("ERROR: Failed to parse grammar file", file=sys.stderr)
print("ERROR: Failed to parse grammar file", err, file=sys.stderr)
sys.exit(1)
firs_sets = FirstSetCalculator(grammar.rules).calculate()

View file

@ -1,15 +1,7 @@
from __future__ import annotations
from typing import (
AbstractSet,
Any,
Iterable,
Iterator,
List,
Optional,
Tuple,
Union,
)
from collections.abc import Iterable, Iterator, Set
from typing import Any
class GrammarError(Exception):
@ -34,7 +26,7 @@ class GrammarVisitor:
class Grammar:
def __init__(self, rules: Iterable[Rule], metas: Iterable[Tuple[str, Optional[str]]]):
def __init__(self, rules: Iterable[Rule], metas: Iterable[tuple[str, str | None]]):
# Check if there are repeated rules in "rules"
all_rules = {}
for rule in rules:
@ -66,7 +58,7 @@ SIMPLE_STR = True
class Rule:
def __init__(self, name: str, type: Optional[str], rhs: Rhs, memo: Optional[object] = None):
def __init__(self, name: str, type: str | None, rhs: Rhs, memo: object | None = None):
self.name = name
self.type = type
self.rhs = rhs
@ -141,9 +133,9 @@ class StringLeaf(Leaf):
class Rhs:
def __init__(self, alts: List[Alt]):
def __init__(self, alts: list[Alt]):
self.alts = alts
self.memo: Optional[Tuple[Optional[str], str]] = None
self.memo: tuple[str | None, str] | None = None
def __str__(self) -> str:
return " | ".join(str(alt) for alt in self.alts)
@ -151,7 +143,7 @@ class Rhs:
def __repr__(self) -> str:
return f"Rhs({self.alts!r})"
def __iter__(self) -> Iterator[List[Alt]]:
def __iter__(self) -> Iterator[list[Alt]]:
yield self.alts
@property
@ -165,7 +157,7 @@ class Rhs:
class Alt:
def __init__(self, items: List[NamedItem], *, icut: int = -1, action: Optional[str] = None):
def __init__(self, items: list[NamedItem], *, icut: int = -1, action: str | None = None):
self.items = items
self.icut = icut
self.action = action
@ -185,12 +177,12 @@ class Alt:
args.append(f"action={self.action!r}")
return f"Alt({', '.join(args)})"
def __iter__(self) -> Iterator[List[NamedItem]]:
def __iter__(self) -> Iterator[list[NamedItem]]:
yield self.items
class NamedItem:
def __init__(self, name: Optional[str], item: Item, type: Optional[str] = None):
def __init__(self, name: str | None, item: Item, type: str | None = None):
self.name = name
self.item = item
self.type = type
@ -271,7 +263,7 @@ class Repeat:
def __init__(self, node: Plain):
self.node = node
self.memo: Optional[Tuple[Optional[str], str]] = None
self.memo: tuple[str | None, str] | None = None
def __iter__(self) -> Iterator[Plain]:
yield self.node
@ -334,12 +326,12 @@ class Cut:
pass
def __repr__(self) -> str:
return f"Cut()"
return "Cut()"
def __str__(self) -> str:
return f"~"
return "~"
def __iter__(self) -> Iterator[Tuple[str, str]]:
def __iter__(self) -> Iterator[tuple[str, str]]:
yield from ()
def __eq__(self, other: object) -> bool:
@ -347,15 +339,15 @@ class Cut:
return NotImplemented
return True
def initial_names(self) -> AbstractSet[str]:
def initial_names(self) -> Set[str]:
return set()
Plain = Union[Leaf, Group]
Item = Union[Plain, Opt, Repeat, Forced, Lookahead, Rhs, Cut]
RuleName = Tuple[str, Optional[str]]
MetaTuple = Tuple[str, Optional[str]]
MetaList = List[MetaTuple]
RuleList = List[Rule]
NamedItemList = List[NamedItem]
LookaheadOrCut = Union[Lookahead, Cut]
Plain = Leaf | Group
Item = Plain | Opt | Repeat | Forced | Lookahead | Rhs | Cut
RuleName = tuple[str, str | None]
MetaTuple = tuple[str, str | None]
MetaList = list[MetaTuple]
RuleList = list[Rule]
NamedItemList = list[NamedItem]
LookaheadOrCut = Lookahead | Cut

View file

@ -1,6 +1,7 @@
import argparse
import sys
from typing import Any, Callable, Iterator
from collections.abc import Callable, Iterator
from typing import Any
from pegen.build import build_parser
from pegen.grammar import Grammar, Rule
@ -52,7 +53,7 @@ def main() -> None:
try:
grammar, parser, tokenizer = build_parser(args.filename)
except Exception as err:
print("ERROR: Failed to parse grammar file", file=sys.stderr)
print("ERROR: Failed to parse grammar file", err, file=sys.stderr)
sys.exit(1)
visitor = ASTGrammarPrinter()

View file

@ -5,7 +5,13 @@ import token
import tokenize
import traceback
from abc import abstractmethod
from typing import Any, Callable, ClassVar, Dict, Optional, Tuple, Type, TypeVar, cast
from collections.abc import Callable
from typing import (
Any,
ClassVar,
TypeVar,
cast,
)
from pegen.tokenizer import Mark, Tokenizer, exact_token_types
@ -74,12 +80,12 @@ def memoize(method: F) -> F:
def memoize_left_rec(
method: Callable[["Parser"], Optional[T]]
) -> Callable[["Parser"], Optional[T]]:
method: Callable[["Parser"], T | None]
) -> Callable[["Parser"], T | None]:
"""Memoize a left-recursive symbol method."""
method_name = method.__name__
def memoize_left_rec_wrapper(self: "Parser") -> Optional[T]:
def memoize_left_rec_wrapper(self: "Parser") -> T | None:
mark = self._mark()
key = mark, method_name, ()
# Fast path: cache hit, and not verbose.
@ -160,15 +166,15 @@ def memoize_left_rec(
class Parser:
"""Parsing base class."""
KEYWORDS: ClassVar[Tuple[str, ...]]
KEYWORDS: ClassVar[tuple[str, ...]]
SOFT_KEYWORDS: ClassVar[Tuple[str, ...]]
SOFT_KEYWORDS: ClassVar[tuple[str, ...]]
def __init__(self, tokenizer: Tokenizer, *, verbose: bool = False):
self._tokenizer = tokenizer
self._verbose = verbose
self._level = 0
self._cache: Dict[Tuple[Mark, str, Tuple[Any, ...]], Tuple[Any, Mark]] = {}
self._cache: dict[tuple[Mark, str, tuple[Any, ...]], tuple[Any, Mark]] = {}
# Integer tracking whether we are in a left recursive rule or not. Can be useful
# for error reporting.
self.in_recursive_rule = 0
@ -185,49 +191,49 @@ class Parser:
return f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
@memoize
def name(self) -> Optional[tokenize.TokenInfo]:
def name(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.type == token.NAME and tok.string not in self.KEYWORDS:
return self._tokenizer.getnext()
return None
@memoize
def number(self) -> Optional[tokenize.TokenInfo]:
def number(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.type == token.NUMBER:
return self._tokenizer.getnext()
return None
@memoize
def string(self) -> Optional[tokenize.TokenInfo]:
def string(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.type == token.STRING:
return self._tokenizer.getnext()
return None
@memoize
def op(self) -> Optional[tokenize.TokenInfo]:
def op(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.type == token.OP:
return self._tokenizer.getnext()
return None
@memoize
def type_comment(self) -> Optional[tokenize.TokenInfo]:
def type_comment(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.type == token.TYPE_COMMENT:
return self._tokenizer.getnext()
return None
@memoize
def soft_keyword(self) -> Optional[tokenize.TokenInfo]:
def soft_keyword(self) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.type == token.NAME and tok.string in self.SOFT_KEYWORDS:
return self._tokenizer.getnext()
return None
@memoize
def expect(self, type: str) -> Optional[tokenize.TokenInfo]:
def expect(self, type: str) -> tokenize.TokenInfo | None:
tok = self._tokenizer.peek()
if tok.string == type:
return self._tokenizer.getnext()
@ -241,7 +247,7 @@ class Parser:
return self._tokenizer.getnext()
return None
def expect_forced(self, res: Any, expectation: str) -> Optional[tokenize.TokenInfo]:
def expect_forced(self, res: Any, expectation: str) -> tokenize.TokenInfo | None:
if res is None:
raise self.make_syntax_error(f"expected {expectation}")
return res
@ -263,7 +269,7 @@ class Parser:
return SyntaxError(message, (filename, tok.start[0], 1 + tok.start[1], tok.line))
def simple_parser_main(parser_class: Type[Parser]) -> None:
def simple_parser_main(parser_class: type[Parser]) -> None:
argparser = argparse.ArgumentParser()
argparser.add_argument(
"-v",
@ -300,7 +306,7 @@ def simple_parser_main(parser_class: Type[Parser]) -> None:
endpos = 0
else:
endpos = file.tell()
except IOError:
except OSError:
endpos = 0
finally:
if file is not sys.stdin:

View file

@ -2,19 +2,10 @@ import ast
import contextlib
import re
from abc import abstractmethod
from collections.abc import Iterable, Iterator, Set
from typing import (
IO,
AbstractSet,
Any,
Dict,
Iterable,
Iterator,
List,
Optional,
Set,
Text,
Tuple,
Union,
)
from pegen import sccutils
@ -43,7 +34,7 @@ from pegen.grammar import (
class RuleCollectorVisitor(GrammarVisitor):
"""Visitor that invokes a provieded callmaker visitor with just the NamedItem nodes"""
def __init__(self, rules: Dict[str, Rule], callmakervisitor: GrammarVisitor) -> None:
def __init__(self, rules: dict[str, Rule], callmakervisitor: GrammarVisitor) -> None:
self.rules = rules
self.callmaker = callmakervisitor
@ -57,7 +48,7 @@ class RuleCollectorVisitor(GrammarVisitor):
class KeywordCollectorVisitor(GrammarVisitor):
"""Visitor that collects all the keywords and soft keywords in the Grammar"""
def __init__(self, gen: "ParserGenerator", keywords: Dict[str, int], soft_keywords: Set[str]):
def __init__(self, gen: "ParserGenerator", keywords: dict[str, int], soft_keywords: set[str]):
self.generator = gen
self.keywords = keywords
self.soft_keywords = soft_keywords
@ -72,7 +63,7 @@ class KeywordCollectorVisitor(GrammarVisitor):
class RuleCheckingVisitor(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule], tokens: Set[str]):
def __init__(self, rules: dict[str, Rule], tokens: set[str]):
self.rules = rules
self.tokens = tokens
@ -89,11 +80,11 @@ class RuleCheckingVisitor(GrammarVisitor):
class ParserGenerator:
callmakervisitor: GrammarVisitor
def __init__(self, grammar: Grammar, tokens: Set[str], file: Optional[IO[Text]]):
def __init__(self, grammar: Grammar, tokens: set[str], file: IO[str] | None):
self.grammar = grammar
self.tokens = tokens
self.keywords: Dict[str, int] = {}
self.soft_keywords: Set[str] = set()
self.keywords: dict[str, int] = {}
self.soft_keywords: set[str] = set()
self.rules = grammar.rules
self.validate_rule_names()
if "trailer" not in grammar.metas and "start" not in self.rules:
@ -106,8 +97,8 @@ class ParserGenerator:
self.first_graph, self.first_sccs = compute_left_recursives(self.rules)
self.counter = 0 # For name_rule()/name_loop()
self.keyword_counter = 499 # For keyword_type()
self.all_rules: Dict[str, Rule] = self.rules.copy() # Rules + temporal rules
self._local_variable_stack: List[List[str]] = []
self.all_rules: dict[str, Rule] = self.rules.copy() # Rules + temporal rules
self._local_variable_stack: list[list[str]] = []
def validate_rule_names(self) -> None:
for rule in self.rules:
@ -121,7 +112,7 @@ class ParserGenerator:
self._local_variable_stack.pop()
@property
def local_variable_names(self) -> List[str]:
def local_variable_names(self) -> list[str]:
return self._local_variable_stack[-1]
@abstractmethod
@ -153,7 +144,7 @@ class ParserGenerator:
keyword_collector.visit(rule)
rule_collector = RuleCollectorVisitor(self.rules, self.callmakervisitor)
done: Set[str] = set()
done: set[str] = set()
while True:
computed_rules = list(self.all_rules)
todo = [i for i in computed_rules if i not in done]
@ -218,10 +209,10 @@ class ParserGenerator:
class NullableVisitor(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule]) -> None:
def __init__(self, rules: dict[str, Rule]) -> None:
self.rules = rules
self.visited: Set[Any] = set()
self.nullables: Set[Union[Rule, NamedItem]] = set()
self.visited: set[Any] = set()
self.nullables: set[Rule | NamedItem] = set()
def visit_Rule(self, rule: Rule) -> bool:
if rule in self.visited:
@ -283,7 +274,7 @@ class NullableVisitor(GrammarVisitor):
return not node.value
def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]:
def compute_nullables(rules: dict[str, Rule]) -> set[Any]:
"""Compute which rules in a grammar are nullable.
Thanks to TatSu (tatsu/leftrec.py) for inspiration.
@ -295,12 +286,12 @@ def compute_nullables(rules: Dict[str, Rule]) -> Set[Any]:
class InitialNamesVisitor(GrammarVisitor):
def __init__(self, rules: Dict[str, Rule]) -> None:
def __init__(self, rules: dict[str, Rule]) -> None:
self.rules = rules
self.nullables = compute_nullables(rules)
def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> Set[Any]:
names: Set[str] = set()
def generic_visit(self, node: Iterable[Any], *args: Any, **kwargs: Any) -> set[Any]:
names: set[str] = set()
for value in node:
if isinstance(value, list):
for item in value:
@ -309,33 +300,33 @@ class InitialNamesVisitor(GrammarVisitor):
names |= self.visit(value, *args, **kwargs)
return names
def visit_Alt(self, alt: Alt) -> Set[Any]:
names: Set[str] = set()
def visit_Alt(self, alt: Alt) -> set[Any]:
names: set[str] = set()
for item in alt.items:
names |= self.visit(item)
if item not in self.nullables:
break
return names
def visit_Forced(self, force: Forced) -> Set[Any]:
def visit_Forced(self, force: Forced) -> set[Any]:
return set()
def visit_LookAhead(self, lookahead: Lookahead) -> Set[Any]:
def visit_LookAhead(self, lookahead: Lookahead) -> set[Any]:
return set()
def visit_Cut(self, cut: Cut) -> Set[Any]:
def visit_Cut(self, cut: Cut) -> set[Any]:
return set()
def visit_NameLeaf(self, node: NameLeaf) -> Set[Any]:
def visit_NameLeaf(self, node: NameLeaf) -> set[Any]:
return {node.value}
def visit_StringLeaf(self, node: StringLeaf) -> Set[Any]:
def visit_StringLeaf(self, node: StringLeaf) -> set[Any]:
return set()
def compute_left_recursives(
rules: Dict[str, Rule]
) -> Tuple[Dict[str, AbstractSet[str]], List[AbstractSet[str]]]:
rules: dict[str, Rule]
) -> tuple[dict[str, Set[str]], list[Set[str]]]:
graph = make_first_graph(rules)
sccs = list(sccutils.strongly_connected_components(graph.keys(), graph))
for scc in sccs:
@ -363,7 +354,7 @@ def compute_left_recursives(
return graph, sccs
def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
def make_first_graph(rules: dict[str, Rule]) -> dict[str, Set[str]]:
"""Compute the graph of left-invocations.
There's an edge from A to B if A may invoke B at its initial
@ -373,7 +364,7 @@ def make_first_graph(rules: Dict[str, Rule]) -> Dict[str, AbstractSet[str]]:
"""
initial_name_visitor = InitialNamesVisitor(rules)
graph = {}
vertices: Set[str] = set()
vertices: set[str] = set()
for rulename, rhs in rules.items():
graph[rulename] = names = initial_name_visitor.visit(rhs)
vertices |= names

View file

@ -1,6 +1,7 @@
import os.path
import token
from typing import IO, Any, Dict, Optional, Sequence, Set, Text, Tuple
from collections.abc import Sequence
from typing import IO, Any
from pegen import grammar
from pegen.grammar import (
@ -74,10 +75,10 @@ class InvalidNodeVisitor(GrammarVisitor):
def visit_Opt(self, node: Opt) -> bool:
return self.visit(node.node)
def visit_Repeat(self, node: Repeat0) -> Tuple[str, str]:
def visit_Repeat(self, node: Repeat0) -> tuple[str, str]:
return self.visit(node.node)
def visit_Gather(self, node: Gather) -> Tuple[str, str]:
def visit_Gather(self, node: Gather) -> tuple[str, str]:
return self.visit(node.node)
def visit_Group(self, node: Group) -> bool:
@ -93,9 +94,9 @@ class InvalidNodeVisitor(GrammarVisitor):
class PythonCallMakerVisitor(GrammarVisitor):
def __init__(self, parser_generator: ParserGenerator):
self.gen = parser_generator
self.cache: Dict[Any, Any] = {}
self.cache: dict[Any, Any] = {}
def visit_NameLeaf(self, node: NameLeaf) -> Tuple[Optional[str], str]:
def visit_NameLeaf(self, node: NameLeaf) -> tuple[str | None, str]:
name = node.value
if name == "SOFT_KEYWORD":
return "soft_keyword", "self.soft_keyword()"
@ -107,10 +108,10 @@ class PythonCallMakerVisitor(GrammarVisitor):
return "_" + name.lower(), f"self.expect({name!r})"
return name, f"self.{name}()"
def visit_StringLeaf(self, node: StringLeaf) -> Tuple[str, str]:
def visit_StringLeaf(self, node: StringLeaf) -> tuple[str, str]:
return "literal", f"self.expect({node.value})"
def visit_Rhs(self, node: Rhs) -> Tuple[Optional[str], str]:
def visit_Rhs(self, node: Rhs) -> tuple[str | None, str]:
if node in self.cache:
return self.cache[node]
if len(node.alts) == 1 and len(node.alts[0].items) == 1:
@ -120,28 +121,28 @@ class PythonCallMakerVisitor(GrammarVisitor):
self.cache[node] = name, f"self.{name}()"
return self.cache[node]
def visit_NamedItem(self, node: NamedItem) -> Tuple[Optional[str], str]:
def visit_NamedItem(self, node: NamedItem) -> tuple[str | None, str]:
name, call = self.visit(node.item)
if node.name:
name = node.name
return name, call
def lookahead_call_helper(self, node: Lookahead) -> Tuple[str, str]:
def lookahead_call_helper(self, node: Lookahead) -> tuple[str, str]:
name, call = self.visit(node.node)
head, tail = call.split("(", 1)
assert tail[-1] == ")"
tail = tail[:-1]
return head, tail
def visit_PositiveLookahead(self, node: PositiveLookahead) -> Tuple[None, str]:
def visit_PositiveLookahead(self, node: PositiveLookahead) -> tuple[None, str]:
head, tail = self.lookahead_call_helper(node)
return None, f"self.positive_lookahead({head}, {tail})"
def visit_NegativeLookahead(self, node: NegativeLookahead) -> Tuple[None, str]:
def visit_NegativeLookahead(self, node: NegativeLookahead) -> tuple[None, str]:
head, tail = self.lookahead_call_helper(node)
return None, f"self.negative_lookahead({head}, {tail})"
def visit_Opt(self, node: Opt) -> Tuple[str, str]:
def visit_Opt(self, node: Opt) -> tuple[str, str]:
name, call = self.visit(node.node)
# Note trailing comma (the call may already have one comma
# at the end, for example when rules have both repeat0 and optional
@ -151,34 +152,34 @@ class PythonCallMakerVisitor(GrammarVisitor):
else:
return "opt", f"{call},"
def visit_Repeat0(self, node: Repeat0) -> Tuple[str, str]:
def visit_Repeat0(self, node: Repeat0) -> tuple[str, str]:
if node in self.cache:
return self.cache[node]
name = self.gen.artificial_rule_from_repeat(node.node, False)
self.cache[node] = name, f"self.{name}()," # Also a trailing comma!
return self.cache[node]
def visit_Repeat1(self, node: Repeat1) -> Tuple[str, str]:
def visit_Repeat1(self, node: Repeat1) -> tuple[str, str]:
if node in self.cache:
return self.cache[node]
name = self.gen.artificial_rule_from_repeat(node.node, True)
self.cache[node] = name, f"self.{name}()" # But no trailing comma here!
return self.cache[node]
def visit_Gather(self, node: Gather) -> Tuple[str, str]:
def visit_Gather(self, node: Gather) -> tuple[str, str]:
if node in self.cache:
return self.cache[node]
name = self.gen.artifical_rule_from_gather(node)
self.cache[node] = name, f"self.{name}()" # No trailing comma here either!
return self.cache[node]
def visit_Group(self, node: Group) -> Tuple[Optional[str], str]:
def visit_Group(self, node: Group) -> tuple[str | None, str]:
return self.visit(node.rhs)
def visit_Cut(self, node: Cut) -> Tuple[str, str]:
def visit_Cut(self, node: Cut) -> tuple[str, str]:
return "cut", "True"
def visit_Forced(self, node: Forced) -> Tuple[str, str]:
def visit_Forced(self, node: Forced) -> tuple[str, str]:
if isinstance(node.node, Group):
_, val = self.visit(node.node.rhs)
return "forced", f"self.expect_forced({val}, '''({node.node.rhs!s})''')"
@ -193,10 +194,10 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
def __init__(
self,
grammar: grammar.Grammar,
file: Optional[IO[Text]],
tokens: Set[str] = set(token.tok_name.values()),
location_formatting: Optional[str] = None,
unreachable_formatting: Optional[str] = None,
file: IO[str] | None,
tokens: set[str] = set(token.tok_name.values()),
location_formatting: str | None = None,
unreachable_formatting: str | None = None,
):
tokens.add("SOFT_KEYWORD")
super().__init__(grammar, tokens, file)
@ -333,7 +334,7 @@ class PythonParserGenerator(ParserGenerator, GrammarVisitor):
if is_loop:
self.print(f"children.append({action})")
self.print(f"mark = self._mark()")
self.print("mark = self._mark()")
else:
if "UNREACHABLE" in action:
action = action.replace("UNREACHABLE", self.unreachable_formatting)

View file

@ -1,11 +1,11 @@
# Adapted from mypy (mypy/build.py) under the MIT license.
from typing import *
from collections.abc import Iterable, Iterator, Set
def strongly_connected_components(
vertices: AbstractSet[str], edges: Dict[str, AbstractSet[str]]
) -> Iterator[AbstractSet[str]]:
vertices: Set[str], edges: dict[str, Set[str]]
) -> Iterator[Set[str]]:
"""Compute Strongly Connected Components of a directed graph.
Args:
@ -20,12 +20,12 @@ def strongly_connected_components(
From https://code.activestate.com/recipes/578507-strongly-connected-components-of-a-directed-graph/.
"""
identified: Set[str] = set()
stack: List[str] = []
index: Dict[str, int] = {}
boundaries: List[int] = []
identified: set[str] = set()
stack: list[str] = []
index: dict[str, int] = {}
boundaries: list[int] = []
def dfs(v: str) -> Iterator[Set[str]]:
def dfs(v: str) -> Iterator[set[str]]:
index[v] = len(stack)
stack.append(v)
boundaries.append(index[v])
@ -50,8 +50,8 @@ def strongly_connected_components(
def topsort(
data: Dict[AbstractSet[str], Set[AbstractSet[str]]]
) -> Iterable[AbstractSet[AbstractSet[str]]]:
data: dict[Set[str], set[Set[str]]]
) -> Iterable[Set[Set[str]]]:
"""Topological sort.
Args:
@ -94,12 +94,12 @@ def topsort(
break
yield ready
data = {item: (dep - ready) for item, dep in data.items() if item not in ready}
assert not data, "A cyclic dependency exists amongst %r" % data
assert not data, f"A cyclic dependency exists amongst {data}"
def find_cycles_in_scc(
graph: Dict[str, AbstractSet[str]], scc: AbstractSet[str], start: str
) -> Iterable[List[str]]:
graph: dict[str, Set[str]], scc: Set[str], start: str
) -> Iterable[list[str]]:
"""Find cycles in SCC emanating from start.
Yields lists of the form ['A', 'B', 'C', 'A'], which means there's
@ -117,7 +117,7 @@ def find_cycles_in_scc(
assert start in graph
# Recursive helper that yields cycles.
def dfs(node: str, path: List[str]) -> Iterator[List[str]]:
def dfs(node: str, path: list[str]) -> Iterator[list[str]]:
if node in path:
yield path + [node]
return

View file

@ -6,7 +6,7 @@ import sys
import textwrap
import token
import tokenize
from typing import IO, Any, Dict, Final, Optional, Type, cast
from typing import IO, Any, Final, cast
from pegen.build import compile_c_extension
from pegen.c_generator import CParserGenerator
@ -23,19 +23,19 @@ NON_EXACT_TOKENS = {
}
def generate_parser(grammar: Grammar) -> Type[Parser]:
def generate_parser(grammar: Grammar) -> type[Parser]:
# Generate a parser.
out = io.StringIO()
genr = PythonParserGenerator(grammar, out)
genr.generate("<string>")
# Load the generated parser class.
ns: Dict[str, Any] = {}
ns: dict[str, Any] = {}
exec(out.getvalue(), ns)
return ns["GeneratedParser"]
def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = False) -> Any:
def run_parser(file: IO[bytes], parser_class: type[Parser], *, verbose: bool = False) -> Any:
# Run a parser on a file (stream).
tokenizer = Tokenizer(tokenize.generate_tokens(file.readline)) # type: ignore[arg-type] # typeshed issue #3515
parser = parser_class(tokenizer, verbose=verbose)
@ -46,7 +46,7 @@ def run_parser(file: IO[bytes], parser_class: Type[Parser], *, verbose: bool = F
def parse_string(
source: str, parser_class: Type[Parser], *, dedent: bool = True, verbose: bool = False
source: str, parser_class: type[Parser], *, dedent: bool = True, verbose: bool = False
) -> Any:
# Run the parser on a string.
if dedent:
@ -55,7 +55,7 @@ def parse_string(
return run_parser(file, parser_class, verbose=verbose) # type: ignore[arg-type] # typeshed issue #3515
def make_parser(source: str) -> Type[Parser]:
def make_parser(source: str) -> type[Parser]:
# Combine parse_string() and generate_parser().
grammar = parse_string(source, GrammarParser)
return generate_parser(grammar)
@ -86,7 +86,7 @@ def generate_parser_c_extension(
grammar: Grammar,
path: pathlib.PurePath,
debug: bool = False,
library_dir: Optional[str] = None,
library_dir: str | None = None,
) -> Any:
"""Generate a parser c extension for the given grammar in the given path

View file

@ -1,6 +1,6 @@
import token
import tokenize
from typing import Dict, Iterator, List
from collections.abc import Iterator
Mark = int # NewType('Mark', int)
@ -8,7 +8,11 @@ exact_token_types = token.EXACT_TOKEN_TYPES
def shorttok(tok: tokenize.TokenInfo) -> str:
return "%-25.25s" % f"{tok.start[0]}.{tok.start[1]}: {token.tok_name[tok.type]}:{tok.string!r}"
formatted = (
f"{tok.start[0]}.{tok.start[1]}: "
f"{token.tok_name[tok.type]}:{tok.string!r}"
)
return f"{formatted:<25.25}"
class Tokenizer:
@ -17,7 +21,7 @@ class Tokenizer:
This is pretty tied to Python's syntax.
"""
_tokens: List[tokenize.TokenInfo]
_tokens: list[tokenize.TokenInfo]
def __init__(
self, tokengen: Iterator[tokenize.TokenInfo], *, path: str = "", verbose: bool = False
@ -26,7 +30,7 @@ class Tokenizer:
self._tokens = []
self._index = 0
self._verbose = verbose
self._lines: Dict[int, str] = {}
self._lines: dict[int, str] = {}
self._path = path
if verbose:
self.report(False, False)
@ -72,7 +76,7 @@ class Tokenizer:
break
return tok
def get_lines(self, line_numbers: List[int]) -> List[str]:
def get_lines(self, line_numbers: list[int]) -> list[str]:
"""Retrieve source lines corresponding to line numbers."""
if self._lines:
lines = self._lines

View file

@ -1,5 +1,3 @@
from typing import Optional
from pegen import grammar
from pegen.grammar import Alt, GrammarVisitor, Rhs, Rule
@ -11,7 +9,7 @@ class ValidationError(Exception):
class GrammarValidator(GrammarVisitor):
def __init__(self, grammar: grammar.Grammar) -> None:
self.grammar = grammar
self.rulename: Optional[str] = None
self.rulename: str | None = None
def validate_rule(self, rulename: str, node: Rule) -> None:
self.rulename = rulename