mirror of
https://github.com/python/cpython.git
synced 2025-08-04 08:59:19 +00:00
bpo-40334: Refactor peg_generator to receive a Tokens file when building c code (GH-19745)
This commit is contained in:
parent
3d53d8756f
commit
5b9f4988c9
8 changed files with 220 additions and 91 deletions
|
@ -3,8 +3,9 @@ import shutil
|
|||
import tokenize
|
||||
import sys
|
||||
import sysconfig
|
||||
import itertools
|
||||
|
||||
from typing import Optional, Tuple
|
||||
from typing import Optional, Tuple, List, IO, Iterator, Set, Dict
|
||||
|
||||
from pegen.c_generator import CParserGenerator
|
||||
from pegen.grammar import Grammar
|
||||
|
@ -17,12 +18,12 @@ from pegen.tokenizer import Tokenizer
|
|||
MOD_DIR = pathlib.Path(__file__).parent
|
||||
|
||||
|
||||
def get_extra_flags(compiler_flags, compiler_py_flags_nodist):
|
||||
def get_extra_flags(compiler_flags: str, compiler_py_flags_nodist: str) -> List[str]:
|
||||
flags = sysconfig.get_config_var(compiler_flags)
|
||||
py_flags_nodist = sysconfig.get_config_var(compiler_py_flags_nodist)
|
||||
if flags is None or py_flags_nodist is None:
|
||||
return []
|
||||
return f'{flags} {py_flags_nodist}'.split()
|
||||
return f"{flags} {py_flags_nodist}".split()
|
||||
|
||||
|
||||
def compile_c_extension(
|
||||
|
@ -45,15 +46,15 @@ def compile_c_extension(
|
|||
from distutils.core import Distribution, Extension
|
||||
from distutils.command.clean import clean # type: ignore
|
||||
from distutils.command.build_ext import build_ext # type: ignore
|
||||
from distutils.tests.support import fixup_build_ext
|
||||
from distutils.tests.support import fixup_build_ext # type: ignore
|
||||
|
||||
if verbose:
|
||||
distutils.log.set_verbosity(distutils.log.DEBUG)
|
||||
|
||||
source_file_path = pathlib.Path(generated_source_path)
|
||||
extension_name = source_file_path.stem
|
||||
extra_compile_args = get_extra_flags('CFLAGS', 'PY_CFLAGS_NODIST')
|
||||
extra_link_args = get_extra_flags('LDFLAGS', 'PY_LDFLAGS_NODIST')
|
||||
extra_compile_args = get_extra_flags("CFLAGS", "PY_CFLAGS_NODIST")
|
||||
extra_link_args = get_extra_flags("LDFLAGS", "PY_LDFLAGS_NODIST")
|
||||
if keep_asserts:
|
||||
extra_compile_args.append("-UNDEBUG")
|
||||
extension = [
|
||||
|
@ -111,39 +112,69 @@ def build_parser(
|
|||
return grammar, parser, tokenizer
|
||||
|
||||
|
||||
def build_generator(
|
||||
tokenizer: Tokenizer,
|
||||
def generate_token_definitions(tokens: IO[str]) -> Tuple[Dict[str, int], Set[str]]:
|
||||
exact_tokens = {}
|
||||
non_exact_tokens = set()
|
||||
numbers = itertools.count(0)
|
||||
|
||||
for line in tokens:
|
||||
line = line.strip()
|
||||
|
||||
if not line or line.startswith("#"):
|
||||
continue
|
||||
|
||||
pieces = line.split()
|
||||
index = next(numbers)
|
||||
|
||||
if len(pieces) == 1:
|
||||
(token,) = pieces
|
||||
non_exact_tokens.add(token)
|
||||
elif len(pieces) == 2:
|
||||
_, op = pieces
|
||||
exact_tokens[op.strip("'")] = index
|
||||
else:
|
||||
raise ValueError(f"Unexpected line found in Tokens file: {line}")
|
||||
|
||||
return exact_tokens, non_exact_tokens
|
||||
|
||||
|
||||
def build_c_generator(
|
||||
grammar: Grammar,
|
||||
grammar_file: str,
|
||||
tokens_file: str,
|
||||
output_file: str,
|
||||
compile_extension: bool = False,
|
||||
verbose_c_extension: bool = False,
|
||||
keep_asserts_in_extension: bool = True,
|
||||
skip_actions: bool = False,
|
||||
) -> ParserGenerator:
|
||||
# TODO: Allow other extensions; pass the output type as an argument.
|
||||
if not output_file.endswith((".c", ".py")):
|
||||
raise RuntimeError("Your output file must either be a .c or .py file")
|
||||
with open(tokens_file, "r") as tok_file:
|
||||
exact_tok, non_exact_tok = generate_token_definitions(tok_file)
|
||||
with open(output_file, "w") as file:
|
||||
gen: ParserGenerator
|
||||
if output_file.endswith(".c"):
|
||||
gen = CParserGenerator(grammar, file, skip_actions=skip_actions)
|
||||
elif output_file.endswith(".py"):
|
||||
gen = PythonParserGenerator(grammar, file) # TODO: skip_actions
|
||||
else:
|
||||
assert False # Should have been checked above
|
||||
gen: ParserGenerator = CParserGenerator(
|
||||
grammar, exact_tok, non_exact_tok, file, skip_actions=skip_actions
|
||||
)
|
||||
gen.generate(grammar_file)
|
||||
|
||||
if compile_extension and output_file.endswith(".c"):
|
||||
if compile_extension:
|
||||
compile_c_extension(
|
||||
output_file, verbose=verbose_c_extension, keep_asserts=keep_asserts_in_extension
|
||||
)
|
||||
|
||||
return gen
|
||||
|
||||
|
||||
def build_parser_and_generator(
|
||||
def build_python_generator(
|
||||
grammar: Grammar, grammar_file: str, output_file: str, skip_actions: bool = False,
|
||||
) -> ParserGenerator:
|
||||
with open(output_file, "w") as file:
|
||||
gen: ParserGenerator = PythonParserGenerator(grammar, file) # TODO: skip_actions
|
||||
gen.generate(grammar_file)
|
||||
return gen
|
||||
|
||||
|
||||
def build_c_parser_and_generator(
|
||||
grammar_file: str,
|
||||
tokens_file: str,
|
||||
output_file: str,
|
||||
compile_extension: bool = False,
|
||||
verbose_tokenizer: bool = False,
|
||||
|
@ -152,10 +183,11 @@ def build_parser_and_generator(
|
|||
keep_asserts_in_extension: bool = True,
|
||||
skip_actions: bool = False,
|
||||
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
|
||||
"""Generate rules, parser, tokenizer, parser generator for a given grammar
|
||||
"""Generate rules, C parser, tokenizer, parser generator for a given grammar
|
||||
|
||||
Args:
|
||||
grammar_file (string): Path for the grammar file
|
||||
tokens_file (string): Path for the tokens file
|
||||
output_file (string): Path for the output file
|
||||
compile_extension (bool, optional): Whether to compile the C extension.
|
||||
Defaults to False.
|
||||
|
@ -170,10 +202,10 @@ def build_parser_and_generator(
|
|||
skip_actions (bool, optional): Whether to pretend no rule has any actions.
|
||||
"""
|
||||
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
|
||||
gen = build_generator(
|
||||
tokenizer,
|
||||
gen = build_c_generator(
|
||||
grammar,
|
||||
grammar_file,
|
||||
tokens_file,
|
||||
output_file,
|
||||
compile_extension,
|
||||
verbose_c_extension,
|
||||
|
@ -182,3 +214,26 @@ def build_parser_and_generator(
|
|||
)
|
||||
|
||||
return grammar, parser, tokenizer, gen
|
||||
|
||||
|
||||
def build_python_parser_and_generator(
|
||||
grammar_file: str,
|
||||
output_file: str,
|
||||
verbose_tokenizer: bool = False,
|
||||
verbose_parser: bool = False,
|
||||
skip_actions: bool = False,
|
||||
) -> Tuple[Grammar, Parser, Tokenizer, ParserGenerator]:
|
||||
"""Generate rules, python parser, tokenizer, parser generator for a given grammar
|
||||
|
||||
Args:
|
||||
grammar_file (string): Path for the grammar file
|
||||
output_file (string): Path for the output file
|
||||
verbose_tokenizer (bool, optional): Whether to display additional output
|
||||
when generating the tokenizer. Defaults to False.
|
||||
verbose_parser (bool, optional): Whether to display additional output
|
||||
when generating the parser. Defaults to False.
|
||||
skip_actions (bool, optional): Whether to pretend no rule has any actions.
|
||||
"""
|
||||
grammar, parser, tokenizer = build_parser(grammar_file, verbose_tokenizer, verbose_parser)
|
||||
gen = build_python_generator(grammar, grammar_file, output_file, skip_actions=skip_actions,)
|
||||
return grammar, parser, tokenizer, gen
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue