bpo-46576: Speed up test_peg_generator by using a static library for shared sources (GH-32338)

Speed up test_peg_generator by using a static library for shared sources to avoid recompiling as much code.
This commit is contained in:
Jeremy Kloth 2022-04-06 15:55:58 -06:00 committed by GitHub
parent 1ba82d4419
commit 612e422c6e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 121 additions and 42 deletions

View file

@ -72,13 +72,30 @@ unittest.main()
@support.requires_subprocess()
class TestCParser(unittest.TestCase):
@classmethod
def setUpClass(cls):
# When running under regtest, a seperate tempdir is used
# as the current directory and watched for left-overs.
# Reusing that as the base for temporary directories
# ensures everything is cleaned up properly and
# cleans up afterwards if not (with warnings).
cls.tmp_base = os.getcwd()
if os.path.samefile(cls.tmp_base, os_helper.SAVEDCWD):
cls.tmp_base = None
# Create a directory for the reuseable static library part of
# the pegen extension build process. This greatly reduces the
# runtime overhead of spawning compiler processes.
cls.library_dir = tempfile.mkdtemp(dir=cls.tmp_base)
cls.addClassCleanup(shutil.rmtree, cls.library_dir)
def setUp(self):
self._backup_config_vars = dict(sysconfig._CONFIG_VARS)
cmd = support.missing_compiler_executable()
if cmd is not None:
self.skipTest("The %r command is not found" % cmd)
self.old_cwd = os.getcwd()
self.tmp_path = tempfile.mkdtemp()
self.tmp_path = tempfile.mkdtemp(dir=self.tmp_base)
change_cwd = os_helper.change_cwd(self.tmp_path)
change_cwd.__enter__()
self.addCleanup(change_cwd.__exit__, None, None, None)
@ -91,7 +108,10 @@ class TestCParser(unittest.TestCase):
def build_extension(self, grammar_source):
grammar = parse_string(grammar_source, GrammarParser)
generate_parser_c_extension(grammar, Path(self.tmp_path))
# Because setUp() already changes the current directory to the
# temporary path, use a relative path here to prevent excessive
# path lengths when compiling.
generate_parser_c_extension(grammar, Path('.'), library_dir=self.library_dir)
def run_test(self, grammar_source, test_source):
self.build_extension(grammar_source)

View file

@ -1,6 +1,5 @@
import itertools
import pathlib
import shutil
import sys
import sysconfig
import tempfile
@ -33,7 +32,8 @@ def compile_c_extension(
build_dir: Optional[str] = None,
verbose: bool = False,
keep_asserts: bool = True,
disable_optimization: bool = True, # Significant test_peg_generator speedup.
disable_optimization: bool = False,
library_dir: Optional[str] = None,
) -> str:
"""Compile the generated source for a parser generator into an extension module.
@ -44,15 +44,21 @@ def compile_c_extension(
If *build_dir* is provided, that path will be used as the temporary build directory
of distutils (this is useful in case you want to use a temporary directory).
If *library_dir* is provided, that path will be used as the directory for a
static library of the common parser sources (this is useful in case you are
creating multiple extensions).
"""
import distutils.log
from distutils.command.build_ext import build_ext # type: ignore
from distutils.command.clean import clean # type: ignore
from distutils.core import Distribution, Extension
from distutils.tests.support import fixup_build_ext # type: ignore
from distutils.ccompiler import new_compiler
from distutils.dep_util import newer_group
from distutils.sysconfig import customize_compiler
if verbose:
distutils.log.set_verbosity(distutils.log.DEBUG)
distutils.log.set_threshold(distutils.log.DEBUG)
source_file_path = pathlib.Path(generated_source_path)
extension_name = source_file_path.stem
@ -71,46 +77,92 @@ def compile_c_extension(
extra_compile_args.append("-O0")
if sysconfig.get_config_var("GNULD") == "yes":
extra_link_args.append("-fno-lto")
extension = [
Extension(
extension_name,
sources=[
str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
generated_source_path,
],
include_dirs=[
str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
str(MOD_DIR.parent.parent.parent / "Parser"),
],
extra_compile_args=extra_compile_args,
extra_link_args=extra_link_args,
)
common_sources = [
str(MOD_DIR.parent.parent.parent / "Python" / "Python-ast.c"),
str(MOD_DIR.parent.parent.parent / "Python" / "asdl.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "tokenizer.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "pegen_errors.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "action_helpers.c"),
str(MOD_DIR.parent.parent.parent / "Parser" / "string_parser.c"),
str(MOD_DIR.parent / "peg_extension" / "peg_extension.c"),
]
dist = Distribution({"name": extension_name, "ext_modules": extension})
cmd = build_ext(dist)
include_dirs = [
str(MOD_DIR.parent.parent.parent / "Include" / "internal"),
str(MOD_DIR.parent.parent.parent / "Parser"),
]
extension = Extension(
extension_name,
sources=[generated_source_path],
extra_compile_args=extra_compile_args,
extra_link_args=extra_link_args,
)
dist = Distribution({"name": extension_name, "ext_modules": [extension]})
cmd = dist.get_command_obj("build_ext")
fixup_build_ext(cmd)
cmd.inplace = True
cmd.build_lib = str(source_file_path.parent)
cmd.include_dirs = include_dirs
if build_dir:
cmd.build_temp = build_dir
cmd.build_lib = build_dir
cmd.ensure_finalized()
cmd.run()
extension_path = source_file_path.parent / cmd.get_ext_filename(extension_name)
shutil.move(cmd.get_ext_fullpath(extension_name), extension_path)
compiler = new_compiler()
customize_compiler(compiler)
compiler.set_include_dirs(cmd.include_dirs)
compiler.set_library_dirs(cmd.library_dirs)
# build static lib
if library_dir:
library_filename = compiler.library_filename(extension_name,
output_dir=library_dir)
if newer_group(common_sources, library_filename, 'newer'):
if sys.platform == 'win32':
pdb = compiler.static_lib_format % (extension_name, '.pdb')
compile_opts = [f"/Fd{library_dir}\\{pdb}"]
compile_opts.extend(extra_compile_args)
else:
compile_opts = extra_compile_args
objects = compiler.compile(common_sources,
output_dir=library_dir,
debug=cmd.debug,
extra_postargs=compile_opts)
compiler.create_static_lib(objects, extension_name,
output_dir=library_dir,
debug=cmd.debug)
if sys.platform == 'win32':
compiler.add_library_dir(library_dir)
extension.libraries = [extension_name]
elif sys.platform == 'darwin':
compiler.set_link_objects([
'-Wl,-force_load', library_filename,
])
else:
compiler.set_link_objects([
'-Wl,--whole-archive', library_filename, '-Wl,--no-whole-archive',
])
else:
extension.sources[0:0] = common_sources
cmd = clean(dist)
cmd.finalize_options()
cmd.run()
# Compile the source code to object files.
ext_path = cmd.get_ext_fullpath(extension_name)
if newer_group(extension.sources, ext_path, 'newer'):
objects = compiler.compile(extension.sources,
output_dir=cmd.build_temp,
debug=cmd.debug,
extra_postargs=extra_compile_args)
else:
objects = compiler.object_filenames(extension.sources,
output_dir=cmd.build_temp)
# Now link the object files together into a "shared object"
compiler.link_shared_object(
objects, ext_path,
libraries=cmd.get_libraries(extension),
extra_postargs=extra_link_args,
export_symbols=cmd.get_export_symbols(extension),
debug=cmd.debug,
build_temp=cmd.build_temp)
return extension_path
return pathlib.Path(ext_path)
def build_parser(

View file

@ -6,7 +6,7 @@ import sys
import textwrap
import token
import tokenize
from typing import IO, Any, Dict, Final, Type, cast
from typing import IO, Any, Dict, Final, Optional, Type, cast
from pegen.build import compile_c_extension
from pegen.c_generator import CParserGenerator
@ -83,7 +83,8 @@ def generate_c_parser_source(grammar: Grammar) -> str:
def generate_parser_c_extension(
grammar: Grammar, path: pathlib.PurePath, debug: bool = False
grammar: Grammar, path: pathlib.PurePath, debug: bool = False,
library_dir: Optional[str] = None,
) -> Any:
"""Generate a parser c extension for the given grammar in the given path
@ -101,7 +102,13 @@ def generate_parser_c_extension(
grammar, ALL_TOKENS, EXACT_TOKENS, NON_EXACT_TOKENS, file, debug=debug
)
genr.generate("parse.c")
compile_c_extension(str(source), build_dir=str(path))
compile_c_extension(
str(source),
build_dir=str(path),
# Significant test_peg_generator speedups
disable_optimization=True,
library_dir=library_dir,
)
def print_memstats() -> bool: