closes bpo-31650: PEP 552 (Deterministic pycs) implementation (#4575)

Python now supports checking bytecode cache up-to-dateness with a hash of the
source contents rather than volatile source metadata. See the PEP for details.

While a fairly straightforward idea, quite a lot of code had to be modified due
to the pervasiveness of pyc implementation details in the codebase. Changes in
this commit include:

- The core changes to importlib to understand how to read, validate, and
  regenerate hash-based pycs.

- Support for generating hash-based pycs in py_compile and compileall.

- Modifications to our siphash implementation to support passing a custom
  key. We then expose it to importlib through _imp.

- Updates to all places in the interpreter, standard library, and tests that
  manually generate or parse pyc files to grok the new format.

- Support in the interpreter command line code for long options like
  --check-hash-based-pycs.

- Tests and documentation for all of the above.
This commit is contained in:
Benjamin Peterson 2017-12-09 10:26:52 -08:00 committed by GitHub
parent 28d8d14013
commit 42aa93b8ff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
33 changed files with 3364 additions and 2505 deletions

View file

@ -3,6 +3,7 @@
This module has intimate knowledge of the format of .pyc files.
"""
import enum
import importlib._bootstrap_external
import importlib.machinery
import importlib.util
@ -11,7 +12,7 @@ import os.path
import sys
import traceback
__all__ = ["compile", "main", "PyCompileError"]
__all__ = ["compile", "main", "PyCompileError", "PycInvalidationMode"]
class PyCompileError(Exception):
@ -62,7 +63,14 @@ class PyCompileError(Exception):
return self.msg
def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1):
class PycInvalidationMode(enum.Enum):
TIMESTAMP = 1
CHECKED_HASH = 2
UNCHECKED_HASH = 3
def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1,
invalidation_mode=PycInvalidationMode.TIMESTAMP):
"""Byte-compile one Python source file to Python bytecode.
:param file: The source file name.
@ -79,6 +87,7 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1):
:param optimize: The optimization level for the compiler. Valid values
are -1, 0, 1 and 2. A value of -1 means to use the optimization
level of the current interpreter, as given by -O command line options.
:param invalidation_mode:
:return: Path to the resulting byte compiled file.
@ -136,9 +145,17 @@ def compile(file, cfile=None, dfile=None, doraise=False, optimize=-1):
os.makedirs(dirname)
except FileExistsError:
pass
source_stats = loader.path_stats(file)
bytecode = importlib._bootstrap_external._code_to_bytecode(
if invalidation_mode == PycInvalidationMode.TIMESTAMP:
source_stats = loader.path_stats(file)
bytecode = importlib._bootstrap_external._code_to_timestamp_pyc(
code, source_stats['mtime'], source_stats['size'])
else:
source_hash = importlib.util.source_hash(source_bytes)
bytecode = importlib._bootstrap_external._code_to_hash_pyc(
code,
source_hash,
(invalidation_mode == PycInvalidationMode.CHECKED_HASH),
)
mode = importlib._bootstrap_external._calc_mode(file)
importlib._bootstrap_external._write_atomic(cfile, bytecode, mode)
return cfile