closes bpo-31650: PEP 552 (Deterministic pycs) implementation (#4575)

Python now supports checking bytecode cache up-to-dateness with a hash of the
source contents rather than volatile source metadata. See the PEP for details.

While a fairly straightforward idea, quite a lot of code had to be modified due
to the pervasiveness of pyc implementation details in the codebase. Changes in
this commit include:

- The core changes to importlib to understand how to read, validate, and
  regenerate hash-based pycs.

- Support for generating hash-based pycs in py_compile and compileall.

- Modifications to our siphash implementation to support passing a custom
  key. We then expose it to importlib through _imp.

- Updates to all places in the interpreter, standard library, and tests that
  manually generate or parse pyc files to grok the new format.

- Support in the interpreter command line code for long options like
  --check-hash-based-pycs.

- Tests and documentation for all of the above.
This commit is contained in:
Benjamin Peterson 2017-12-09 10:26:52 -08:00 committed by GitHub
parent 28d8d14013
commit 42aa93b8ff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
33 changed files with 3364 additions and 2505 deletions

View file

@ -52,7 +52,8 @@ def _walk_dir(dir, ddir=None, maxlevels=10, quiet=0):
maxlevels=maxlevels - 1, quiet=quiet)
def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,
quiet=0, legacy=False, optimize=-1, workers=1):
quiet=0, legacy=False, optimize=-1, workers=1,
invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP):
"""Byte-compile all modules in the given directory tree.
Arguments (only dir is required):
@ -67,6 +68,7 @@ def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,
legacy: if True, produce legacy pyc paths instead of PEP 3147 paths
optimize: optimization level or -1 for level of the interpreter
workers: maximum number of parallel workers
invalidation_mode: how the up-to-dateness of the pyc will be checked
"""
if workers is not None and workers < 0:
raise ValueError('workers must be greater or equal to 0')
@ -81,18 +83,20 @@ def compile_dir(dir, maxlevels=10, ddir=None, force=False, rx=None,
ddir=ddir, force=force,
rx=rx, quiet=quiet,
legacy=legacy,
optimize=optimize),
optimize=optimize,
invalidation_mode=invalidation_mode),
files)
success = min(results, default=True)
else:
for file in files:
if not compile_file(file, ddir, force, rx, quiet,
legacy, optimize):
legacy, optimize, invalidation_mode):
success = False
return success
def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
legacy=False, optimize=-1):
legacy=False, optimize=-1,
invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP):
"""Byte-compile one file.
Arguments (only fullname is required):
@ -105,6 +109,7 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
no output with 2
legacy: if True, produce legacy pyc paths instead of PEP 3147 paths
optimize: optimization level or -1 for level of the interpreter
invalidation_mode: how the up-to-dateness of the pyc will be checked
"""
success = True
if quiet < 2 and isinstance(fullname, os.PathLike):
@ -134,10 +139,10 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
if not force:
try:
mtime = int(os.stat(fullname).st_mtime)
expect = struct.pack('<4sl', importlib.util.MAGIC_NUMBER,
mtime)
expect = struct.pack('<4sll', importlib.util.MAGIC_NUMBER,
0, mtime)
with open(cfile, 'rb') as chandle:
actual = chandle.read(8)
actual = chandle.read(12)
if expect == actual:
return success
except OSError:
@ -146,7 +151,8 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
print('Compiling {!r}...'.format(fullname))
try:
ok = py_compile.compile(fullname, cfile, dfile, True,
optimize=optimize)
optimize=optimize,
invalidation_mode=invalidation_mode)
except py_compile.PyCompileError as err:
success = False
if quiet >= 2:
@ -175,7 +181,8 @@ def compile_file(fullname, ddir=None, force=False, rx=None, quiet=0,
return success
def compile_path(skip_curdir=1, maxlevels=0, force=False, quiet=0,
legacy=False, optimize=-1):
legacy=False, optimize=-1,
invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP):
"""Byte-compile all module on sys.path.
Arguments (all optional):
@ -186,6 +193,7 @@ def compile_path(skip_curdir=1, maxlevels=0, force=False, quiet=0,
quiet: as for compile_dir() (default 0)
legacy: as for compile_dir() (default False)
optimize: as for compile_dir() (default -1)
invalidation_mode: as for compiler_dir()
"""
success = True
for dir in sys.path:
@ -193,9 +201,16 @@ def compile_path(skip_curdir=1, maxlevels=0, force=False, quiet=0,
if quiet < 2:
print('Skipping current directory')
else:
success = success and compile_dir(dir, maxlevels, None,
force, quiet=quiet,
legacy=legacy, optimize=optimize)
success = success and compile_dir(
dir,
maxlevels,
None,
force,
quiet=quiet,
legacy=legacy,
optimize=optimize,
invalidation_mode=invalidation_mode,
)
return success
@ -238,6 +253,11 @@ def main():
'to the equivalent of -l sys.path'))
parser.add_argument('-j', '--workers', default=1,
type=int, help='Run compileall concurrently')
invalidation_modes = [mode.name.lower().replace('_', '-')
for mode in py_compile.PycInvalidationMode]
parser.add_argument('--invalidation-mode', default='timestamp',
choices=sorted(invalidation_modes),
help='How the pycs will be invalidated at runtime')
args = parser.parse_args()
compile_dests = args.compile_dest
@ -266,23 +286,29 @@ def main():
if args.workers is not None:
args.workers = args.workers or None
ivl_mode = args.invalidation_mode.replace('-', '_').upper()
invalidation_mode = py_compile.PycInvalidationMode[ivl_mode]
success = True
try:
if compile_dests:
for dest in compile_dests:
if os.path.isfile(dest):
if not compile_file(dest, args.ddir, args.force, args.rx,
args.quiet, args.legacy):
args.quiet, args.legacy,
invalidation_mode=invalidation_mode):
success = False
else:
if not compile_dir(dest, maxlevels, args.ddir,
args.force, args.rx, args.quiet,
args.legacy, workers=args.workers):
args.legacy, workers=args.workers,
invalidation_mode=invalidation_mode):
success = False
return success
else:
return compile_path(legacy=args.legacy, force=args.force,
quiet=args.quiet)
quiet=args.quiet,
invalidation_mode=invalidation_mode)
except KeyboardInterrupt:
if args.quiet < 2:
print("\n[interrupted]")