closes bpo-31650: PEP 552 (Deterministic pycs) implementation (#4575)

Python now supports checking bytecode cache up-to-dateness with a hash of the
source contents rather than volatile source metadata. See the PEP for details.

While a fairly straightforward idea, quite a lot of code had to be modified due
to the pervasiveness of pyc implementation details in the codebase. Changes in
this commit include:

- The core changes to importlib to understand how to read, validate, and
  regenerate hash-based pycs.

- Support for generating hash-based pycs in py_compile and compileall.

- Modifications to our siphash implementation to support passing a custom
  key. We then expose it to importlib through _imp.

- Updates to all places in the interpreter, standard library, and tests that
  manually generate or parse pyc files to grok the new format.

- Support in the interpreter command line code for long options like
  --check-hash-based-pycs.

- Tests and documentation for all of the above.
This commit is contained in:
Benjamin Peterson 2017-12-09 10:26:52 -08:00 committed by GitHub
parent 28d8d14013
commit 42aa93b8ff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
33 changed files with 3364 additions and 2505 deletions

View file

@ -40,7 +40,7 @@ def make_pyc(co, mtime, size):
else:
mtime = int(-0x100000000 + int(mtime))
pyc = (importlib.util.MAGIC_NUMBER +
struct.pack("<ii", int(mtime), size & 0xFFFFFFFF) + data)
struct.pack("<iii", 0, int(mtime), size & 0xFFFFFFFF) + data)
return pyc
def module_path_to_dotted_name(path):
@ -187,6 +187,20 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
TESTMOD + pyc_ext: (NOW, test_pyc)}
self.doTest(pyc_ext, files, TESTMOD)
def testUncheckedHashBasedPyc(self):
source = b"state = 'old'"
source_hash = importlib.util.source_hash(source)
bytecode = importlib._bootstrap_external._code_to_hash_pyc(
compile(source, "???", "exec"),
source_hash,
False, # unchecked
)
files = {TESTMOD + ".py": (NOW, "state = 'new'"),
TESTMOD + ".pyc": (NOW - 20, bytecode)}
def check(mod):
self.assertEqual(mod.state, 'old')
self.doTest(None, files, TESTMOD, call=check)
def testEmptyPy(self):
files = {TESTMOD + ".py": (NOW, "")}
self.doTest(None, files, TESTMOD)
@ -215,7 +229,7 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
badtime_pyc = bytearray(test_pyc)
# flip the second bit -- not the first as that one isn't stored in the
# .py's mtime in the zip archive.
badtime_pyc[7] ^= 0x02
badtime_pyc[11] ^= 0x02
files = {TESTMOD + ".py": (NOW, test_src),
TESTMOD + pyc_ext: (NOW, badtime_pyc)}
self.doTest(".py", files, TESTMOD)