closes bpo-31650: PEP 552 (Deterministic pycs) implementation (#4575)

Python now supports checking bytecode cache up-to-dateness with a hash of the
source contents rather than volatile source metadata. See the PEP for details.

While a fairly straightforward idea, quite a lot of code had to be modified due
to the pervasiveness of pyc implementation details in the codebase. Changes in
this commit include:

- The core changes to importlib to understand how to read, validate, and
  regenerate hash-based pycs.

- Support for generating hash-based pycs in py_compile and compileall.

- Modifications to our siphash implementation to support passing a custom
  key. We then expose it to importlib through _imp.

- Updates to all places in the interpreter, standard library, and tests that
  manually generate or parse pyc files to grok the new format.

- Support in the interpreter command line code for long options like
  --check-hash-based-pycs.

- Tests and documentation for all of the above.
This commit is contained in:
Benjamin Peterson 2017-12-09 10:26:52 -08:00 committed by GitHub
parent 28d8d14013
commit 42aa93b8ff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
33 changed files with 3364 additions and 2505 deletions

View file

@ -284,7 +284,6 @@ static PyHash_FuncDef PyHash_Func = {fnv, "fnv", 8 * SIZEOF_PY_HASH_T,
#endif /* Py_HASH_ALGORITHM == Py_HASH_FNV */
#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
/* **************************************************************************
<MIT License>
Copyright (c) 2013 Marek Majkowski <marek@popcount.org>
@ -364,10 +363,10 @@ static PyHash_FuncDef PyHash_Func = {fnv, "fnv", 8 * SIZEOF_PY_HASH_T,
HALF_ROUND(v2,v1,v0,v3,17,21);
static Py_hash_t
siphash24(const void *src, Py_ssize_t src_sz) {
uint64_t k0 = _le64toh(_Py_HashSecret.siphash.k0);
uint64_t k1 = _le64toh(_Py_HashSecret.siphash.k1);
static uint64_t
siphash24(uint64_t key0, uint64_t key1, const void *src, Py_ssize_t src_sz) {
uint64_t k0 = _le64toh(key0);
uint64_t k1 = _le64toh(key1);
uint64_t b = (uint64_t)src_sz << 56;
const uint64_t *in = (uint64_t*)src;
@ -412,12 +411,26 @@ siphash24(const void *src, Py_ssize_t src_sz) {
/* modified */
t = (v0 ^ v1) ^ (v2 ^ v3);
return (Py_hash_t)t;
return t;
}
static PyHash_FuncDef PyHash_Func = {siphash24, "siphash24", 64, 128};
static Py_hash_t
pysiphash(const void *src, Py_ssize_t src_sz) {
return (Py_hash_t)siphash24(
_Py_HashSecret.siphash.k0, _Py_HashSecret.siphash.k1,
src, src_sz);
}
#endif /* Py_HASH_ALGORITHM == Py_HASH_SIPHASH24 */
uint64_t
_Py_KeyedHash(uint64_t key, const void *src, Py_ssize_t src_sz)
{
return siphash24(key, 0, src, src_sz);
}
#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
static PyHash_FuncDef PyHash_Func = {pysiphash, "siphash24", 64, 128};
#endif
#ifdef __cplusplus
}