mirror of
https://github.com/python/cpython.git
synced 2025-08-28 20:56:54 +00:00
closes bpo-31650: PEP 552 (Deterministic pycs) implementation (#4575)
Python now supports checking bytecode cache up-to-dateness with a hash of the source contents rather than volatile source metadata. See the PEP for details. While a fairly straightforward idea, quite a lot of code had to be modified due to the pervasiveness of pyc implementation details in the codebase. Changes in this commit include: - The core changes to importlib to understand how to read, validate, and regenerate hash-based pycs. - Support for generating hash-based pycs in py_compile and compileall. - Modifications to our siphash implementation to support passing a custom key. We then expose it to importlib through _imp. - Updates to all places in the interpreter, standard library, and tests that manually generate or parse pyc files to grok the new format. - Support in the interpreter command line code for long options like --check-hash-based-pycs. - Tests and documentation for all of the above.
This commit is contained in:
parent
28d8d14013
commit
42aa93b8ff
33 changed files with 3364 additions and 2505 deletions
|
@ -354,6 +354,41 @@ exit:
|
|||
return return_value;
|
||||
}
|
||||
|
||||
PyDoc_STRVAR(_imp_source_hash__doc__,
|
||||
"source_hash($module, /, key, source)\n"
|
||||
"--\n"
|
||||
"\n");
|
||||
|
||||
#define _IMP_SOURCE_HASH_METHODDEF \
|
||||
{"source_hash", (PyCFunction)_imp_source_hash, METH_FASTCALL|METH_KEYWORDS, _imp_source_hash__doc__},
|
||||
|
||||
static PyObject *
|
||||
_imp_source_hash_impl(PyObject *module, long key, Py_buffer *source);
|
||||
|
||||
static PyObject *
|
||||
_imp_source_hash(PyObject *module, PyObject **args, Py_ssize_t nargs, PyObject *kwnames)
|
||||
{
|
||||
PyObject *return_value = NULL;
|
||||
static const char * const _keywords[] = {"key", "source", NULL};
|
||||
static _PyArg_Parser _parser = {"ly*:source_hash", _keywords, 0};
|
||||
long key;
|
||||
Py_buffer source = {NULL, NULL};
|
||||
|
||||
if (!_PyArg_ParseStackAndKeywords(args, nargs, kwnames, &_parser,
|
||||
&key, &source)) {
|
||||
goto exit;
|
||||
}
|
||||
return_value = _imp_source_hash_impl(module, key, &source);
|
||||
|
||||
exit:
|
||||
/* Cleanup for source */
|
||||
if (source.obj) {
|
||||
PyBuffer_Release(&source);
|
||||
}
|
||||
|
||||
return return_value;
|
||||
}
|
||||
|
||||
#ifndef _IMP_CREATE_DYNAMIC_METHODDEF
|
||||
#define _IMP_CREATE_DYNAMIC_METHODDEF
|
||||
#endif /* !defined(_IMP_CREATE_DYNAMIC_METHODDEF) */
|
||||
|
@ -361,4 +396,4 @@ exit:
|
|||
#ifndef _IMP_EXEC_DYNAMIC_METHODDEF
|
||||
#define _IMP_EXEC_DYNAMIC_METHODDEF
|
||||
#endif /* !defined(_IMP_EXEC_DYNAMIC_METHODDEF) */
|
||||
/*[clinic end generated code: output=d068dd493e513604 input=a9049054013a1b77]*/
|
||||
/*[clinic end generated code: output=e8b2c0b0d0a75da8 input=a9049054013a1b77]*/
|
||||
|
|
|
@ -51,7 +51,8 @@ void _PyOS_ResetGetOpt(void)
|
|||
opt_ptr = L"";
|
||||
}
|
||||
|
||||
int _PyOS_GetOpt(int argc, wchar_t **argv, wchar_t *optstring)
|
||||
int _PyOS_GetOpt(int argc, wchar_t **argv, wchar_t *optstring,
|
||||
const _PyOS_LongOption *longopts, int *longindex)
|
||||
{
|
||||
wchar_t *ptr;
|
||||
wchar_t option;
|
||||
|
@ -86,13 +87,41 @@ int _PyOS_GetOpt(int argc, wchar_t **argv, wchar_t *optstring)
|
|||
return 'V';
|
||||
}
|
||||
|
||||
|
||||
opt_ptr = &argv[_PyOS_optind++][1];
|
||||
}
|
||||
|
||||
if ((option = *opt_ptr++) == L'\0')
|
||||
return -1;
|
||||
|
||||
if (option == L'-') {
|
||||
// Parse long option.
|
||||
if (*opt_ptr == L'\0') {
|
||||
fprintf(stderr, "expected long option\n");
|
||||
return -1;
|
||||
}
|
||||
*longindex = 0;
|
||||
const _PyOS_LongOption *opt;
|
||||
for (opt = &longopts[*longindex]; opt->name; opt = &longopts[++(*longindex)]) {
|
||||
if (!wcscmp(opt->name, opt_ptr))
|
||||
break;
|
||||
}
|
||||
if (!opt->name) {
|
||||
fprintf(stderr, "unknown option %ls\n", argv[_PyOS_optind - 1]);
|
||||
return '_';
|
||||
}
|
||||
opt_ptr = L"";
|
||||
if (!opt->has_arg) {
|
||||
return opt->val;
|
||||
}
|
||||
if (_PyOS_optind >= argc) {
|
||||
fprintf(stderr, "Argument expected for the %ls options\n",
|
||||
argv[_PyOS_optind - 1]);
|
||||
return '_';
|
||||
}
|
||||
_PyOS_optarg = argv[_PyOS_optind++];
|
||||
return opt->val;
|
||||
}
|
||||
|
||||
if (option == 'J') {
|
||||
if (_PyOS_opterr)
|
||||
fprintf(stderr, "-J is reserved for Jython\n");
|
||||
|
|
|
@ -5,6 +5,8 @@
|
|||
|
||||
#include "Python-ast.h"
|
||||
#undef Yield /* undefine macro conflicting with winbase.h */
|
||||
#include "internal/hash.h"
|
||||
#include "internal/import.h"
|
||||
#include "internal/pystate.h"
|
||||
#include "errcode.h"
|
||||
#include "marshal.h"
|
||||
|
@ -2184,6 +2186,34 @@ _imp_exec_builtin_impl(PyObject *module, PyObject *mod)
|
|||
return exec_builtin_or_dynamic(mod);
|
||||
}
|
||||
|
||||
/*[clinic input]
|
||||
_imp.source_hash
|
||||
|
||||
key: long
|
||||
source: Py_buffer
|
||||
[clinic start generated code]*/
|
||||
|
||||
static PyObject *
|
||||
_imp_source_hash_impl(PyObject *module, long key, Py_buffer *source)
|
||||
/*[clinic end generated code: output=edb292448cf399ea input=9aaad1e590089789]*/
|
||||
{
|
||||
uint64_t hash = _Py_KeyedHash((uint64_t)key, source->buf, source->len);
|
||||
#if !PY_LITTLE_ENDIAN
|
||||
// Force to little-endian. There really ought to be a succinct standard way
|
||||
// to do this.
|
||||
union {
|
||||
uint64_t x;
|
||||
unsigned char data[sizeof(uint64_t)];
|
||||
} pun;
|
||||
pun.x = hash;
|
||||
for (size_t i = 0; i < sizeof(pun.data); i++) {
|
||||
pun.data[sizeof(pun.data) - i - 1] = pun.data[i];
|
||||
}
|
||||
hash = pun.x;
|
||||
#endif
|
||||
return PyBytes_FromStringAndSize((const char *)&hash, sizeof(hash));
|
||||
}
|
||||
|
||||
|
||||
PyDoc_STRVAR(doc_imp,
|
||||
"(Extremely) low-level import machinery bits as used by importlib and imp.");
|
||||
|
@ -2203,6 +2233,7 @@ static PyMethodDef imp_methods[] = {
|
|||
_IMP_EXEC_DYNAMIC_METHODDEF
|
||||
_IMP_EXEC_BUILTIN_METHODDEF
|
||||
_IMP__FIX_CO_FILENAME_METHODDEF
|
||||
_IMP_SOURCE_HASH_METHODDEF
|
||||
{NULL, NULL} /* sentinel */
|
||||
};
|
||||
|
||||
|
@ -2219,6 +2250,8 @@ static struct PyModuleDef impmodule = {
|
|||
NULL
|
||||
};
|
||||
|
||||
const char *_Py_CheckHashBasedPycsMode = "default";
|
||||
|
||||
PyMODINIT_FUNC
|
||||
PyInit_imp(void)
|
||||
{
|
||||
|
@ -2230,6 +2263,15 @@ PyInit_imp(void)
|
|||
d = PyModule_GetDict(m);
|
||||
if (d == NULL)
|
||||
goto failure;
|
||||
PyObject *pyc_mode = PyUnicode_FromString(_Py_CheckHashBasedPycsMode);
|
||||
if (pyc_mode == NULL) {
|
||||
goto failure;
|
||||
}
|
||||
if (PyDict_SetItemString(d, "check_hash_based_pycs", pyc_mode) < 0) {
|
||||
Py_DECREF(pyc_mode);
|
||||
goto failure;
|
||||
}
|
||||
Py_DECREF(pyc_mode);
|
||||
|
||||
return m;
|
||||
failure:
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -284,7 +284,6 @@ static PyHash_FuncDef PyHash_Func = {fnv, "fnv", 8 * SIZEOF_PY_HASH_T,
|
|||
#endif /* Py_HASH_ALGORITHM == Py_HASH_FNV */
|
||||
|
||||
|
||||
#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
|
||||
/* **************************************************************************
|
||||
<MIT License>
|
||||
Copyright (c) 2013 Marek Majkowski <marek@popcount.org>
|
||||
|
@ -364,10 +363,10 @@ static PyHash_FuncDef PyHash_Func = {fnv, "fnv", 8 * SIZEOF_PY_HASH_T,
|
|||
HALF_ROUND(v2,v1,v0,v3,17,21);
|
||||
|
||||
|
||||
static Py_hash_t
|
||||
siphash24(const void *src, Py_ssize_t src_sz) {
|
||||
uint64_t k0 = _le64toh(_Py_HashSecret.siphash.k0);
|
||||
uint64_t k1 = _le64toh(_Py_HashSecret.siphash.k1);
|
||||
static uint64_t
|
||||
siphash24(uint64_t key0, uint64_t key1, const void *src, Py_ssize_t src_sz) {
|
||||
uint64_t k0 = _le64toh(key0);
|
||||
uint64_t k1 = _le64toh(key1);
|
||||
uint64_t b = (uint64_t)src_sz << 56;
|
||||
const uint64_t *in = (uint64_t*)src;
|
||||
|
||||
|
@ -412,12 +411,26 @@ siphash24(const void *src, Py_ssize_t src_sz) {
|
|||
|
||||
/* modified */
|
||||
t = (v0 ^ v1) ^ (v2 ^ v3);
|
||||
return (Py_hash_t)t;
|
||||
return t;
|
||||
}
|
||||
|
||||
static PyHash_FuncDef PyHash_Func = {siphash24, "siphash24", 64, 128};
|
||||
static Py_hash_t
|
||||
pysiphash(const void *src, Py_ssize_t src_sz) {
|
||||
return (Py_hash_t)siphash24(
|
||||
_Py_HashSecret.siphash.k0, _Py_HashSecret.siphash.k1,
|
||||
src, src_sz);
|
||||
}
|
||||
|
||||
#endif /* Py_HASH_ALGORITHM == Py_HASH_SIPHASH24 */
|
||||
uint64_t
|
||||
_Py_KeyedHash(uint64_t key, const void *src, Py_ssize_t src_sz)
|
||||
{
|
||||
return siphash24(key, 0, src, src_sz);
|
||||
}
|
||||
|
||||
|
||||
#if Py_HASH_ALGORITHM == Py_HASH_SIPHASH24
|
||||
static PyHash_FuncDef PyHash_Func = {pysiphash, "siphash24", 64, 128};
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
|
|
|
@ -1053,7 +1053,8 @@ run_pyc_file(FILE *fp, const char *filename, PyObject *globals,
|
|||
"Bad magic number in .pyc file");
|
||||
return NULL;
|
||||
}
|
||||
/* Skip mtime and size */
|
||||
/* Skip the rest of the header. */
|
||||
(void) PyMarshal_ReadLongFromFile(fp);
|
||||
(void) PyMarshal_ReadLongFromFile(fp);
|
||||
(void) PyMarshal_ReadLongFromFile(fp);
|
||||
if (PyErr_Occurred())
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue