closes bpo-31650: PEP 552 (Deterministic pycs) implementation (#4575)

Python now supports checking bytecode cache up-to-dateness with a hash of the
source contents rather than volatile source metadata. See the PEP for details.

While a fairly straightforward idea, quite a lot of code had to be modified due
to the pervasiveness of pyc implementation details in the codebase. Changes in
this commit include:

- The core changes to importlib to understand how to read, validate, and
  regenerate hash-based pycs.

- Support for generating hash-based pycs in py_compile and compileall.

- Modifications to our siphash implementation to support passing a custom
  key. We then expose it to importlib through _imp.

- Updates to all places in the interpreter, standard library, and tests that
  manually generate or parse pyc files to grok the new format.

- Support in the interpreter command line code for long options like
  --check-hash-based-pycs.

- Tests and documentation for all of the above.
This commit is contained in:
Benjamin Peterson 2017-12-09 10:26:52 -08:00 committed by GitHub
parent 28d8d14013
commit 42aa93b8ff
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
33 changed files with 3364 additions and 2505 deletions

View file

@ -2,6 +2,7 @@
#include "Python.h"
#include "osdefs.h"
#include "internal/import.h"
#include "internal/pystate.h"
#include <locale.h>
@ -61,6 +62,11 @@ static int orig_argc;
#define PROGRAM_OPTS BASE_OPTS
static const _PyOS_LongOption longoptions[] = {
{L"check-hash-based-pycs", 1, 0},
{NULL, 0, 0},
};
/* Short usage message (with %s for argv0) */
static const char usage_line[] =
"usage: %ls [option] ... [-c cmd | -m mod | file | -] [arg] ...\n";
@ -98,6 +104,8 @@ static const char usage_3[] = "\
also PYTHONWARNINGS=arg\n\
-x : skip first line of source, allowing use of non-Unix forms of #!cmd\n\
-X opt : set implementation-specific option\n\
--check-hash-based-pycs always|default|never:\n\
control how Python invalidates hash-based .pyc files\n\
";
static const char usage_4[] = "\
file : program read from script file\n\
@ -393,6 +401,7 @@ typedef struct {
int quiet_flag; /* Py_QuietFlag, -q */
int skip_first_line; /* -x option */
_Py_OptList xoptions; /* -X options */
const char *check_hash_pycs_mode; /* --check-hash-based-pycs */
#ifdef MS_WINDOWS
int legacy_windows_fs_encoding; /* Py_LegacyWindowsFSEncodingFlag,
PYTHONLEGACYWINDOWSFSENCODING */
@ -577,7 +586,9 @@ pymain_parse_cmdline_impl(_PyMain *pymain)
_PyOS_ResetGetOpt();
do {
int c = _PyOS_GetOpt(pymain->argc, pymain->argv, PROGRAM_OPTS);
int longindex = -1;
int c = _PyOS_GetOpt(pymain->argc, pymain->argv, PROGRAM_OPTS,
longoptions, &longindex);
if (c == EOF) {
break;
}
@ -608,6 +619,22 @@ pymain_parse_cmdline_impl(_PyMain *pymain)
}
switch (c) {
case 0:
// Handle long option.
assert(longindex == 0); // Only one long option now.
if (!wcscmp(_PyOS_optarg, L"always")) {
cmdline->check_hash_pycs_mode = "always";
} else if (!wcscmp(_PyOS_optarg, L"never")) {
cmdline->check_hash_pycs_mode = "never";
} else if (!wcscmp(_PyOS_optarg, L"default")) {
cmdline->check_hash_pycs_mode = "default";
} else {
fprintf(stderr, "--check-hash-based-pycs must be one of "
"'default', 'always', or 'never'\n");
return 1;
}
break;
case 'b':
cmdline->bytes_warning++;
break;
@ -1085,6 +1112,8 @@ pymain_set_global_config(_PyMain *pymain)
pymain_set_flag(&Py_UnbufferedStdioFlag, cmdline->use_unbuffered_io);
pymain_set_flag(&Py_VerboseFlag, cmdline->verbosity);
pymain_set_flag(&Py_QuietFlag, cmdline->quiet_flag);
if (cmdline->check_hash_pycs_mode)
_Py_CheckHashBasedPycsMode = cmdline->check_hash_pycs_mode;
#ifdef MS_WINDOWS
pymain_set_flag(&Py_LegacyWindowsFSEncodingFlag, cmdline->legacy_windows_fs_encoding);
pymain_set_flag(&Py_LegacyWindowsStdioFlag, cmdline->legacy_windows_stdio);