mirror of
https://github.com/python/cpython.git
synced 2025-10-04 06:06:44 +00:00
[3.5] bpo-29537: Tolerate legacy invalid bytecode (#169)
bpo-27286 fixed a problem where BUILD_MAP_UNPACK_WITH_CALL could be emitted with an incorrect oparg value, causing the eval loop to access the wrong stack entry when attempting to read the function name. The associated magic number change caused significant problems when attempting to upgrade to 3.5.3 for anyone that relies on pre-cached bytecode remaining valid across maintenance releases. This patch restores the ability to import legacy bytecode generated by 3.5.0, 3.5.1 or 3.5.2, and modifies the eval loop to avoid any harmful consequences from the potentially malformed legacy bytecode. Original import patch by Petr Viktorin, eval loop patch by Serhiy Storchaka, and tests and integration by Nick Coghlan.
This commit is contained in:
parent
bef209d449
commit
93602e3af7
13 changed files with 2828 additions and 2612 deletions
|
@ -230,7 +230,7 @@ _code_type = type(_write_atomic.__code__)
|
|||
# Python 3.5b1 3330 (PEP 448: Additional Unpacking Generalizations)
|
||||
# Python 3.5b2 3340 (fix dictionary display evaluation order #11205)
|
||||
# Python 3.5b2 3350 (add GET_YIELD_FROM_ITER opcode #24400)
|
||||
# Python 3.5.2 3351 (fix BUILD_MAP_UNPACK_WITH_CALL opcode #27286)
|
||||
# Python 3.5.3 3351 (fix BUILD_MAP_UNPACK_WITH_CALL opcode #27286)
|
||||
#
|
||||
# MAGIC must change whenever the bytecode emitted by the compiler may no
|
||||
# longer be understood by older implementations of the eval loop (usually
|
||||
|
@ -242,6 +242,28 @@ _code_type = type(_write_atomic.__code__)
|
|||
MAGIC_NUMBER = (3351).to_bytes(2, 'little') + b'\r\n'
|
||||
_RAW_MAGIC_NUMBER = int.from_bytes(MAGIC_NUMBER, 'little') # For import.c
|
||||
|
||||
# Issue #29537: handle issue27286 bytecode incompatibility
|
||||
#
|
||||
# The magic number bump in Python 3.5.3 for issue27286 turned out to create
|
||||
# significant backwards compatibility problems for redistributors and
|
||||
# other folks that rely on the bytecode format remaining stable within a
|
||||
# given maintenance release series. See http://bugs.python.org/issue29514
|
||||
# for more discussion of the problems that the original change caused.
|
||||
#
|
||||
# The _BACKCOMPAT_MAGIC_NUMBER below and any other changes marked with
|
||||
# "Issue #29537" comments allow Python 3.5.4+ to load bytecode files with both
|
||||
# the original 3.5.0 magic number and those with the updated magic number used
|
||||
# since 3.5.3.
|
||||
#
|
||||
# This is expected to be a one-off change used solely to restore legacy
|
||||
# bytecode compatibility within the 3.5.x series, so it avoids any changes
|
||||
# that would prompt a rebuild of C extension modules.
|
||||
#
|
||||
if _RAW_MAGIC_NUMBER != 168627479:
|
||||
_msg = 'Magic number mismatch (the issue27286 workaround is for 3.5 only)'
|
||||
raise SystemError(_msg)
|
||||
_BACKCOMPAT_MAGIC_NUMBER = (3350).to_bytes(2, 'little') + b'\r\n'
|
||||
|
||||
_PYCACHE = '__pycache__'
|
||||
_OPT = 'opt-'
|
||||
|
||||
|
@ -446,7 +468,9 @@ def _validate_bytecode_header(data, source_stats=None, name=None, path=None):
|
|||
magic = data[:4]
|
||||
raw_timestamp = data[4:8]
|
||||
raw_size = data[8:12]
|
||||
if magic != MAGIC_NUMBER:
|
||||
if (magic != MAGIC_NUMBER
|
||||
# Issue #29537: handle issue27286 bytecode incompatibility
|
||||
and magic != _BACKCOMPAT_MAGIC_NUMBER):
|
||||
message = 'bad magic number in {!r}: {!r}'.format(name, magic)
|
||||
_verbose_message('{}', message)
|
||||
raise ImportError(message, **exc_details)
|
||||
|
|
|
@ -4,7 +4,7 @@ from ._bootstrap import module_from_spec
|
|||
from ._bootstrap import _resolve_name
|
||||
from ._bootstrap import spec_from_loader
|
||||
from ._bootstrap import _find_spec
|
||||
from ._bootstrap_external import MAGIC_NUMBER
|
||||
from ._bootstrap_external import MAGIC_NUMBER, _BACKCOMPAT_MAGIC_NUMBER
|
||||
from ._bootstrap_external import cache_from_source
|
||||
from ._bootstrap_external import decode_source
|
||||
from ._bootstrap_external import source_from_cache
|
||||
|
|
|
@ -37,7 +37,10 @@ def read_code(stream):
|
|||
import marshal
|
||||
|
||||
magic = stream.read(4)
|
||||
if magic != importlib.util.MAGIC_NUMBER:
|
||||
if (magic != importlib.util.MAGIC_NUMBER
|
||||
# Issue #29537: handle issue27286 bytecode incompatibility
|
||||
# See Lib/importlib/_bootstrap_external.py
|
||||
and magic != importlib.util._BACKCOMPAT_MAGIC_NUMBER):
|
||||
return None
|
||||
|
||||
stream.read(8) # Skip timestamp and size
|
||||
|
|
|
@ -289,7 +289,12 @@ def importfile(path):
|
|||
"""Import a Python source file or compiled file given its path."""
|
||||
magic = importlib.util.MAGIC_NUMBER
|
||||
with open(path, 'rb') as file:
|
||||
is_bytecode = magic == file.read(len(magic))
|
||||
first_bytes = file.read(len(magic))
|
||||
is_bytecode = first_bytes in (magic,
|
||||
# Issue #29537: handle issue27286
|
||||
# bytecode incompatibility
|
||||
# See Lib/importlib/_bootstrap_external.py
|
||||
importlib.util._BACKCOMPAT_MAGIC_NUMBER)
|
||||
filename = os.path.basename(path)
|
||||
name, ext = os.path.splitext(filename)
|
||||
if is_bytecode:
|
||||
|
|
|
@ -52,15 +52,15 @@ Here we add keyword arguments
|
|||
>>> f(1, 2, **{'a': -1, 'b': 5}, **{'a': 4, 'c': 6})
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
TypeError: f() got multiple values for keyword argument 'a'
|
||||
TypeError: function got multiple values for keyword argument 'a'
|
||||
>>> f(1, 2, **{'a': -1, 'b': 5}, a=4, c=6)
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
TypeError: f() got multiple values for keyword argument 'a'
|
||||
TypeError: function got multiple values for keyword argument 'a'
|
||||
>>> f(1, 2, a=3, **{'a': 4}, **{'a': 5})
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
TypeError: f() got multiple values for keyword argument 'a'
|
||||
TypeError: function got multiple values for keyword argument 'a'
|
||||
>>> f(1, 2, 3, *[4, 5], **{'a':6, 'b':7})
|
||||
(1, 2, 3, 4, 5) {'a': 6, 'b': 7}
|
||||
>>> f(1, 2, 3, x=4, y=5, *(6, 7), **{'a':8, 'b': 9})
|
||||
|
|
|
@ -603,5 +603,109 @@ class SourcelessLoaderBadBytecodeTestPEP302(SourcelessLoaderBadBytecodeTest,
|
|||
util=importlib_util)
|
||||
|
||||
|
||||
###########################################################################
|
||||
# Issue #29537: Test backwards compatibility with legacy 3.5.0/1/2 bytecode
|
||||
###########################################################################
|
||||
|
||||
class LegacyBytecodeTest:
|
||||
|
||||
def _test_legacy_magic(self, test, *, del_source=False):
|
||||
# Replace the default magic number with one copied from a pyc file
|
||||
# generated by Python 3.5.2
|
||||
with util.create_modules('_temp') as mapping:
|
||||
bc_path = self.manipulate_bytecode('_temp', mapping,
|
||||
lambda bc: b'\x16\r\r\n' + bc[4:])
|
||||
test('_temp', mapping, bc_path)
|
||||
|
||||
LegacyBytecodeTestPEP451 = BadBytecodeTestPEP451
|
||||
LegacyBytecodeTestPEP302 = BadBytecodeTestPEP302
|
||||
|
||||
# SourceLoader via both PEP 451 and 302 hooks
|
||||
|
||||
class SourceLoaderLegacyBytecodeTest(LegacyBytecodeTest):
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.loader = cls.machinery.SourceFileLoader
|
||||
|
||||
@util.writes_bytecode_files
|
||||
def test_legacy_magic(self):
|
||||
# The magic number from 3.5.0/1/2 should be accepted as is
|
||||
def test(name, mapping, bytecode_path):
|
||||
self.import_(mapping[name], name)
|
||||
with open(bytecode_path, 'rb') as bytecode_file:
|
||||
self.assertEqual(bytecode_file.read(4),
|
||||
self.util._BACKCOMPAT_MAGIC_NUMBER)
|
||||
|
||||
self._test_legacy_magic(test)
|
||||
|
||||
|
||||
class SourceLoaderLegacyBytecodeTestPEP451(
|
||||
SourceLoaderLegacyBytecodeTest, LegacyBytecodeTestPEP451):
|
||||
pass
|
||||
|
||||
|
||||
(Frozen_SourceLegacyBytecodePEP451,
|
||||
Source_SourceLegacyBytecodePEP451
|
||||
) = util.test_both(SourceLoaderLegacyBytecodeTestPEP451, importlib=importlib,
|
||||
machinery=machinery, abc=importlib_abc,
|
||||
util=importlib_util)
|
||||
|
||||
|
||||
class SourceLoaderLegacyBytecodeTestPEP302(
|
||||
SourceLoaderLegacyBytecodeTest, LegacyBytecodeTestPEP302):
|
||||
pass
|
||||
|
||||
|
||||
(Frozen_SourceLegacyBytecodePEP302,
|
||||
Source_SourceLegacyBytecodePEP302
|
||||
) = util.test_both(SourceLoaderLegacyBytecodeTestPEP302, importlib=importlib,
|
||||
machinery=machinery, abc=importlib_abc,
|
||||
util=importlib_util)
|
||||
|
||||
# SourcelessLoader via both PEP 451 and 302 hooks
|
||||
|
||||
class SourcelessLoaderLegacyBytecodeTest(LegacyBytecodeTest):
|
||||
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.loader = cls.machinery.SourcelessFileLoader
|
||||
|
||||
|
||||
@util.writes_bytecode_files
|
||||
def test_legacy_magic(self):
|
||||
# The magic number from 3.5.0/1/2 should be accepted as is
|
||||
def test(name, mapping, bytecode_path):
|
||||
self.import_(bytecode_path, name)
|
||||
with open(bytecode_path, 'rb') as bytecode_file:
|
||||
self.assertEqual(bytecode_file.read(4),
|
||||
self.util._BACKCOMPAT_MAGIC_NUMBER)
|
||||
|
||||
self._test_legacy_magic(test)
|
||||
|
||||
class SourcelessLoaderLegacyBytecodeTestPEP451(
|
||||
SourcelessLoaderLegacyBytecodeTest, LegacyBytecodeTestPEP451):
|
||||
pass
|
||||
|
||||
(Frozen_SourcelessLegacyBytecodePEP451,
|
||||
Source_SourcelessLegacyBytecodePEP451
|
||||
) = util.test_both(SourcelessLoaderLegacyBytecodeTestPEP451, importlib=importlib,
|
||||
machinery=machinery, abc=importlib_abc,
|
||||
util=importlib_util)
|
||||
|
||||
|
||||
class SourcelessLoaderLegacyBytecodeTestPEP302(SourcelessLoaderLegacyBytecodeTest,
|
||||
LegacyBytecodeTestPEP302):
|
||||
pass
|
||||
|
||||
|
||||
(Frozen_SourcelessLegacyBytecodePEP302,
|
||||
Source_SourcelessLegacyBytecodePEP302
|
||||
) = util.test_both(SourcelessLoaderLegacyBytecodeTestPEP302, importlib=importlib,
|
||||
machinery=machinery, abc=importlib_abc,
|
||||
util=importlib_util)
|
||||
|
||||
# End of Issue #29537 legacy bytecode compatibility tests
|
||||
|
||||
if __name__ == '__main__':
|
||||
unittest.main()
|
||||
|
|
|
@ -251,7 +251,7 @@ Overridden parameters
|
|||
>>> f(x=5, **{'x': 3}, **{'x': 2})
|
||||
Traceback (most recent call last):
|
||||
...
|
||||
TypeError: f() got multiple values for keyword argument 'x'
|
||||
TypeError: function got multiple values for keyword argument 'x'
|
||||
|
||||
>>> f(**{1: 3}, **{1: 5})
|
||||
Traceback (most recent call last):
|
||||
|
|
|
@ -10,6 +10,12 @@ Release date: XXXX-XX-XX
|
|||
Core and Builtins
|
||||
-----------------
|
||||
|
||||
- Issue #29537: Restore runtime compatibility with bytecode files generated by
|
||||
CPython 3.5.0 and 3.5.1, and adjust the eval loop to avoid the problems that
|
||||
could be caused by the malformed variant of the BUILD_MAP_UNPACK_WITH_CALL
|
||||
opcode that they may contain. Patch by Petr Viktorin, Serhiy Storchaka,
|
||||
and Nick Coghlan.
|
||||
|
||||
- Issue #28598: Support __rmod__ for subclasses of str being called before
|
||||
str.__mod__. Patch by Martijn Pieters.
|
||||
|
||||
|
|
|
@ -1263,6 +1263,11 @@ eq_mtime(time_t t1, time_t t2)
|
|||
return d <= 1;
|
||||
}
|
||||
|
||||
/* Issue #29537: handle issue27286 bytecode incompatibility
|
||||
* See Lib/importlib/_bootstrap_external.py for general discussion
|
||||
*/
|
||||
extern PY_UINT32_T _Py_BACKCOMPAT_MAGIC_NUMBER;
|
||||
|
||||
/* Given the contents of a .py[co] file in a buffer, unmarshal the data
|
||||
and return the code object. Return None if it the magic word doesn't
|
||||
match (we do this instead of raising an exception as we fall back
|
||||
|
@ -1274,6 +1279,7 @@ unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
|
|||
PyObject *code;
|
||||
unsigned char *buf = (unsigned char *)PyBytes_AsString(data);
|
||||
Py_ssize_t size = PyBytes_Size(data);
|
||||
PY_UINT32_T magic;
|
||||
|
||||
if (size < 12) {
|
||||
PyErr_SetString(ZipImportError,
|
||||
|
@ -1281,7 +1287,10 @@ unmarshal_code(PyObject *pathname, PyObject *data, time_t mtime)
|
|||
return NULL;
|
||||
}
|
||||
|
||||
if (get_uint32(buf) != (unsigned int)PyImport_GetMagicNumber()) {
|
||||
magic = get_uint32(buf);
|
||||
if (magic != (unsigned int)PyImport_GetMagicNumber()
|
||||
/* Issue #29537: handle issue27286 bytecode incompatibility */
|
||||
&& magic != _Py_BACKCOMPAT_MAGIC_NUMBER) {
|
||||
if (Py_VerboseFlag) {
|
||||
PySys_FormatStderr("# %R has bad magic\n",
|
||||
pathname);
|
||||
|
|
|
@ -2672,6 +2672,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
|
|||
if (PyErr_ExceptionMatches(PyExc_AttributeError) ||
|
||||
!PyMapping_Check(arg)) {
|
||||
int function_location = (oparg>>8) & 0xff;
|
||||
if (function_location == 1) {
|
||||
PyObject *func = (
|
||||
PEEK(function_location + num_maps));
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
|
@ -2681,6 +2682,13 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
|
|||
PyEval_GetFuncDesc(func),
|
||||
arg->ob_type->tp_name);
|
||||
}
|
||||
else {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"argument after ** "
|
||||
"must be a mapping, not %.200s",
|
||||
arg->ob_type->tp_name);
|
||||
}
|
||||
}
|
||||
Py_DECREF(sum);
|
||||
goto error;
|
||||
}
|
||||
|
@ -2689,9 +2697,10 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
|
|||
Py_ssize_t idx = 0;
|
||||
PyObject *key;
|
||||
int function_location = (oparg>>8) & 0xff;
|
||||
PyObject *func = PEEK(function_location + num_maps);
|
||||
Py_hash_t hash;
|
||||
_PySet_NextEntry(intersection, &idx, &key, &hash);
|
||||
if (function_location == 1) {
|
||||
PyObject *func = PEEK(function_location + num_maps);
|
||||
if (!PyUnicode_Check(key)) {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%.200s%.200s keywords must be strings",
|
||||
|
@ -2705,6 +2714,18 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
|
|||
PyEval_GetFuncDesc(func),
|
||||
key);
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (!PyUnicode_Check(key)) {
|
||||
PyErr_SetString(PyExc_TypeError,
|
||||
"keywords must be strings");
|
||||
} else {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"function got multiple "
|
||||
"values for keyword argument '%U'",
|
||||
key);
|
||||
}
|
||||
}
|
||||
Py_DECREF(intersection);
|
||||
Py_DECREF(sum);
|
||||
goto error;
|
||||
|
@ -2716,6 +2737,7 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
|
|||
if (PyErr_ExceptionMatches(PyExc_AttributeError)) {
|
||||
if (with_call) {
|
||||
int function_location = (oparg>>8) & 0xff;
|
||||
if (function_location == 1) {
|
||||
PyObject *func = PEEK(function_location + num_maps);
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"%.200s%.200s argument after ** "
|
||||
|
@ -2724,6 +2746,13 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag)
|
|||
PyEval_GetFuncDesc(func),
|
||||
arg->ob_type->tp_name);
|
||||
}
|
||||
else {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"argument after ** "
|
||||
"must be a mapping, not %.200s",
|
||||
arg->ob_type->tp_name);
|
||||
}
|
||||
}
|
||||
else {
|
||||
PyErr_Format(PyExc_TypeError,
|
||||
"'%.200s' object is not a mapping",
|
||||
|
|
|
@ -483,9 +483,17 @@ PyImport_Cleanup(void)
|
|||
#undef STORE_MODULE_WEAKREF
|
||||
}
|
||||
|
||||
/* Issue #29537: handle issue27286 bytecode incompatibility
|
||||
*
|
||||
* In order to avoid forcing recompilation of all extension modules, we export
|
||||
* the legacy 3.5.0 magic number here rather than putting it in a header file.
|
||||
*
|
||||
* See Lib/importlib/_bootstrap_external.py for general discussion
|
||||
*/
|
||||
PY_UINT32_T _Py_BACKCOMPAT_MAGIC_NUMBER = 168627478;
|
||||
PY_UINT32_T _Py_BACKCOMPAT_HALF_MAGIC = 3350;
|
||||
|
||||
/* Helper for pythonrun.c -- return magic number and tag. */
|
||||
|
||||
long
|
||||
PyImport_GetMagicNumber(void)
|
||||
{
|
||||
|
|
File diff suppressed because it is too large
Load diff
|
@ -259,6 +259,21 @@ PyRun_InteractiveOneFlags(FILE *fp, const char *filename_str, PyCompilerFlags *f
|
|||
}
|
||||
|
||||
|
||||
/* Issue #29537: handle issue27286 bytecode incompatibility
|
||||
* See Lib/importlib/_bootstrap_external.py for general discussion
|
||||
*/
|
||||
extern PY_UINT32_T _Py_BACKCOMPAT_HALF_MAGIC;
|
||||
static int
|
||||
_check_half_magic(unsigned int read_value, unsigned int halfmagic) {
|
||||
return (read_value == halfmagic || read_value == _Py_BACKCOMPAT_HALF_MAGIC);
|
||||
}
|
||||
|
||||
extern PY_UINT32_T _Py_BACKCOMPAT_MAGIC_NUMBER;
|
||||
static int
|
||||
_check_magic(long read_value, long magic) {
|
||||
return (read_value == magic || read_value == _Py_BACKCOMPAT_MAGIC_NUMBER);
|
||||
}
|
||||
|
||||
/* Check whether a file maybe a pyc file: Look at the extension,
|
||||
the file type, and, if we may close it, at the first few bytes. */
|
||||
|
||||
|
@ -290,7 +305,7 @@ maybe_pyc_file(FILE *fp, const char* filename, const char* ext, int closeit)
|
|||
int ispyc = 0;
|
||||
if (ftell(fp) == 0) {
|
||||
if (fread(buf, 1, 2, fp) == 2 &&
|
||||
((unsigned int)buf[1]<<8 | buf[0]) == halfmagic)
|
||||
_check_half_magic(((unsigned int)buf[1]<<8 | buf[0]), halfmagic))
|
||||
ispyc = 1;
|
||||
rewind(fp);
|
||||
}
|
||||
|
@ -988,7 +1003,7 @@ run_pyc_file(FILE *fp, const char *filename, PyObject *globals,
|
|||
long PyImport_GetMagicNumber(void);
|
||||
|
||||
magic = PyMarshal_ReadLongFromFile(fp);
|
||||
if (magic != PyImport_GetMagicNumber()) {
|
||||
if (!_check_magic(magic, PyImport_GetMagicNumber())) {
|
||||
if (!PyErr_Occurred())
|
||||
PyErr_SetString(PyExc_RuntimeError,
|
||||
"Bad magic number in .pyc file");
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue