mirror of
https://github.com/python/cpython.git
synced 2025-10-06 23:21:06 +00:00
bpo-34726: Fix handling of hash-based pycs in zipimport. (GH-10327)
Current support for hash-based bytecode files in `zipimport` is rather sparse, which leads to test failures when the test suite is ran with the ``SOURCE_DATE_EPOCH`` environment variable set. This teaches zipimport to handle hash-based pycs properly.
This commit is contained in:
parent
bfe1839aa9
commit
a6e956bcb0
4 changed files with 1147 additions and 1049 deletions
|
@ -259,10 +259,32 @@ class CmdLineTest(unittest.TestCase):
|
||||||
self._check_script(zip_name, run_name, zip_name, zip_name, '',
|
self._check_script(zip_name, run_name, zip_name, zip_name, '',
|
||||||
zipimport.zipimporter)
|
zipimport.zipimporter)
|
||||||
|
|
||||||
def test_zipfile_compiled(self):
|
def test_zipfile_compiled_timestamp(self):
|
||||||
with support.temp_dir() as script_dir:
|
with support.temp_dir() as script_dir:
|
||||||
script_name = _make_test_script(script_dir, '__main__')
|
script_name = _make_test_script(script_dir, '__main__')
|
||||||
compiled_name = py_compile.compile(script_name, doraise=True)
|
compiled_name = py_compile.compile(
|
||||||
|
script_name, doraise=True,
|
||||||
|
invalidation_mode=py_compile.PycInvalidationMode.TIMESTAMP)
|
||||||
|
zip_name, run_name = make_zip_script(script_dir, 'test_zip', compiled_name)
|
||||||
|
self._check_script(zip_name, run_name, zip_name, zip_name, '',
|
||||||
|
zipimport.zipimporter)
|
||||||
|
|
||||||
|
def test_zipfile_compiled_checked_hash(self):
|
||||||
|
with support.temp_dir() as script_dir:
|
||||||
|
script_name = _make_test_script(script_dir, '__main__')
|
||||||
|
compiled_name = py_compile.compile(
|
||||||
|
script_name, doraise=True,
|
||||||
|
invalidation_mode=py_compile.PycInvalidationMode.CHECKED_HASH)
|
||||||
|
zip_name, run_name = make_zip_script(script_dir, 'test_zip', compiled_name)
|
||||||
|
self._check_script(zip_name, run_name, zip_name, zip_name, '',
|
||||||
|
zipimport.zipimporter)
|
||||||
|
|
||||||
|
def test_zipfile_compiled_unchecked_hash(self):
|
||||||
|
with support.temp_dir() as script_dir:
|
||||||
|
script_name = _make_test_script(script_dir, '__main__')
|
||||||
|
compiled_name = py_compile.compile(
|
||||||
|
script_name, doraise=True,
|
||||||
|
invalidation_mode=py_compile.PycInvalidationMode.UNCHECKED_HASH)
|
||||||
zip_name, run_name = make_zip_script(script_dir, 'test_zip', compiled_name)
|
zip_name, run_name = make_zip_script(script_dir, 'test_zip', compiled_name)
|
||||||
self._check_script(zip_name, run_name, zip_name, zip_name, '',
|
self._check_script(zip_name, run_name, zip_name, zip_name, '',
|
||||||
zipimport.zipimporter)
|
zipimport.zipimporter)
|
||||||
|
|
|
@ -578,33 +578,53 @@ def _eq_mtime(t1, t2):
|
||||||
# dostime only stores even seconds, so be lenient
|
# dostime only stores even seconds, so be lenient
|
||||||
return abs(t1 - t2) <= 1
|
return abs(t1 - t2) <= 1
|
||||||
|
|
||||||
|
|
||||||
# Given the contents of a .py[co] file, unmarshal the data
|
# Given the contents of a .py[co] file, unmarshal the data
|
||||||
# and return the code object. Return None if it the magic word doesn't
|
# and return the code object. Return None if it the magic word doesn't
|
||||||
# match (we do this instead of raising an exception as we fall back
|
# match, or if the recorded .py[co] metadata does not match the source,
|
||||||
|
# (we do this instead of raising an exception as we fall back
|
||||||
# to .py if available and we don't want to mask other errors).
|
# to .py if available and we don't want to mask other errors).
|
||||||
def _unmarshal_code(pathname, data, mtime):
|
def _unmarshal_code(self, pathname, fullpath, fullname, data):
|
||||||
if len(data) < 16:
|
exc_details = {
|
||||||
raise ZipImportError('bad pyc data')
|
'name': fullname,
|
||||||
|
'path': fullpath,
|
||||||
|
}
|
||||||
|
|
||||||
if data[:4] != _bootstrap_external.MAGIC_NUMBER:
|
try:
|
||||||
_bootstrap._verbose_message('{!r} has bad magic', pathname)
|
flags = _bootstrap_external._classify_pyc(data, fullname, exc_details)
|
||||||
return None # signal caller to try alternative
|
except ImportError:
|
||||||
|
return None
|
||||||
flags = _unpack_uint32(data[4:8])
|
|
||||||
if flags != 0:
|
hash_based = flags & 0b1 != 0
|
||||||
# Hash-based pyc. We currently refuse to handle checked hash-based
|
if hash_based:
|
||||||
# pycs. We could validate hash-based pycs against the source, but it
|
check_source = flags & 0b10 != 0
|
||||||
# seems likely that most people putting hash-based pycs in a zipfile
|
if (_imp.check_hash_based_pycs != 'never' and
|
||||||
# will use unchecked ones.
|
(check_source or _imp.check_hash_based_pycs == 'always')):
|
||||||
if (_imp.check_hash_based_pycs != 'never' and
|
source_bytes = _get_pyc_source(self, fullpath)
|
||||||
(flags != 0x1 or _imp.check_hash_based_pycs == 'always')):
|
if source_bytes is not None:
|
||||||
|
source_hash = _imp.source_hash(
|
||||||
|
_bootstrap_external._RAW_MAGIC_NUMBER,
|
||||||
|
source_bytes,
|
||||||
|
)
|
||||||
|
|
||||||
|
try:
|
||||||
|
_boostrap_external._validate_hash_pyc(
|
||||||
|
data, source_hash, fullname, exc_details)
|
||||||
|
except ImportError:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
source_mtime, source_size = \
|
||||||
|
_get_mtime_and_size_of_source(self, fullpath)
|
||||||
|
|
||||||
|
if source_mtime:
|
||||||
|
# We don't use _bootstrap_external._validate_timestamp_pyc
|
||||||
|
# to allow for a more lenient timestamp check.
|
||||||
|
if (not _eq_mtime(_unpack_uint32(data[8:12]), source_mtime) or
|
||||||
|
_unpack_uint32(data[12:16]) != source_size):
|
||||||
|
_bootstrap._verbose_message(
|
||||||
|
f'bytecode is stale for {fullname!r}')
|
||||||
return None
|
return None
|
||||||
elif mtime != 0 and not _eq_mtime(_unpack_uint32(data[8:12]), mtime):
|
|
||||||
_bootstrap._verbose_message('{!r} has bad mtime', pathname)
|
|
||||||
return None # signal caller to try alternative
|
|
||||||
|
|
||||||
# XXX the pyc's size field is ignored; timestamp collisions are probably
|
|
||||||
# unimportant with zip files.
|
|
||||||
code = marshal.loads(data[16:])
|
code = marshal.loads(data[16:])
|
||||||
if not isinstance(code, _code_type):
|
if not isinstance(code, _code_type):
|
||||||
raise TypeError(f'compiled module {pathname!r} is not a code object')
|
raise TypeError(f'compiled module {pathname!r} is not a code object')
|
||||||
|
@ -639,9 +659,9 @@ def _parse_dostime(d, t):
|
||||||
-1, -1, -1))
|
-1, -1, -1))
|
||||||
|
|
||||||
# Given a path to a .pyc file in the archive, return the
|
# Given a path to a .pyc file in the archive, return the
|
||||||
# modification time of the matching .py file, or 0 if no source
|
# modification time of the matching .py file and its size,
|
||||||
# is available.
|
# or (0, 0) if no source is available.
|
||||||
def _get_mtime_of_source(self, path):
|
def _get_mtime_and_size_of_source(self, path):
|
||||||
try:
|
try:
|
||||||
# strip 'c' or 'o' from *.py[co]
|
# strip 'c' or 'o' from *.py[co]
|
||||||
assert path[-1:] in ('c', 'o')
|
assert path[-1:] in ('c', 'o')
|
||||||
|
@ -651,9 +671,27 @@ def _get_mtime_of_source(self, path):
|
||||||
# with an embedded pyc time stamp
|
# with an embedded pyc time stamp
|
||||||
time = toc_entry[5]
|
time = toc_entry[5]
|
||||||
date = toc_entry[6]
|
date = toc_entry[6]
|
||||||
return _parse_dostime(date, time)
|
uncompressed_size = toc_entry[3]
|
||||||
|
return _parse_dostime(date, time), uncompressed_size
|
||||||
except (KeyError, IndexError, TypeError):
|
except (KeyError, IndexError, TypeError):
|
||||||
return 0
|
return 0, 0
|
||||||
|
|
||||||
|
|
||||||
|
# Given a path to a .pyc file in the archive, return the
|
||||||
|
# contents of the matching .py file, or None if no source
|
||||||
|
# is available.
|
||||||
|
def _get_pyc_source(self, path):
|
||||||
|
# strip 'c' or 'o' from *.py[co]
|
||||||
|
assert path[-1:] in ('c', 'o')
|
||||||
|
path = path[:-1]
|
||||||
|
|
||||||
|
try:
|
||||||
|
toc_entry = self._files[path]
|
||||||
|
except KeyError:
|
||||||
|
return None
|
||||||
|
else:
|
||||||
|
return _get_data(self.archive, toc_entry)
|
||||||
|
|
||||||
|
|
||||||
# Get the code object associated with the module specified by
|
# Get the code object associated with the module specified by
|
||||||
# 'fullname'.
|
# 'fullname'.
|
||||||
|
@ -670,8 +708,7 @@ def _get_module_code(self, fullname):
|
||||||
modpath = toc_entry[0]
|
modpath = toc_entry[0]
|
||||||
data = _get_data(self.archive, toc_entry)
|
data = _get_data(self.archive, toc_entry)
|
||||||
if isbytecode:
|
if isbytecode:
|
||||||
mtime = _get_mtime_of_source(self, fullpath)
|
code = _unmarshal_code(self, modpath, fullpath, fullname, data)
|
||||||
code = _unmarshal_code(modpath, data, mtime)
|
|
||||||
else:
|
else:
|
||||||
code = _compile_source(modpath, data)
|
code = _compile_source(modpath, data)
|
||||||
if code is None:
|
if code is None:
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Fix handling of hash-based bytecode files in :mod:`zipimport`.
|
||||||
|
Patch by Elvis Pranskevichus.
|
File diff suppressed because it is too large
Load diff
Loading…
Add table
Add a link
Reference in a new issue