mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
gh-118107: Fix zipimporter ZIP64 handling. (GH-118108)
Add missing import to code that handles too large files and offsets. Use list, not tuple, for a mutable sequence. Add tests to prevent similar mistakes. --------- Co-authored-by: Gregory P. Smith [Google LLC] <greg@krypto.org> Co-authored-by: Kirill Podoprigora <kirill.bast9@mail.ru>
This commit is contained in:
parent
698417f2f6
commit
49258efada
7 changed files with 128 additions and 3 deletions
|
@ -1,8 +1,10 @@
|
||||||
import sys
|
import sys
|
||||||
import os
|
import os
|
||||||
import marshal
|
import marshal
|
||||||
|
import glob
|
||||||
import importlib
|
import importlib
|
||||||
import importlib.util
|
import importlib.util
|
||||||
|
import re
|
||||||
import struct
|
import struct
|
||||||
import time
|
import time
|
||||||
import unittest
|
import unittest
|
||||||
|
@ -54,6 +56,7 @@ TESTPACK = "ziptestpackage"
|
||||||
TESTPACK2 = "ziptestpackage2"
|
TESTPACK2 = "ziptestpackage2"
|
||||||
TEMP_DIR = os.path.abspath("junk95142")
|
TEMP_DIR = os.path.abspath("junk95142")
|
||||||
TEMP_ZIP = os.path.abspath("junk95142.zip")
|
TEMP_ZIP = os.path.abspath("junk95142.zip")
|
||||||
|
TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "zipimport_data")
|
||||||
|
|
||||||
pyc_file = importlib.util.cache_from_source(TESTMOD + '.py')
|
pyc_file = importlib.util.cache_from_source(TESTMOD + '.py')
|
||||||
pyc_ext = '.pyc'
|
pyc_ext = '.pyc'
|
||||||
|
@ -134,7 +137,9 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
|
||||||
|
|
||||||
def doTest(self, expected_ext, files, *modules, **kw):
|
def doTest(self, expected_ext, files, *modules, **kw):
|
||||||
self.makeZip(files, **kw)
|
self.makeZip(files, **kw)
|
||||||
|
self.doTestWithPreBuiltZip(expected_ext, *modules, **kw)
|
||||||
|
|
||||||
|
def doTestWithPreBuiltZip(self, expected_ext, *modules, **kw):
|
||||||
sys.path.insert(0, TEMP_ZIP)
|
sys.path.insert(0, TEMP_ZIP)
|
||||||
|
|
||||||
mod = importlib.import_module(".".join(modules))
|
mod = importlib.import_module(".".join(modules))
|
||||||
|
@ -810,6 +815,122 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
|
||||||
files = self.getZip64Files()
|
files = self.getZip64Files()
|
||||||
self.doTest(".py", files, "f65536", comment=b"c" * ((1 << 16) - 1))
|
self.doTest(".py", files, "f65536", comment=b"c" * ((1 << 16) - 1))
|
||||||
|
|
||||||
|
def testZip64LargeFile(self):
|
||||||
|
support.requires(
|
||||||
|
"largefile",
|
||||||
|
f"test generates files >{0xFFFFFFFF} bytes and takes a long time "
|
||||||
|
"to run"
|
||||||
|
)
|
||||||
|
|
||||||
|
# N.B.: We do alot of gymnastics below in the ZIP_STORED case to save
|
||||||
|
# and reconstruct a sparse zip on systems that support sparse files.
|
||||||
|
# Instead of creating a ~8GB zip file mainly consisting of null bytes
|
||||||
|
# for every run of the test, we create the zip once and save off the
|
||||||
|
# non-null portions of the resulting file as data blobs with offsets
|
||||||
|
# that allow re-creating the zip file sparsely. This drops disk space
|
||||||
|
# usage to ~9KB for the ZIP_STORED case and drops that test time by ~2
|
||||||
|
# orders of magnitude. For the ZIP_DEFLATED case, however, we bite the
|
||||||
|
# bullet. The resulting zip file is ~8MB of non-null data; so the sparse
|
||||||
|
# trick doesn't work and would result in that full ~8MB zip data file
|
||||||
|
# being checked in to source control.
|
||||||
|
parts_glob = f"sparse-zip64-c{self.compression:d}-0x*.part"
|
||||||
|
full_parts_glob = os.path.join(TEST_DATA_DIR, parts_glob)
|
||||||
|
pre_built_zip_parts = glob.glob(full_parts_glob)
|
||||||
|
|
||||||
|
self.addCleanup(os_helper.unlink, TEMP_ZIP)
|
||||||
|
if not pre_built_zip_parts:
|
||||||
|
if self.compression != ZIP_STORED:
|
||||||
|
support.requires(
|
||||||
|
"cpu",
|
||||||
|
"test requires a lot of CPU for compression."
|
||||||
|
)
|
||||||
|
self.addCleanup(os_helper.unlink, os_helper.TESTFN)
|
||||||
|
with open(os_helper.TESTFN, "wb") as f:
|
||||||
|
f.write(b"data")
|
||||||
|
f.write(os.linesep.encode())
|
||||||
|
f.seek(0xffff_ffff, os.SEEK_CUR)
|
||||||
|
f.write(os.linesep.encode())
|
||||||
|
os.utime(os_helper.TESTFN, (0.0, 0.0))
|
||||||
|
with ZipFile(
|
||||||
|
TEMP_ZIP,
|
||||||
|
"w",
|
||||||
|
compression=self.compression,
|
||||||
|
strict_timestamps=False
|
||||||
|
) as z:
|
||||||
|
z.write(os_helper.TESTFN, "data1")
|
||||||
|
z.writestr(
|
||||||
|
ZipInfo("module.py", (1980, 1, 1, 0, 0, 0)), test_src
|
||||||
|
)
|
||||||
|
z.write(os_helper.TESTFN, "data2")
|
||||||
|
|
||||||
|
# This "works" but relies on the zip format having a non-empty
|
||||||
|
# final page due to the trailing central directory to wind up with
|
||||||
|
# the correct length file.
|
||||||
|
def make_sparse_zip_parts(name):
|
||||||
|
empty_page = b"\0" * 4096
|
||||||
|
with open(name, "rb") as f:
|
||||||
|
part = None
|
||||||
|
try:
|
||||||
|
while True:
|
||||||
|
offset = f.tell()
|
||||||
|
data = f.read(len(empty_page))
|
||||||
|
if not data:
|
||||||
|
break
|
||||||
|
if data != empty_page:
|
||||||
|
if not part:
|
||||||
|
part_fullname = os.path.join(
|
||||||
|
TEST_DATA_DIR,
|
||||||
|
f"sparse-zip64-c{self.compression:d}-"
|
||||||
|
f"{offset:#011x}.part",
|
||||||
|
)
|
||||||
|
os.makedirs(
|
||||||
|
os.path.dirname(part_fullname),
|
||||||
|
exist_ok=True
|
||||||
|
)
|
||||||
|
part = open(part_fullname, "wb")
|
||||||
|
print("Created", part_fullname)
|
||||||
|
part.write(data)
|
||||||
|
else:
|
||||||
|
if part:
|
||||||
|
part.close()
|
||||||
|
part = None
|
||||||
|
finally:
|
||||||
|
if part:
|
||||||
|
part.close()
|
||||||
|
|
||||||
|
if self.compression == ZIP_STORED:
|
||||||
|
print(f"Creating sparse parts to check in into {TEST_DATA_DIR}:")
|
||||||
|
make_sparse_zip_parts(TEMP_ZIP)
|
||||||
|
|
||||||
|
else:
|
||||||
|
def extract_offset(name):
|
||||||
|
if m := re.search(r"-(0x[0-9a-f]{9})\.part$", name):
|
||||||
|
return int(m.group(1), base=16)
|
||||||
|
raise ValueError(f"{name=} does not fit expected pattern.")
|
||||||
|
offset_parts = [(extract_offset(n), n) for n in pre_built_zip_parts]
|
||||||
|
with open(TEMP_ZIP, "wb") as f:
|
||||||
|
for offset, part_fn in sorted(offset_parts):
|
||||||
|
with open(part_fn, "rb") as part:
|
||||||
|
f.seek(offset, os.SEEK_SET)
|
||||||
|
f.write(part.read())
|
||||||
|
# Confirm that the reconstructed zip file works and looks right.
|
||||||
|
with ZipFile(TEMP_ZIP, "r") as z:
|
||||||
|
self.assertEqual(
|
||||||
|
z.getinfo("module.py").date_time, (1980, 1, 1, 0, 0, 0)
|
||||||
|
)
|
||||||
|
self.assertEqual(
|
||||||
|
z.read("module.py"), test_src.encode(),
|
||||||
|
msg=f"Recreate {full_parts_glob}, unexpected contents."
|
||||||
|
)
|
||||||
|
def assertDataEntry(name):
|
||||||
|
zinfo = z.getinfo(name)
|
||||||
|
self.assertEqual(zinfo.date_time, (1980, 1, 1, 0, 0, 0))
|
||||||
|
self.assertGreater(zinfo.file_size, 0xffff_ffff)
|
||||||
|
assertDataEntry("data1")
|
||||||
|
assertDataEntry("data2")
|
||||||
|
|
||||||
|
self.doTestWithPreBuiltZip(".py", "module")
|
||||||
|
|
||||||
|
|
||||||
@support.requires_zlib()
|
@support.requires_zlib()
|
||||||
class CompressedZipImportTestCase(UncompressedZipImportTestCase):
|
class CompressedZipImportTestCase(UncompressedZipImportTestCase):
|
||||||
|
|
BIN
Lib/test/zipimport_data/sparse-zip64-c0-0x000000000.part
Normal file
BIN
Lib/test/zipimport_data/sparse-zip64-c0-0x000000000.part
Normal file
Binary file not shown.
BIN
Lib/test/zipimport_data/sparse-zip64-c0-0x100000000.part
Normal file
BIN
Lib/test/zipimport_data/sparse-zip64-c0-0x100000000.part
Normal file
Binary file not shown.
BIN
Lib/test/zipimport_data/sparse-zip64-c0-0x200000000.part
Normal file
BIN
Lib/test/zipimport_data/sparse-zip64-c0-0x200000000.part
Normal file
Binary file not shown.
|
@ -517,8 +517,9 @@ def _read_directory(archive):
|
||||||
num_extra_values = (len(extra_data) - 4) // 8
|
num_extra_values = (len(extra_data) - 4) // 8
|
||||||
if num_extra_values > 3:
|
if num_extra_values > 3:
|
||||||
raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
|
raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
|
||||||
values = struct.unpack_from(f"<{min(num_extra_values, 3)}Q",
|
import struct
|
||||||
extra_data, offset=4)
|
values = list(struct.unpack_from(f"<{min(num_extra_values, 3)}Q",
|
||||||
|
extra_data, offset=4))
|
||||||
|
|
||||||
# N.b. Here be dragons: the ordering of these is different than
|
# N.b. Here be dragons: the ordering of these is different than
|
||||||
# the header fields, and it's really easy to get it wrong since
|
# the header fields, and it's really easy to get it wrong since
|
||||||
|
|
|
@ -2483,7 +2483,8 @@ TESTSUBDIRS= idlelib/idle_test \
|
||||||
test/typinganndata \
|
test/typinganndata \
|
||||||
test/wheeldata \
|
test/wheeldata \
|
||||||
test/xmltestdata \
|
test/xmltestdata \
|
||||||
test/xmltestdata/c14n-20
|
test/xmltestdata/c14n-20 \
|
||||||
|
test/zipimport_data
|
||||||
|
|
||||||
COMPILEALL_OPTS=-j0
|
COMPILEALL_OPTS=-j0
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Fix :mod:`zipimport` reading of ZIP64 files with file entries that are too big or
|
||||||
|
offset too far.
|
Loading…
Add table
Add a link
Reference in a new issue