gh-118107: Fix zipimporter ZIP64 handling. (GH-118108)

Add missing import to code that handles too large files and offsets.
Use list, not tuple, for a mutable sequence.

Add tests to prevent similar mistakes.

---------

Co-authored-by: Gregory P. Smith [Google LLC] <greg@krypto.org>
Co-authored-by: Kirill Podoprigora <kirill.bast9@mail.ru>
This commit is contained in:
John Sirois 2024-05-07 02:23:27 -05:00 committed by GitHub
parent 698417f2f6
commit 49258efada
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
7 changed files with 128 additions and 3 deletions

View file

@ -1,8 +1,10 @@
import sys import sys
import os import os
import marshal import marshal
import glob
import importlib import importlib
import importlib.util import importlib.util
import re
import struct import struct
import time import time
import unittest import unittest
@ -54,6 +56,7 @@ TESTPACK = "ziptestpackage"
TESTPACK2 = "ziptestpackage2" TESTPACK2 = "ziptestpackage2"
TEMP_DIR = os.path.abspath("junk95142") TEMP_DIR = os.path.abspath("junk95142")
TEMP_ZIP = os.path.abspath("junk95142.zip") TEMP_ZIP = os.path.abspath("junk95142.zip")
TEST_DATA_DIR = os.path.join(os.path.dirname(__file__), "zipimport_data")
pyc_file = importlib.util.cache_from_source(TESTMOD + '.py') pyc_file = importlib.util.cache_from_source(TESTMOD + '.py')
pyc_ext = '.pyc' pyc_ext = '.pyc'
@ -134,7 +137,9 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
def doTest(self, expected_ext, files, *modules, **kw): def doTest(self, expected_ext, files, *modules, **kw):
self.makeZip(files, **kw) self.makeZip(files, **kw)
self.doTestWithPreBuiltZip(expected_ext, *modules, **kw)
def doTestWithPreBuiltZip(self, expected_ext, *modules, **kw):
sys.path.insert(0, TEMP_ZIP) sys.path.insert(0, TEMP_ZIP)
mod = importlib.import_module(".".join(modules)) mod = importlib.import_module(".".join(modules))
@ -810,6 +815,122 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
files = self.getZip64Files() files = self.getZip64Files()
self.doTest(".py", files, "f65536", comment=b"c" * ((1 << 16) - 1)) self.doTest(".py", files, "f65536", comment=b"c" * ((1 << 16) - 1))
def testZip64LargeFile(self):
support.requires(
"largefile",
f"test generates files >{0xFFFFFFFF} bytes and takes a long time "
"to run"
)
# N.B.: We do alot of gymnastics below in the ZIP_STORED case to save
# and reconstruct a sparse zip on systems that support sparse files.
# Instead of creating a ~8GB zip file mainly consisting of null bytes
# for every run of the test, we create the zip once and save off the
# non-null portions of the resulting file as data blobs with offsets
# that allow re-creating the zip file sparsely. This drops disk space
# usage to ~9KB for the ZIP_STORED case and drops that test time by ~2
# orders of magnitude. For the ZIP_DEFLATED case, however, we bite the
# bullet. The resulting zip file is ~8MB of non-null data; so the sparse
# trick doesn't work and would result in that full ~8MB zip data file
# being checked in to source control.
parts_glob = f"sparse-zip64-c{self.compression:d}-0x*.part"
full_parts_glob = os.path.join(TEST_DATA_DIR, parts_glob)
pre_built_zip_parts = glob.glob(full_parts_glob)
self.addCleanup(os_helper.unlink, TEMP_ZIP)
if not pre_built_zip_parts:
if self.compression != ZIP_STORED:
support.requires(
"cpu",
"test requires a lot of CPU for compression."
)
self.addCleanup(os_helper.unlink, os_helper.TESTFN)
with open(os_helper.TESTFN, "wb") as f:
f.write(b"data")
f.write(os.linesep.encode())
f.seek(0xffff_ffff, os.SEEK_CUR)
f.write(os.linesep.encode())
os.utime(os_helper.TESTFN, (0.0, 0.0))
with ZipFile(
TEMP_ZIP,
"w",
compression=self.compression,
strict_timestamps=False
) as z:
z.write(os_helper.TESTFN, "data1")
z.writestr(
ZipInfo("module.py", (1980, 1, 1, 0, 0, 0)), test_src
)
z.write(os_helper.TESTFN, "data2")
# This "works" but relies on the zip format having a non-empty
# final page due to the trailing central directory to wind up with
# the correct length file.
def make_sparse_zip_parts(name):
empty_page = b"\0" * 4096
with open(name, "rb") as f:
part = None
try:
while True:
offset = f.tell()
data = f.read(len(empty_page))
if not data:
break
if data != empty_page:
if not part:
part_fullname = os.path.join(
TEST_DATA_DIR,
f"sparse-zip64-c{self.compression:d}-"
f"{offset:#011x}.part",
)
os.makedirs(
os.path.dirname(part_fullname),
exist_ok=True
)
part = open(part_fullname, "wb")
print("Created", part_fullname)
part.write(data)
else:
if part:
part.close()
part = None
finally:
if part:
part.close()
if self.compression == ZIP_STORED:
print(f"Creating sparse parts to check in into {TEST_DATA_DIR}:")
make_sparse_zip_parts(TEMP_ZIP)
else:
def extract_offset(name):
if m := re.search(r"-(0x[0-9a-f]{9})\.part$", name):
return int(m.group(1), base=16)
raise ValueError(f"{name=} does not fit expected pattern.")
offset_parts = [(extract_offset(n), n) for n in pre_built_zip_parts]
with open(TEMP_ZIP, "wb") as f:
for offset, part_fn in sorted(offset_parts):
with open(part_fn, "rb") as part:
f.seek(offset, os.SEEK_SET)
f.write(part.read())
# Confirm that the reconstructed zip file works and looks right.
with ZipFile(TEMP_ZIP, "r") as z:
self.assertEqual(
z.getinfo("module.py").date_time, (1980, 1, 1, 0, 0, 0)
)
self.assertEqual(
z.read("module.py"), test_src.encode(),
msg=f"Recreate {full_parts_glob}, unexpected contents."
)
def assertDataEntry(name):
zinfo = z.getinfo(name)
self.assertEqual(zinfo.date_time, (1980, 1, 1, 0, 0, 0))
self.assertGreater(zinfo.file_size, 0xffff_ffff)
assertDataEntry("data1")
assertDataEntry("data2")
self.doTestWithPreBuiltZip(".py", "module")
@support.requires_zlib() @support.requires_zlib()
class CompressedZipImportTestCase(UncompressedZipImportTestCase): class CompressedZipImportTestCase(UncompressedZipImportTestCase):

View file

@ -517,8 +517,9 @@ def _read_directory(archive):
num_extra_values = (len(extra_data) - 4) // 8 num_extra_values = (len(extra_data) - 4) // 8
if num_extra_values > 3: if num_extra_values > 3:
raise ZipImportError(f"can't read header extra: {archive!r}", path=archive) raise ZipImportError(f"can't read header extra: {archive!r}", path=archive)
values = struct.unpack_from(f"<{min(num_extra_values, 3)}Q", import struct
extra_data, offset=4) values = list(struct.unpack_from(f"<{min(num_extra_values, 3)}Q",
extra_data, offset=4))
# N.b. Here be dragons: the ordering of these is different than # N.b. Here be dragons: the ordering of these is different than
# the header fields, and it's really easy to get it wrong since # the header fields, and it's really easy to get it wrong since

View file

@ -2483,7 +2483,8 @@ TESTSUBDIRS= idlelib/idle_test \
test/typinganndata \ test/typinganndata \
test/wheeldata \ test/wheeldata \
test/xmltestdata \ test/xmltestdata \
test/xmltestdata/c14n-20 test/xmltestdata/c14n-20 \
test/zipimport_data
COMPILEALL_OPTS=-j0 COMPILEALL_OPTS=-j0

View file

@ -0,0 +1,2 @@
Fix :mod:`zipimport` reading of ZIP64 files with file entries that are too big or
offset too far.