gh-103861: Fix Zip64 extensions not being properly applied in some cases (#103863)

Fix Zip64 extensions not being properly applied in some cases:

Fixes an issue where adding a small file to a `ZipFile`
object while forcing zip64 extensions causes an extra Zip64 record to be
added to the zip, but doesn't update the `min_version` or file sizes in
the primary central directory header.

Also fixed an edge case in checking if zip64 extensions are required:

This fixes an issue where if data requiring zip64 extensions was added
to an unseekable stream without specifying `force_zip64=True`, zip64
extensions would not be used and a RuntimeError would not be raised when
closing the file (even though the size would be known at that point).
This would result in successfully writing corrupt zip files.

Deciding if zip64 extensions are required outside of the `FileHeader`
function means that both `FileHeader` and `_ZipWriteFile` will always be
in sync. Previously, the `FileHeader` function could enable zip64
extensions without propagating that decision to the `_ZipWriteFile`
class, which would then not correctly write the data descriptor record
or check for errors on close.

If anyone is actually using `ZipInfo.FileHeader` as a public API without
explicitly passing True or False in for zip64, their own code may still be
susceptible to that kind of bug unless they make a similar change to
where the zip64 decision happens.

Fixes #103861

---------

Co-authored-by: Gregory P. Smith <greg@krypto.org>
This commit is contained in:
Carey Metcalfe 2023-05-16 01:43:44 -06:00 committed by GitHub
parent 85ec192ac4
commit 798bcaa1eb
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 172 additions and 15 deletions

View file

@ -442,7 +442,12 @@ class ZipInfo (object):
return ''.join(result)
def FileHeader(self, zip64=None):
"""Return the per-file header as a bytes object."""
"""Return the per-file header as a bytes object.
When the optional zip64 arg is None rather than a bool, we will
decide based upon the file_size and compress_size, if known,
False otherwise.
"""
dt = self.date_time
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
@ -458,16 +463,13 @@ class ZipInfo (object):
min_version = 0
if zip64 is None:
# We always explicitly pass zip64 within this module.... This
# remains for anyone using ZipInfo.FileHeader as a public API.
zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
if zip64:
fmt = '<HHQQ'
extra = extra + struct.pack(fmt,
1, struct.calcsize(fmt)-4, file_size, compress_size)
if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
if not zip64:
raise LargeZipFile("Filesize would require ZIP64 extensions")
# File is larger than what fits into a 4 byte integer,
# fall back to the ZIP64 extension
file_size = 0xffffffff
compress_size = 0xffffffff
min_version = ZIP64_VERSION
@ -1219,6 +1221,12 @@ class _ZipWriteFile(io.BufferedIOBase):
self._zinfo.CRC = self._crc
self._zinfo.file_size = self._file_size
if not self._zip64:
if self._file_size > ZIP64_LIMIT:
raise RuntimeError("File size too large, try using force_zip64")
if self._compress_size > ZIP64_LIMIT:
raise RuntimeError("Compressed size too large, try using force_zip64")
# Write updated header info
if self._zinfo.flag_bits & _MASK_USE_DATA_DESCRIPTOR:
# Write CRC and file sizes after the file data
@ -1227,13 +1235,6 @@ class _ZipWriteFile(io.BufferedIOBase):
self._zinfo.compress_size, self._zinfo.file_size))
self._zipfile.start_dir = self._fileobj.tell()
else:
if not self._zip64:
if self._file_size > ZIP64_LIMIT:
raise RuntimeError(
'File size too large, try using force_zip64')
if self._compress_size > ZIP64_LIMIT:
raise RuntimeError(
'Compressed size too large, try using force_zip64')
# Seek backwards and write file header (which will now include
# correct CRC and file sizes)
@ -1672,8 +1673,9 @@ class ZipFile:
zinfo.external_attr = 0o600 << 16 # permissions: ?rw-------
# Compressed size can be larger than uncompressed size
zip64 = self._allowZip64 and \
(force_zip64 or zinfo.file_size * 1.05 > ZIP64_LIMIT)
zip64 = force_zip64 or (zinfo.file_size * 1.05 > ZIP64_LIMIT)
if not self._allowZip64 and zip64:
raise LargeZipFile("Filesize would require ZIP64 extensions")
if self._seekable:
self.fp.seek(self.start_dir)