mirror of
https://github.com/python/cpython.git
synced 2025-07-07 19:35:27 +00:00
gh-72680: Fix false positives when using zipfile.is_zipfile() (GH-134250)
bpo-28494: Improve zipfile.is_zipfile reliability The zipfile.is_zipfile function would only search for the EndOfZipfile section header. This failed to correctly identify non-zipfiles that contained this header. Now the zipfile.is_zipfile function verifies the first central directory entry. Changes: * Extended zipfile.is_zipfile to verify zipfile catalog * Added tests to validate failure of binary non-zipfiles * Reuse 'concat' handling for is_zipfile Co-authored-by: John Jolly <john.jolly@gmail.com>
This commit is contained in:
parent
d327159eb4
commit
1298511b41
3 changed files with 54 additions and 14 deletions
|
@ -1991,6 +1991,25 @@ class OtherTests(unittest.TestCase):
|
|||
self.assertFalse(zipfile.is_zipfile(fp))
|
||||
fp.seek(0, 0)
|
||||
self.assertFalse(zipfile.is_zipfile(fp))
|
||||
# - passing non-zipfile with ZIP header elements
|
||||
# data created using pyPNG like so:
|
||||
# d = [(ord('P'), ord('K'), 5, 6), (ord('P'), ord('K'), 6, 6)]
|
||||
# w = png.Writer(1,2,alpha=True,compression=0)
|
||||
# f = open('onepix.png', 'wb')
|
||||
# w.write(f, d)
|
||||
# w.close()
|
||||
data = (b"\x89PNG\r\n\x1a\n\x00\x00\x00\rIHDR\x00\x00\x00\x01\x00\x00"
|
||||
b"\x00\x02\x08\x06\x00\x00\x00\x99\x81\xb6'\x00\x00\x00\x15I"
|
||||
b"DATx\x01\x01\n\x00\xf5\xff\x00PK\x05\x06\x00PK\x06\x06\x07"
|
||||
b"\xac\x01N\xc6|a\r\x00\x00\x00\x00IEND\xaeB`\x82")
|
||||
# - passing a filename
|
||||
with open(TESTFN, "wb") as fp:
|
||||
fp.write(data)
|
||||
self.assertFalse(zipfile.is_zipfile(TESTFN))
|
||||
# - passing a file-like object
|
||||
fp = io.BytesIO()
|
||||
fp.write(data)
|
||||
self.assertFalse(zipfile.is_zipfile(fp))
|
||||
|
||||
def test_damaged_zipfile(self):
|
||||
"""Check that zipfiles with missing bytes at the end raise BadZipFile."""
|
||||
|
|
|
@ -234,8 +234,19 @@ class _Extra(bytes):
|
|||
|
||||
def _check_zipfile(fp):
|
||||
try:
|
||||
if _EndRecData(fp):
|
||||
return True # file has correct magic number
|
||||
endrec = _EndRecData(fp)
|
||||
if endrec:
|
||||
if endrec[_ECD_ENTRIES_TOTAL] == 0 and endrec[_ECD_SIZE] == 0 and endrec[_ECD_OFFSET] == 0:
|
||||
return True # Empty zipfiles are still zipfiles
|
||||
elif endrec[_ECD_DISK_NUMBER] == endrec[_ECD_DISK_START]:
|
||||
# Central directory is on the same disk
|
||||
fp.seek(sum(_handle_prepended_data(endrec)))
|
||||
if endrec[_ECD_SIZE] >= sizeCentralDir:
|
||||
data = fp.read(sizeCentralDir) # CD is where we expect it to be
|
||||
if len(data) == sizeCentralDir:
|
||||
centdir = struct.unpack(structCentralDir, data) # CD is the right size
|
||||
if centdir[_CD_SIGNATURE] == stringCentralDir:
|
||||
return True # First central directory entry has correct magic number
|
||||
except OSError:
|
||||
pass
|
||||
return False
|
||||
|
@ -258,6 +269,22 @@ def is_zipfile(filename):
|
|||
pass
|
||||
return result
|
||||
|
||||
def _handle_prepended_data(endrec, debug=0):
|
||||
size_cd = endrec[_ECD_SIZE] # bytes in central directory
|
||||
offset_cd = endrec[_ECD_OFFSET] # offset of central directory
|
||||
|
||||
# "concat" is zero, unless zip was concatenated to another file
|
||||
concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
|
||||
if endrec[_ECD_SIGNATURE] == stringEndArchive64:
|
||||
# If Zip64 extension structures are present, account for them
|
||||
concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
|
||||
|
||||
if debug > 2:
|
||||
inferred = concat + offset_cd
|
||||
print("given, inferred, offset", offset_cd, inferred, concat)
|
||||
|
||||
return offset_cd, concat
|
||||
|
||||
def _EndRecData64(fpin, offset, endrec):
|
||||
"""
|
||||
Read the ZIP64 end-of-archive records and use that to update endrec
|
||||
|
@ -1501,28 +1528,21 @@ class ZipFile:
|
|||
raise BadZipFile("File is not a zip file")
|
||||
if self.debug > 1:
|
||||
print(endrec)
|
||||
size_cd = endrec[_ECD_SIZE] # bytes in central directory
|
||||
offset_cd = endrec[_ECD_OFFSET] # offset of central directory
|
||||
self._comment = endrec[_ECD_COMMENT] # archive comment
|
||||
|
||||
# "concat" is zero, unless zip was concatenated to another file
|
||||
concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
|
||||
if endrec[_ECD_SIGNATURE] == stringEndArchive64:
|
||||
# If Zip64 extension structures are present, account for them
|
||||
concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
|
||||
offset_cd, concat = _handle_prepended_data(endrec, self.debug)
|
||||
|
||||
# self.start_dir: Position of start of central directory
|
||||
self.start_dir = offset_cd + concat
|
||||
|
||||
# store the offset to the beginning of data for the
|
||||
# .data_offset property
|
||||
self._data_offset = concat
|
||||
|
||||
if self.debug > 2:
|
||||
inferred = concat + offset_cd
|
||||
print("given, inferred, offset", offset_cd, inferred, concat)
|
||||
# self.start_dir: Position of start of central directory
|
||||
self.start_dir = offset_cd + concat
|
||||
if self.start_dir < 0:
|
||||
raise BadZipFile("Bad offset for central directory")
|
||||
fp.seek(self.start_dir, 0)
|
||||
size_cd = endrec[_ECD_SIZE]
|
||||
data = fp.read(size_cd)
|
||||
fp = io.BytesIO(data)
|
||||
total = 0
|
||||
|
|
|
@ -0,0 +1 @@
|
|||
Improve Zip file validation false positive rate in :func:`zipfile.is_zipfile`.
|
Loading…
Add table
Add a link
Reference in a new issue