Patch #1446489 (zipfile: support for ZIP64)

This commit is contained in:
Ronald Oussoren 2006-06-15 08:14:18 +00:00
parent 0eac11826a
commit 143cefb846
5 changed files with 665 additions and 63 deletions

View file

@ -1,7 +1,8 @@
"Read and write ZIP files."
"""
Read and write ZIP files.
"""
import struct, os, time, sys
import binascii
import binascii, cStringIO
try:
import zlib # We may need its compression method
@ -9,12 +10,22 @@ except ImportError:
zlib = None
__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
"ZipInfo", "ZipFile", "PyZipFile"]
"ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
class BadZipfile(Exception):
pass
class LargeZipFile(Exception):
"""
Raised when writing a zipfile, the zipfile requires ZIP64 extensions
and those extensions are disabled.
"""
error = BadZipfile # The exception raised by this module
ZIP64_LIMIT= (1 << 31) - 1
# constants for Zip file compression methods
ZIP_STORED = 0
ZIP_DEFLATED = 8
@ -27,6 +38,11 @@ structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
stringCentralDir = "PK\001\002" # magic number for central directory
structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
stringFileHeader = "PK\003\004" # magic number for file header
structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
# indexes of entries in the central directory structure
_CD_SIGNATURE = 0
@ -75,6 +91,40 @@ def is_zipfile(filename):
pass
return False
def _EndRecData64(fpin, offset, endrec):
"""
Read the ZIP64 end-of-archive records and use that to update endrec
"""
locatorSize = struct.calcsize(structEndArchive64Locator)
fpin.seek(offset - locatorSize, 2)
data = fpin.read(locatorSize)
sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
if sig != stringEndArchive64Locator:
return endrec
if diskno != 0 or disks != 1:
raise BadZipfile("zipfiles that span multiple disks are not supported")
# Assume no 'zip64 extensible data'
endArchiveSize = struct.calcsize(structEndArchive64)
fpin.seek(offset - locatorSize - endArchiveSize, 2)
data = fpin.read(endArchiveSize)
sig, sz, create_version, read_version, disk_num, disk_dir, \
dircount, dircount2, dirsize, diroffset = \
struct.unpack(structEndArchive64, data)
if sig != stringEndArchive64:
return endrec
# Update the original endrec using data from the ZIP64 record
endrec[1] = disk_num
endrec[2] = disk_dir
endrec[3] = dircount
endrec[4] = dircount2
endrec[5] = dirsize
endrec[6] = diroffset
return endrec
def _EndRecData(fpin):
"""Return data from the "End of Central Directory" record, or None.
@ -88,6 +138,8 @@ def _EndRecData(fpin):
endrec = list(endrec)
endrec.append("") # Append the archive comment
endrec.append(filesize - 22) # Append the record start offset
if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
return _EndRecData64(fpin, -22, endrec)
return endrec
# Search the last END_BLOCK bytes of the file for the record signature.
# The comment is appended to the ZIP file and has a 16 bit length.
@ -106,25 +158,50 @@ def _EndRecData(fpin):
# Append the archive comment and start offset
endrec.append(comment)
endrec.append(filesize - END_BLOCK + start)
if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
return _EndRecData64(fpin, - END_BLOCK + start, endrec)
return endrec
return # Error, return None
class ZipInfo:
class ZipInfo (object):
"""Class with attributes describing each file in the ZIP archive."""
__slots__ = (
'orig_filename',
'filename',
'date_time',
'compress_type',
'comment',
'extra',
'create_system',
'create_version',
'extract_version',
'reserved',
'flag_bits',
'volume',
'internal_attr',
'external_attr',
'header_offset',
'CRC',
'compress_size',
'file_size',
)
def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
self.orig_filename = filename # Original file name in archive
# Terminate the file name at the first null byte. Null bytes in file
# names are used as tricks by viruses in archives.
# Terminate the file name at the first null byte. Null bytes in file
# names are used as tricks by viruses in archives.
null_byte = filename.find(chr(0))
if null_byte >= 0:
filename = filename[0:null_byte]
# This is used to ensure paths in generated ZIP files always use
# forward slashes as the directory separator, as required by the
# ZIP format specification.
if os.sep != "/":
# This is used to ensure paths in generated ZIP files always use
# forward slashes as the directory separator, as required by the
# ZIP format specification.
if os.sep != "/" and os.sep in filename:
filename = filename.replace(os.sep, "/")
self.filename = filename # Normalized file name
self.date_time = date_time # year, month, day, hour, min, sec
# Standard values:
@ -145,7 +222,6 @@ class ZipInfo:
self.external_attr = 0 # External file attributes
# Other attributes are set by class ZipFile:
# header_offset Byte offset to the file header
# file_offset Byte offset to the start of the file data
# CRC CRC-32 of the uncompressed file
# compress_size Size of the compressed file
# file_size Size of the uncompressed file
@ -162,29 +238,85 @@ class ZipInfo:
CRC = self.CRC
compress_size = self.compress_size
file_size = self.file_size
extra = self.extra
if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
# File is larger than what fits into a 4 byte integer,
# fall back to the ZIP64 extension
fmt = '<hhqq'
extra = extra + struct.pack(fmt,
1, struct.calcsize(fmt)-4, file_size, compress_size)
file_size = 0xffffffff # -1
compress_size = 0xffffffff # -1
self.extract_version = max(45, self.extract_version)
self.create_version = max(45, self.extract_version)
header = struct.pack(structFileHeader, stringFileHeader,
self.extract_version, self.reserved, self.flag_bits,
self.compress_type, dostime, dosdate, CRC,
compress_size, file_size,
len(self.filename), len(self.extra))
return header + self.filename + self.extra
len(self.filename), len(extra))
return header + self.filename + extra
def _decodeExtra(self):
# Try to decode the extra field.
extra = self.extra
unpack = struct.unpack
while extra:
tp, ln = unpack('<hh', extra[:4])
if tp == 1:
if ln >= 24:
counts = unpack('<qqq', extra[4:28])
elif ln == 16:
counts = unpack('<qq', extra[4:20])
elif ln == 8:
counts = unpack('<q', extra[4:12])
elif ln == 0:
counts = ()
else:
raise RuntimeError, "Corrupt extra field %s"%(ln,)
idx = 0
# ZIP64 extension (large files and/or large archives)
if self.file_size == -1 or self.file_size == 0xFFFFFFFFL:
self.file_size = counts[idx]
idx += 1
if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
self.compress_size = counts[idx]
idx += 1
if self.header_offset == -1 or self.header_offset == 0xffffffffL:
old = self.header_offset
self.header_offset = counts[idx]
idx+=1
extra = extra[ln+4:]
class ZipFile:
""" Class with methods to open, read, write, close, list zip files.
z = ZipFile(file, mode="r", compression=ZIP_STORED)
z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
file: Either the path to the file, or a file-like object.
If it is a path, the file will be opened and closed by ZipFile.
mode: The mode can be either read "r", write "w" or append "a".
compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
allowZip64: if True ZipFile will create files with ZIP64 extensions when
needed, otherwise it will raise an exception when this would
be necessary.
"""
fp = None # Set here since __del__ checks it
def __init__(self, file, mode="r", compression=ZIP_STORED):
def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
"""Open the ZIP file with mode read "r", write "w" or append "a"."""
self._allowZip64 = allowZip64
self._didModify = False
if compression == ZIP_STORED:
pass
elif compression == ZIP_DEFLATED:
@ -250,7 +382,10 @@ class ZipFile:
offset_cd = endrec[6] # offset of central directory
self.comment = endrec[8] # archive comment
# endrec[9] is the offset of the "End of Central Dir" record
x = endrec[9] - size_cd
if endrec[9] > ZIP64_LIMIT:
x = endrec[9] - size_cd - 56 - 20
else:
x = endrec[9] - size_cd
# "concat" is zero, unless zip was concatenated to another file
concat = x - offset_cd
if self.debug > 2:
@ -258,6 +393,8 @@ class ZipFile:
# self.start_dir: Position of start of central directory
self.start_dir = offset_cd + concat
fp.seek(self.start_dir, 0)
data = fp.read(size_cd)
fp = cStringIO.StringIO(data)
total = 0
while total < size_cd:
centdir = fp.read(46)
@ -275,8 +412,7 @@ class ZipFile:
total = (total + centdir[_CD_FILENAME_LENGTH]
+ centdir[_CD_EXTRA_FIELD_LENGTH]
+ centdir[_CD_COMMENT_LENGTH])
x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
# file_offset must be computed below...
x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
(x.create_version, x.create_system, x.extract_version, x.reserved,
x.flag_bits, x.compress_type, t, d,
x.CRC, x.compress_size, x.file_size) = centdir[1:12]
@ -284,28 +420,14 @@ class ZipFile:
# Convert date/time code to (year, month, day, hour, min, sec)
x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
x._decodeExtra()
x.header_offset = x.header_offset + concat
self.filelist.append(x)
self.NameToInfo[x.filename] = x
if self.debug > 2:
print "total", total
for data in self.filelist:
fp.seek(data.header_offset, 0)
fheader = fp.read(30)
if fheader[0:4] != stringFileHeader:
raise BadZipfile, "Bad magic number for file header"
fheader = struct.unpack(structFileHeader, fheader)
# file_offset is computed here, since the extra field for
# the central directory and for the local file header
# refer to different fields, and they can have different
# lengths
data.file_offset = (data.header_offset + 30
+ fheader[_FH_FILENAME_LENGTH]
+ fheader[_FH_EXTRA_FIELD_LENGTH])
fname = fp.read(fheader[_FH_FILENAME_LENGTH])
if fname != data.orig_filename:
raise RuntimeError, \
'File name in directory "%s" and header "%s" differ.' % (
data.orig_filename, fname)
def namelist(self):
"""Return a list of file names in the archive."""
@ -334,6 +456,7 @@ class ZipFile:
except BadZipfile:
return zinfo.filename
def getinfo(self, name):
"""Return the instance of ZipInfo given 'name'."""
return self.NameToInfo[name]
@ -347,7 +470,24 @@ class ZipFile:
"Attempt to read ZIP archive that was already closed"
zinfo = self.getinfo(name)
filepos = self.fp.tell()
self.fp.seek(zinfo.file_offset, 0)
self.fp.seek(zinfo.header_offset, 0)
# Skip the file header:
fheader = self.fp.read(30)
if fheader[0:4] != stringFileHeader:
raise BadZipfile, "Bad magic number for file header"
fheader = struct.unpack(structFileHeader, fheader)
fname = self.fp.read(fheader[_FH_FILENAME_LENGTH])
if fheader[_FH_EXTRA_FIELD_LENGTH]:
self.fp.read(fheader[_FH_EXTRA_FIELD_LENGTH])
if fname != zinfo.orig_filename:
raise BadZipfile, \
'File name in directory "%s" and header "%s" differ.' % (
zinfo.orig_filename, fname)
bytes = self.fp.read(zinfo.compress_size)
self.fp.seek(filepos, 0)
if zinfo.compress_type == ZIP_STORED:
@ -388,6 +528,12 @@ class ZipFile:
if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
raise RuntimeError, \
"That compression method is not supported"
if zinfo.file_size > ZIP64_LIMIT:
if not self._allowZip64:
raise LargeZipFile("Filesize would require ZIP64 extensions")
if zinfo.header_offset > ZIP64_LIMIT:
if not self._allowZip64:
raise LargeZipFile("Zipfile size would require ZIP64 extensions")
def write(self, filename, arcname=None, compress_type=None):
"""Put the bytes from filename into the archive under the name
@ -407,16 +553,19 @@ class ZipFile:
zinfo.compress_type = self.compression
else:
zinfo.compress_type = compress_type
self._writecheck(zinfo)
fp = open(filename, "rb")
zinfo.file_size = st.st_size
zinfo.flag_bits = 0x00
zinfo.header_offset = self.fp.tell() # Start of header bytes
self._writecheck(zinfo)
self._didModify = True
fp = open(filename, "rb")
# Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0
zinfo.compress_size = compress_size = 0
zinfo.file_size = file_size = 0
self.fp.write(zinfo.FileHeader())
zinfo.file_offset = self.fp.tell() # Start of file bytes
if zinfo.compress_type == ZIP_DEFLATED:
cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
zlib.DEFLATED, -15)
@ -461,8 +610,10 @@ class ZipFile:
zinfo.compress_type = self.compression
else:
zinfo = zinfo_or_arcname
self._writecheck(zinfo)
zinfo.file_size = len(bytes) # Uncompressed size
zinfo.header_offset = self.fp.tell() # Start of header bytes
self._writecheck(zinfo)
self._didModify = True
zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
if zinfo.compress_type == ZIP_DEFLATED:
co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
@ -473,8 +624,8 @@ class ZipFile:
zinfo.compress_size = zinfo.file_size
zinfo.header_offset = self.fp.tell() # Start of header bytes
self.fp.write(zinfo.FileHeader())
zinfo.file_offset = self.fp.tell() # Start of file bytes
self.fp.write(bytes)
self.fp.flush()
if zinfo.flag_bits & 0x08:
# Write CRC and file sizes after the file data
self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
@ -491,7 +642,8 @@ class ZipFile:
records."""
if self.fp is None:
return
if self.mode in ("w", "a"): # write ending records
if self.mode in ("w", "a") and self._didModify: # write ending records
count = 0
pos1 = self.fp.tell()
for zinfo in self.filelist: # write central directory
@ -499,23 +651,72 @@ class ZipFile:
dt = zinfo.date_time
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
extra = []
if zinfo.file_size > ZIP64_LIMIT \
or zinfo.compress_size > ZIP64_LIMIT:
extra.append(zinfo.file_size)
extra.append(zinfo.compress_size)
file_size = 0xffffffff #-1
compress_size = 0xffffffff #-1
else:
file_size = zinfo.file_size
compress_size = zinfo.compress_size
if zinfo.header_offset > ZIP64_LIMIT:
extra.append(zinfo.header_offset)
header_offset = 0xffffffff #-1
else:
header_offset = zinfo.header_offset
extra_data = zinfo.extra
if extra:
# Append a ZIP64 field to the extra's
extra_data = struct.pack(
'<hh' + 'q'*len(extra),
1, 8*len(extra), *extra) + extra_data
extract_version = max(45, zinfo.extract_version)
create_version = max(45, zinfo.create_version)
else:
extract_version = zinfo.extract_version
create_version = zinfo.create_version
centdir = struct.pack(structCentralDir,
stringCentralDir, zinfo.create_version,
zinfo.create_system, zinfo.extract_version, zinfo.reserved,
stringCentralDir, create_version,
zinfo.create_system, extract_version, zinfo.reserved,
zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
zinfo.CRC, zinfo.compress_size, zinfo.file_size,
len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
zinfo.CRC, compress_size, file_size,
len(zinfo.filename), len(extra_data), len(zinfo.comment),
0, zinfo.internal_attr, zinfo.external_attr,
zinfo.header_offset)
header_offset)
self.fp.write(centdir)
self.fp.write(zinfo.filename)
self.fp.write(zinfo.extra)
self.fp.write(extra_data)
self.fp.write(zinfo.comment)
pos2 = self.fp.tell()
# Write end-of-zip-archive record
endrec = struct.pack(structEndArchive, stringEndArchive,
0, 0, count, count, pos2 - pos1, pos1, 0)
self.fp.write(endrec)
if pos1 > ZIP64_LIMIT:
# Need to write the ZIP64 end-of-archive records
zip64endrec = struct.pack(
structEndArchive64, stringEndArchive64,
44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
self.fp.write(zip64endrec)
zip64locrec = struct.pack(
structEndArchive64Locator,
stringEndArchive64Locator, 0, pos2, 1)
self.fp.write(zip64locrec)
pos3 = self.fp.tell()
endrec = struct.pack(structEndArchive, stringEndArchive,
0, 0, count, count, pos2 - pos1, 0xffffffff, 0) # -1, 0)
self.fp.write(endrec)
else:
endrec = struct.pack(structEndArchive, stringEndArchive,
0, 0, count, count, pos2 - pos1, pos1, 0)
self.fp.write(endrec)
self.fp.flush()
if not self._filePassed:
self.fp.close()
@ -619,3 +820,80 @@ class PyZipFile(ZipFile):
if basename:
archivename = "%s/%s" % (basename, archivename)
return (fname, archivename)
def main(args = None):
import textwrap
USAGE=textwrap.dedent("""\
Usage:
zipfile.py -l zipfile.zip # Show listing of a zipfile
zipfile.py -t zipfile.zip # Test if a zipfile is valid
zipfile.py -e zipfile.zip target # Extract zipfile into target dir
zipfile.py -c zipfile.zip src ... # Create zipfile from sources
""")
if args is None:
args = sys.argv[1:]
if not args or args[0] not in ('-l', '-c', '-e', '-t'):
print USAGE
sys.exit(1)
if args[0] == '-l':
if len(args) != 2:
print USAGE
sys.exit(1)
zf = ZipFile(args[1], 'r')
zf.printdir()
zf.close()
elif args[0] == '-t':
if len(args) != 2:
print USAGE
sys.exit(1)
zf = ZipFile(args[1], 'r')
zf.testzip()
print "Done testing"
elif args[0] == '-e':
if len(args) != 3:
print USAGE
sys.exit(1)
zf = ZipFile(args[1], 'r')
out = args[2]
for path in zf.namelist():
if path.startswith('./'):
tgt = os.path.join(out, path[2:])
else:
tgt = os.path.join(out, path)
tgtdir = os.path.dirname(tgt)
if not os.path.exists(tgtdir):
os.makedirs(tgtdir)
fp = open(tgt, 'wb')
fp.write(zf.read(path))
fp.close()
zf.close()
elif args[0] == '-c':
if len(args) < 3:
print USAGE
sys.exit(1)
def addToZip(zf, path, zippath):
if os.path.isfile(path):
zf.write(path, zippath, ZIP_DEFLATED)
elif os.path.isdir(path):
for nm in os.listdir(path):
addToZip(zf,
os.path.join(path, nm), os.path.join(zippath, nm))
# else: ignore
zf = ZipFile(args[1], 'w', allowZip64=True)
for src in args[2:]:
addToZip(zf, src, os.path.basename(src))
zf.close()
if __name__ == "__main__":
main()