Patch #1446489 (zipfile: support for ZIP64)

This commit is contained in:
Ronald Oussoren 2006-06-15 08:14:18 +00:00
parent 0eac11826a
commit 143cefb846
5 changed files with 665 additions and 63 deletions

View file

@ -4,7 +4,7 @@ try:
except ImportError:
zlib = None
import zipfile, os, unittest
import zipfile, os, unittest, sys, shutil
from StringIO import StringIO
from tempfile import TemporaryFile
@ -28,14 +28,70 @@ class TestsWithSourceFile(unittest.TestCase):
zipfp = zipfile.ZipFile(f, "w", compression)
zipfp.write(TESTFN, "another"+os.extsep+"name")
zipfp.write(TESTFN, TESTFN)
zipfp.writestr("strfile", self.data)
zipfp.close()
# Read the ZIP archive
zipfp = zipfile.ZipFile(f, "r", compression)
self.assertEqual(zipfp.read(TESTFN), self.data)
self.assertEqual(zipfp.read("another"+os.extsep+"name"), self.data)
self.assertEqual(zipfp.read("strfile"), self.data)
# Print the ZIP directory
fp = StringIO()
stdout = sys.stdout
try:
sys.stdout = fp
zipfp.printdir()
finally:
sys.stdout = stdout
directory = fp.getvalue()
lines = directory.splitlines()
self.assertEquals(len(lines), 4) # Number of files + header
self.assert_('File Name' in lines[0])
self.assert_('Modified' in lines[0])
self.assert_('Size' in lines[0])
fn, date, time, size = lines[1].split()
self.assertEquals(fn, 'another.name')
# XXX: timestamp is not tested
self.assertEquals(size, str(len(self.data)))
# Check the namelist
names = zipfp.namelist()
self.assertEquals(len(names), 3)
self.assert_(TESTFN in names)
self.assert_("another"+os.extsep+"name" in names)
self.assert_("strfile" in names)
# Check infolist
infos = zipfp.infolist()
names = [ i.filename for i in infos ]
self.assertEquals(len(names), 3)
self.assert_(TESTFN in names)
self.assert_("another"+os.extsep+"name" in names)
self.assert_("strfile" in names)
for i in infos:
self.assertEquals(i.file_size, len(self.data))
# check getinfo
for nm in (TESTFN, "another"+os.extsep+"name", "strfile"):
info = zipfp.getinfo(nm)
self.assertEquals(info.filename, nm)
self.assertEquals(info.file_size, len(self.data))
# Check that testzip doesn't raise an exception
zipfp.testzip()
zipfp.close()
def testStored(self):
for f in (TESTFN2, TemporaryFile(), StringIO()):
self.zipTest(f, zipfile.ZIP_STORED)
@ -59,6 +115,197 @@ class TestsWithSourceFile(unittest.TestCase):
os.remove(TESTFN)
os.remove(TESTFN2)
class TestZip64InSmallFiles(unittest.TestCase):
# These tests test the ZIP64 functionality without using large files,
# see test_zipfile64 for proper tests.
def setUp(self):
self._limit = zipfile.ZIP64_LIMIT
zipfile.ZIP64_LIMIT = 5
line_gen = ("Test of zipfile line %d." % i for i in range(0, 1000))
self.data = '\n'.join(line_gen)
# Make a source file with some lines
fp = open(TESTFN, "wb")
fp.write(self.data)
fp.close()
def largeFileExceptionTest(self, f, compression):
zipfp = zipfile.ZipFile(f, "w", compression)
self.assertRaises(zipfile.LargeZipFile,
zipfp.write, TESTFN, "another"+os.extsep+"name")
zipfp.close()
def largeFileExceptionTest2(self, f, compression):
zipfp = zipfile.ZipFile(f, "w", compression)
self.assertRaises(zipfile.LargeZipFile,
zipfp.writestr, "another"+os.extsep+"name", self.data)
zipfp.close()
def testLargeFileException(self):
for f in (TESTFN2, TemporaryFile(), StringIO()):
self.largeFileExceptionTest(f, zipfile.ZIP_STORED)
self.largeFileExceptionTest2(f, zipfile.ZIP_STORED)
def zipTest(self, f, compression):
# Create the ZIP archive
zipfp = zipfile.ZipFile(f, "w", compression, allowZip64=True)
zipfp.write(TESTFN, "another"+os.extsep+"name")
zipfp.write(TESTFN, TESTFN)
zipfp.writestr("strfile", self.data)
zipfp.close()
# Read the ZIP archive
zipfp = zipfile.ZipFile(f, "r", compression)
self.assertEqual(zipfp.read(TESTFN), self.data)
self.assertEqual(zipfp.read("another"+os.extsep+"name"), self.data)
self.assertEqual(zipfp.read("strfile"), self.data)
# Print the ZIP directory
fp = StringIO()
stdout = sys.stdout
try:
sys.stdout = fp
zipfp.printdir()
finally:
sys.stdout = stdout
directory = fp.getvalue()
lines = directory.splitlines()
self.assertEquals(len(lines), 4) # Number of files + header
self.assert_('File Name' in lines[0])
self.assert_('Modified' in lines[0])
self.assert_('Size' in lines[0])
fn, date, time, size = lines[1].split()
self.assertEquals(fn, 'another.name')
# XXX: timestamp is not tested
self.assertEquals(size, str(len(self.data)))
# Check the namelist
names = zipfp.namelist()
self.assertEquals(len(names), 3)
self.assert_(TESTFN in names)
self.assert_("another"+os.extsep+"name" in names)
self.assert_("strfile" in names)
# Check infolist
infos = zipfp.infolist()
names = [ i.filename for i in infos ]
self.assertEquals(len(names), 3)
self.assert_(TESTFN in names)
self.assert_("another"+os.extsep+"name" in names)
self.assert_("strfile" in names)
for i in infos:
self.assertEquals(i.file_size, len(self.data))
# check getinfo
for nm in (TESTFN, "another"+os.extsep+"name", "strfile"):
info = zipfp.getinfo(nm)
self.assertEquals(info.filename, nm)
self.assertEquals(info.file_size, len(self.data))
# Check that testzip doesn't raise an exception
zipfp.testzip()
zipfp.close()
def testStored(self):
for f in (TESTFN2, TemporaryFile(), StringIO()):
self.zipTest(f, zipfile.ZIP_STORED)
if zlib:
def testDeflated(self):
for f in (TESTFN2, TemporaryFile(), StringIO()):
self.zipTest(f, zipfile.ZIP_DEFLATED)
def testAbsoluteArcnames(self):
zipfp = zipfile.ZipFile(TESTFN2, "w", zipfile.ZIP_STORED, allowZip64=True)
zipfp.write(TESTFN, "/absolute")
zipfp.close()
zipfp = zipfile.ZipFile(TESTFN2, "r", zipfile.ZIP_STORED)
self.assertEqual(zipfp.namelist(), ["absolute"])
zipfp.close()
def tearDown(self):
zipfile.ZIP64_LIMIT = self._limit
os.remove(TESTFN)
os.remove(TESTFN2)
class PyZipFileTests(unittest.TestCase):
def testWritePyfile(self):
zipfp = zipfile.PyZipFile(TemporaryFile(), "w")
fn = __file__
if fn.endswith('.pyc') or fn.endswith('.pyo'):
fn = fn[:-1]
zipfp.writepy(fn)
bn = os.path.basename(fn)
self.assert_(bn not in zipfp.namelist())
self.assert_(bn + 'o' in zipfp.namelist() or bn + 'c' in zipfp.namelist())
zipfp.close()
zipfp = zipfile.PyZipFile(TemporaryFile(), "w")
fn = __file__
if fn.endswith('.pyc') or fn.endswith('.pyo'):
fn = fn[:-1]
zipfp.writepy(fn, "testpackage")
bn = "%s/%s"%("testpackage", os.path.basename(fn))
self.assert_(bn not in zipfp.namelist())
self.assert_(bn + 'o' in zipfp.namelist() or bn + 'c' in zipfp.namelist())
zipfp.close()
def testWritePythonPackage(self):
import email
packagedir = os.path.dirname(email.__file__)
zipfp = zipfile.PyZipFile(TemporaryFile(), "w")
zipfp.writepy(packagedir)
# Check for a couple of modules at different levels of the hieararchy
names = zipfp.namelist()
self.assert_('email/__init__.pyo' in names or 'email/__init__.pyc' in names)
self.assert_('email/mime/text.pyo' in names or 'email/mime/text.pyc' in names)
def testWritePythonDirectory(self):
os.mkdir(TESTFN2)
try:
fp = open(os.path.join(TESTFN2, "mod1.py"), "w")
fp.write("print 42\n")
fp.close()
fp = open(os.path.join(TESTFN2, "mod2.py"), "w")
fp.write("print 42 * 42\n")
fp.close()
fp = open(os.path.join(TESTFN2, "mod2.txt"), "w")
fp.write("bla bla bla\n")
fp.close()
zipfp = zipfile.PyZipFile(TemporaryFile(), "w")
zipfp.writepy(TESTFN2)
names = zipfp.namelist()
self.assert_('mod1.pyc' in names or 'mod1.pyo' in names)
self.assert_('mod2.pyc' in names or 'mod2.pyo' in names)
self.assert_('mod2.txt' not in names)
finally:
shutil.rmtree(TESTFN2)
class OtherTests(unittest.TestCase):
def testCloseErroneousFile(self):
# This test checks that the ZipFile constructor closes the file object
@ -103,7 +350,8 @@ class OtherTests(unittest.TestCase):
self.assertRaises(RuntimeError, zipf.testzip)
def test_main():
run_unittest(TestsWithSourceFile, OtherTests)
run_unittest(TestsWithSourceFile, TestZip64InSmallFiles, OtherTests, PyZipFileTests)
#run_unittest(TestZip64InSmallFiles)
if __name__ == "__main__":
test_main()

View file

@ -0,0 +1,67 @@
# Tests of the full ZIP64 functionality of zipfile
# The test_support.requires call is the only reason for keeping this separate
# from test_zipfile
from test import test_support
test_support.requires(
'largefile',
'test requires loads of disk-space bytes and a long time to run'
)
# We can test part of the module without zlib.
try:
import zlib
except ImportError:
zlib = None
import zipfile, os, unittest
from StringIO import StringIO
from tempfile import TemporaryFile
from test.test_support import TESTFN, run_unittest
TESTFN2 = TESTFN + "2"
class TestsWithSourceFile(unittest.TestCase):
def setUp(self):
line_gen = ("Test of zipfile line %d." % i for i in range(0, 1000000))
self.data = '\n'.join(line_gen)
# Make a source file with some lines
fp = open(TESTFN, "wb")
fp.write(self.data)
fp.close()
def zipTest(self, f, compression):
# Create the ZIP archive
filecount = int(((1 << 32) / len(self.data)) * 1.5)
zipfp = zipfile.ZipFile(f, "w", compression, allowZip64=True)
for num in range(filecount):
zipfp.writestr("testfn%d"%(num,), self.data)
zipfp.close()
# Read the ZIP archive
zipfp = zipfile.ZipFile(f, "r", compression)
for num in range(filecount):
self.assertEqual(zipfp.read("testfn%d"%(num,)), self.data)
zipfp.close()
def testStored(self):
for f in (TESTFN2, TemporaryFile()):
self.zipTest(f, zipfile.ZIP_STORED)
if zlib:
def testDeflated(self):
for f in (TESTFN2, TemporaryFile()):
self.zipTest(f, zipfile.ZIP_DEFLATED)
def tearDown(self):
os.remove(TESTFN)
os.remove(TESTFN2)
def test_main():
run_unittest(TestsWithSourceFile)
if __name__ == "__main__":
test_main()

View file

@ -1,7 +1,8 @@
"Read and write ZIP files."
"""
Read and write ZIP files.
"""
import struct, os, time, sys
import binascii
import binascii, cStringIO
try:
import zlib # We may need its compression method
@ -9,12 +10,22 @@ except ImportError:
zlib = None
__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
"ZipInfo", "ZipFile", "PyZipFile"]
"ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
class BadZipfile(Exception):
pass
class LargeZipFile(Exception):
"""
Raised when writing a zipfile, the zipfile requires ZIP64 extensions
and those extensions are disabled.
"""
error = BadZipfile # The exception raised by this module
ZIP64_LIMIT= (1 << 31) - 1
# constants for Zip file compression methods
ZIP_STORED = 0
ZIP_DEFLATED = 8
@ -27,6 +38,11 @@ structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
stringCentralDir = "PK\001\002" # magic number for central directory
structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
stringFileHeader = "PK\003\004" # magic number for file header
structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
# indexes of entries in the central directory structure
_CD_SIGNATURE = 0
@ -75,6 +91,40 @@ def is_zipfile(filename):
pass
return False
def _EndRecData64(fpin, offset, endrec):
"""
Read the ZIP64 end-of-archive records and use that to update endrec
"""
locatorSize = struct.calcsize(structEndArchive64Locator)
fpin.seek(offset - locatorSize, 2)
data = fpin.read(locatorSize)
sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
if sig != stringEndArchive64Locator:
return endrec
if diskno != 0 or disks != 1:
raise BadZipfile("zipfiles that span multiple disks are not supported")
# Assume no 'zip64 extensible data'
endArchiveSize = struct.calcsize(structEndArchive64)
fpin.seek(offset - locatorSize - endArchiveSize, 2)
data = fpin.read(endArchiveSize)
sig, sz, create_version, read_version, disk_num, disk_dir, \
dircount, dircount2, dirsize, diroffset = \
struct.unpack(structEndArchive64, data)
if sig != stringEndArchive64:
return endrec
# Update the original endrec using data from the ZIP64 record
endrec[1] = disk_num
endrec[2] = disk_dir
endrec[3] = dircount
endrec[4] = dircount2
endrec[5] = dirsize
endrec[6] = diroffset
return endrec
def _EndRecData(fpin):
"""Return data from the "End of Central Directory" record, or None.
@ -88,6 +138,8 @@ def _EndRecData(fpin):
endrec = list(endrec)
endrec.append("") # Append the archive comment
endrec.append(filesize - 22) # Append the record start offset
if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
return _EndRecData64(fpin, -22, endrec)
return endrec
# Search the last END_BLOCK bytes of the file for the record signature.
# The comment is appended to the ZIP file and has a 16 bit length.
@ -106,25 +158,50 @@ def _EndRecData(fpin):
# Append the archive comment and start offset
endrec.append(comment)
endrec.append(filesize - END_BLOCK + start)
if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
return _EndRecData64(fpin, - END_BLOCK + start, endrec)
return endrec
return # Error, return None
class ZipInfo:
class ZipInfo (object):
"""Class with attributes describing each file in the ZIP archive."""
__slots__ = (
'orig_filename',
'filename',
'date_time',
'compress_type',
'comment',
'extra',
'create_system',
'create_version',
'extract_version',
'reserved',
'flag_bits',
'volume',
'internal_attr',
'external_attr',
'header_offset',
'CRC',
'compress_size',
'file_size',
)
def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
self.orig_filename = filename # Original file name in archive
# Terminate the file name at the first null byte. Null bytes in file
# names are used as tricks by viruses in archives.
# Terminate the file name at the first null byte. Null bytes in file
# names are used as tricks by viruses in archives.
null_byte = filename.find(chr(0))
if null_byte >= 0:
filename = filename[0:null_byte]
# This is used to ensure paths in generated ZIP files always use
# forward slashes as the directory separator, as required by the
# ZIP format specification.
if os.sep != "/":
# This is used to ensure paths in generated ZIP files always use
# forward slashes as the directory separator, as required by the
# ZIP format specification.
if os.sep != "/" and os.sep in filename:
filename = filename.replace(os.sep, "/")
self.filename = filename # Normalized file name
self.date_time = date_time # year, month, day, hour, min, sec
# Standard values:
@ -145,7 +222,6 @@ class ZipInfo:
self.external_attr = 0 # External file attributes
# Other attributes are set by class ZipFile:
# header_offset Byte offset to the file header
# file_offset Byte offset to the start of the file data
# CRC CRC-32 of the uncompressed file
# compress_size Size of the compressed file
# file_size Size of the uncompressed file
@ -162,29 +238,85 @@ class ZipInfo:
CRC = self.CRC
compress_size = self.compress_size
file_size = self.file_size
extra = self.extra
if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
# File is larger than what fits into a 4 byte integer,
# fall back to the ZIP64 extension
fmt = '<hhqq'
extra = extra + struct.pack(fmt,
1, struct.calcsize(fmt)-4, file_size, compress_size)
file_size = 0xffffffff # -1
compress_size = 0xffffffff # -1
self.extract_version = max(45, self.extract_version)
self.create_version = max(45, self.extract_version)
header = struct.pack(structFileHeader, stringFileHeader,
self.extract_version, self.reserved, self.flag_bits,
self.compress_type, dostime, dosdate, CRC,
compress_size, file_size,
len(self.filename), len(self.extra))
return header + self.filename + self.extra
len(self.filename), len(extra))
return header + self.filename + extra
def _decodeExtra(self):
# Try to decode the extra field.
extra = self.extra
unpack = struct.unpack
while extra:
tp, ln = unpack('<hh', extra[:4])
if tp == 1:
if ln >= 24:
counts = unpack('<qqq', extra[4:28])
elif ln == 16:
counts = unpack('<qq', extra[4:20])
elif ln == 8:
counts = unpack('<q', extra[4:12])
elif ln == 0:
counts = ()
else:
raise RuntimeError, "Corrupt extra field %s"%(ln,)
idx = 0
# ZIP64 extension (large files and/or large archives)
if self.file_size == -1 or self.file_size == 0xFFFFFFFFL:
self.file_size = counts[idx]
idx += 1
if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
self.compress_size = counts[idx]
idx += 1
if self.header_offset == -1 or self.header_offset == 0xffffffffL:
old = self.header_offset
self.header_offset = counts[idx]
idx+=1
extra = extra[ln+4:]
class ZipFile:
""" Class with methods to open, read, write, close, list zip files.
z = ZipFile(file, mode="r", compression=ZIP_STORED)
z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
file: Either the path to the file, or a file-like object.
If it is a path, the file will be opened and closed by ZipFile.
mode: The mode can be either read "r", write "w" or append "a".
compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
allowZip64: if True ZipFile will create files with ZIP64 extensions when
needed, otherwise it will raise an exception when this would
be necessary.
"""
fp = None # Set here since __del__ checks it
def __init__(self, file, mode="r", compression=ZIP_STORED):
def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
"""Open the ZIP file with mode read "r", write "w" or append "a"."""
self._allowZip64 = allowZip64
self._didModify = False
if compression == ZIP_STORED:
pass
elif compression == ZIP_DEFLATED:
@ -250,7 +382,10 @@ class ZipFile:
offset_cd = endrec[6] # offset of central directory
self.comment = endrec[8] # archive comment
# endrec[9] is the offset of the "End of Central Dir" record
x = endrec[9] - size_cd
if endrec[9] > ZIP64_LIMIT:
x = endrec[9] - size_cd - 56 - 20
else:
x = endrec[9] - size_cd
# "concat" is zero, unless zip was concatenated to another file
concat = x - offset_cd
if self.debug > 2:
@ -258,6 +393,8 @@ class ZipFile:
# self.start_dir: Position of start of central directory
self.start_dir = offset_cd + concat
fp.seek(self.start_dir, 0)
data = fp.read(size_cd)
fp = cStringIO.StringIO(data)
total = 0
while total < size_cd:
centdir = fp.read(46)
@ -275,8 +412,7 @@ class ZipFile:
total = (total + centdir[_CD_FILENAME_LENGTH]
+ centdir[_CD_EXTRA_FIELD_LENGTH]
+ centdir[_CD_COMMENT_LENGTH])
x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET] + concat
# file_offset must be computed below...
x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
(x.create_version, x.create_system, x.extract_version, x.reserved,
x.flag_bits, x.compress_type, t, d,
x.CRC, x.compress_size, x.file_size) = centdir[1:12]
@ -284,28 +420,14 @@ class ZipFile:
# Convert date/time code to (year, month, day, hour, min, sec)
x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
x._decodeExtra()
x.header_offset = x.header_offset + concat
self.filelist.append(x)
self.NameToInfo[x.filename] = x
if self.debug > 2:
print "total", total
for data in self.filelist:
fp.seek(data.header_offset, 0)
fheader = fp.read(30)
if fheader[0:4] != stringFileHeader:
raise BadZipfile, "Bad magic number for file header"
fheader = struct.unpack(structFileHeader, fheader)
# file_offset is computed here, since the extra field for
# the central directory and for the local file header
# refer to different fields, and they can have different
# lengths
data.file_offset = (data.header_offset + 30
+ fheader[_FH_FILENAME_LENGTH]
+ fheader[_FH_EXTRA_FIELD_LENGTH])
fname = fp.read(fheader[_FH_FILENAME_LENGTH])
if fname != data.orig_filename:
raise RuntimeError, \
'File name in directory "%s" and header "%s" differ.' % (
data.orig_filename, fname)
def namelist(self):
"""Return a list of file names in the archive."""
@ -334,6 +456,7 @@ class ZipFile:
except BadZipfile:
return zinfo.filename
def getinfo(self, name):
"""Return the instance of ZipInfo given 'name'."""
return self.NameToInfo[name]
@ -347,7 +470,24 @@ class ZipFile:
"Attempt to read ZIP archive that was already closed"
zinfo = self.getinfo(name)
filepos = self.fp.tell()
self.fp.seek(zinfo.file_offset, 0)
self.fp.seek(zinfo.header_offset, 0)
# Skip the file header:
fheader = self.fp.read(30)
if fheader[0:4] != stringFileHeader:
raise BadZipfile, "Bad magic number for file header"
fheader = struct.unpack(structFileHeader, fheader)
fname = self.fp.read(fheader[_FH_FILENAME_LENGTH])
if fheader[_FH_EXTRA_FIELD_LENGTH]:
self.fp.read(fheader[_FH_EXTRA_FIELD_LENGTH])
if fname != zinfo.orig_filename:
raise BadZipfile, \
'File name in directory "%s" and header "%s" differ.' % (
zinfo.orig_filename, fname)
bytes = self.fp.read(zinfo.compress_size)
self.fp.seek(filepos, 0)
if zinfo.compress_type == ZIP_STORED:
@ -388,6 +528,12 @@ class ZipFile:
if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
raise RuntimeError, \
"That compression method is not supported"
if zinfo.file_size > ZIP64_LIMIT:
if not self._allowZip64:
raise LargeZipFile("Filesize would require ZIP64 extensions")
if zinfo.header_offset > ZIP64_LIMIT:
if not self._allowZip64:
raise LargeZipFile("Zipfile size would require ZIP64 extensions")
def write(self, filename, arcname=None, compress_type=None):
"""Put the bytes from filename into the archive under the name
@ -407,16 +553,19 @@ class ZipFile:
zinfo.compress_type = self.compression
else:
zinfo.compress_type = compress_type
self._writecheck(zinfo)
fp = open(filename, "rb")
zinfo.file_size = st.st_size
zinfo.flag_bits = 0x00
zinfo.header_offset = self.fp.tell() # Start of header bytes
self._writecheck(zinfo)
self._didModify = True
fp = open(filename, "rb")
# Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0
zinfo.compress_size = compress_size = 0
zinfo.file_size = file_size = 0
self.fp.write(zinfo.FileHeader())
zinfo.file_offset = self.fp.tell() # Start of file bytes
if zinfo.compress_type == ZIP_DEFLATED:
cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
zlib.DEFLATED, -15)
@ -461,8 +610,10 @@ class ZipFile:
zinfo.compress_type = self.compression
else:
zinfo = zinfo_or_arcname
self._writecheck(zinfo)
zinfo.file_size = len(bytes) # Uncompressed size
zinfo.header_offset = self.fp.tell() # Start of header bytes
self._writecheck(zinfo)
self._didModify = True
zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
if zinfo.compress_type == ZIP_DEFLATED:
co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
@ -473,8 +624,8 @@ class ZipFile:
zinfo.compress_size = zinfo.file_size
zinfo.header_offset = self.fp.tell() # Start of header bytes
self.fp.write(zinfo.FileHeader())
zinfo.file_offset = self.fp.tell() # Start of file bytes
self.fp.write(bytes)
self.fp.flush()
if zinfo.flag_bits & 0x08:
# Write CRC and file sizes after the file data
self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
@ -491,7 +642,8 @@ class ZipFile:
records."""
if self.fp is None:
return
if self.mode in ("w", "a"): # write ending records
if self.mode in ("w", "a") and self._didModify: # write ending records
count = 0
pos1 = self.fp.tell()
for zinfo in self.filelist: # write central directory
@ -499,23 +651,72 @@ class ZipFile:
dt = zinfo.date_time
dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
extra = []
if zinfo.file_size > ZIP64_LIMIT \
or zinfo.compress_size > ZIP64_LIMIT:
extra.append(zinfo.file_size)
extra.append(zinfo.compress_size)
file_size = 0xffffffff #-1
compress_size = 0xffffffff #-1
else:
file_size = zinfo.file_size
compress_size = zinfo.compress_size
if zinfo.header_offset > ZIP64_LIMIT:
extra.append(zinfo.header_offset)
header_offset = 0xffffffff #-1
else:
header_offset = zinfo.header_offset
extra_data = zinfo.extra
if extra:
# Append a ZIP64 field to the extra's
extra_data = struct.pack(
'<hh' + 'q'*len(extra),
1, 8*len(extra), *extra) + extra_data
extract_version = max(45, zinfo.extract_version)
create_version = max(45, zinfo.create_version)
else:
extract_version = zinfo.extract_version
create_version = zinfo.create_version
centdir = struct.pack(structCentralDir,
stringCentralDir, zinfo.create_version,
zinfo.create_system, zinfo.extract_version, zinfo.reserved,
stringCentralDir, create_version,
zinfo.create_system, extract_version, zinfo.reserved,
zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
zinfo.CRC, zinfo.compress_size, zinfo.file_size,
len(zinfo.filename), len(zinfo.extra), len(zinfo.comment),
zinfo.CRC, compress_size, file_size,
len(zinfo.filename), len(extra_data), len(zinfo.comment),
0, zinfo.internal_attr, zinfo.external_attr,
zinfo.header_offset)
header_offset)
self.fp.write(centdir)
self.fp.write(zinfo.filename)
self.fp.write(zinfo.extra)
self.fp.write(extra_data)
self.fp.write(zinfo.comment)
pos2 = self.fp.tell()
# Write end-of-zip-archive record
endrec = struct.pack(structEndArchive, stringEndArchive,
0, 0, count, count, pos2 - pos1, pos1, 0)
self.fp.write(endrec)
if pos1 > ZIP64_LIMIT:
# Need to write the ZIP64 end-of-archive records
zip64endrec = struct.pack(
structEndArchive64, stringEndArchive64,
44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
self.fp.write(zip64endrec)
zip64locrec = struct.pack(
structEndArchive64Locator,
stringEndArchive64Locator, 0, pos2, 1)
self.fp.write(zip64locrec)
pos3 = self.fp.tell()
endrec = struct.pack(structEndArchive, stringEndArchive,
0, 0, count, count, pos2 - pos1, 0xffffffff, 0) # -1, 0)
self.fp.write(endrec)
else:
endrec = struct.pack(structEndArchive, stringEndArchive,
0, 0, count, count, pos2 - pos1, pos1, 0)
self.fp.write(endrec)
self.fp.flush()
if not self._filePassed:
self.fp.close()
@ -619,3 +820,80 @@ class PyZipFile(ZipFile):
if basename:
archivename = "%s/%s" % (basename, archivename)
return (fname, archivename)
def main(args = None):
import textwrap
USAGE=textwrap.dedent("""\
Usage:
zipfile.py -l zipfile.zip # Show listing of a zipfile
zipfile.py -t zipfile.zip # Test if a zipfile is valid
zipfile.py -e zipfile.zip target # Extract zipfile into target dir
zipfile.py -c zipfile.zip src ... # Create zipfile from sources
""")
if args is None:
args = sys.argv[1:]
if not args or args[0] not in ('-l', '-c', '-e', '-t'):
print USAGE
sys.exit(1)
if args[0] == '-l':
if len(args) != 2:
print USAGE
sys.exit(1)
zf = ZipFile(args[1], 'r')
zf.printdir()
zf.close()
elif args[0] == '-t':
if len(args) != 2:
print USAGE
sys.exit(1)
zf = ZipFile(args[1], 'r')
zf.testzip()
print "Done testing"
elif args[0] == '-e':
if len(args) != 3:
print USAGE
sys.exit(1)
zf = ZipFile(args[1], 'r')
out = args[2]
for path in zf.namelist():
if path.startswith('./'):
tgt = os.path.join(out, path[2:])
else:
tgt = os.path.join(out, path)
tgtdir = os.path.dirname(tgt)
if not os.path.exists(tgtdir):
os.makedirs(tgtdir)
fp = open(tgt, 'wb')
fp.write(zf.read(path))
fp.close()
zf.close()
elif args[0] == '-c':
if len(args) < 3:
print USAGE
sys.exit(1)
def addToZip(zf, path, zippath):
if os.path.isfile(path):
zf.write(path, zippath, ZIP_DEFLATED)
elif os.path.isdir(path):
for nm in os.listdir(path):
addToZip(zf,
os.path.join(path, nm), os.path.join(zippath, nm))
# else: ignore
zf = ZipFile(args[1], 'w', allowZip64=True)
for src in args[2:]:
addToZip(zf, src, os.path.basename(src))
zf.close()
if __name__ == "__main__":
main()