Make test_zipfile pass.

The zipfile module now does all I/O in binary mode using bytes.
(Maybe we should support wrapping a TextIOWrapper around it
when text mode reading is requested?)
Even the password is a bytes array now.
Had to fix py_compile.py to use bytes while I was at it.
The _struct needed a patch to support bytes, str8 and str
for the 's' and 'p' formats.
This commit is contained in:
Guido van Rossum 2007-05-22 01:29:33 +00:00
parent 94ca1c620e
commit d6ca546091
4 changed files with 177 additions and 146 deletions

View file

@ -1,5 +1,7 @@
"""
Read and write ZIP files.
XXX references to utf-8 need further investigation.
"""
import struct, os, time, sys
import binascii, io
@ -33,15 +35,15 @@ ZIP_DEFLATED = 8
# Here are some struct module formats for reading headers
structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
stringEndArchive = "PK\005\006" # magic number for end of archive record
stringEndArchive = b"PK\005\006" # magic number for end of archive record
structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
stringCentralDir = "PK\001\002" # magic number for central directory
stringCentralDir = b"PK\001\002" # magic number for central directory
structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
stringFileHeader = "PK\003\004" # magic number for file header
stringFileHeader = b"PK\003\004" # magic number for file header
structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
stringEndArchive64Locator = b"PK\x06\x07" # magic token for locator header
structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
stringEndArchive64 = b"PK\x06\x06" # magic token for Zip64 header
# indexes of entries in the central directory structure
@ -82,7 +84,7 @@ _FH_EXTRA_FIELD_LENGTH = 11
def is_zipfile(filename):
"""Quickly see if file is a ZIP file by checking the magic number."""
try:
fpin = open(filename, "rb")
fpin = io.open(filename, "rb")
endrec = _EndRecData(fpin)
fpin.close()
if endrec:
@ -206,8 +208,8 @@ class ZipInfo (object):
self.date_time = date_time # year, month, day, hour, min, sec
# Standard values:
self.compress_type = ZIP_STORED # Type of compression for the file
self.comment = "" # Comment for each file
self.extra = "" # ZIP extra data
self.comment = b"" # Comment for each file
self.extra = b"" # ZIP extra data
if sys.platform == 'win32':
self.create_system = 0 # System which created ZIP archive
else:
@ -257,7 +259,7 @@ class ZipInfo (object):
self.compress_type, dostime, dosdate, CRC,
compress_size, file_size,
len(self.filename), len(extra))
return header + self.filename + extra
return header + self.filename.encode("utf-8") + extra
def _decodeExtra(self):
# Try to decode the extra field.
@ -331,7 +333,7 @@ class _ZipDecrypter:
def _crc32(self, ch, crc):
"""Compute the CRC32 primitive on one byte."""
return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
def __init__(self, pwd):
self.key0 = 305419896
@ -344,20 +346,13 @@ class _ZipDecrypter:
self.key0 = self._crc32(c, self.key0)
self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
self.key1 = (self.key1 * 134775813 + 1) & 4294967295
self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
def __call__(self, c):
"""Decrypt a single character."""
# XXX When this is called with a byte instead of a char, ord()
# isn't needed. Don't die in that case. In the future we should
# just leave this out, once we're always using bytes.
try:
c = ord(c)
except TypeError:
pass
assert isinstance(c, int)
k = self.key2 | 2
c = c ^ (((k * (k^1)) >> 8) & 255)
c = chr(c)
self._UpdateKeys(c)
return c
@ -370,13 +365,13 @@ class ZipExtFile:
self.fileobj = fileobj
self.decrypter = decrypt
self.bytes_read = 0
self.rawbuffer = ''
self.readbuffer = ''
self.linebuffer = ''
self.rawbuffer = b''
self.readbuffer = b''
self.linebuffer = b''
self.eof = False
self.univ_newlines = False
self.nlSeps = ("\n", )
self.lastdiscard = ''
self.nlSeps = (b"\n", )
self.lastdiscard = b''
self.compress_type = zipinfo.compress_type
self.compress_size = zipinfo.compress_size
@ -394,9 +389,9 @@ class ZipExtFile:
self.univ_newlines = univ_newlines
# pick line separator char(s) based on universal newlines flag
self.nlSeps = ("\n", )
self.nlSeps = (b"\n", )
if self.univ_newlines:
self.nlSeps = ("\r\n", "\r", "\n")
self.nlSeps = (b"\r\n", b"\r", b"\n")
def __iter__(self):
return self
@ -417,7 +412,7 @@ class ZipExtFile:
# ugly check for cases where half of an \r\n pair was
# read on the last pass, and the \r was discarded. In this
# case we just throw away the \n at the start of the buffer.
if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
if (self.lastdiscard, self.linebuffer[0]) == (b'\r', b'\n'):
self.linebuffer = self.linebuffer[1:]
for sep in self.nlSeps:
@ -435,7 +430,7 @@ class ZipExtFile:
if size < 0:
size = sys.maxint
elif size == 0:
return ''
return b''
# check for a newline already in buffer
nl, nllen = self._checkfornewline()
@ -461,7 +456,7 @@ class ZipExtFile:
# so return current buffer
if nl < 0:
s = self.linebuffer
self.linebuffer = ''
self.linebuffer = b''
return s
buf = self.linebuffer[:nl]
@ -470,7 +465,7 @@ class ZipExtFile:
# line is always returned with \n as newline char (except possibly
# for a final incomplete line in the file, which is handled above).
return buf + "\n"
return buf + b"\n"
def readlines(self, sizehint = -1):
"""Return a list with all (following) lines. The sizehint parameter
@ -516,18 +511,23 @@ class ZipExtFile:
# try to read from file (if necessary)
if bytesToRead > 0:
bytes = self.fileobj.read(bytesToRead)
self.bytes_read += len(bytes)
self.rawbuffer += bytes
data = self.fileobj.read(bytesToRead)
self.bytes_read += len(data)
try:
self.rawbuffer += data
except:
print(repr(self.fileobj), repr(self.rawbuffer),
repr(data))
raise
# handle contents of raw buffer
if self.rawbuffer:
newdata = self.rawbuffer
self.rawbuffer = ''
self.rawbuffer = b''
# decrypt new data if we were given an object to handle that
if newdata and self.decrypter is not None:
newdata = ''.join(map(self.decrypter, newdata))
newdata = bytes(map(self.decrypter, newdata))
# decompress newly read data if necessary
if newdata and self.compress_type == ZIP_DEFLATED:
@ -546,13 +546,13 @@ class ZipExtFile:
# return what the user asked for
if size is None or len(self.readbuffer) <= size:
bytes = self.readbuffer
self.readbuffer = ''
data = self.readbuffer
self.readbuffer = b''
else:
bytes = self.readbuffer[:size]
data = self.readbuffer[:size]
self.readbuffer = self.readbuffer[size:]
return bytes
return data
class ZipFile:
@ -593,15 +593,16 @@ class ZipFile:
# Check if we were passed a file-like object
if isinstance(file, basestring):
# No, it's a filename
self._filePassed = 0
self.filename = file
modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
try:
self.fp = open(file, modeDict[mode])
self.fp = io.open(file, modeDict[mode])
except IOError:
if mode == 'a':
mode = key = 'w'
self.fp = open(file, modeDict[mode])
self.fp = io.open(file, modeDict[mode])
else:
raise
else:
@ -661,7 +662,7 @@ class ZipFile:
self.start_dir = offset_cd + concat
fp.seek(self.start_dir, 0)
data = fp.read(size_cd)
fp = io.StringIO(data)
fp = io.BytesIO(data)
total = 0
while total < size_cd:
centdir = fp.read(46)
@ -673,7 +674,7 @@ class ZipFile:
print(centdir)
filename = fp.read(centdir[_CD_FILENAME_LENGTH])
# Create ZipInfo instance to store file information
x = ZipInfo(filename)
x = ZipInfo(str(filename))
x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
total = (total + centdir[_CD_FILENAME_LENGTH]
@ -708,12 +709,16 @@ class ZipFile:
archive."""
return self.filelist
def printdir(self):
def printdir(self, file=None):
"""Print a table of contents for the zip file."""
print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"))
if file is None:
file = sys.stdout
print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
file=file)
for zinfo in self.filelist:
date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size))
print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
file=file)
def testzip(self):
"""Read all the files and check the CRC."""
@ -730,6 +735,7 @@ class ZipFile:
def setpassword(self, pwd):
"""Set default password for encrypted files."""
assert isinstance(pwd, bytes)
self.pwd = pwd
def read(self, name, pwd=None):
@ -749,7 +755,7 @@ class ZipFile:
if self._filePassed:
zef_file = self.fp
else:
zef_file = open(self.filename, 'rb')
zef_file = io.open(self.filename, 'rb')
# Get info object for name
zinfo = self.getinfo(name)
@ -768,9 +774,9 @@ class ZipFile:
if fheader[_FH_EXTRA_FIELD_LENGTH]:
zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
if fname != zinfo.orig_filename:
if fname != zinfo.orig_filename.encode("utf-8"):
raise BadZipfile, \
'File name in directory "%s" and header "%s" differ.' % (
'File name in directory %r and header %r differ.' % (
zinfo.orig_filename, fname)
# check for encrypted flag & handle password
@ -790,7 +796,7 @@ class ZipFile:
# and is used to check the correctness of the password.
bytes = zef_file.read(12)
h = map(zd, bytes[0:12])
if ord(h[11]) != ((zinfo.CRC>>24)&255):
if h[11] != ((zinfo.CRC>>24) & 255):
raise RuntimeError, "Bad password for file %s" % name
# build and return a ZipExtFile
@ -852,7 +858,7 @@ class ZipFile:
self._writecheck(zinfo)
self._didModify = True
fp = open(filename, "rb")
fp = io.open(filename, "rb")
# Must overwrite CRC and sizes with correct data later
zinfo.CRC = CRC = 0
zinfo.compress_size = compress_size = 0
@ -982,7 +988,7 @@ class ZipFile:
0, zinfo.internal_attr, zinfo.external_attr,
header_offset)
self.fp.write(centdir)
self.fp.write(zinfo.filename)
self.fp.write(zinfo.filename.encode("utf-8"))
self.fp.write(extra_data)
self.fp.write(zinfo.comment)
@ -1163,7 +1169,7 @@ def main(args = None):
tgtdir = os.path.dirname(tgt)
if not os.path.exists(tgtdir):
os.makedirs(tgtdir)
fp = open(tgt, 'wb')
fp = io.open(tgt, 'wb')
fp.write(zf.read(path))
fp.close()
zf.close()