mirror of
https://github.com/python/cpython.git
synced 2025-12-04 00:30:19 +00:00
Make test_zipfile pass.
The zipfile module now does all I/O in binary mode using bytes. (Maybe we should support wrapping a TextIOWrapper around it when text mode reading is requested?) Even the password is a bytes array now. Had to fix py_compile.py to use bytes while I was at it. The _struct needed a patch to support bytes, str8 and str for the 's' and 'p' formats.
This commit is contained in:
parent
94ca1c620e
commit
d6ca546091
4 changed files with 177 additions and 146 deletions
112
Lib/zipfile.py
112
Lib/zipfile.py
|
|
@ -1,5 +1,7 @@
|
|||
"""
|
||||
Read and write ZIP files.
|
||||
|
||||
XXX references to utf-8 need further investigation.
|
||||
"""
|
||||
import struct, os, time, sys
|
||||
import binascii, io
|
||||
|
|
@ -33,15 +35,15 @@ ZIP_DEFLATED = 8
|
|||
|
||||
# Here are some struct module formats for reading headers
|
||||
structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
|
||||
stringEndArchive = "PK\005\006" # magic number for end of archive record
|
||||
stringEndArchive = b"PK\005\006" # magic number for end of archive record
|
||||
structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
|
||||
stringCentralDir = "PK\001\002" # magic number for central directory
|
||||
stringCentralDir = b"PK\001\002" # magic number for central directory
|
||||
structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
|
||||
stringFileHeader = "PK\003\004" # magic number for file header
|
||||
stringFileHeader = b"PK\003\004" # magic number for file header
|
||||
structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
|
||||
stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
|
||||
stringEndArchive64Locator = b"PK\x06\x07" # magic token for locator header
|
||||
structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
|
||||
stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
|
||||
stringEndArchive64 = b"PK\x06\x06" # magic token for Zip64 header
|
||||
|
||||
|
||||
# indexes of entries in the central directory structure
|
||||
|
|
@ -82,7 +84,7 @@ _FH_EXTRA_FIELD_LENGTH = 11
|
|||
def is_zipfile(filename):
|
||||
"""Quickly see if file is a ZIP file by checking the magic number."""
|
||||
try:
|
||||
fpin = open(filename, "rb")
|
||||
fpin = io.open(filename, "rb")
|
||||
endrec = _EndRecData(fpin)
|
||||
fpin.close()
|
||||
if endrec:
|
||||
|
|
@ -206,8 +208,8 @@ class ZipInfo (object):
|
|||
self.date_time = date_time # year, month, day, hour, min, sec
|
||||
# Standard values:
|
||||
self.compress_type = ZIP_STORED # Type of compression for the file
|
||||
self.comment = "" # Comment for each file
|
||||
self.extra = "" # ZIP extra data
|
||||
self.comment = b"" # Comment for each file
|
||||
self.extra = b"" # ZIP extra data
|
||||
if sys.platform == 'win32':
|
||||
self.create_system = 0 # System which created ZIP archive
|
||||
else:
|
||||
|
|
@ -257,7 +259,7 @@ class ZipInfo (object):
|
|||
self.compress_type, dostime, dosdate, CRC,
|
||||
compress_size, file_size,
|
||||
len(self.filename), len(extra))
|
||||
return header + self.filename + extra
|
||||
return header + self.filename.encode("utf-8") + extra
|
||||
|
||||
def _decodeExtra(self):
|
||||
# Try to decode the extra field.
|
||||
|
|
@ -331,7 +333,7 @@ class _ZipDecrypter:
|
|||
|
||||
def _crc32(self, ch, crc):
|
||||
"""Compute the CRC32 primitive on one byte."""
|
||||
return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
|
||||
return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ch) & 0xff]
|
||||
|
||||
def __init__(self, pwd):
|
||||
self.key0 = 305419896
|
||||
|
|
@ -344,20 +346,13 @@ class _ZipDecrypter:
|
|||
self.key0 = self._crc32(c, self.key0)
|
||||
self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
|
||||
self.key1 = (self.key1 * 134775813 + 1) & 4294967295
|
||||
self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
|
||||
self.key2 = self._crc32((self.key1 >> 24) & 255, self.key2)
|
||||
|
||||
def __call__(self, c):
|
||||
"""Decrypt a single character."""
|
||||
# XXX When this is called with a byte instead of a char, ord()
|
||||
# isn't needed. Don't die in that case. In the future we should
|
||||
# just leave this out, once we're always using bytes.
|
||||
try:
|
||||
c = ord(c)
|
||||
except TypeError:
|
||||
pass
|
||||
assert isinstance(c, int)
|
||||
k = self.key2 | 2
|
||||
c = c ^ (((k * (k^1)) >> 8) & 255)
|
||||
c = chr(c)
|
||||
self._UpdateKeys(c)
|
||||
return c
|
||||
|
||||
|
|
@ -370,13 +365,13 @@ class ZipExtFile:
|
|||
self.fileobj = fileobj
|
||||
self.decrypter = decrypt
|
||||
self.bytes_read = 0
|
||||
self.rawbuffer = ''
|
||||
self.readbuffer = ''
|
||||
self.linebuffer = ''
|
||||
self.rawbuffer = b''
|
||||
self.readbuffer = b''
|
||||
self.linebuffer = b''
|
||||
self.eof = False
|
||||
self.univ_newlines = False
|
||||
self.nlSeps = ("\n", )
|
||||
self.lastdiscard = ''
|
||||
self.nlSeps = (b"\n", )
|
||||
self.lastdiscard = b''
|
||||
|
||||
self.compress_type = zipinfo.compress_type
|
||||
self.compress_size = zipinfo.compress_size
|
||||
|
|
@ -394,9 +389,9 @@ class ZipExtFile:
|
|||
self.univ_newlines = univ_newlines
|
||||
|
||||
# pick line separator char(s) based on universal newlines flag
|
||||
self.nlSeps = ("\n", )
|
||||
self.nlSeps = (b"\n", )
|
||||
if self.univ_newlines:
|
||||
self.nlSeps = ("\r\n", "\r", "\n")
|
||||
self.nlSeps = (b"\r\n", b"\r", b"\n")
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
|
@ -417,7 +412,7 @@ class ZipExtFile:
|
|||
# ugly check for cases where half of an \r\n pair was
|
||||
# read on the last pass, and the \r was discarded. In this
|
||||
# case we just throw away the \n at the start of the buffer.
|
||||
if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'):
|
||||
if (self.lastdiscard, self.linebuffer[0]) == (b'\r', b'\n'):
|
||||
self.linebuffer = self.linebuffer[1:]
|
||||
|
||||
for sep in self.nlSeps:
|
||||
|
|
@ -435,7 +430,7 @@ class ZipExtFile:
|
|||
if size < 0:
|
||||
size = sys.maxint
|
||||
elif size == 0:
|
||||
return ''
|
||||
return b''
|
||||
|
||||
# check for a newline already in buffer
|
||||
nl, nllen = self._checkfornewline()
|
||||
|
|
@ -461,7 +456,7 @@ class ZipExtFile:
|
|||
# so return current buffer
|
||||
if nl < 0:
|
||||
s = self.linebuffer
|
||||
self.linebuffer = ''
|
||||
self.linebuffer = b''
|
||||
return s
|
||||
|
||||
buf = self.linebuffer[:nl]
|
||||
|
|
@ -470,7 +465,7 @@ class ZipExtFile:
|
|||
|
||||
# line is always returned with \n as newline char (except possibly
|
||||
# for a final incomplete line in the file, which is handled above).
|
||||
return buf + "\n"
|
||||
return buf + b"\n"
|
||||
|
||||
def readlines(self, sizehint = -1):
|
||||
"""Return a list with all (following) lines. The sizehint parameter
|
||||
|
|
@ -516,18 +511,23 @@ class ZipExtFile:
|
|||
|
||||
# try to read from file (if necessary)
|
||||
if bytesToRead > 0:
|
||||
bytes = self.fileobj.read(bytesToRead)
|
||||
self.bytes_read += len(bytes)
|
||||
self.rawbuffer += bytes
|
||||
data = self.fileobj.read(bytesToRead)
|
||||
self.bytes_read += len(data)
|
||||
try:
|
||||
self.rawbuffer += data
|
||||
except:
|
||||
print(repr(self.fileobj), repr(self.rawbuffer),
|
||||
repr(data))
|
||||
raise
|
||||
|
||||
# handle contents of raw buffer
|
||||
if self.rawbuffer:
|
||||
newdata = self.rawbuffer
|
||||
self.rawbuffer = ''
|
||||
self.rawbuffer = b''
|
||||
|
||||
# decrypt new data if we were given an object to handle that
|
||||
if newdata and self.decrypter is not None:
|
||||
newdata = ''.join(map(self.decrypter, newdata))
|
||||
newdata = bytes(map(self.decrypter, newdata))
|
||||
|
||||
# decompress newly read data if necessary
|
||||
if newdata and self.compress_type == ZIP_DEFLATED:
|
||||
|
|
@ -546,13 +546,13 @@ class ZipExtFile:
|
|||
|
||||
# return what the user asked for
|
||||
if size is None or len(self.readbuffer) <= size:
|
||||
bytes = self.readbuffer
|
||||
self.readbuffer = ''
|
||||
data = self.readbuffer
|
||||
self.readbuffer = b''
|
||||
else:
|
||||
bytes = self.readbuffer[:size]
|
||||
data = self.readbuffer[:size]
|
||||
self.readbuffer = self.readbuffer[size:]
|
||||
|
||||
return bytes
|
||||
return data
|
||||
|
||||
|
||||
class ZipFile:
|
||||
|
|
@ -593,15 +593,16 @@ class ZipFile:
|
|||
|
||||
# Check if we were passed a file-like object
|
||||
if isinstance(file, basestring):
|
||||
# No, it's a filename
|
||||
self._filePassed = 0
|
||||
self.filename = file
|
||||
modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
|
||||
try:
|
||||
self.fp = open(file, modeDict[mode])
|
||||
self.fp = io.open(file, modeDict[mode])
|
||||
except IOError:
|
||||
if mode == 'a':
|
||||
mode = key = 'w'
|
||||
self.fp = open(file, modeDict[mode])
|
||||
self.fp = io.open(file, modeDict[mode])
|
||||
else:
|
||||
raise
|
||||
else:
|
||||
|
|
@ -661,7 +662,7 @@ class ZipFile:
|
|||
self.start_dir = offset_cd + concat
|
||||
fp.seek(self.start_dir, 0)
|
||||
data = fp.read(size_cd)
|
||||
fp = io.StringIO(data)
|
||||
fp = io.BytesIO(data)
|
||||
total = 0
|
||||
while total < size_cd:
|
||||
centdir = fp.read(46)
|
||||
|
|
@ -673,7 +674,7 @@ class ZipFile:
|
|||
print(centdir)
|
||||
filename = fp.read(centdir[_CD_FILENAME_LENGTH])
|
||||
# Create ZipInfo instance to store file information
|
||||
x = ZipInfo(filename)
|
||||
x = ZipInfo(str(filename))
|
||||
x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
|
||||
x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
|
||||
total = (total + centdir[_CD_FILENAME_LENGTH]
|
||||
|
|
@ -708,12 +709,16 @@ class ZipFile:
|
|||
archive."""
|
||||
return self.filelist
|
||||
|
||||
def printdir(self):
|
||||
def printdir(self, file=None):
|
||||
"""Print a table of contents for the zip file."""
|
||||
print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"))
|
||||
if file is None:
|
||||
file = sys.stdout
|
||||
print("%-46s %19s %12s" % ("File Name", "Modified ", "Size"),
|
||||
file=file)
|
||||
for zinfo in self.filelist:
|
||||
date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
|
||||
print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size))
|
||||
print("%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size),
|
||||
file=file)
|
||||
|
||||
def testzip(self):
|
||||
"""Read all the files and check the CRC."""
|
||||
|
|
@ -730,6 +735,7 @@ class ZipFile:
|
|||
|
||||
def setpassword(self, pwd):
|
||||
"""Set default password for encrypted files."""
|
||||
assert isinstance(pwd, bytes)
|
||||
self.pwd = pwd
|
||||
|
||||
def read(self, name, pwd=None):
|
||||
|
|
@ -749,7 +755,7 @@ class ZipFile:
|
|||
if self._filePassed:
|
||||
zef_file = self.fp
|
||||
else:
|
||||
zef_file = open(self.filename, 'rb')
|
||||
zef_file = io.open(self.filename, 'rb')
|
||||
|
||||
# Get info object for name
|
||||
zinfo = self.getinfo(name)
|
||||
|
|
@ -768,9 +774,9 @@ class ZipFile:
|
|||
if fheader[_FH_EXTRA_FIELD_LENGTH]:
|
||||
zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
|
||||
|
||||
if fname != zinfo.orig_filename:
|
||||
if fname != zinfo.orig_filename.encode("utf-8"):
|
||||
raise BadZipfile, \
|
||||
'File name in directory "%s" and header "%s" differ.' % (
|
||||
'File name in directory %r and header %r differ.' % (
|
||||
zinfo.orig_filename, fname)
|
||||
|
||||
# check for encrypted flag & handle password
|
||||
|
|
@ -790,7 +796,7 @@ class ZipFile:
|
|||
# and is used to check the correctness of the password.
|
||||
bytes = zef_file.read(12)
|
||||
h = map(zd, bytes[0:12])
|
||||
if ord(h[11]) != ((zinfo.CRC>>24)&255):
|
||||
if h[11] != ((zinfo.CRC>>24) & 255):
|
||||
raise RuntimeError, "Bad password for file %s" % name
|
||||
|
||||
# build and return a ZipExtFile
|
||||
|
|
@ -852,7 +858,7 @@ class ZipFile:
|
|||
|
||||
self._writecheck(zinfo)
|
||||
self._didModify = True
|
||||
fp = open(filename, "rb")
|
||||
fp = io.open(filename, "rb")
|
||||
# Must overwrite CRC and sizes with correct data later
|
||||
zinfo.CRC = CRC = 0
|
||||
zinfo.compress_size = compress_size = 0
|
||||
|
|
@ -982,7 +988,7 @@ class ZipFile:
|
|||
0, zinfo.internal_attr, zinfo.external_attr,
|
||||
header_offset)
|
||||
self.fp.write(centdir)
|
||||
self.fp.write(zinfo.filename)
|
||||
self.fp.write(zinfo.filename.encode("utf-8"))
|
||||
self.fp.write(extra_data)
|
||||
self.fp.write(zinfo.comment)
|
||||
|
||||
|
|
@ -1163,7 +1169,7 @@ def main(args = None):
|
|||
tgtdir = os.path.dirname(tgt)
|
||||
if not os.path.exists(tgtdir):
|
||||
os.makedirs(tgtdir)
|
||||
fp = open(tgt, 'wb')
|
||||
fp = io.open(tgt, 'wb')
|
||||
fp.write(zf.read(path))
|
||||
fp.close()
|
||||
zf.close()
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue