mirror of
https://github.com/python/cpython.git
synced 2025-11-02 11:08:57 +00:00
bpo-22908: Add seek and tell functionality to ZipExtFile (GH-4966)
This allows for nested zip files, tar files within zip files, zip files within tar files, etc. Contributed by: John Jolly
This commit is contained in:
parent
2e0ecde8d7
commit
066df4fd45
4 changed files with 121 additions and 3 deletions
|
|
@ -246,9 +246,9 @@ ZipFile Objects
|
||||||
With *mode* ``'r'`` the file-like object
|
With *mode* ``'r'`` the file-like object
|
||||||
(``ZipExtFile``) is read-only and provides the following methods:
|
(``ZipExtFile``) is read-only and provides the following methods:
|
||||||
:meth:`~io.BufferedIOBase.read`, :meth:`~io.IOBase.readline`,
|
:meth:`~io.BufferedIOBase.read`, :meth:`~io.IOBase.readline`,
|
||||||
:meth:`~io.IOBase.readlines`, :meth:`__iter__`,
|
:meth:`~io.IOBase.readlines`, :meth:`~io.IOBase.seek`,
|
||||||
:meth:`~iterator.__next__`. These objects can operate independently of
|
:meth:`~io.IOBase.tell`, :meth:`__iter__`, :meth:`~iterator.__next__`.
|
||||||
the ZipFile.
|
These objects can operate independently of the ZipFile.
|
||||||
|
|
||||||
With ``mode='w'``, a writable file handle is returned, which supports the
|
With ``mode='w'``, a writable file handle is returned, which supports the
|
||||||
:meth:`~io.BufferedIOBase.write` method. While a writable file handle is open,
|
:meth:`~io.BufferedIOBase.write` method. While a writable file handle is open,
|
||||||
|
|
|
||||||
|
|
@ -1628,6 +1628,40 @@ class OtherTests(unittest.TestCase):
|
||||||
self.assertEqual(zipf.read('baz'), msg3)
|
self.assertEqual(zipf.read('baz'), msg3)
|
||||||
self.assertEqual(zipf.namelist(), ['foo', 'bar', 'baz'])
|
self.assertEqual(zipf.namelist(), ['foo', 'bar', 'baz'])
|
||||||
|
|
||||||
|
def test_seek_tell(self):
|
||||||
|
# Test seek functionality
|
||||||
|
txt = b"Where's Bruce?"
|
||||||
|
bloc = txt.find(b"Bruce")
|
||||||
|
# Check seek on a file
|
||||||
|
with zipfile.ZipFile(TESTFN, "w") as zipf:
|
||||||
|
zipf.writestr("foo.txt", txt)
|
||||||
|
with zipfile.ZipFile(TESTFN, "r") as zipf:
|
||||||
|
with zipf.open("foo.txt", "r") as fp:
|
||||||
|
fp.seek(bloc, os.SEEK_SET)
|
||||||
|
self.assertEqual(fp.tell(), bloc)
|
||||||
|
fp.seek(-bloc, os.SEEK_CUR)
|
||||||
|
self.assertEqual(fp.tell(), 0)
|
||||||
|
fp.seek(bloc, os.SEEK_CUR)
|
||||||
|
self.assertEqual(fp.tell(), bloc)
|
||||||
|
self.assertEqual(fp.read(5), txt[bloc:bloc+5])
|
||||||
|
fp.seek(0, os.SEEK_END)
|
||||||
|
self.assertEqual(fp.tell(), len(txt))
|
||||||
|
# Check seek on memory file
|
||||||
|
data = io.BytesIO()
|
||||||
|
with zipfile.ZipFile(data, mode="w") as zipf:
|
||||||
|
zipf.writestr("foo.txt", txt)
|
||||||
|
with zipfile.ZipFile(data, mode="r") as zipf:
|
||||||
|
with zipf.open("foo.txt", "r") as fp:
|
||||||
|
fp.seek(bloc, os.SEEK_SET)
|
||||||
|
self.assertEqual(fp.tell(), bloc)
|
||||||
|
fp.seek(-bloc, os.SEEK_CUR)
|
||||||
|
self.assertEqual(fp.tell(), 0)
|
||||||
|
fp.seek(bloc, os.SEEK_CUR)
|
||||||
|
self.assertEqual(fp.tell(), bloc)
|
||||||
|
self.assertEqual(fp.read(5), txt[bloc:bloc+5])
|
||||||
|
fp.seek(0, os.SEEK_END)
|
||||||
|
self.assertEqual(fp.tell(), len(txt))
|
||||||
|
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
unlink(TESTFN)
|
unlink(TESTFN)
|
||||||
unlink(TESTFN2)
|
unlink(TESTFN2)
|
||||||
|
|
|
||||||
|
|
@ -696,6 +696,18 @@ class _SharedFile:
|
||||||
self._close = close
|
self._close = close
|
||||||
self._lock = lock
|
self._lock = lock
|
||||||
self._writing = writing
|
self._writing = writing
|
||||||
|
self.seekable = file.seekable
|
||||||
|
self.tell = file.tell
|
||||||
|
|
||||||
|
def seek(self, offset, whence=0):
|
||||||
|
with self._lock:
|
||||||
|
if self.writing():
|
||||||
|
raise ValueError("Can't reposition in the ZIP file while "
|
||||||
|
"there is an open writing handle on it. "
|
||||||
|
"Close the writing handle before trying to read.")
|
||||||
|
self._file.seek(self._pos)
|
||||||
|
self._pos = self._file.tell()
|
||||||
|
return self._pos
|
||||||
|
|
||||||
def read(self, n=-1):
|
def read(self, n=-1):
|
||||||
with self._lock:
|
with self._lock:
|
||||||
|
|
@ -746,6 +758,9 @@ class ZipExtFile(io.BufferedIOBase):
|
||||||
# Read from compressed files in 4k blocks.
|
# Read from compressed files in 4k blocks.
|
||||||
MIN_READ_SIZE = 4096
|
MIN_READ_SIZE = 4096
|
||||||
|
|
||||||
|
# Chunk size to read during seek
|
||||||
|
MAX_SEEK_READ = 1 << 24
|
||||||
|
|
||||||
def __init__(self, fileobj, mode, zipinfo, decrypter=None,
|
def __init__(self, fileobj, mode, zipinfo, decrypter=None,
|
||||||
close_fileobj=False):
|
close_fileobj=False):
|
||||||
self._fileobj = fileobj
|
self._fileobj = fileobj
|
||||||
|
|
@ -778,6 +793,17 @@ class ZipExtFile(io.BufferedIOBase):
|
||||||
else:
|
else:
|
||||||
self._expected_crc = None
|
self._expected_crc = None
|
||||||
|
|
||||||
|
self._seekable = False
|
||||||
|
try:
|
||||||
|
if fileobj.seekable():
|
||||||
|
self._orig_compress_start = fileobj.tell()
|
||||||
|
self._orig_compress_size = zipinfo.compress_size
|
||||||
|
self._orig_file_size = zipinfo.file_size
|
||||||
|
self._orig_start_crc = self._running_crc
|
||||||
|
self._seekable = True
|
||||||
|
except AttributeError:
|
||||||
|
pass
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
result = ['<%s.%s' % (self.__class__.__module__,
|
result = ['<%s.%s' % (self.__class__.__module__,
|
||||||
self.__class__.__qualname__)]
|
self.__class__.__qualname__)]
|
||||||
|
|
@ -963,6 +989,62 @@ class ZipExtFile(io.BufferedIOBase):
|
||||||
finally:
|
finally:
|
||||||
super().close()
|
super().close()
|
||||||
|
|
||||||
|
def seekable(self):
|
||||||
|
return self._seekable
|
||||||
|
|
||||||
|
def seek(self, offset, whence=0):
|
||||||
|
if not self._seekable:
|
||||||
|
raise io.UnsupportedOperation("underlying stream is not seekable")
|
||||||
|
curr_pos = self.tell()
|
||||||
|
if whence == 0: # Seek from start of file
|
||||||
|
new_pos = offset
|
||||||
|
elif whence == 1: # Seek from current position
|
||||||
|
new_pos = curr_pos + offset
|
||||||
|
elif whence == 2: # Seek from EOF
|
||||||
|
new_pos = self._orig_file_size + offset
|
||||||
|
else:
|
||||||
|
raise ValueError("whence must be os.SEEK_SET (0), "
|
||||||
|
"os.SEEK_CUR (1), or os.SEEK_END (2)")
|
||||||
|
|
||||||
|
if new_pos > self._orig_file_size:
|
||||||
|
new_pos = self._orig_file_size
|
||||||
|
|
||||||
|
if new_pos < 0:
|
||||||
|
new_pos = 0
|
||||||
|
|
||||||
|
read_offset = new_pos - curr_pos
|
||||||
|
buff_offset = read_offset + self._offset
|
||||||
|
|
||||||
|
if buff_offset >= 0 and buff_offset < len(self._readbuffer):
|
||||||
|
# Just move the _offset index if the new position is in the _readbuffer
|
||||||
|
self._offset = buff_offset
|
||||||
|
read_offset = 0
|
||||||
|
elif read_offset < 0:
|
||||||
|
# Position is before the current position. Reset the ZipExtFile
|
||||||
|
|
||||||
|
self._fileobj.seek(self._orig_compress_start)
|
||||||
|
self._running_crc = self._orig_start_crc
|
||||||
|
self._compress_left = self._orig_compress_size
|
||||||
|
self._left = self._orig_file_size
|
||||||
|
self._readbuffer = b''
|
||||||
|
self._offset = 0
|
||||||
|
self._decompressor = zipfile._get_decompressor(self._compress_type)
|
||||||
|
self._eof = False
|
||||||
|
read_offset = new_pos
|
||||||
|
|
||||||
|
while read_offset > 0:
|
||||||
|
read_len = min(self.MAX_SEEK_READ, read_offset)
|
||||||
|
self.read(read_len)
|
||||||
|
read_offset -= read_len
|
||||||
|
|
||||||
|
return self.tell()
|
||||||
|
|
||||||
|
def tell(self):
|
||||||
|
if not self._seekable:
|
||||||
|
raise io.UnsupportedOperation("underlying stream is not seekable")
|
||||||
|
filepos = self._orig_file_size - self._left - len(self._readbuffer) + self._offset
|
||||||
|
return filepos
|
||||||
|
|
||||||
|
|
||||||
class _ZipWriteFile(io.BufferedIOBase):
|
class _ZipWriteFile(io.BufferedIOBase):
|
||||||
def __init__(self, zf, zinfo, zip64):
|
def __init__(self, zf, zinfo, zip64):
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
Added seek and tell to the ZipExtFile class. This only works if the file
|
||||||
|
object used to open the zipfile is seekable.
|
||||||
Loading…
Add table
Add a link
Reference in a new issue