mirror of
https://github.com/python/cpython.git
synced 2025-07-24 03:35:53 +00:00
[3.13] GH-128131: Completely support random read access of uncompressed unencrypted files in ZipFile (GH-128143) (#129091)
GH-128131: Completely support random read access of uncompressed unencrypted files in ZipFile (GH-128143)
(cherry picked from commit dda02eb7be
)
Co-authored-by: 5ec1cff <56485584+5ec1cff@users.noreply.github.com>
Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com>
This commit is contained in:
parent
a1c48a750c
commit
03b2ecf41c
3 changed files with 88 additions and 1 deletions
|
@ -1,3 +1,4 @@
|
||||||
|
import _pyio
|
||||||
import array
|
import array
|
||||||
import contextlib
|
import contextlib
|
||||||
import importlib.util
|
import importlib.util
|
||||||
|
@ -3454,5 +3455,87 @@ class StripExtraTests(unittest.TestCase):
|
||||||
b"zzz", zipfile._Extra.strip(b"zzz", (self.ZIP64_EXTRA,)))
|
b"zzz", zipfile._Extra.strip(b"zzz", (self.ZIP64_EXTRA,)))
|
||||||
|
|
||||||
|
|
||||||
|
class StatIO(_pyio.BytesIO):
|
||||||
|
"""Buffer which remembers the number of bytes that were read."""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__()
|
||||||
|
self.bytes_read = 0
|
||||||
|
|
||||||
|
def read(self, size=-1):
|
||||||
|
bs = super().read(size)
|
||||||
|
self.bytes_read += len(bs)
|
||||||
|
return bs
|
||||||
|
|
||||||
|
|
||||||
|
class StoredZipExtFileRandomReadTest(unittest.TestCase):
|
||||||
|
"""Tests whether an uncompressed, unencrypted zip entry can be randomly
|
||||||
|
seek and read without reading redundant bytes."""
|
||||||
|
def test_stored_seek_and_read(self):
|
||||||
|
|
||||||
|
sio = StatIO()
|
||||||
|
# 20000 bytes
|
||||||
|
txt = b'0123456789' * 2000
|
||||||
|
|
||||||
|
# The seek length must be greater than ZipExtFile.MIN_READ_SIZE
|
||||||
|
# as `ZipExtFile._read2()` reads in blocks of this size and we
|
||||||
|
# need to seek out of the buffered data
|
||||||
|
read_buffer_size = zipfile.ZipExtFile.MIN_READ_SIZE
|
||||||
|
self.assertGreaterEqual(10002, read_buffer_size) # for forward seek test
|
||||||
|
self.assertGreaterEqual(5003, read_buffer_size) # for backward seek test
|
||||||
|
# The read length must be less than MIN_READ_SIZE, since we assume that
|
||||||
|
# only 1 block is read in the test.
|
||||||
|
read_length = 100
|
||||||
|
self.assertGreaterEqual(read_buffer_size, read_length) # for read() calls
|
||||||
|
|
||||||
|
with zipfile.ZipFile(sio, "w", compression=zipfile.ZIP_STORED) as zipf:
|
||||||
|
zipf.writestr("foo.txt", txt)
|
||||||
|
|
||||||
|
# check random seek and read on a file
|
||||||
|
with zipfile.ZipFile(sio, "r") as zipf:
|
||||||
|
with zipf.open("foo.txt", "r") as fp:
|
||||||
|
# Test this optimized read hasn't rewound and read from the
|
||||||
|
# start of the file (as in the case of the unoptimized path)
|
||||||
|
|
||||||
|
# forward seek
|
||||||
|
old_count = sio.bytes_read
|
||||||
|
forward_seek_len = 10002
|
||||||
|
current_pos = 0
|
||||||
|
fp.seek(forward_seek_len, os.SEEK_CUR)
|
||||||
|
current_pos += forward_seek_len
|
||||||
|
self.assertEqual(fp.tell(), current_pos)
|
||||||
|
self.assertEqual(fp._left, fp._compress_left)
|
||||||
|
arr = fp.read(read_length)
|
||||||
|
current_pos += read_length
|
||||||
|
self.assertEqual(fp.tell(), current_pos)
|
||||||
|
self.assertEqual(arr, txt[current_pos - read_length:current_pos])
|
||||||
|
self.assertEqual(fp._left, fp._compress_left)
|
||||||
|
read_count = sio.bytes_read - old_count
|
||||||
|
self.assertLessEqual(read_count, read_buffer_size)
|
||||||
|
|
||||||
|
# backward seek
|
||||||
|
old_count = sio.bytes_read
|
||||||
|
backward_seek_len = 5003
|
||||||
|
fp.seek(-backward_seek_len, os.SEEK_CUR)
|
||||||
|
current_pos -= backward_seek_len
|
||||||
|
self.assertEqual(fp.tell(), current_pos)
|
||||||
|
self.assertEqual(fp._left, fp._compress_left)
|
||||||
|
arr = fp.read(read_length)
|
||||||
|
current_pos += read_length
|
||||||
|
self.assertEqual(fp.tell(), current_pos)
|
||||||
|
self.assertEqual(arr, txt[current_pos - read_length:current_pos])
|
||||||
|
self.assertEqual(fp._left, fp._compress_left)
|
||||||
|
read_count = sio.bytes_read - old_count
|
||||||
|
self.assertLessEqual(read_count, read_buffer_size)
|
||||||
|
|
||||||
|
# eof flags test
|
||||||
|
fp.seek(0, os.SEEK_END)
|
||||||
|
fp.seek(12345, os.SEEK_SET)
|
||||||
|
current_pos = 12345
|
||||||
|
arr = fp.read(read_length)
|
||||||
|
current_pos += read_length
|
||||||
|
self.assertEqual(arr, txt[current_pos - read_length:current_pos])
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
unittest.main()
|
unittest.main()
|
||||||
|
|
|
@ -1163,13 +1163,15 @@ class ZipExtFile(io.BufferedIOBase):
|
||||||
self._offset = buff_offset
|
self._offset = buff_offset
|
||||||
read_offset = 0
|
read_offset = 0
|
||||||
# Fast seek uncompressed unencrypted file
|
# Fast seek uncompressed unencrypted file
|
||||||
elif self._compress_type == ZIP_STORED and self._decrypter is None and read_offset > 0:
|
elif self._compress_type == ZIP_STORED and self._decrypter is None and read_offset != 0:
|
||||||
# disable CRC checking after first seeking - it would be invalid
|
# disable CRC checking after first seeking - it would be invalid
|
||||||
self._expected_crc = None
|
self._expected_crc = None
|
||||||
# seek actual file taking already buffered data into account
|
# seek actual file taking already buffered data into account
|
||||||
read_offset -= len(self._readbuffer) - self._offset
|
read_offset -= len(self._readbuffer) - self._offset
|
||||||
self._fileobj.seek(read_offset, os.SEEK_CUR)
|
self._fileobj.seek(read_offset, os.SEEK_CUR)
|
||||||
self._left -= read_offset
|
self._left -= read_offset
|
||||||
|
self._compress_left -= read_offset
|
||||||
|
self._eof = self._left <= 0
|
||||||
read_offset = 0
|
read_offset = 0
|
||||||
# flush read buffer
|
# flush read buffer
|
||||||
self._readbuffer = b''
|
self._readbuffer = b''
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Completely support random access of uncompressed unencrypted read-only
|
||||||
|
zip files obtained by :meth:`ZipFile.open <zipfile.ZipFile.open>`.
|
Loading…
Add table
Add a link
Reference in a new issue