mirror of
https://github.com/python/cpython.git
synced 2025-08-03 08:34:29 +00:00
Issue #7471: Improve the performance of GzipFile's buffering mechanism,
and make it implement the `io.BufferedIOBase` ABC to allow for further speedups by wrapping it in an `io.BufferedReader`. Patch by Nir Aides.
This commit is contained in:
parent
49d709c921
commit
673ddf9907
3 changed files with 58 additions and 58 deletions
99
Lib/gzip.py
99
Lib/gzip.py
|
@ -7,6 +7,7 @@ but random access is not allowed."""
|
|||
|
||||
import struct, sys, time, os
|
||||
import zlib
|
||||
import io
|
||||
import __builtin__
|
||||
|
||||
__all__ = ["GzipFile","open"]
|
||||
|
@ -32,7 +33,7 @@ def open(filename, mode="rb", compresslevel=9):
|
|||
"""
|
||||
return GzipFile(filename, mode, compresslevel)
|
||||
|
||||
class GzipFile:
|
||||
class GzipFile(io.BufferedIOBase):
|
||||
"""The GzipFile class simulates most of the methods of a file object with
|
||||
the exception of the readinto() and truncate() methods.
|
||||
|
||||
|
@ -97,8 +98,12 @@ class GzipFile:
|
|||
self.mode = READ
|
||||
# Set flag indicating start of a new member
|
||||
self._new_member = True
|
||||
# Buffer data read from gzip file. extrastart is offset in
|
||||
# stream where buffer starts. extrasize is number of
|
||||
# bytes remaining in buffer from current stream position.
|
||||
self.extrabuf = ""
|
||||
self.extrasize = 0
|
||||
self.extrastart = 0
|
||||
self.name = filename
|
||||
# Starts small, scales exponentially
|
||||
self.min_readsize = 100
|
||||
|
@ -196,7 +201,6 @@ class GzipFile:
|
|||
if flag & FHCRC:
|
||||
self.fileobj.read(2) # Read & discard the 16-bit header CRC
|
||||
|
||||
|
||||
def write(self,data):
|
||||
if self.mode != WRITE:
|
||||
import errno
|
||||
|
@ -204,12 +208,19 @@ class GzipFile:
|
|||
|
||||
if self.fileobj is None:
|
||||
raise ValueError, "write() on closed GzipFile object"
|
||||
|
||||
# Convert data type if called by io.BufferedWriter.
|
||||
if isinstance(data, memoryview):
|
||||
data = data.tobytes()
|
||||
|
||||
if len(data) > 0:
|
||||
self.size = self.size + len(data)
|
||||
self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
|
||||
self.fileobj.write( self.compress.compress(data) )
|
||||
self.offset += len(data)
|
||||
|
||||
return len(data)
|
||||
|
||||
def read(self, size=-1):
|
||||
if self.mode != READ:
|
||||
import errno
|
||||
|
@ -235,15 +246,14 @@ class GzipFile:
|
|||
if size > self.extrasize:
|
||||
size = self.extrasize
|
||||
|
||||
chunk = self.extrabuf[:size]
|
||||
self.extrabuf = self.extrabuf[size:]
|
||||
offset = self.offset - self.extrastart
|
||||
chunk = self.extrabuf[offset: offset + size]
|
||||
self.extrasize = self.extrasize - size
|
||||
|
||||
self.offset += size
|
||||
return chunk
|
||||
|
||||
def _unread(self, buf):
|
||||
self.extrabuf = buf + self.extrabuf
|
||||
self.extrasize = len(buf) + self.extrasize
|
||||
self.offset -= len(buf)
|
||||
|
||||
|
@ -299,8 +309,10 @@ class GzipFile:
|
|||
|
||||
def _add_read_data(self, data):
|
||||
self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
|
||||
self.extrabuf = self.extrabuf + data
|
||||
offset = self.offset - self.extrastart
|
||||
self.extrabuf = self.extrabuf[offset:] + data
|
||||
self.extrasize = self.extrasize + len(data)
|
||||
self.extrastart = self.offset
|
||||
self.size = self.size + len(data)
|
||||
|
||||
def _read_eof(self):
|
||||
|
@ -318,6 +330,10 @@ class GzipFile:
|
|||
elif isize != (self.size & 0xffffffffL):
|
||||
raise IOError, "Incorrect length of data produced"
|
||||
|
||||
@property
|
||||
def closed(self):
|
||||
return self.fileobj is None
|
||||
|
||||
def close(self):
|
||||
if self.fileobj is None:
|
||||
return
|
||||
|
@ -333,15 +349,6 @@ class GzipFile:
|
|||
self.myfileobj.close()
|
||||
self.myfileobj = None
|
||||
|
||||
def __del__(self):
|
||||
try:
|
||||
if (self.myfileobj is None and
|
||||
self.fileobj is None):
|
||||
return
|
||||
except AttributeError:
|
||||
return
|
||||
self.close()
|
||||
|
||||
def flush(self,zlib_mode=zlib.Z_SYNC_FLUSH):
|
||||
if self.mode == WRITE:
|
||||
# Ensure the compressor's buffer is flushed
|
||||
|
@ -356,12 +363,6 @@ class GzipFile:
|
|||
"""
|
||||
return self.fileobj.fileno()
|
||||
|
||||
def isatty(self):
|
||||
return False
|
||||
|
||||
def tell(self):
|
||||
return self.offset
|
||||
|
||||
def rewind(self):
|
||||
'''Return the uncompressed stream file position indicator to the
|
||||
beginning of the file'''
|
||||
|
@ -371,8 +372,18 @@ class GzipFile:
|
|||
self._new_member = True
|
||||
self.extrabuf = ""
|
||||
self.extrasize = 0
|
||||
self.extrastart = 0
|
||||
self.offset = 0
|
||||
|
||||
def readable(self):
|
||||
return self.mode == READ
|
||||
|
||||
def writable(self):
|
||||
return self.mode == WRITE
|
||||
|
||||
def seekable(self):
|
||||
return True
|
||||
|
||||
def seek(self, offset, whence=0):
|
||||
if whence:
|
||||
if whence == 1:
|
||||
|
@ -395,8 +406,18 @@ class GzipFile:
|
|||
self.read(1024)
|
||||
self.read(count % 1024)
|
||||
|
||||
return self.offset
|
||||
|
||||
def readline(self, size=-1):
|
||||
if size < 0:
|
||||
# Shortcut common case - newline found in buffer.
|
||||
offset = self.offset - self.extrastart
|
||||
i = self.extrabuf.find('\n', offset) + 1
|
||||
if i > 0:
|
||||
self.extrasize -= i - offset
|
||||
self.offset += i - offset
|
||||
return self.extrabuf[offset: i]
|
||||
|
||||
size = sys.maxint
|
||||
readsize = self.min_readsize
|
||||
else:
|
||||
|
@ -426,42 +447,6 @@ class GzipFile:
|
|||
self.min_readsize = min(readsize, self.min_readsize * 2, 512)
|
||||
return ''.join(bufs) # Return resulting line
|
||||
|
||||
def readlines(self, sizehint=0):
|
||||
# Negative numbers result in reading all the lines
|
||||
if sizehint <= 0:
|
||||
sizehint = sys.maxint
|
||||
L = []
|
||||
while sizehint > 0:
|
||||
line = self.readline()
|
||||
if line == "":
|
||||
break
|
||||
L.append(line)
|
||||
sizehint = sizehint - len(line)
|
||||
|
||||
return L
|
||||
|
||||
def writelines(self, L):
|
||||
for line in L:
|
||||
self.write(line)
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def next(self):
|
||||
line = self.readline()
|
||||
if line:
|
||||
return line
|
||||
else:
|
||||
raise StopIteration
|
||||
|
||||
def __enter__(self):
|
||||
if self.fileobj is None:
|
||||
raise ValueError("I/O operation on closed GzipFile object")
|
||||
return self
|
||||
|
||||
def __exit__(self, *args):
|
||||
self.close()
|
||||
|
||||
|
||||
def _test():
|
||||
# Act like gzip; with -d, act like gunzip.
|
||||
|
|
|
@ -5,6 +5,7 @@
|
|||
import unittest
|
||||
from test import test_support
|
||||
import os
|
||||
import io
|
||||
import struct
|
||||
gzip = test_support.import_module('gzip')
|
||||
|
||||
|
@ -80,6 +81,16 @@ class TestGzip(unittest.TestCase):
|
|||
zgfile.close()
|
||||
self.assertEquals(contents, 'a'*201)
|
||||
|
||||
def test_buffered_reader(self):
|
||||
# Issue #7471: a GzipFile can be wrapped in a BufferedReader for
|
||||
# performance.
|
||||
self.test_write()
|
||||
|
||||
f = gzip.GzipFile(self.filename, 'rb')
|
||||
with io.BufferedReader(f) as r:
|
||||
lines = [line for line in r]
|
||||
|
||||
self.assertEqual(lines, 50 * data1.splitlines(True))
|
||||
|
||||
def test_readline(self):
|
||||
self.test_write()
|
||||
|
|
|
@ -62,7 +62,11 @@ Core and Builtins
|
|||
Library
|
||||
-------
|
||||
|
||||
_ Issue #3972: httplib.HTTPConnection now accepts an optional source_address
|
||||
- Issue #7471: Improve the performance of GzipFile's buffering mechanism,
|
||||
and make it implement the `io.BufferedIOBase` ABC to allow for further
|
||||
speedups by wrapping it in an `io.BufferedReader`. Patch by Nir Aides.
|
||||
|
||||
- Issue #3972: httplib.HTTPConnection now accepts an optional source_address
|
||||
parameter to allow specifying where your connections come from.
|
||||
|
||||
- socket.create_connection now accepts an optional source_address parameter.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue