#15546: Fix BZ2File.read1()'s handling of pathological input data.

This commit is contained in:
Nadeem Vawda 2012-08-04 15:29:28 +02:00
parent d9f38bc704
commit 8280b4ba02

View file

@ -174,29 +174,31 @@ class BZ2File(io.BufferedIOBase):
# Fill the readahead buffer if it is empty. Returns False on EOF. # Fill the readahead buffer if it is empty. Returns False on EOF.
def _fill_buffer(self): def _fill_buffer(self):
if self._buffer: # Depending on the input data, our call to the decompressor may not
return True # return any data. In this case, try again after reading another block.
while True:
if self._buffer:
return True
if self._decompressor.unused_data: if self._decompressor.unused_data:
rawblock = self._decompressor.unused_data rawblock = self._decompressor.unused_data
else:
rawblock = self._fp.read(_BUFFER_SIZE)
if not rawblock:
if self._decompressor.eof:
self._mode = _MODE_READ_EOF
self._size = self._pos
return False
else: else:
raise EOFError("Compressed file ended before the " rawblock = self._fp.read(_BUFFER_SIZE)
"end-of-stream marker was reached")
# Continue to next stream. if not rawblock:
if self._decompressor.eof: if self._decompressor.eof:
self._decompressor = BZ2Decompressor() self._mode = _MODE_READ_EOF
self._size = self._pos
return False
else:
raise EOFError("Compressed file ended before the "
"end-of-stream marker was reached")
self._buffer = self._decompressor.decompress(rawblock) # Continue to next stream.
return True if self._decompressor.eof:
self._decompressor = BZ2Decompressor()
self._buffer = self._decompressor.decompress(rawblock)
# Read data until EOF. # Read data until EOF.
# If return_data is false, consume the data without returning it. # If return_data is false, consume the data without returning it.
@ -256,11 +258,14 @@ class BZ2File(io.BufferedIOBase):
return self._read_block(size) return self._read_block(size)
def read1(self, size=-1): def read1(self, size=-1):
"""Read up to size uncompressed bytes with at most one read """Read up to size uncompressed bytes, while trying to avoid
from the underlying stream. making multiple reads from the underlying stream.
Returns b'' if the file is at EOF. Returns b'' if the file is at EOF.
""" """
# Usually, read1() calls _fp.read() at most once. However, sometimes
# this does not give enough data for the decompressor to make progress.
# In this case we make multiple reads, to avoid returning b"".
with self._lock: with self._lock:
self._check_can_read() self._check_can_read()
if (size == 0 or self._mode == _MODE_READ_EOF or if (size == 0 or self._mode == _MODE_READ_EOF or