mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
#15546: Fix BZ2File.read1()'s handling of pathological input data.
This commit is contained in:
parent
d9f38bc704
commit
8280b4ba02
1 changed files with 26 additions and 21 deletions
47
Lib/bz2.py
47
Lib/bz2.py
|
@ -174,29 +174,31 @@ class BZ2File(io.BufferedIOBase):
|
||||||
|
|
||||||
# Fill the readahead buffer if it is empty. Returns False on EOF.
|
# Fill the readahead buffer if it is empty. Returns False on EOF.
|
||||||
def _fill_buffer(self):
|
def _fill_buffer(self):
|
||||||
if self._buffer:
|
# Depending on the input data, our call to the decompressor may not
|
||||||
return True
|
# return any data. In this case, try again after reading another block.
|
||||||
|
while True:
|
||||||
|
if self._buffer:
|
||||||
|
return True
|
||||||
|
|
||||||
if self._decompressor.unused_data:
|
if self._decompressor.unused_data:
|
||||||
rawblock = self._decompressor.unused_data
|
rawblock = self._decompressor.unused_data
|
||||||
else:
|
|
||||||
rawblock = self._fp.read(_BUFFER_SIZE)
|
|
||||||
|
|
||||||
if not rawblock:
|
|
||||||
if self._decompressor.eof:
|
|
||||||
self._mode = _MODE_READ_EOF
|
|
||||||
self._size = self._pos
|
|
||||||
return False
|
|
||||||
else:
|
else:
|
||||||
raise EOFError("Compressed file ended before the "
|
rawblock = self._fp.read(_BUFFER_SIZE)
|
||||||
"end-of-stream marker was reached")
|
|
||||||
|
|
||||||
# Continue to next stream.
|
if not rawblock:
|
||||||
if self._decompressor.eof:
|
if self._decompressor.eof:
|
||||||
self._decompressor = BZ2Decompressor()
|
self._mode = _MODE_READ_EOF
|
||||||
|
self._size = self._pos
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
raise EOFError("Compressed file ended before the "
|
||||||
|
"end-of-stream marker was reached")
|
||||||
|
|
||||||
self._buffer = self._decompressor.decompress(rawblock)
|
# Continue to next stream.
|
||||||
return True
|
if self._decompressor.eof:
|
||||||
|
self._decompressor = BZ2Decompressor()
|
||||||
|
|
||||||
|
self._buffer = self._decompressor.decompress(rawblock)
|
||||||
|
|
||||||
# Read data until EOF.
|
# Read data until EOF.
|
||||||
# If return_data is false, consume the data without returning it.
|
# If return_data is false, consume the data without returning it.
|
||||||
|
@ -256,11 +258,14 @@ class BZ2File(io.BufferedIOBase):
|
||||||
return self._read_block(size)
|
return self._read_block(size)
|
||||||
|
|
||||||
def read1(self, size=-1):
|
def read1(self, size=-1):
|
||||||
"""Read up to size uncompressed bytes with at most one read
|
"""Read up to size uncompressed bytes, while trying to avoid
|
||||||
from the underlying stream.
|
making multiple reads from the underlying stream.
|
||||||
|
|
||||||
Returns b'' if the file is at EOF.
|
Returns b'' if the file is at EOF.
|
||||||
"""
|
"""
|
||||||
|
# Usually, read1() calls _fp.read() at most once. However, sometimes
|
||||||
|
# this does not give enough data for the decompressor to make progress.
|
||||||
|
# In this case we make multiple reads, to avoid returning b"".
|
||||||
with self._lock:
|
with self._lock:
|
||||||
self._check_can_read()
|
self._check_can_read()
|
||||||
if (size == 0 or self._mode == _MODE_READ_EOF or
|
if (size == 0 or self._mode == _MODE_READ_EOF or
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue