mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Issue #11224: Improved sparse file read support (r85916) introduced a
regression in _FileInFile which is used in file-like objects returned by TarFile.extractfile(). The inefficient design of the _FileInFile.read() method causes various dramatic side-effects and errors: - The data segment of a file member is read completely into memory every(!) time a small block is accessed. This is not only slow but may cause unexpected MemoryErrors with very large files. - Reading members from compressed tar archives is even slower because of the excessive backwards seeking which is done when the same data segment is read over and over again. - As a backwards seek on a TarFile opened in stream mode is not possible, using extractfile() fails with a StreamError.
This commit is contained in:
parent
3eeee83391
commit
dd071045e7
3 changed files with 22 additions and 3 deletions
|
@ -760,9 +760,8 @@ class _FileInFile(object):
|
||||||
self.map_index = 0
|
self.map_index = 0
|
||||||
length = min(size, stop - self.position)
|
length = min(size, stop - self.position)
|
||||||
if data:
|
if data:
|
||||||
self.fileobj.seek(offset)
|
self.fileobj.seek(offset + (self.position - start))
|
||||||
block = self.fileobj.read(stop - start)
|
buf += self.fileobj.read(length)
|
||||||
buf += block[self.position - start:self.position + length]
|
|
||||||
else:
|
else:
|
||||||
buf += NUL * length
|
buf += NUL * length
|
||||||
size -= length
|
size -= length
|
||||||
|
|
|
@ -419,6 +419,22 @@ class StreamReadTest(CommonReadTest):
|
||||||
|
|
||||||
mode="r|"
|
mode="r|"
|
||||||
|
|
||||||
|
def test_read_through(self):
|
||||||
|
# Issue #11224: A poorly designed _FileInFile.read() method
|
||||||
|
# caused seeking errors with stream tar files.
|
||||||
|
for tarinfo in self.tar:
|
||||||
|
if not tarinfo.isreg():
|
||||||
|
continue
|
||||||
|
fobj = self.tar.extractfile(tarinfo)
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
buf = fobj.read(512)
|
||||||
|
except tarfile.StreamError:
|
||||||
|
self.fail("simple read-through using TarFile.extractfile() failed")
|
||||||
|
if not buf:
|
||||||
|
break
|
||||||
|
fobj.close()
|
||||||
|
|
||||||
def test_fileobj_regular_file(self):
|
def test_fileobj_regular_file(self):
|
||||||
tarinfo = self.tar.next() # get "regtype" (can't use getmember)
|
tarinfo = self.tar.next() # get "regtype" (can't use getmember)
|
||||||
fobj = self.tar.extractfile(tarinfo)
|
fobj = self.tar.extractfile(tarinfo)
|
||||||
|
|
|
@ -27,6 +27,10 @@ Core and Builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Issue #11224: Fixed a regression in tarfile that affected the file-like
|
||||||
|
objects returned by TarFile.extractfile() regarding performance, memory
|
||||||
|
consumption and failures with the stream interface.
|
||||||
|
|
||||||
- Issue #10924: Adding salt and Modular Crypt Format to crypt library.
|
- Issue #10924: Adding salt and Modular Crypt Format to crypt library.
|
||||||
Moved old C wrapper to _crypt, and added a Python wrapper with
|
Moved old C wrapper to _crypt, and added a Python wrapper with
|
||||||
enhanced salt generation and simpler API for password generation.
|
enhanced salt generation and simpler API for password generation.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue