mirror of
https://github.com/python/cpython.git
synced 2025-08-22 01:35:16 +00:00
Patch #1230446: tarfile.py: fix ExFileObject so that read() and tell()
work correctly together with readline(). (backport from rev. 53153)
This commit is contained in:
parent
60775f29de
commit
aedb92e59c
3 changed files with 180 additions and 120 deletions
285
Lib/tarfile.py
285
Lib/tarfile.py
|
@ -622,64 +622,158 @@ class _BZ2Proxy(object):
|
||||||
#------------------------
|
#------------------------
|
||||||
# Extraction file object
|
# Extraction file object
|
||||||
#------------------------
|
#------------------------
|
||||||
class ExFileObject(object):
|
class _FileInFile(object):
|
||||||
"""File-like object for reading an archive member.
|
"""A thin wrapper around an existing file object that
|
||||||
Is returned by TarFile.extractfile(). Support for
|
provides a part of its data as an individual file
|
||||||
sparse files included.
|
object.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, tarfile, tarinfo):
|
def __init__(self, fileobj, offset, size, sparse=None):
|
||||||
self.fileobj = tarfile.fileobj
|
self.fileobj = fileobj
|
||||||
self.name = tarinfo.name
|
self.offset = offset
|
||||||
self.mode = "r"
|
self.size = size
|
||||||
self.closed = False
|
self.sparse = sparse
|
||||||
self.offset = tarinfo.offset_data
|
self.position = 0
|
||||||
self.size = tarinfo.size
|
|
||||||
self.pos = 0L
|
|
||||||
self.linebuffer = ""
|
|
||||||
if tarinfo.issparse():
|
|
||||||
self.sparse = tarinfo.sparse
|
|
||||||
self.read = self._readsparse
|
|
||||||
else:
|
|
||||||
self.read = self._readnormal
|
|
||||||
|
|
||||||
def __read(self, size):
|
def tell(self):
|
||||||
"""Overloadable read method.
|
"""Return the current file position.
|
||||||
"""
|
"""
|
||||||
|
return self.position
|
||||||
|
|
||||||
|
def seek(self, position):
|
||||||
|
"""Seek to a position in the file.
|
||||||
|
"""
|
||||||
|
self.position = position
|
||||||
|
|
||||||
|
def read(self, size=None):
|
||||||
|
"""Read data from the file.
|
||||||
|
"""
|
||||||
|
if size is None:
|
||||||
|
size = self.size - self.position
|
||||||
|
else:
|
||||||
|
size = min(size, self.size - self.position)
|
||||||
|
|
||||||
|
if self.sparse is None:
|
||||||
|
return self.readnormal(size)
|
||||||
|
else:
|
||||||
|
return self.readsparse(size)
|
||||||
|
|
||||||
|
def readnormal(self, size):
|
||||||
|
"""Read operation for regular files.
|
||||||
|
"""
|
||||||
|
self.fileobj.seek(self.offset + self.position)
|
||||||
|
self.position += size
|
||||||
return self.fileobj.read(size)
|
return self.fileobj.read(size)
|
||||||
|
|
||||||
def readline(self, size=-1):
|
def readsparse(self, size):
|
||||||
"""Read a line with approx. size. If size is negative,
|
"""Read operation for sparse files.
|
||||||
read a whole line. readline() and read() must not
|
|
||||||
be mixed up (!).
|
|
||||||
"""
|
"""
|
||||||
if size < 0:
|
data = []
|
||||||
size = sys.maxint
|
while size > 0:
|
||||||
|
buf = self.readsparsesection(size)
|
||||||
|
if not buf:
|
||||||
|
break
|
||||||
|
size -= len(buf)
|
||||||
|
data.append(buf)
|
||||||
|
return "".join(data)
|
||||||
|
|
||||||
nl = self.linebuffer.find("\n")
|
def readsparsesection(self, size):
|
||||||
if nl >= 0:
|
"""Read a single section of a sparse file.
|
||||||
nl = min(nl, size)
|
"""
|
||||||
|
section = self.sparse.find(self.position)
|
||||||
|
|
||||||
|
if section is None:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
size = min(size, section.offset + section.size - self.position)
|
||||||
|
|
||||||
|
if isinstance(section, _data):
|
||||||
|
realpos = section.realpos + self.position - section.offset
|
||||||
|
self.fileobj.seek(self.offset + realpos)
|
||||||
|
self.position += size
|
||||||
|
return self.fileobj.read(size)
|
||||||
else:
|
else:
|
||||||
size -= len(self.linebuffer)
|
self.position += size
|
||||||
while (nl < 0 and size > 0):
|
return NUL * size
|
||||||
buf = self.read(min(size, 100))
|
#class _FileInFile
|
||||||
if not buf:
|
|
||||||
|
|
||||||
|
class ExFileObject(object):
|
||||||
|
"""File-like object for reading an archive member.
|
||||||
|
Is returned by TarFile.extractfile().
|
||||||
|
"""
|
||||||
|
blocksize = 1024
|
||||||
|
|
||||||
|
def __init__(self, tarfile, tarinfo):
|
||||||
|
self.fileobj = _FileInFile(tarfile.fileobj,
|
||||||
|
tarinfo.offset_data,
|
||||||
|
tarinfo.size,
|
||||||
|
getattr(tarinfo, "sparse", None))
|
||||||
|
self.name = tarinfo.name
|
||||||
|
self.mode = "r"
|
||||||
|
self.closed = False
|
||||||
|
self.size = tarinfo.size
|
||||||
|
|
||||||
|
self.position = 0
|
||||||
|
self.buffer = ""
|
||||||
|
|
||||||
|
def read(self, size=None):
|
||||||
|
"""Read at most size bytes from the file. If size is not
|
||||||
|
present or None, read all data until EOF is reached.
|
||||||
|
"""
|
||||||
|
if self.closed:
|
||||||
|
raise ValueError("I/O operation on closed file")
|
||||||
|
|
||||||
|
buf = ""
|
||||||
|
if self.buffer:
|
||||||
|
if size is None:
|
||||||
|
buf = self.buffer
|
||||||
|
self.buffer = ""
|
||||||
|
else:
|
||||||
|
buf = self.buffer[:size]
|
||||||
|
self.buffer = self.buffer[size:]
|
||||||
|
|
||||||
|
if size is None:
|
||||||
|
buf += self.fileobj.read()
|
||||||
|
else:
|
||||||
|
buf += self.fileobj.read(size - len(buf))
|
||||||
|
|
||||||
|
self.position += len(buf)
|
||||||
|
return buf
|
||||||
|
|
||||||
|
def readline(self, size=-1):
|
||||||
|
"""Read one entire line from the file. If size is present
|
||||||
|
and non-negative, return a string with at most that
|
||||||
|
size, which may be an incomplete line.
|
||||||
|
"""
|
||||||
|
if self.closed:
|
||||||
|
raise ValueError("I/O operation on closed file")
|
||||||
|
|
||||||
|
if "\n" in self.buffer:
|
||||||
|
pos = self.buffer.find("\n") + 1
|
||||||
|
else:
|
||||||
|
buffers = [self.buffer]
|
||||||
|
while True:
|
||||||
|
buf = self.fileobj.read(self.blocksize)
|
||||||
|
buffers.append(buf)
|
||||||
|
if not buf or "\n" in buf:
|
||||||
|
self.buffer = "".join(buffers)
|
||||||
|
pos = self.buffer.find("\n") + 1
|
||||||
|
if pos == 0:
|
||||||
|
# no newline found.
|
||||||
|
pos = len(self.buffer)
|
||||||
break
|
break
|
||||||
self.linebuffer += buf
|
|
||||||
size -= len(buf)
|
if size != -1:
|
||||||
nl = self.linebuffer.find("\n")
|
pos = min(size, pos)
|
||||||
if nl == -1:
|
|
||||||
s = self.linebuffer
|
buf = self.buffer[:pos]
|
||||||
self.linebuffer = ""
|
self.buffer = self.buffer[pos:]
|
||||||
return s
|
self.position += len(buf)
|
||||||
buf = self.linebuffer[:nl]
|
return buf
|
||||||
self.linebuffer = self.linebuffer[nl + 1:]
|
|
||||||
while buf[-1:] == "\r":
|
|
||||||
buf = buf[:-1]
|
|
||||||
return buf + "\n"
|
|
||||||
|
|
||||||
def readlines(self):
|
def readlines(self):
|
||||||
"""Return a list with all (following) lines.
|
"""Return a list with all remaining lines.
|
||||||
"""
|
"""
|
||||||
result = []
|
result = []
|
||||||
while True:
|
while True:
|
||||||
|
@ -688,74 +782,34 @@ class ExFileObject(object):
|
||||||
result.append(line)
|
result.append(line)
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def _readnormal(self, size=None):
|
|
||||||
"""Read operation for regular files.
|
|
||||||
"""
|
|
||||||
if self.closed:
|
|
||||||
raise ValueError("file is closed")
|
|
||||||
self.fileobj.seek(self.offset + self.pos)
|
|
||||||
bytesleft = self.size - self.pos
|
|
||||||
if size is None:
|
|
||||||
bytestoread = bytesleft
|
|
||||||
else:
|
|
||||||
bytestoread = min(size, bytesleft)
|
|
||||||
self.pos += bytestoread
|
|
||||||
return self.__read(bytestoread)
|
|
||||||
|
|
||||||
def _readsparse(self, size=None):
|
|
||||||
"""Read operation for sparse files.
|
|
||||||
"""
|
|
||||||
if self.closed:
|
|
||||||
raise ValueError("file is closed")
|
|
||||||
|
|
||||||
if size is None:
|
|
||||||
size = self.size - self.pos
|
|
||||||
|
|
||||||
data = []
|
|
||||||
while size > 0:
|
|
||||||
buf = self._readsparsesection(size)
|
|
||||||
if not buf:
|
|
||||||
break
|
|
||||||
size -= len(buf)
|
|
||||||
data.append(buf)
|
|
||||||
return "".join(data)
|
|
||||||
|
|
||||||
def _readsparsesection(self, size):
|
|
||||||
"""Read a single section of a sparse file.
|
|
||||||
"""
|
|
||||||
section = self.sparse.find(self.pos)
|
|
||||||
|
|
||||||
if section is None:
|
|
||||||
return ""
|
|
||||||
|
|
||||||
toread = min(size, section.offset + section.size - self.pos)
|
|
||||||
if isinstance(section, _data):
|
|
||||||
realpos = section.realpos + self.pos - section.offset
|
|
||||||
self.pos += toread
|
|
||||||
self.fileobj.seek(self.offset + realpos)
|
|
||||||
return self.__read(toread)
|
|
||||||
else:
|
|
||||||
self.pos += toread
|
|
||||||
return NUL * toread
|
|
||||||
|
|
||||||
def tell(self):
|
def tell(self):
|
||||||
"""Return the current file position.
|
"""Return the current file position.
|
||||||
"""
|
"""
|
||||||
return self.pos
|
if self.closed:
|
||||||
|
raise ValueError("I/O operation on closed file")
|
||||||
|
|
||||||
def seek(self, pos, whence=0):
|
return self.position
|
||||||
|
|
||||||
|
def seek(self, pos, whence=os.SEEK_SET):
|
||||||
"""Seek to a position in the file.
|
"""Seek to a position in the file.
|
||||||
"""
|
"""
|
||||||
self.linebuffer = ""
|
if self.closed:
|
||||||
if whence == 0:
|
raise ValueError("I/O operation on closed file")
|
||||||
self.pos = min(max(pos, 0), self.size)
|
|
||||||
if whence == 1:
|
if whence == os.SEEK_SET:
|
||||||
|
self.position = min(max(pos, 0), self.size)
|
||||||
|
elif whence == os.SEEK_CUR:
|
||||||
if pos < 0:
|
if pos < 0:
|
||||||
self.pos = max(self.pos + pos, 0)
|
self.position = max(self.position + pos, 0)
|
||||||
else:
|
else:
|
||||||
self.pos = min(self.pos + pos, self.size)
|
self.position = min(self.position + pos, self.size)
|
||||||
if whence == 2:
|
elif whence == os.SEEK_END:
|
||||||
self.pos = max(min(self.size + pos, self.size), 0)
|
self.position = max(min(self.size + pos, self.size), 0)
|
||||||
|
else:
|
||||||
|
raise ValueError("Invalid argument")
|
||||||
|
|
||||||
|
self.buffer = ""
|
||||||
|
self.fileobj.seek(self.position)
|
||||||
|
|
||||||
def close(self):
|
def close(self):
|
||||||
"""Close the file object.
|
"""Close the file object.
|
||||||
|
@ -763,20 +817,13 @@ class ExFileObject(object):
|
||||||
self.closed = True
|
self.closed = True
|
||||||
|
|
||||||
def __iter__(self):
|
def __iter__(self):
|
||||||
"""Get an iterator over the file object.
|
"""Get an iterator over the file's lines.
|
||||||
"""
|
"""
|
||||||
if self.closed:
|
while True:
|
||||||
raise ValueError("I/O operation on closed file")
|
line = self.readline()
|
||||||
return self
|
if not line:
|
||||||
|
break
|
||||||
def next(self):
|
yield line
|
||||||
"""Get the next item from the file iterator.
|
|
||||||
"""
|
|
||||||
result = self.readline()
|
|
||||||
if not result:
|
|
||||||
raise StopIteration
|
|
||||||
return result
|
|
||||||
|
|
||||||
#class ExFileObject
|
#class ExFileObject
|
||||||
|
|
||||||
#------------------
|
#------------------
|
||||||
|
|
|
@ -110,7 +110,7 @@ class ReadTest(BaseTest):
|
||||||
"""Test seek() method of _FileObject, incl. random reading.
|
"""Test seek() method of _FileObject, incl. random reading.
|
||||||
"""
|
"""
|
||||||
if self.sep != "|":
|
if self.sep != "|":
|
||||||
filename = "0-REGTYPE"
|
filename = "0-REGTYPE-TEXT"
|
||||||
self.tar.extract(filename, dirname())
|
self.tar.extract(filename, dirname())
|
||||||
f = open(os.path.join(dirname(), filename), "rb")
|
f = open(os.path.join(dirname(), filename), "rb")
|
||||||
data = f.read()
|
data = f.read()
|
||||||
|
@ -149,6 +149,16 @@ class ReadTest(BaseTest):
|
||||||
s2 = fobj.readlines()
|
s2 = fobj.readlines()
|
||||||
self.assert_(s1 == s2,
|
self.assert_(s1 == s2,
|
||||||
"readlines() after seek failed")
|
"readlines() after seek failed")
|
||||||
|
fobj.seek(0)
|
||||||
|
self.assert_(len(fobj.readline()) == fobj.tell(),
|
||||||
|
"tell() after readline() failed")
|
||||||
|
fobj.seek(512)
|
||||||
|
self.assert_(len(fobj.readline()) + 512 == fobj.tell(),
|
||||||
|
"tell() after seek() and readline() failed")
|
||||||
|
fobj.seek(0)
|
||||||
|
line = fobj.readline()
|
||||||
|
self.assert_(fobj.read() == data[len(line):],
|
||||||
|
"read() after readline() failed")
|
||||||
fobj.close()
|
fobj.close()
|
||||||
|
|
||||||
def test_old_dirtype(self):
|
def test_old_dirtype(self):
|
||||||
|
|
|
@ -620,6 +620,9 @@ Core and builtins
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Patch #1230446: tarfile.py: fix ExFileObject so that read() and tell()
|
||||||
|
work correctly together with readline().
|
||||||
|
|
||||||
- Correction of patch #1455898: In the mbcs decoder, set final=False
|
- Correction of patch #1455898: In the mbcs decoder, set final=False
|
||||||
for stream decoder, but final=True for the decode function.
|
for stream decoder, but final=True for the decode function.
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue