Issue #13815: TarFile.extractfile() now returns io.BufferedReader objects.

The ExFileObject class was removed, some of its code went into _FileInFile.
This commit is contained in:
Lars Gustäbel 2012-05-05 18:15:03 +02:00
parent ef5a4636d0
commit 7a919e9930
4 changed files with 80 additions and 200 deletions

View file

@ -668,6 +668,8 @@ class _FileInFile(object):
self.offset = offset
self.size = size
self.position = 0
self.name = getattr(fileobj, "name", None)
self.closed = False
if blockinfo is None:
blockinfo = [(0, size)]
@ -686,10 +688,16 @@ class _FileInFile(object):
if lastpos < self.size:
self.map.append((False, lastpos, self.size, None))
def flush(self):
pass
def readable(self):
return True
def writable(self):
return False
def seekable(self):
if not hasattr(self.fileobj, "seekable"):
# XXX gzip.GzipFile and bz2.BZ2File
return True
return self.fileobj.seekable()
def tell(self):
@ -697,10 +705,21 @@ class _FileInFile(object):
"""
return self.position
def seek(self, position):
def seek(self, position, whence=io.SEEK_SET):
"""Seek to a position in the file.
"""
self.position = position
if whence == io.SEEK_SET:
self.position = min(max(position, 0), self.size)
elif whence == io.SEEK_CUR:
if position < 0:
self.position = max(self.position + position, 0)
else:
self.position = min(self.position + position, self.size)
elif whence == io.SEEK_END:
self.position = max(min(self.size + position, self.size), 0)
else:
raise ValueError("Invalid argument")
return self.position
def read(self, size=None):
"""Read data from the file.
@ -729,146 +748,16 @@ class _FileInFile(object):
size -= length
self.position += length
return buf
#class _FileInFile
class ExFileObject(object):
"""File-like object for reading an archive member.
Is returned by TarFile.extractfile().
"""
blocksize = 1024
def __init__(self, tarfile, tarinfo):
self.fileobj = _FileInFile(tarfile.fileobj,
tarinfo.offset_data,
tarinfo.size,
tarinfo.sparse)
self.name = tarinfo.name
self.mode = "r"
self.closed = False
self.size = tarinfo.size
self.position = 0
self.buffer = b""
def readable(self):
return True
def writable(self):
return False
def seekable(self):
return self.fileobj.seekable()
def read(self, size=None):
"""Read at most size bytes from the file. If size is not
present or None, read all data until EOF is reached.
"""
if self.closed:
raise ValueError("I/O operation on closed file")
buf = b""
if self.buffer:
if size is None:
buf = self.buffer
self.buffer = b""
else:
buf = self.buffer[:size]
self.buffer = self.buffer[size:]
if size is None:
buf += self.fileobj.read()
else:
buf += self.fileobj.read(size - len(buf))
self.position += len(buf)
return buf
# XXX TextIOWrapper uses the read1() method.
read1 = read
def readline(self, size=-1):
"""Read one entire line from the file. If size is present
and non-negative, return a string with at most that
size, which may be an incomplete line.
"""
if self.closed:
raise ValueError("I/O operation on closed file")
pos = self.buffer.find(b"\n") + 1
if pos == 0:
# no newline found.
while True:
buf = self.fileobj.read(self.blocksize)
self.buffer += buf
if not buf or b"\n" in buf:
pos = self.buffer.find(b"\n") + 1
if pos == 0:
# no newline found.
pos = len(self.buffer)
break
if size != -1:
pos = min(size, pos)
buf = self.buffer[:pos]
self.buffer = self.buffer[pos:]
self.position += len(buf)
return buf
def readlines(self):
"""Return a list with all remaining lines.
"""
result = []
while True:
line = self.readline()
if not line: break
result.append(line)
return result
def tell(self):
"""Return the current file position.
"""
if self.closed:
raise ValueError("I/O operation on closed file")
return self.position
def seek(self, pos, whence=io.SEEK_SET):
"""Seek to a position in the file.
"""
if self.closed:
raise ValueError("I/O operation on closed file")
if whence == io.SEEK_SET:
self.position = min(max(pos, 0), self.size)
elif whence == io.SEEK_CUR:
if pos < 0:
self.position = max(self.position + pos, 0)
else:
self.position = min(self.position + pos, self.size)
elif whence == io.SEEK_END:
self.position = max(min(self.size + pos, self.size), 0)
else:
raise ValueError("Invalid argument")
self.buffer = b""
self.fileobj.seek(self.position)
def readinto(self, b):
buf = self.read(len(b))
b[:len(buf)] = buf
return len(buf)
def close(self):
"""Close the file object.
"""
self.closed = True
#class _FileInFile
def __iter__(self):
"""Get an iterator over the file's lines.
"""
while True:
line = self.readline()
if not line:
break
yield line
#class ExFileObject
#------------------
# Exported Classes
@ -1554,7 +1443,8 @@ class TarFile(object):
tarinfo = TarInfo # The default TarInfo class to use.
fileobject = ExFileObject # The default ExFileObject class to use.
fileobject = None # The file-object for extractfile() or
# io.BufferedReader if None.
def __init__(self, name=None, mode="r", fileobj=None, format=None,
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
@ -2178,12 +2068,9 @@ class TarFile(object):
def extractfile(self, member):
"""Extract a member from the archive as a file object. `member' may be
a filename or a TarInfo object. If `member' is a regular file, a
file-like object is returned. If `member' is a link, a file-like
object is constructed from the link's target. If `member' is none of
the above, None is returned.
The file-like object is read-only and provides the following
methods: read(), readline(), readlines(), seek() and tell()
a filename or a TarInfo object. If `member' is a regular file or a
link, an io.BufferedReader object is returned. Otherwise, None is
returned.
"""
self._check("r")
@ -2192,13 +2079,14 @@ class TarFile(object):
else:
tarinfo = member
if tarinfo.isreg():
return self.fileobject(self, tarinfo)
elif tarinfo.type not in SUPPORTED_TYPES:
# If a member's type is unknown, it is treated as a
# regular file.
return self.fileobject(self, tarinfo)
if tarinfo.isreg() or tarinfo.type not in SUPPORTED_TYPES:
# Members with unknown types are treated as regular files.
if self.fileobject is None:
fileobj = _FileInFile(self.fileobj, tarinfo.offset_data, tarinfo.size, tarinfo.sparse)
return io.BufferedReader(fileobj)
else:
# Keep the traditional pre-3.3 API intact.
return self.fileobject(self, tarinfo)
elif tarinfo.islnk() or tarinfo.issym():
if isinstance(self.fileobj, _Stream):