mirror of
https://github.com/python/cpython.git
synced 2025-08-03 16:39:00 +00:00
Patch #918101: Add tarfile open mode r|* for auto-detection of the
stream compression; add, for symmetry reasons, r:* as a synonym of r.
This commit is contained in:
parent
409d8f2ebd
commit
78be7df9e4
4 changed files with 97 additions and 32 deletions
|
@ -32,7 +32,7 @@ Some facts and figures:
|
||||||
it defaults to \code{'r'}. Here is a full list of mode combinations:
|
it defaults to \code{'r'}. Here is a full list of mode combinations:
|
||||||
|
|
||||||
\begin{tableii}{c|l}{code}{mode}{action}
|
\begin{tableii}{c|l}{code}{mode}{action}
|
||||||
\lineii{'r'}{Open for reading with transparent compression (recommended).}
|
\lineii{'r' or 'r:*'}{Open for reading with transparent compression (recommended).}
|
||||||
\lineii{'r:'}{Open for reading exclusively without compression.}
|
\lineii{'r:'}{Open for reading exclusively without compression.}
|
||||||
\lineii{'r:gz'}{Open for reading with gzip compression.}
|
\lineii{'r:gz'}{Open for reading with gzip compression.}
|
||||||
\lineii{'r:bz2'}{Open for reading with bzip2 compression.}
|
\lineii{'r:bz2'}{Open for reading with bzip2 compression.}
|
||||||
|
@ -65,6 +65,7 @@ Some facts and figures:
|
||||||
(section~\ref{tar-examples}). The currently possible modes:
|
(section~\ref{tar-examples}). The currently possible modes:
|
||||||
|
|
||||||
\begin{tableii}{c|l}{code}{Mode}{Action}
|
\begin{tableii}{c|l}{code}{Mode}{Action}
|
||||||
|
\lineii{'r|*'}{Open a \emph{stream} of tar blocks for reading with transparent compression.}
|
||||||
\lineii{'r|'}{Open a \emph{stream} of uncompressed tar blocks for reading.}
|
\lineii{'r|'}{Open a \emph{stream} of uncompressed tar blocks for reading.}
|
||||||
\lineii{'r|gz'}{Open a gzip compressed \emph{stream} for reading.}
|
\lineii{'r|gz'}{Open a gzip compressed \emph{stream} for reading.}
|
||||||
\lineii{'r|bz2'}{Open a bzip2 compressed \emph{stream} for reading.}
|
\lineii{'r|bz2'}{Open a bzip2 compressed \emph{stream} for reading.}
|
||||||
|
|
|
@ -274,7 +274,7 @@ class _Stream:
|
||||||
_Stream is intended to be used only internally.
|
_Stream is intended to be used only internally.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, name, mode, type, fileobj, bufsize):
|
def __init__(self, name, mode, comptype, fileobj, bufsize):
|
||||||
"""Construct a _Stream object.
|
"""Construct a _Stream object.
|
||||||
"""
|
"""
|
||||||
self._extfileobj = True
|
self._extfileobj = True
|
||||||
|
@ -282,16 +282,22 @@ class _Stream:
|
||||||
fileobj = _LowLevelFile(name, mode)
|
fileobj = _LowLevelFile(name, mode)
|
||||||
self._extfileobj = False
|
self._extfileobj = False
|
||||||
|
|
||||||
self.name = name or ""
|
if comptype == '*':
|
||||||
self.mode = mode
|
# Enable transparent compression detection for the
|
||||||
self.type = type
|
# stream interface
|
||||||
self.fileobj = fileobj
|
fileobj = _StreamProxy(fileobj)
|
||||||
self.bufsize = bufsize
|
comptype = fileobj.getcomptype()
|
||||||
self.buf = ""
|
|
||||||
self.pos = 0L
|
|
||||||
self.closed = False
|
|
||||||
|
|
||||||
if type == "gz":
|
self.name = name or ""
|
||||||
|
self.mode = mode
|
||||||
|
self.comptype = comptype
|
||||||
|
self.fileobj = fileobj
|
||||||
|
self.bufsize = bufsize
|
||||||
|
self.buf = ""
|
||||||
|
self.pos = 0L
|
||||||
|
self.closed = False
|
||||||
|
|
||||||
|
if comptype == "gz":
|
||||||
try:
|
try:
|
||||||
import zlib
|
import zlib
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
@ -303,7 +309,7 @@ class _Stream:
|
||||||
else:
|
else:
|
||||||
self._init_write_gz()
|
self._init_write_gz()
|
||||||
|
|
||||||
if type == "bz2":
|
if comptype == "bz2":
|
||||||
try:
|
try:
|
||||||
import bz2
|
import bz2
|
||||||
except ImportError:
|
except ImportError:
|
||||||
|
@ -315,7 +321,7 @@ class _Stream:
|
||||||
self.cmp = bz2.BZ2Compressor()
|
self.cmp = bz2.BZ2Compressor()
|
||||||
|
|
||||||
def __del__(self):
|
def __del__(self):
|
||||||
if not self.closed:
|
if hasattr(self, "closed") and not self.closed:
|
||||||
self.close()
|
self.close()
|
||||||
|
|
||||||
def _init_write_gz(self):
|
def _init_write_gz(self):
|
||||||
|
@ -334,10 +340,10 @@ class _Stream:
|
||||||
def write(self, s):
|
def write(self, s):
|
||||||
"""Write string s to the stream.
|
"""Write string s to the stream.
|
||||||
"""
|
"""
|
||||||
if self.type == "gz":
|
if self.comptype == "gz":
|
||||||
self.crc = self.zlib.crc32(s, self.crc)
|
self.crc = self.zlib.crc32(s, self.crc)
|
||||||
self.pos += len(s)
|
self.pos += len(s)
|
||||||
if self.type != "tar":
|
if self.comptype != "tar":
|
||||||
s = self.cmp.compress(s)
|
s = self.cmp.compress(s)
|
||||||
self.__write(s)
|
self.__write(s)
|
||||||
|
|
||||||
|
@ -357,12 +363,16 @@ class _Stream:
|
||||||
if self.closed:
|
if self.closed:
|
||||||
return
|
return
|
||||||
|
|
||||||
if self.mode == "w" and self.type != "tar":
|
if self.mode == "w" and self.comptype != "tar":
|
||||||
self.buf += self.cmp.flush()
|
self.buf += self.cmp.flush()
|
||||||
|
|
||||||
if self.mode == "w" and self.buf:
|
if self.mode == "w" and self.buf:
|
||||||
|
blocks, remainder = divmod(len(self.buf), self.bufsize)
|
||||||
|
if remainder > 0:
|
||||||
|
self.buf += NUL * (self.bufsize - remainder)
|
||||||
self.fileobj.write(self.buf)
|
self.fileobj.write(self.buf)
|
||||||
self.buf = ""
|
self.buf = ""
|
||||||
if self.type == "gz":
|
if self.comptype == "gz":
|
||||||
self.fileobj.write(struct.pack("<l", self.crc))
|
self.fileobj.write(struct.pack("<l", self.crc))
|
||||||
self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
|
self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
|
||||||
|
|
||||||
|
@ -441,7 +451,7 @@ class _Stream:
|
||||||
def _read(self, size):
|
def _read(self, size):
|
||||||
"""Return size bytes from the stream.
|
"""Return size bytes from the stream.
|
||||||
"""
|
"""
|
||||||
if self.type == "tar":
|
if self.comptype == "tar":
|
||||||
return self.__read(size)
|
return self.__read(size)
|
||||||
|
|
||||||
c = len(self.dbuf)
|
c = len(self.dbuf)
|
||||||
|
@ -474,6 +484,30 @@ class _Stream:
|
||||||
return t[:size]
|
return t[:size]
|
||||||
# class _Stream
|
# class _Stream
|
||||||
|
|
||||||
|
class _StreamProxy(object):
|
||||||
|
"""Small proxy class that enables transparent compression
|
||||||
|
detection for the Stream interface (mode 'r|*').
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, fileobj):
|
||||||
|
self.fileobj = fileobj
|
||||||
|
self.buf = self.fileobj.read(BLOCKSIZE)
|
||||||
|
|
||||||
|
def read(self, size):
|
||||||
|
self.read = self.fileobj.read
|
||||||
|
return self.buf
|
||||||
|
|
||||||
|
def getcomptype(self):
|
||||||
|
if self.buf.startswith("\037\213\010"):
|
||||||
|
return "gz"
|
||||||
|
if self.buf.startswith("BZh91"):
|
||||||
|
return "bz2"
|
||||||
|
return "tar"
|
||||||
|
|
||||||
|
def close(self):
|
||||||
|
self.fileobj.close()
|
||||||
|
# class StreamProxy
|
||||||
|
|
||||||
#------------------------
|
#------------------------
|
||||||
# Extraction file object
|
# Extraction file object
|
||||||
#------------------------
|
#------------------------
|
||||||
|
@ -879,7 +913,7 @@ class TarFile(object):
|
||||||
an appropriate TarFile class.
|
an appropriate TarFile class.
|
||||||
|
|
||||||
mode:
|
mode:
|
||||||
'r' open for reading with transparent compression
|
'r' or 'r:*' open for reading with transparent compression
|
||||||
'r:' open for reading exclusively uncompressed
|
'r:' open for reading exclusively uncompressed
|
||||||
'r:gz' open for reading with gzip compression
|
'r:gz' open for reading with gzip compression
|
||||||
'r:bz2' open for reading with bzip2 compression
|
'r:bz2' open for reading with bzip2 compression
|
||||||
|
@ -887,6 +921,8 @@ class TarFile(object):
|
||||||
'w' or 'w:' open for writing without compression
|
'w' or 'w:' open for writing without compression
|
||||||
'w:gz' open for writing with gzip compression
|
'w:gz' open for writing with gzip compression
|
||||||
'w:bz2' open for writing with bzip2 compression
|
'w:bz2' open for writing with bzip2 compression
|
||||||
|
|
||||||
|
'r|*' open a stream of tar blocks with transparent compression
|
||||||
'r|' open an uncompressed stream of tar blocks for reading
|
'r|' open an uncompressed stream of tar blocks for reading
|
||||||
'r|gz' open a gzip compressed stream of tar blocks
|
'r|gz' open a gzip compressed stream of tar blocks
|
||||||
'r|bz2' open a bzip2 compressed stream of tar blocks
|
'r|bz2' open a bzip2 compressed stream of tar blocks
|
||||||
|
@ -898,7 +934,17 @@ class TarFile(object):
|
||||||
if not name and not fileobj:
|
if not name and not fileobj:
|
||||||
raise ValueError, "nothing to open"
|
raise ValueError, "nothing to open"
|
||||||
|
|
||||||
if ":" in mode:
|
if mode in ("r", "r:*"):
|
||||||
|
# Find out which *open() is appropriate for opening the file.
|
||||||
|
for comptype in cls.OPEN_METH:
|
||||||
|
func = getattr(cls, cls.OPEN_METH[comptype])
|
||||||
|
try:
|
||||||
|
return func(name, "r", fileobj)
|
||||||
|
except (ReadError, CompressionError):
|
||||||
|
continue
|
||||||
|
raise ReadError, "file could not be opened successfully"
|
||||||
|
|
||||||
|
elif ":" in mode:
|
||||||
filemode, comptype = mode.split(":", 1)
|
filemode, comptype = mode.split(":", 1)
|
||||||
filemode = filemode or "r"
|
filemode = filemode or "r"
|
||||||
comptype = comptype or "tar"
|
comptype = comptype or "tar"
|
||||||
|
@ -924,16 +970,6 @@ class TarFile(object):
|
||||||
t._extfileobj = False
|
t._extfileobj = False
|
||||||
return t
|
return t
|
||||||
|
|
||||||
elif mode == "r":
|
|
||||||
# Find out which *open() is appropriate for opening the file.
|
|
||||||
for comptype in cls.OPEN_METH:
|
|
||||||
func = getattr(cls, cls.OPEN_METH[comptype])
|
|
||||||
try:
|
|
||||||
return func(name, "r", fileobj)
|
|
||||||
except (ReadError, CompressionError):
|
|
||||||
continue
|
|
||||||
raise ReadError, "file could not be opened successfully"
|
|
||||||
|
|
||||||
elif mode in "aw":
|
elif mode in "aw":
|
||||||
return cls.taropen(name, mode, fileobj)
|
return cls.taropen(name, mode, fileobj)
|
||||||
|
|
||||||
|
|
|
@ -181,6 +181,18 @@ class ReadStreamTest(ReadTest):
|
||||||
|
|
||||||
stream.close()
|
stream.close()
|
||||||
|
|
||||||
|
class ReadAsteriskTest(ReadTest):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
mode = self.mode + self.sep + "*"
|
||||||
|
self.tar = tarfile.open(tarname(self.comp), mode)
|
||||||
|
|
||||||
|
class ReadStreamAsteriskTest(ReadStreamTest):
|
||||||
|
|
||||||
|
def setUp(self):
|
||||||
|
mode = self.mode + self.sep + "*"
|
||||||
|
self.tar = tarfile.open(tarname(self.comp), mode)
|
||||||
|
|
||||||
class WriteTest(BaseTest):
|
class WriteTest(BaseTest):
|
||||||
mode = 'w'
|
mode = 'w'
|
||||||
|
|
||||||
|
@ -336,6 +348,11 @@ class WriteTestGzip(WriteTest):
|
||||||
comp = "gz"
|
comp = "gz"
|
||||||
class WriteStreamTestGzip(WriteStreamTest):
|
class WriteStreamTestGzip(WriteStreamTest):
|
||||||
comp = "gz"
|
comp = "gz"
|
||||||
|
class ReadAsteriskTestGzip(ReadAsteriskTest):
|
||||||
|
comp = "gz"
|
||||||
|
class ReadStreamAsteriskTestGzip(ReadStreamAsteriskTest):
|
||||||
|
comp = "gz"
|
||||||
|
|
||||||
|
|
||||||
# Filemode test cases
|
# Filemode test cases
|
||||||
|
|
||||||
|
@ -355,6 +372,10 @@ if bz2:
|
||||||
comp = "bz2"
|
comp = "bz2"
|
||||||
class WriteStreamTestBzip2(WriteStreamTestGzip):
|
class WriteStreamTestBzip2(WriteStreamTestGzip):
|
||||||
comp = "bz2"
|
comp = "bz2"
|
||||||
|
class ReadAsteriskTestBzip2(ReadAsteriskTest):
|
||||||
|
comp = "bz2"
|
||||||
|
class ReadStreamAsteriskTestBzip2(ReadStreamAsteriskTest):
|
||||||
|
comp = "bz2"
|
||||||
|
|
||||||
# If importing gzip failed, discard the Gzip TestCases.
|
# If importing gzip failed, discard the Gzip TestCases.
|
||||||
if not gzip:
|
if not gzip:
|
||||||
|
@ -375,6 +396,8 @@ def test_main():
|
||||||
FileModeTest,
|
FileModeTest,
|
||||||
ReadTest,
|
ReadTest,
|
||||||
ReadStreamTest,
|
ReadStreamTest,
|
||||||
|
ReadAsteriskTest,
|
||||||
|
ReadStreamAsteriskTest,
|
||||||
WriteTest,
|
WriteTest,
|
||||||
WriteStreamTest,
|
WriteStreamTest,
|
||||||
WriteGNULongTest,
|
WriteGNULongTest,
|
||||||
|
@ -386,13 +409,15 @@ def test_main():
|
||||||
if gzip:
|
if gzip:
|
||||||
tests.extend([
|
tests.extend([
|
||||||
ReadTestGzip, ReadStreamTestGzip,
|
ReadTestGzip, ReadStreamTestGzip,
|
||||||
WriteTestGzip, WriteStreamTestGzip
|
WriteTestGzip, WriteStreamTestGzip,
|
||||||
|
ReadAsteriskTestGzip, ReadStreamAsteriskTestGzip
|
||||||
])
|
])
|
||||||
|
|
||||||
if bz2:
|
if bz2:
|
||||||
tests.extend([
|
tests.extend([
|
||||||
ReadTestBzip2, ReadStreamTestBzip2,
|
ReadTestBzip2, ReadStreamTestBzip2,
|
||||||
WriteTestBzip2, WriteStreamTestBzip2
|
WriteTestBzip2, WriteStreamTestBzip2,
|
||||||
|
ReadAsteriskTestBzip2, ReadStreamAsteriskTestBzip2
|
||||||
])
|
])
|
||||||
try:
|
try:
|
||||||
test_support.run_unittest(*tests)
|
test_support.run_unittest(*tests)
|
||||||
|
|
|
@ -78,6 +78,9 @@ Extension Modules
|
||||||
Library
|
Library
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
- Patch #918101: Add tarfile open mode r|* for auto-detection of the
|
||||||
|
stream compression; add, for symmetry reasons, r:* as a synonym of r.
|
||||||
|
|
||||||
- Patch #1043890: Add extractall method to tarfile.
|
- Patch #1043890: Add extractall method to tarfile.
|
||||||
|
|
||||||
- Patch #1075887: Don't require MSVC in distutils if there is nothing
|
- Patch #1075887: Don't require MSVC in distutils if there is nothing
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue