mirror of
https://github.com/python/cpython.git
synced 2025-12-11 19:40:17 +00:00
gh-102120: [TarFile] Add an iter function that doesn't cache (GH-102128)
This commit is contained in:
parent
097b7830cd
commit
50fce89d12
4 changed files with 42 additions and 7 deletions
|
|
@ -318,7 +318,7 @@ be finalized; only the internally used file object will be closed. See the
|
||||||
.. versionadded:: 3.2
|
.. versionadded:: 3.2
|
||||||
Added support for the context management protocol.
|
Added support for the context management protocol.
|
||||||
|
|
||||||
.. class:: TarFile(name=None, mode='r', fileobj=None, format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False, ignore_zeros=False, encoding=ENCODING, errors='surrogateescape', pax_headers=None, debug=0, errorlevel=1)
|
.. class:: TarFile(name=None, mode='r', fileobj=None, format=DEFAULT_FORMAT, tarinfo=TarInfo, dereference=False, ignore_zeros=False, encoding=ENCODING, errors='surrogateescape', pax_headers=None, debug=0, errorlevel=1, stream=False)
|
||||||
|
|
||||||
All following arguments are optional and can be accessed as instance attributes
|
All following arguments are optional and can be accessed as instance attributes
|
||||||
as well.
|
as well.
|
||||||
|
|
@ -369,6 +369,9 @@ be finalized; only the internally used file object will be closed. See the
|
||||||
The *pax_headers* argument is an optional dictionary of strings which
|
The *pax_headers* argument is an optional dictionary of strings which
|
||||||
will be added as a pax global header if *format* is :const:`PAX_FORMAT`.
|
will be added as a pax global header if *format* is :const:`PAX_FORMAT`.
|
||||||
|
|
||||||
|
If *stream* is set to :const:`True` then while reading the archive info about files
|
||||||
|
in the archive are not cached, saving memory.
|
||||||
|
|
||||||
.. versionchanged:: 3.2
|
.. versionchanged:: 3.2
|
||||||
Use ``'surrogateescape'`` as the default for the *errors* argument.
|
Use ``'surrogateescape'`` as the default for the *errors* argument.
|
||||||
|
|
||||||
|
|
@ -378,6 +381,8 @@ be finalized; only the internally used file object will be closed. See the
|
||||||
.. versionchanged:: 3.6
|
.. versionchanged:: 3.6
|
||||||
The *name* parameter accepts a :term:`path-like object`.
|
The *name* parameter accepts a :term:`path-like object`.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.13
|
||||||
|
Add the *stream* parameter.
|
||||||
|
|
||||||
.. classmethod:: TarFile.open(...)
|
.. classmethod:: TarFile.open(...)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1633,7 +1633,7 @@ class TarFile(object):
|
||||||
def __init__(self, name=None, mode="r", fileobj=None, format=None,
|
def __init__(self, name=None, mode="r", fileobj=None, format=None,
|
||||||
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
|
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
|
||||||
errors="surrogateescape", pax_headers=None, debug=None,
|
errors="surrogateescape", pax_headers=None, debug=None,
|
||||||
errorlevel=None, copybufsize=None):
|
errorlevel=None, copybufsize=None, stream=False):
|
||||||
"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
|
"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
|
||||||
read from an existing archive, 'a' to append data to an existing
|
read from an existing archive, 'a' to append data to an existing
|
||||||
file or 'w' to create a new file overwriting an existing one. `mode'
|
file or 'w' to create a new file overwriting an existing one. `mode'
|
||||||
|
|
@ -1665,6 +1665,8 @@ class TarFile(object):
|
||||||
self.name = os.path.abspath(name) if name else None
|
self.name = os.path.abspath(name) if name else None
|
||||||
self.fileobj = fileobj
|
self.fileobj = fileobj
|
||||||
|
|
||||||
|
self.stream = stream
|
||||||
|
|
||||||
# Init attributes.
|
# Init attributes.
|
||||||
if format is not None:
|
if format is not None:
|
||||||
self.format = format
|
self.format = format
|
||||||
|
|
@ -2631,6 +2633,8 @@ class TarFile(object):
|
||||||
break
|
break
|
||||||
|
|
||||||
if tarinfo is not None:
|
if tarinfo is not None:
|
||||||
|
# if streaming the file we do not want to cache the tarinfo
|
||||||
|
if not self.stream:
|
||||||
self.members.append(tarinfo)
|
self.members.append(tarinfo)
|
||||||
else:
|
else:
|
||||||
self._loaded = True
|
self._loaded = True
|
||||||
|
|
@ -2682,8 +2686,9 @@ class TarFile(object):
|
||||||
|
|
||||||
def _load(self):
|
def _load(self):
|
||||||
"""Read through the entire archive file and look for readable
|
"""Read through the entire archive file and look for readable
|
||||||
members.
|
members. This should not run if the file is set to stream.
|
||||||
"""
|
"""
|
||||||
|
if not self.stream:
|
||||||
while self.next() is not None:
|
while self.next() is not None:
|
||||||
pass
|
pass
|
||||||
self._loaded = True
|
self._loaded = True
|
||||||
|
|
|
||||||
|
|
@ -100,6 +100,14 @@ class ReadTest(TarTest):
|
||||||
def tearDown(self):
|
def tearDown(self):
|
||||||
self.tar.close()
|
self.tar.close()
|
||||||
|
|
||||||
|
class StreamModeTest(ReadTest):
|
||||||
|
|
||||||
|
# Only needs to change how the tarfile is opened to set
|
||||||
|
# stream mode
|
||||||
|
def setUp(self):
|
||||||
|
self.tar = tarfile.open(self.tarname, mode=self.mode,
|
||||||
|
encoding="iso8859-1",
|
||||||
|
stream=True)
|
||||||
|
|
||||||
class UstarReadTest(ReadTest, unittest.TestCase):
|
class UstarReadTest(ReadTest, unittest.TestCase):
|
||||||
|
|
||||||
|
|
@ -852,6 +860,21 @@ class Bz2StreamReadTest(Bz2Test, StreamReadTest):
|
||||||
class LzmaStreamReadTest(LzmaTest, StreamReadTest):
|
class LzmaStreamReadTest(LzmaTest, StreamReadTest):
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
class TarStreamModeReadTest(StreamModeTest, unittest.TestCase):
|
||||||
|
|
||||||
|
def test_stream_mode_no_cache(self):
|
||||||
|
for _ in self.tar:
|
||||||
|
pass
|
||||||
|
self.assertEqual(self.tar.members, [])
|
||||||
|
|
||||||
|
class GzipStreamModeReadTest(GzipTest, TarStreamModeReadTest):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class Bz2StreamModeReadTest(Bz2Test, TarStreamModeReadTest):
|
||||||
|
pass
|
||||||
|
|
||||||
|
class LzmaStreamModeReadTest(LzmaTest, TarStreamModeReadTest):
|
||||||
|
pass
|
||||||
|
|
||||||
class DetectReadTest(TarTest, unittest.TestCase):
|
class DetectReadTest(TarTest, unittest.TestCase):
|
||||||
def _testfunc_file(self, name, mode):
|
def _testfunc_file(self, name, mode):
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1,2 @@
|
||||||
|
Added a stream mode to ``tarfile`` that allows for reading
|
||||||
|
archives without caching info about the inner files.
|
||||||
Loading…
Add table
Add a link
Reference in a new issue