gh-102950: Implement PEP 706 – Filter for tarfile.extractall (#102953)

This commit is contained in:
Petr Viktorin 2023-04-24 10:58:06 +02:00 committed by GitHub
parent 36860134a9
commit af53046995
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 1786 additions and 99 deletions

View file

@ -1245,7 +1245,7 @@ def _unpack_zipfile(filename, extract_dir):
finally:
zip.close()
def _unpack_tarfile(filename, extract_dir):
def _unpack_tarfile(filename, extract_dir, *, filter=None):
"""Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`
"""
import tarfile # late import for breaking circular dependency
@ -1255,7 +1255,7 @@ def _unpack_tarfile(filename, extract_dir):
raise ReadError(
"%s is not a compressed or uncompressed tar file" % filename)
try:
tarobj.extractall(extract_dir)
tarobj.extractall(extract_dir, filter=filter)
finally:
tarobj.close()
@ -1288,7 +1288,7 @@ def _find_unpack_format(filename):
return name
return None
def unpack_archive(filename, extract_dir=None, format=None):
def unpack_archive(filename, extract_dir=None, format=None, *, filter=None):
"""Unpack an archive.
`filename` is the name of the archive.
@ -1302,6 +1302,9 @@ def unpack_archive(filename, extract_dir=None, format=None):
was registered for that extension.
In case none is found, a ValueError is raised.
If `filter` is given, it is passed to the underlying
extraction function.
"""
sys.audit("shutil.unpack_archive", filename, extract_dir, format)
@ -1311,6 +1314,10 @@ def unpack_archive(filename, extract_dir=None, format=None):
extract_dir = os.fspath(extract_dir)
filename = os.fspath(filename)
if filter is None:
filter_kwargs = {}
else:
filter_kwargs = {'filter': filter}
if format is not None:
try:
format_info = _UNPACK_FORMATS[format]
@ -1318,7 +1325,7 @@ def unpack_archive(filename, extract_dir=None, format=None):
raise ValueError("Unknown unpack format '{0}'".format(format)) from None
func = format_info[1]
func(filename, extract_dir, **dict(format_info[2]))
func(filename, extract_dir, **dict(format_info[2]), **filter_kwargs)
else:
# we need to look at the registered unpackers supported extensions
format = _find_unpack_format(filename)
@ -1326,7 +1333,7 @@ def unpack_archive(filename, extract_dir=None, format=None):
raise ReadError("Unknown archive format '{0}'".format(filename))
func = _UNPACK_FORMATS[format][1]
kwargs = dict(_UNPACK_FORMATS[format][2])
kwargs = dict(_UNPACK_FORMATS[format][2]) | filter_kwargs
func(filename, extract_dir, **kwargs)