mirror of
https://github.com/python/cpython.git
synced 2025-11-03 03:22:27 +00:00
gh-102950: Implement PEP 706 – Filter for tarfile.extractall (#102953)
This commit is contained in:
parent
36860134a9
commit
af53046995
8 changed files with 1786 additions and 99 deletions
|
|
@ -662,7 +662,7 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules.
|
|||
Remove the archive format *name* from the list of supported formats.
|
||||
|
||||
|
||||
.. function:: unpack_archive(filename[, extract_dir[, format]])
|
||||
.. function:: unpack_archive(filename[, extract_dir[, format[, filter]]])
|
||||
|
||||
Unpack an archive. *filename* is the full path of the archive.
|
||||
|
||||
|
|
@ -676,6 +676,14 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules.
|
|||
registered for that extension. In case none is found,
|
||||
a :exc:`ValueError` is raised.
|
||||
|
||||
The keyword-only *filter* argument is passed to the underlying unpacking
|
||||
function. For zip files, *filter* is not accepted.
|
||||
For tar files, it is recommended to set it to ``'data'``,
|
||||
unless using features specific to tar and UNIX-like filesystems.
|
||||
(See :ref:`tarfile-extraction-filter` for details.)
|
||||
The ``'data'`` filter will become the default for tar files
|
||||
in Python 3.14.
|
||||
|
||||
.. audit-event:: shutil.unpack_archive filename,extract_dir,format shutil.unpack_archive
|
||||
|
||||
.. warning::
|
||||
|
|
@ -688,6 +696,9 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules.
|
|||
.. versionchanged:: 3.7
|
||||
Accepts a :term:`path-like object` for *filename* and *extract_dir*.
|
||||
|
||||
.. versionchanged:: 3.12
|
||||
Added the *filter* argument.
|
||||
|
||||
.. function:: register_unpack_format(name, extensions, function[, extra_args[, description]])
|
||||
|
||||
Registers an unpack format. *name* is the name of the format and
|
||||
|
|
@ -695,11 +706,14 @@ provided. They rely on the :mod:`zipfile` and :mod:`tarfile` modules.
|
|||
``.zip`` for Zip files.
|
||||
|
||||
*function* is the callable that will be used to unpack archives. The
|
||||
callable will receive the path of the archive, followed by the directory
|
||||
the archive must be extracted to.
|
||||
callable will receive:
|
||||
|
||||
When provided, *extra_args* is a sequence of ``(name, value)`` tuples that
|
||||
will be passed as keywords arguments to the callable.
|
||||
- the path of the archive, as a positional argument;
|
||||
- the directory the archive must be extracted to, as a positional argument;
|
||||
- possibly a *filter* keyword argument, if it was given to
|
||||
:func:`unpack_archive`;
|
||||
- additional keyword arguments, specified by *extra_args* as a sequence
|
||||
of ``(name, value)`` tuples.
|
||||
|
||||
*description* can be provided to describe the format, and will be returned
|
||||
by the :func:`get_unpack_formats` function.
|
||||
|
|
|
|||
|
|
@ -36,6 +36,13 @@ Some facts and figures:
|
|||
.. versionchanged:: 3.3
|
||||
Added support for :mod:`lzma` compression.
|
||||
|
||||
.. versionchanged:: 3.12
|
||||
Archives are extracted using a :ref:`filter <tarfile-extraction-filter>`,
|
||||
which makes it possible to either limit surprising/dangerous features,
|
||||
or to acknowledge that they are expected and the archive is fully trusted.
|
||||
By default, archives are fully trusted, but this default is deprecated
|
||||
and slated to change in Python 3.14.
|
||||
|
||||
|
||||
.. function:: open(name=None, mode='r', fileobj=None, bufsize=10240, **kwargs)
|
||||
|
||||
|
|
@ -209,6 +216,38 @@ The :mod:`tarfile` module defines the following exceptions:
|
|||
Is raised by :meth:`TarInfo.frombuf` if the buffer it gets is invalid.
|
||||
|
||||
|
||||
.. exception:: FilterError
|
||||
|
||||
Base class for members :ref:`refused <tarfile-extraction-refuse>` by
|
||||
filters.
|
||||
|
||||
.. attribute:: tarinfo
|
||||
|
||||
Information about the member that the filter refused to extract,
|
||||
as :ref:`TarInfo <tarinfo-objects>`.
|
||||
|
||||
.. exception:: AbsolutePathError
|
||||
|
||||
Raised to refuse extracting a member with an absolute path.
|
||||
|
||||
.. exception:: OutsideDestinationError
|
||||
|
||||
Raised to refuse extracting a member outside the destination directory.
|
||||
|
||||
.. exception:: SpecialFileError
|
||||
|
||||
Raised to refuse extracting a special file (e.g. a device or pipe).
|
||||
|
||||
.. exception:: AbsoluteLinkError
|
||||
|
||||
Raised to refuse extracting a symbolic link with an absolute path.
|
||||
|
||||
.. exception:: LinkOutsideDestinationError
|
||||
|
||||
Raised to refuse extracting a symbolic link pointing outside the destination
|
||||
directory.
|
||||
|
||||
|
||||
The following constants are available at the module level:
|
||||
|
||||
.. data:: ENCODING
|
||||
|
|
@ -319,11 +358,8 @@ be finalized; only the internally used file object will be closed. See the
|
|||
*debug* can be set from ``0`` (no debug messages) up to ``3`` (all debug
|
||||
messages). The messages are written to ``sys.stderr``.
|
||||
|
||||
If *errorlevel* is ``0``, all errors are ignored when using :meth:`TarFile.extract`.
|
||||
Nevertheless, they appear as error messages in the debug output, when debugging
|
||||
is enabled. If ``1``, all *fatal* errors are raised as :exc:`OSError`
|
||||
exceptions. If ``2``, all *non-fatal* errors are raised as :exc:`TarError`
|
||||
exceptions as well.
|
||||
*errorlevel* controls how extraction errors are handled,
|
||||
see :attr:`the corresponding attribute <~TarFile.errorlevel>`.
|
||||
|
||||
The *encoding* and *errors* arguments define the character encoding to be
|
||||
used for reading or writing the archive and how conversion errors are going
|
||||
|
|
@ -390,7 +426,7 @@ be finalized; only the internally used file object will be closed. See the
|
|||
available.
|
||||
|
||||
|
||||
.. method:: TarFile.extractall(path=".", members=None, *, numeric_owner=False)
|
||||
.. method:: TarFile.extractall(path=".", members=None, *, numeric_owner=False, filter=None)
|
||||
|
||||
Extract all members from the archive to the current working directory or
|
||||
directory *path*. If optional *members* is given, it must be a subset of the
|
||||
|
|
@ -404,6 +440,12 @@ be finalized; only the internally used file object will be closed. See the
|
|||
are used to set the owner/group for the extracted files. Otherwise, the named
|
||||
values from the tarfile are used.
|
||||
|
||||
The *filter* argument specifies how ``members`` are modified or rejected
|
||||
before extraction.
|
||||
See :ref:`tarfile-extraction-filter` for details.
|
||||
It is recommended to set this explicitly depending on which *tar* features
|
||||
you need to support.
|
||||
|
||||
.. warning::
|
||||
|
||||
Never extract archives from untrusted sources without prior inspection.
|
||||
|
|
@ -411,14 +453,20 @@ be finalized; only the internally used file object will be closed. See the
|
|||
that have absolute filenames starting with ``"/"`` or filenames with two
|
||||
dots ``".."``.
|
||||
|
||||
Set ``filter='data'`` to prevent the most dangerous security issues,
|
||||
and read the :ref:`tarfile-extraction-filter` section for details.
|
||||
|
||||
.. versionchanged:: 3.5
|
||||
Added the *numeric_owner* parameter.
|
||||
|
||||
.. versionchanged:: 3.6
|
||||
The *path* parameter accepts a :term:`path-like object`.
|
||||
|
||||
.. versionchanged:: 3.12
|
||||
Added the *filter* parameter.
|
||||
|
||||
.. method:: TarFile.extract(member, path="", set_attrs=True, *, numeric_owner=False)
|
||||
|
||||
.. method:: TarFile.extract(member, path="", set_attrs=True, *, numeric_owner=False, filter=None)
|
||||
|
||||
Extract a member from the archive to the current working directory, using its
|
||||
full name. Its file information is extracted as accurately as possible. *member*
|
||||
|
|
@ -426,9 +474,8 @@ be finalized; only the internally used file object will be closed. See the
|
|||
directory using *path*. *path* may be a :term:`path-like object`.
|
||||
File attributes (owner, mtime, mode) are set unless *set_attrs* is false.
|
||||
|
||||
If *numeric_owner* is :const:`True`, the uid and gid numbers from the tarfile
|
||||
are used to set the owner/group for the extracted files. Otherwise, the named
|
||||
values from the tarfile are used.
|
||||
The *numeric_owner* and *filter* arguments are the same as
|
||||
for :meth:`extractall`.
|
||||
|
||||
.. note::
|
||||
|
||||
|
|
@ -439,6 +486,9 @@ be finalized; only the internally used file object will be closed. See the
|
|||
|
||||
See the warning for :meth:`extractall`.
|
||||
|
||||
Set ``filter='data'`` to prevent the most dangerous security issues,
|
||||
and read the :ref:`tarfile-extraction-filter` section for details.
|
||||
|
||||
.. versionchanged:: 3.2
|
||||
Added the *set_attrs* parameter.
|
||||
|
||||
|
|
@ -448,6 +498,9 @@ be finalized; only the internally used file object will be closed. See the
|
|||
.. versionchanged:: 3.6
|
||||
The *path* parameter accepts a :term:`path-like object`.
|
||||
|
||||
.. versionchanged:: 3.12
|
||||
Added the *filter* parameter.
|
||||
|
||||
|
||||
.. method:: TarFile.extractfile(member)
|
||||
|
||||
|
|
@ -460,6 +513,55 @@ be finalized; only the internally used file object will be closed. See the
|
|||
.. versionchanged:: 3.3
|
||||
Return an :class:`io.BufferedReader` object.
|
||||
|
||||
.. attribute:: TarFile.errorlevel
|
||||
:type: int
|
||||
|
||||
If *errorlevel* is ``0``, errors are ignored when using :meth:`TarFile.extract`
|
||||
and :meth:`TarFile.extractall`.
|
||||
Nevertheless, they appear as error messages in the debug output when
|
||||
*debug* is greater than 0.
|
||||
If ``1`` (the default), all *fatal* errors are raised as :exc:`OSError` or
|
||||
:exc:`FilterError` exceptions. If ``2``, all *non-fatal* errors are raised
|
||||
as :exc:`TarError` exceptions as well.
|
||||
|
||||
Some exceptions, e.g. ones caused by wrong argument types or data
|
||||
corruption, are always raised.
|
||||
|
||||
Custom :ref:`extraction filters <tarfile-extraction-filter>`
|
||||
should raise :exc:`FilterError` for *fatal* errors
|
||||
and :exc:`ExtractError` for *non-fatal* ones.
|
||||
|
||||
Note that when an exception is raised, the archive may be partially
|
||||
extracted. It is the user’s responsibility to clean up.
|
||||
|
||||
.. attribute:: TarFile.extraction_filter
|
||||
|
||||
.. versionadded:: 3.12
|
||||
|
||||
The :ref:`extraction filter <tarfile-extraction-filter>` used
|
||||
as a default for the *filter* argument of :meth:`~TarFile.extract`
|
||||
and :meth:`~TarFile.extractall`.
|
||||
|
||||
The attribute may be ``None`` or a callable.
|
||||
String names are not allowed for this attribute, unlike the *filter*
|
||||
argument to :meth:`~TarFile.extract`.
|
||||
|
||||
If ``extraction_filter`` is ``None`` (the default),
|
||||
calling an extraction method without a *filter* argument will raise a
|
||||
``DeprecationWarning``,
|
||||
and fall back to the :func:`fully_trusted <fully_trusted_filter>` filter,
|
||||
whose dangerous behavior matches previous versions of Python.
|
||||
|
||||
In Python 3.14+, leaving ``extraction_filter=None`` will cause
|
||||
extraction methods to use the :func:`data <data_filter>` filter by default.
|
||||
|
||||
The attribute may be set on instances or overridden in subclasses.
|
||||
It also is possible to set it on the ``TarFile`` class itself to set a
|
||||
global default, although, since it affects all uses of *tarfile*,
|
||||
it is best practice to only do so in top-level applications or
|
||||
:mod:`site configuration <site>`.
|
||||
To set a global default this way, a filter function needs to be wrapped in
|
||||
:func:`staticmethod()` to prevent injection of a ``self`` argument.
|
||||
|
||||
.. method:: TarFile.add(name, arcname=None, recursive=True, *, filter=None)
|
||||
|
||||
|
|
@ -535,8 +637,23 @@ permissions, owner etc.), it provides some useful methods to determine its type.
|
|||
It does *not* contain the file's data itself.
|
||||
|
||||
:class:`TarInfo` objects are returned by :class:`TarFile`'s methods
|
||||
:meth:`getmember`, :meth:`getmembers` and :meth:`gettarinfo`.
|
||||
:meth:`~TarFile.getmember`, :meth:`~TarFile.getmembers` and
|
||||
:meth:`~TarFile.gettarinfo`.
|
||||
|
||||
Modifying the objects returned by :meth:`~!TarFile.getmember` or
|
||||
:meth:`~!TarFile.getmembers` will affect all subsequent
|
||||
operations on the archive.
|
||||
For cases where this is unwanted, you can use :mod:`copy.copy() <copy>` or
|
||||
call the :meth:`~TarInfo.replace` method to create a modified copy in one step.
|
||||
|
||||
Several attributes can be set to ``None`` to indicate that a piece of metadata
|
||||
is unused or unknown.
|
||||
Different :class:`TarInfo` methods handle ``None`` differently:
|
||||
|
||||
- The :meth:`~TarFile.extract` or :meth:`~TarFile.extractall` methods will
|
||||
ignore the corresponding metadata, leaving it set to a default.
|
||||
- :meth:`~TarFile.addfile` will fail.
|
||||
- :meth:`~TarFile.list` will print a placeholder string.
|
||||
|
||||
.. class:: TarInfo(name="")
|
||||
|
||||
|
|
@ -569,24 +686,39 @@ A ``TarInfo`` object has the following public data attributes:
|
|||
|
||||
|
||||
.. attribute:: TarInfo.name
|
||||
:type: str
|
||||
|
||||
Name of the archive member.
|
||||
|
||||
|
||||
.. attribute:: TarInfo.size
|
||||
:type: int
|
||||
|
||||
Size in bytes.
|
||||
|
||||
|
||||
.. attribute:: TarInfo.mtime
|
||||
:type: int | float
|
||||
|
||||
Time of last modification.
|
||||
Time of last modification in seconds since the :ref:`epoch <epoch>`,
|
||||
as in :attr:`os.stat_result.st_mtime`.
|
||||
|
||||
.. versionchanged:: 3.12
|
||||
|
||||
Can be set to ``None`` for :meth:`~TarFile.extract` and
|
||||
:meth:`~TarFile.extractall`, causing extraction to skip applying this
|
||||
attribute.
|
||||
|
||||
.. attribute:: TarInfo.mode
|
||||
:type: int
|
||||
|
||||
Permission bits.
|
||||
Permission bits, as for :func:`os.chmod`.
|
||||
|
||||
.. versionchanged:: 3.12
|
||||
|
||||
Can be set to ``None`` for :meth:`~TarFile.extract` and
|
||||
:meth:`~TarFile.extractall`, causing extraction to skip applying this
|
||||
attribute.
|
||||
|
||||
.. attribute:: TarInfo.type
|
||||
|
||||
|
|
@ -598,35 +730,76 @@ A ``TarInfo`` object has the following public data attributes:
|
|||
|
||||
|
||||
.. attribute:: TarInfo.linkname
|
||||
:type: str
|
||||
|
||||
Name of the target file name, which is only present in :class:`TarInfo` objects
|
||||
of type :const:`LNKTYPE` and :const:`SYMTYPE`.
|
||||
|
||||
|
||||
.. attribute:: TarInfo.uid
|
||||
:type: int
|
||||
|
||||
User ID of the user who originally stored this member.
|
||||
|
||||
.. versionchanged:: 3.12
|
||||
|
||||
Can be set to ``None`` for :meth:`~TarFile.extract` and
|
||||
:meth:`~TarFile.extractall`, causing extraction to skip applying this
|
||||
attribute.
|
||||
|
||||
.. attribute:: TarInfo.gid
|
||||
:type: int
|
||||
|
||||
Group ID of the user who originally stored this member.
|
||||
|
||||
.. versionchanged:: 3.12
|
||||
|
||||
Can be set to ``None`` for :meth:`~TarFile.extract` and
|
||||
:meth:`~TarFile.extractall`, causing extraction to skip applying this
|
||||
attribute.
|
||||
|
||||
.. attribute:: TarInfo.uname
|
||||
:type: str
|
||||
|
||||
User name.
|
||||
|
||||
.. versionchanged:: 3.12
|
||||
|
||||
Can be set to ``None`` for :meth:`~TarFile.extract` and
|
||||
:meth:`~TarFile.extractall`, causing extraction to skip applying this
|
||||
attribute.
|
||||
|
||||
.. attribute:: TarInfo.gname
|
||||
:type: str
|
||||
|
||||
Group name.
|
||||
|
||||
.. versionchanged:: 3.12
|
||||
|
||||
Can be set to ``None`` for :meth:`~TarFile.extract` and
|
||||
:meth:`~TarFile.extractall`, causing extraction to skip applying this
|
||||
attribute.
|
||||
|
||||
.. attribute:: TarInfo.pax_headers
|
||||
:type: dict
|
||||
|
||||
A dictionary containing key-value pairs of an associated pax extended header.
|
||||
|
||||
.. method:: TarInfo.replace(name=..., mtime=..., mode=..., linkname=...,
|
||||
uid=..., gid=..., uname=..., gname=...,
|
||||
deep=True)
|
||||
|
||||
.. versionadded:: 3.12
|
||||
|
||||
Return a *new* copy of the :class:`!TarInfo` object with the given attributes
|
||||
changed. For example, to return a ``TarInfo`` with the group name set to
|
||||
``'staff'``, use::
|
||||
|
||||
new_tarinfo = old_tarinfo.replace(gname='staff')
|
||||
|
||||
By default, a deep copy is made.
|
||||
If *deep* is false, the copy is shallow, i.e. ``pax_headers``
|
||||
and any custom attributes are shared with the original ``TarInfo`` object.
|
||||
|
||||
A :class:`TarInfo` object also provides some convenient query methods:
|
||||
|
||||
|
|
@ -676,9 +849,258 @@ A :class:`TarInfo` object also provides some convenient query methods:
|
|||
Return :const:`True` if it is one of character device, block device or FIFO.
|
||||
|
||||
|
||||
.. _tarfile-extraction-filter:
|
||||
|
||||
Extraction filters
|
||||
------------------
|
||||
|
||||
.. versionadded:: 3.12
|
||||
|
||||
The *tar* format is designed to capture all details of a UNIX-like filesystem,
|
||||
which makes it very powerful.
|
||||
Unfortunately, the features make it easy to create tar files that have
|
||||
unintended -- and possibly malicious -- effects when extracted.
|
||||
For example, extracting a tar file can overwrite arbitrary files in various
|
||||
ways (e.g. by using absolute paths, ``..`` path components, or symlinks that
|
||||
affect later members).
|
||||
|
||||
In most cases, the full functionality is not needed.
|
||||
Therefore, *tarfile* supports extraction filters: a mechanism to limit
|
||||
functionality, and thus mitigate some of the security issues.
|
||||
|
||||
.. seealso::
|
||||
|
||||
:pep:`706`
|
||||
Contains further motivation and rationale behind the design.
|
||||
|
||||
The *filter* argument to :meth:`TarFile.extract` or :meth:`~TarFile.extractall`
|
||||
can be:
|
||||
|
||||
* the string ``'fully_trusted'``: Honor all metadata as specified in the
|
||||
archive.
|
||||
Should be used if the user trusts the archive completely, or implements
|
||||
their own complex verification.
|
||||
|
||||
* the string ``'tar'``: Honor most *tar*-specific features (i.e. features of
|
||||
UNIX-like filesystems), but block features that are very likely to be
|
||||
surprising or malicious. See :func:`tar_filter` for details.
|
||||
|
||||
* the string ``'data'``: Ignore or block most features specific to UNIX-like
|
||||
filesystems. Intended for extracting cross-platform data archives.
|
||||
See :func:`data_filter` for details.
|
||||
|
||||
* ``None`` (default): Use :attr:`TarFile.extraction_filter`.
|
||||
|
||||
If that is also ``None`` (the default), raise a ``DeprecationWarning``,
|
||||
and fall back to the ``'fully_trusted'`` filter, whose dangerous behavior
|
||||
matches previous versions of Python.
|
||||
|
||||
In Python 3.14, the ``'data'`` filter will become the default instead.
|
||||
It's possible to switch earlier; see :attr:`TarFile.extraction_filter`.
|
||||
|
||||
* A callable which will be called for each extracted member with a
|
||||
:ref:`TarInfo <tarinfo-objects>` describing the member and the destination
|
||||
path to where the archive is extracted (i.e. the same path is used for all
|
||||
members)::
|
||||
|
||||
filter(/, member: TarInfo, path: str) -> TarInfo | None
|
||||
|
||||
The callable is called just before each member is extracted, so it can
|
||||
take the current state of the disk into account.
|
||||
It can:
|
||||
|
||||
- return a :class:`TarInfo` object which will be used instead of the metadata
|
||||
in the archive, or
|
||||
- return ``None``, in which case the member will be skipped, or
|
||||
- raise an exception to abort the operation or skip the member,
|
||||
depending on :attr:`~TarFile.errorlevel`.
|
||||
Note that when extraction is aborted, :meth:`~TarFile.extractall` may leave
|
||||
the archive partially extracted. It does not attempt to clean up.
|
||||
|
||||
Default named filters
|
||||
~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
The pre-defined, named filters are available as functions, so they can be
|
||||
reused in custom filters:
|
||||
|
||||
.. function:: fully_trusted_filter(/, member, path)
|
||||
|
||||
Return *member* unchanged.
|
||||
|
||||
This implements the ``'fully_trusted'`` filter.
|
||||
|
||||
.. function:: tar_filter(/, member, path)
|
||||
|
||||
Implements the ``'tar'`` filter.
|
||||
|
||||
- Strip leading slashes (``/`` and :attr:`os.sep`) from filenames.
|
||||
- :ref:`Refuse <tarfile-extraction-refuse>` to extract files with absolute
|
||||
paths (in case the name is absolute
|
||||
even after stripping slashes, e.g. ``C:/foo`` on Windows).
|
||||
This raises :class:`~tarfile.AbsolutePathError`.
|
||||
- :ref:`Refuse <tarfile-extraction-refuse>` to extract files whose absolute
|
||||
path (after following symlinks) would end up outside the destination.
|
||||
This raises :class:`~tarfile.OutsideDestinationError`.
|
||||
- Clear high mode bits (setuid, setgid, sticky) and group/other write bits
|
||||
(:attr:`~stat.S_IWGRP`|:attr:`~stat.S_IWOTH`).
|
||||
|
||||
Return the modified ``TarInfo`` member.
|
||||
|
||||
.. function:: data_filter(/, member, path)
|
||||
|
||||
Implements the ``'data'`` filter.
|
||||
In addition to what ``tar_filter`` does:
|
||||
|
||||
- :ref:`Refuse <tarfile-extraction-refuse>` to extract links (hard or soft)
|
||||
that link to absolute paths, or ones that link outside the destination.
|
||||
|
||||
This raises :class:`~tarfile.AbsoluteLinkError` or
|
||||
:class:`~tarfile.LinkOutsideDestinationError`.
|
||||
|
||||
Note that such files are refused even on platforms that do not support
|
||||
symbolic links.
|
||||
|
||||
- :ref:`Refuse <tarfile-extraction-refuse>` to extract device files
|
||||
(including pipes).
|
||||
This raises :class:`~tarfile.SpecialFileError`.
|
||||
|
||||
- For regular files, including hard links:
|
||||
|
||||
- Set the owner read and write permissions
|
||||
(:attr:`~stat.S_IRUSR`|:attr:`~stat.S_IWUSR`).
|
||||
- Remove the group & other executable permission
|
||||
(:attr:`~stat.S_IXGRP`|:attr:`~stat.S_IXOTH`)
|
||||
if the owner doesn’t have it (:attr:`~stat.S_IXUSR`).
|
||||
|
||||
- For other files (directories), set ``mode`` to ``None``, so
|
||||
that extraction methods skip applying permission bits.
|
||||
- Set user and group info (``uid``, ``gid``, ``uname``, ``gname``)
|
||||
to ``None``, so that extraction methods skip setting it.
|
||||
|
||||
Return the modified ``TarInfo`` member.
|
||||
|
||||
|
||||
.. _tarfile-extraction-refuse:
|
||||
|
||||
Filter errors
|
||||
~~~~~~~~~~~~~
|
||||
|
||||
When a filter refuses to extract a file, it will raise an appropriate exception,
|
||||
a subclass of :class:`~tarfile.FilterError`.
|
||||
This will abort the extraction if :attr:`TarFile.errorlevel` is 1 or more.
|
||||
With ``errorlevel=0`` the error will be logged and the member will be skipped,
|
||||
but extraction will continue.
|
||||
|
||||
|
||||
Hints for further verification
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Even with ``filter='data'``, *tarfile* is not suited for extracting untrusted
|
||||
files without prior inspection.
|
||||
Among other issues, the pre-defined filters do not prevent denial-of-service
|
||||
attacks. Users should do additional checks.
|
||||
|
||||
Here is an incomplete list of things to consider:
|
||||
|
||||
* Extract to a :func:`new temporary directory <tempfile.mkdtemp>`
|
||||
to prevent e.g. exploiting pre-existing links, and to make it easier to
|
||||
clean up after a failed extraction.
|
||||
* When working with untrusted data, use external (e.g. OS-level) limits on
|
||||
disk, memory and CPU usage.
|
||||
* Check filenames against an allow-list of characters
|
||||
(to filter out control characters, confusables, foreign path separators,
|
||||
etc.).
|
||||
* Check that filenames have expected extensions (discouraging files that
|
||||
execute when you “click on them”, or extension-less files like Windows special device names).
|
||||
* Limit the number of extracted files, total size of extracted data,
|
||||
filename length (including symlink length), and size of individual files.
|
||||
* Check for files that would be shadowed on case-insensitive filesystems.
|
||||
|
||||
Also note that:
|
||||
|
||||
* Tar files may contain multiple versions of the same file.
|
||||
Later ones are expected to overwrite any earlier ones.
|
||||
This feature is crucial to allow updating tape archives, but can be abused
|
||||
maliciously.
|
||||
* *tarfile* does not protect against issues with “live” data,
|
||||
e.g. an attacker tinkering with the destination (or source) directory while
|
||||
extraction (or archiving) is in progress.
|
||||
|
||||
|
||||
Supporting older Python versions
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
Extraction filters were added to Python 3.12, but may be backported to older
|
||||
versions as security updates.
|
||||
To check whether the feature is available, use e.g.
|
||||
``hasattr(tarfile, 'data_filter')`` rather than checking the Python version.
|
||||
|
||||
The following examples show how to support Python versions with and without
|
||||
the feature.
|
||||
Note that setting ``extraction_filter`` will affect any subsequent operations.
|
||||
|
||||
* Fully trusted archive::
|
||||
|
||||
my_tarfile.extraction_filter = (lambda member, path: member)
|
||||
my_tarfile.extractall()
|
||||
|
||||
* Use the ``'data'`` filter if available, but revert to Python 3.11 behavior
|
||||
(``'fully_trusted'``) if this feature is not available::
|
||||
|
||||
my_tarfile.extraction_filter = getattr(tarfile, 'data_filter',
|
||||
(lambda member, path: member))
|
||||
my_tarfile.extractall()
|
||||
|
||||
* Use the ``'data'`` filter; *fail* if it is not available::
|
||||
|
||||
my_tarfile.extractall(filter=tarfile.data_filter)
|
||||
|
||||
or::
|
||||
|
||||
my_tarfile.extraction_filter = tarfile.data_filter
|
||||
my_tarfile.extractall()
|
||||
|
||||
* Use the ``'data'`` filter; *warn* if it is not available::
|
||||
|
||||
if hasattr(tarfile, 'data_filter'):
|
||||
my_tarfile.extractall(filter='data')
|
||||
else:
|
||||
# remove this when no longer needed
|
||||
warn_the_user('Extracting may be unsafe; consider updating Python')
|
||||
my_tarfile.extractall()
|
||||
|
||||
|
||||
Stateful extraction filter example
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
While *tarfile*'s extraction methods take a simple *filter* callable,
|
||||
custom filters may be more complex objects with an internal state.
|
||||
It may be useful to write these as context managers, to be used like this::
|
||||
|
||||
with StatefulFilter() as filter_func:
|
||||
tar.extractall(path, filter=filter_func)
|
||||
|
||||
Such a filter can be written as, for example::
|
||||
|
||||
class StatefulFilter:
|
||||
def __init__(self):
|
||||
self.file_count = 0
|
||||
|
||||
def __enter__(self):
|
||||
return self
|
||||
|
||||
def __call__(self, member, path):
|
||||
self.file_count += 1
|
||||
return member
|
||||
|
||||
def __exit__(self, *exc_info):
|
||||
print(f'{self.file_count} files extracted')
|
||||
|
||||
|
||||
.. _tarfile-commandline:
|
||||
.. program:: tarfile
|
||||
|
||||
|
||||
Command-Line Interface
|
||||
----------------------
|
||||
|
||||
|
|
@ -748,6 +1170,13 @@ Command-line options
|
|||
|
||||
Verbose output.
|
||||
|
||||
.. cmdoption:: --filter <filtername>
|
||||
|
||||
Specifies the *filter* for ``--extract``.
|
||||
See :ref:`tarfile-extraction-filter` for details.
|
||||
Only string names are accepted (that is, ``fully_trusted``, ``tar``,
|
||||
and ``data``).
|
||||
|
||||
.. _tar-examples:
|
||||
|
||||
Examples
|
||||
|
|
@ -757,7 +1186,7 @@ How to extract an entire tar archive to the current working directory::
|
|||
|
||||
import tarfile
|
||||
tar = tarfile.open("sample.tar.gz")
|
||||
tar.extractall()
|
||||
tar.extractall(filter='data')
|
||||
tar.close()
|
||||
|
||||
How to extract a subset of a tar archive with :meth:`TarFile.extractall` using
|
||||
|
|
|
|||
|
|
@ -137,6 +137,13 @@ New Features
|
|||
(Design by Pablo Galindo. Contributed by Pablo Galindo and Christian Heimes
|
||||
with contributions from Gregory P. Smith [Google] and Mark Shannon
|
||||
in :gh:`96123`.)
|
||||
* The extraction methods in :mod:`tarfile`, and :func:`shutil.unpack_archive`,
|
||||
have a new a *filter* argument that allows limiting tar features than may be
|
||||
surprising or dangerous, such as creating files outside the destination
|
||||
directory.
|
||||
See :ref:`tarfile-extraction-filter` for details.
|
||||
In Python 3.14, the default will switch to ``'data'``.
|
||||
(Contributed by Petr Viktorin in :pep:`706`.)
|
||||
|
||||
|
||||
Other Language Changes
|
||||
|
|
@ -630,6 +637,10 @@ Deprecated
|
|||
* The *onerror* argument of :func:`shutil.rmtree` is deprecated as will be removed
|
||||
in Python 3.14. Use *onexc* instead. (Contributed by Irit Katriel in :gh:`102828`.)
|
||||
|
||||
* Extracting tar archives without specifying *filter* is deprecated until
|
||||
Python 3.14, when ``'data'`` filter will become the default.
|
||||
See :ref:`tarfile-extraction-filter` for details.
|
||||
|
||||
|
||||
Pending Removal in Python 3.13
|
||||
------------------------------
|
||||
|
|
@ -992,6 +1003,10 @@ Changes in the Python API
|
|||
exception instance, rather than to a ``(typ, exc, tb)`` tuple.
|
||||
(Contributed by Irit Katriel in :gh:`103176`.)
|
||||
|
||||
* When extracting tar files using :mod:`tarfile` or
|
||||
:func:`shutil.unpack_archive`, pass the *filter* argument to limit features
|
||||
that may be surprising or dangerous.
|
||||
See :ref:`tarfile-extraction-filter` for details.
|
||||
|
||||
Build Changes
|
||||
=============
|
||||
|
|
|
|||
|
|
@ -1245,7 +1245,7 @@ def _unpack_zipfile(filename, extract_dir):
|
|||
finally:
|
||||
zip.close()
|
||||
|
||||
def _unpack_tarfile(filename, extract_dir):
|
||||
def _unpack_tarfile(filename, extract_dir, *, filter=None):
|
||||
"""Unpack tar/tar.gz/tar.bz2/tar.xz `filename` to `extract_dir`
|
||||
"""
|
||||
import tarfile # late import for breaking circular dependency
|
||||
|
|
@ -1255,7 +1255,7 @@ def _unpack_tarfile(filename, extract_dir):
|
|||
raise ReadError(
|
||||
"%s is not a compressed or uncompressed tar file" % filename)
|
||||
try:
|
||||
tarobj.extractall(extract_dir)
|
||||
tarobj.extractall(extract_dir, filter=filter)
|
||||
finally:
|
||||
tarobj.close()
|
||||
|
||||
|
|
@ -1288,7 +1288,7 @@ def _find_unpack_format(filename):
|
|||
return name
|
||||
return None
|
||||
|
||||
def unpack_archive(filename, extract_dir=None, format=None):
|
||||
def unpack_archive(filename, extract_dir=None, format=None, *, filter=None):
|
||||
"""Unpack an archive.
|
||||
|
||||
`filename` is the name of the archive.
|
||||
|
|
@ -1302,6 +1302,9 @@ def unpack_archive(filename, extract_dir=None, format=None):
|
|||
was registered for that extension.
|
||||
|
||||
In case none is found, a ValueError is raised.
|
||||
|
||||
If `filter` is given, it is passed to the underlying
|
||||
extraction function.
|
||||
"""
|
||||
sys.audit("shutil.unpack_archive", filename, extract_dir, format)
|
||||
|
||||
|
|
@ -1311,6 +1314,10 @@ def unpack_archive(filename, extract_dir=None, format=None):
|
|||
extract_dir = os.fspath(extract_dir)
|
||||
filename = os.fspath(filename)
|
||||
|
||||
if filter is None:
|
||||
filter_kwargs = {}
|
||||
else:
|
||||
filter_kwargs = {'filter': filter}
|
||||
if format is not None:
|
||||
try:
|
||||
format_info = _UNPACK_FORMATS[format]
|
||||
|
|
@ -1318,7 +1325,7 @@ def unpack_archive(filename, extract_dir=None, format=None):
|
|||
raise ValueError("Unknown unpack format '{0}'".format(format)) from None
|
||||
|
||||
func = format_info[1]
|
||||
func(filename, extract_dir, **dict(format_info[2]))
|
||||
func(filename, extract_dir, **dict(format_info[2]), **filter_kwargs)
|
||||
else:
|
||||
# we need to look at the registered unpackers supported extensions
|
||||
format = _find_unpack_format(filename)
|
||||
|
|
@ -1326,7 +1333,7 @@ def unpack_archive(filename, extract_dir=None, format=None):
|
|||
raise ReadError("Unknown archive format '{0}'".format(filename))
|
||||
|
||||
func = _UNPACK_FORMATS[format][1]
|
||||
kwargs = dict(_UNPACK_FORMATS[format][2])
|
||||
kwargs = dict(_UNPACK_FORMATS[format][2]) | filter_kwargs
|
||||
func(filename, extract_dir, **kwargs)
|
||||
|
||||
|
||||
|
|
|
|||
343
Lib/tarfile.py
343
Lib/tarfile.py
|
|
@ -46,6 +46,7 @@ import time
|
|||
import struct
|
||||
import copy
|
||||
import re
|
||||
import warnings
|
||||
|
||||
try:
|
||||
import pwd
|
||||
|
|
@ -65,7 +66,11 @@ symlink_exception = (AttributeError, NotImplementedError, OSError)
|
|||
__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError", "ReadError",
|
||||
"CompressionError", "StreamError", "ExtractError", "HeaderError",
|
||||
"ENCODING", "USTAR_FORMAT", "GNU_FORMAT", "PAX_FORMAT",
|
||||
"DEFAULT_FORMAT", "open"]
|
||||
"DEFAULT_FORMAT", "open","fully_trusted_filter", "data_filter",
|
||||
"tar_filter", "FilterError", "AbsoluteLinkError",
|
||||
"OutsideDestinationError", "SpecialFileError", "AbsolutePathError",
|
||||
"LinkOutsideDestinationError"]
|
||||
|
||||
|
||||
#---------------------------------------------------------
|
||||
# tar constants
|
||||
|
|
@ -154,6 +159,8 @@ else:
|
|||
def stn(s, length, encoding, errors):
|
||||
"""Convert a string to a null-terminated bytes object.
|
||||
"""
|
||||
if s is None:
|
||||
raise ValueError("metadata cannot contain None")
|
||||
s = s.encode(encoding, errors)
|
||||
return s[:length] + (length - len(s)) * NUL
|
||||
|
||||
|
|
@ -707,9 +714,127 @@ class ExFileObject(io.BufferedReader):
|
|||
super().__init__(fileobj)
|
||||
#class ExFileObject
|
||||
|
||||
|
||||
#-----------------------------
|
||||
# extraction filters (PEP 706)
|
||||
#-----------------------------
|
||||
|
||||
class FilterError(TarError):
|
||||
pass
|
||||
|
||||
class AbsolutePathError(FilterError):
|
||||
def __init__(self, tarinfo):
|
||||
self.tarinfo = tarinfo
|
||||
super().__init__(f'member {tarinfo.name!r} has an absolute path')
|
||||
|
||||
class OutsideDestinationError(FilterError):
|
||||
def __init__(self, tarinfo, path):
|
||||
self.tarinfo = tarinfo
|
||||
self._path = path
|
||||
super().__init__(f'{tarinfo.name!r} would be extracted to {path!r}, '
|
||||
+ 'which is outside the destination')
|
||||
|
||||
class SpecialFileError(FilterError):
|
||||
def __init__(self, tarinfo):
|
||||
self.tarinfo = tarinfo
|
||||
super().__init__(f'{tarinfo.name!r} is a special file')
|
||||
|
||||
class AbsoluteLinkError(FilterError):
|
||||
def __init__(self, tarinfo):
|
||||
self.tarinfo = tarinfo
|
||||
super().__init__(f'{tarinfo.name!r} is a symlink to an absolute path')
|
||||
|
||||
class LinkOutsideDestinationError(FilterError):
|
||||
def __init__(self, tarinfo, path):
|
||||
self.tarinfo = tarinfo
|
||||
self._path = path
|
||||
super().__init__(f'{tarinfo.name!r} would link to {path!r}, '
|
||||
+ 'which is outside the destination')
|
||||
|
||||
def _get_filtered_attrs(member, dest_path, for_data=True):
|
||||
new_attrs = {}
|
||||
name = member.name
|
||||
dest_path = os.path.realpath(dest_path)
|
||||
# Strip leading / (tar's directory separator) from filenames.
|
||||
# Include os.sep (target OS directory separator) as well.
|
||||
if name.startswith(('/', os.sep)):
|
||||
name = new_attrs['name'] = member.path.lstrip('/' + os.sep)
|
||||
if os.path.isabs(name):
|
||||
# Path is absolute even after stripping.
|
||||
# For example, 'C:/foo' on Windows.
|
||||
raise AbsolutePathError(member)
|
||||
# Ensure we stay in the destination
|
||||
target_path = os.path.realpath(os.path.join(dest_path, name))
|
||||
if os.path.commonpath([target_path, dest_path]) != dest_path:
|
||||
raise OutsideDestinationError(member, target_path)
|
||||
# Limit permissions (no high bits, and go-w)
|
||||
mode = member.mode
|
||||
if mode is not None:
|
||||
# Strip high bits & group/other write bits
|
||||
mode = mode & 0o755
|
||||
if for_data:
|
||||
# For data, handle permissions & file types
|
||||
if member.isreg() or member.islnk():
|
||||
if not mode & 0o100:
|
||||
# Clear executable bits if not executable by user
|
||||
mode &= ~0o111
|
||||
# Ensure owner can read & write
|
||||
mode |= 0o600
|
||||
elif member.isdir() or member.issym():
|
||||
# Ignore mode for directories & symlinks
|
||||
mode = None
|
||||
else:
|
||||
# Reject special files
|
||||
raise SpecialFileError(member)
|
||||
if mode != member.mode:
|
||||
new_attrs['mode'] = mode
|
||||
if for_data:
|
||||
# Ignore ownership for 'data'
|
||||
if member.uid is not None:
|
||||
new_attrs['uid'] = None
|
||||
if member.gid is not None:
|
||||
new_attrs['gid'] = None
|
||||
if member.uname is not None:
|
||||
new_attrs['uname'] = None
|
||||
if member.gname is not None:
|
||||
new_attrs['gname'] = None
|
||||
# Check link destination for 'data'
|
||||
if member.islnk() or member.issym():
|
||||
if os.path.isabs(member.linkname):
|
||||
raise AbsoluteLinkError(member)
|
||||
target_path = os.path.realpath(os.path.join(dest_path, member.linkname))
|
||||
if os.path.commonpath([target_path, dest_path]) != dest_path:
|
||||
raise LinkOutsideDestinationError(member, target_path)
|
||||
return new_attrs
|
||||
|
||||
def fully_trusted_filter(member, dest_path):
|
||||
return member
|
||||
|
||||
def tar_filter(member, dest_path):
|
||||
new_attrs = _get_filtered_attrs(member, dest_path, False)
|
||||
if new_attrs:
|
||||
return member.replace(**new_attrs, deep=False)
|
||||
return member
|
||||
|
||||
def data_filter(member, dest_path):
|
||||
new_attrs = _get_filtered_attrs(member, dest_path, True)
|
||||
if new_attrs:
|
||||
return member.replace(**new_attrs, deep=False)
|
||||
return member
|
||||
|
||||
_NAMED_FILTERS = {
|
||||
"fully_trusted": fully_trusted_filter,
|
||||
"tar": tar_filter,
|
||||
"data": data_filter,
|
||||
}
|
||||
|
||||
#------------------
|
||||
# Exported Classes
|
||||
#------------------
|
||||
|
||||
# Sentinel for replace() defaults, meaning "don't change the attribute"
|
||||
_KEEP = object()
|
||||
|
||||
class TarInfo(object):
|
||||
"""Informational class which holds the details about an
|
||||
archive member given by a tar header block.
|
||||
|
|
@ -790,12 +915,44 @@ class TarInfo(object):
|
|||
def __repr__(self):
|
||||
return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
|
||||
|
||||
def replace(self, *,
|
||||
name=_KEEP, mtime=_KEEP, mode=_KEEP, linkname=_KEEP,
|
||||
uid=_KEEP, gid=_KEEP, uname=_KEEP, gname=_KEEP,
|
||||
deep=True, _KEEP=_KEEP):
|
||||
"""Return a deep copy of self with the given attributes replaced.
|
||||
"""
|
||||
if deep:
|
||||
result = copy.deepcopy(self)
|
||||
else:
|
||||
result = copy.copy(self)
|
||||
if name is not _KEEP:
|
||||
result.name = name
|
||||
if mtime is not _KEEP:
|
||||
result.mtime = mtime
|
||||
if mode is not _KEEP:
|
||||
result.mode = mode
|
||||
if linkname is not _KEEP:
|
||||
result.linkname = linkname
|
||||
if uid is not _KEEP:
|
||||
result.uid = uid
|
||||
if gid is not _KEEP:
|
||||
result.gid = gid
|
||||
if uname is not _KEEP:
|
||||
result.uname = uname
|
||||
if gname is not _KEEP:
|
||||
result.gname = gname
|
||||
return result
|
||||
|
||||
def get_info(self):
|
||||
"""Return the TarInfo's attributes as a dictionary.
|
||||
"""
|
||||
if self.mode is None:
|
||||
mode = None
|
||||
else:
|
||||
mode = self.mode & 0o7777
|
||||
info = {
|
||||
"name": self.name,
|
||||
"mode": self.mode & 0o7777,
|
||||
"mode": mode,
|
||||
"uid": self.uid,
|
||||
"gid": self.gid,
|
||||
"size": self.size,
|
||||
|
|
@ -818,6 +975,9 @@ class TarInfo(object):
|
|||
"""Return a tar header as a string of 512 byte blocks.
|
||||
"""
|
||||
info = self.get_info()
|
||||
for name, value in info.items():
|
||||
if value is None:
|
||||
raise ValueError("%s may not be None" % name)
|
||||
|
||||
if format == USTAR_FORMAT:
|
||||
return self.create_ustar_header(info, encoding, errors)
|
||||
|
|
@ -948,6 +1108,12 @@ class TarInfo(object):
|
|||
devmajor = stn("", 8, encoding, errors)
|
||||
devminor = stn("", 8, encoding, errors)
|
||||
|
||||
# None values in metadata should cause ValueError.
|
||||
# itn()/stn() do this for all fields except type.
|
||||
filetype = info.get("type", REGTYPE)
|
||||
if filetype is None:
|
||||
raise ValueError("TarInfo.type must not be None")
|
||||
|
||||
parts = [
|
||||
stn(info.get("name", ""), 100, encoding, errors),
|
||||
itn(info.get("mode", 0) & 0o7777, 8, format),
|
||||
|
|
@ -956,7 +1122,7 @@ class TarInfo(object):
|
|||
itn(info.get("size", 0), 12, format),
|
||||
itn(info.get("mtime", 0), 12, format),
|
||||
b" ", # checksum field
|
||||
info.get("type", REGTYPE),
|
||||
filetype,
|
||||
stn(info.get("linkname", ""), 100, encoding, errors),
|
||||
info.get("magic", POSIX_MAGIC),
|
||||
stn(info.get("uname", ""), 32, encoding, errors),
|
||||
|
|
@ -1462,6 +1628,8 @@ class TarFile(object):
|
|||
|
||||
fileobject = ExFileObject # The file-object for extractfile().
|
||||
|
||||
extraction_filter = None # The default filter for extraction.
|
||||
|
||||
def __init__(self, name=None, mode="r", fileobj=None, format=None,
|
||||
tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
|
||||
errors="surrogateescape", pax_headers=None, debug=None,
|
||||
|
|
@ -1936,6 +2104,9 @@ class TarFile(object):
|
|||
members = self
|
||||
for tarinfo in members:
|
||||
if verbose:
|
||||
if tarinfo.mode is None:
|
||||
_safe_print("??????????")
|
||||
else:
|
||||
_safe_print(stat.filemode(tarinfo.mode))
|
||||
_safe_print("%s/%s" % (tarinfo.uname or tarinfo.uid,
|
||||
tarinfo.gname or tarinfo.gid))
|
||||
|
|
@ -1944,6 +2115,9 @@ class TarFile(object):
|
|||
("%d,%d" % (tarinfo.devmajor, tarinfo.devminor)))
|
||||
else:
|
||||
_safe_print("%10d" % tarinfo.size)
|
||||
if tarinfo.mtime is None:
|
||||
_safe_print("????-??-?? ??:??:??")
|
||||
else:
|
||||
_safe_print("%d-%02d-%02d %02d:%02d:%02d" \
|
||||
% time.localtime(tarinfo.mtime)[:6])
|
||||
|
||||
|
|
@ -2032,32 +2206,63 @@ class TarFile(object):
|
|||
|
||||
self.members.append(tarinfo)
|
||||
|
||||
def extractall(self, path=".", members=None, *, numeric_owner=False):
|
||||
def _get_filter_function(self, filter):
|
||||
if filter is None:
|
||||
filter = self.extraction_filter
|
||||
if filter is None:
|
||||
warnings.warn(
|
||||
'Python 3.14 will, by default, filter extracted tar '
|
||||
+ 'archives and reject files or modify their metadata. '
|
||||
+ 'Use the filter argument to control this behavior.',
|
||||
DeprecationWarning)
|
||||
return fully_trusted_filter
|
||||
if isinstance(filter, str):
|
||||
raise TypeError(
|
||||
'String names are not supported for '
|
||||
+ 'TarFile.extraction_filter. Use a function such as '
|
||||
+ 'tarfile.data_filter directly.')
|
||||
return filter
|
||||
if callable(filter):
|
||||
return filter
|
||||
try:
|
||||
return _NAMED_FILTERS[filter]
|
||||
except KeyError:
|
||||
raise ValueError(f"filter {filter!r} not found") from None
|
||||
|
||||
def extractall(self, path=".", members=None, *, numeric_owner=False,
|
||||
filter=None):
|
||||
"""Extract all members from the archive to the current working
|
||||
directory and set owner, modification time and permissions on
|
||||
directories afterwards. `path' specifies a different directory
|
||||
to extract to. `members' is optional and must be a subset of the
|
||||
list returned by getmembers(). If `numeric_owner` is True, only
|
||||
the numbers for user/group names are used and not the names.
|
||||
|
||||
The `filter` function will be called on each member just
|
||||
before extraction.
|
||||
It can return a changed TarInfo or None to skip the member.
|
||||
String names of common filters are accepted.
|
||||
"""
|
||||
directories = []
|
||||
|
||||
filter_function = self._get_filter_function(filter)
|
||||
if members is None:
|
||||
members = self
|
||||
|
||||
for tarinfo in members:
|
||||
for member in members:
|
||||
tarinfo = self._get_extract_tarinfo(member, filter_function, path)
|
||||
if tarinfo is None:
|
||||
continue
|
||||
if tarinfo.isdir():
|
||||
# Extract directories with a safe mode.
|
||||
# For directories, delay setting attributes until later,
|
||||
# since permissions can interfere with extraction and
|
||||
# extracting contents can reset mtime.
|
||||
directories.append(tarinfo)
|
||||
tarinfo = copy.copy(tarinfo)
|
||||
tarinfo.mode = 0o700
|
||||
# Do not set_attrs directories, as we will do that further down
|
||||
self.extract(tarinfo, path, set_attrs=not tarinfo.isdir(),
|
||||
self._extract_one(tarinfo, path, set_attrs=not tarinfo.isdir(),
|
||||
numeric_owner=numeric_owner)
|
||||
|
||||
# Reverse sort directories.
|
||||
directories.sort(key=lambda a: a.name)
|
||||
directories.reverse()
|
||||
directories.sort(key=lambda a: a.name, reverse=True)
|
||||
|
||||
# Set correct owner, mtime and filemode on directories.
|
||||
for tarinfo in directories:
|
||||
|
|
@ -2067,12 +2272,10 @@ class TarFile(object):
|
|||
self.utime(tarinfo, dirpath)
|
||||
self.chmod(tarinfo, dirpath)
|
||||
except ExtractError as e:
|
||||
if self.errorlevel > 1:
|
||||
raise
|
||||
else:
|
||||
self._dbg(1, "tarfile: %s" % e)
|
||||
self._handle_nonfatal_error(e)
|
||||
|
||||
def extract(self, member, path="", set_attrs=True, *, numeric_owner=False):
|
||||
def extract(self, member, path="", set_attrs=True, *, numeric_owner=False,
|
||||
filter=None):
|
||||
"""Extract a member from the archive to the current working directory,
|
||||
using its full name. Its file information is extracted as accurately
|
||||
as possible. `member' may be a filename or a TarInfo object. You can
|
||||
|
|
@ -2080,36 +2283,71 @@ class TarFile(object):
|
|||
mtime, mode) are set unless `set_attrs' is False. If `numeric_owner`
|
||||
is True, only the numbers for user/group names are used and not
|
||||
the names.
|
||||
"""
|
||||
self._check("r")
|
||||
|
||||
The `filter` function will be called before extraction.
|
||||
It can return a changed TarInfo or None to skip the member.
|
||||
String names of common filters are accepted.
|
||||
"""
|
||||
filter_function = self._get_filter_function(filter)
|
||||
tarinfo = self._get_extract_tarinfo(member, filter_function, path)
|
||||
if tarinfo is not None:
|
||||
self._extract_one(tarinfo, path, set_attrs, numeric_owner)
|
||||
|
||||
def _get_extract_tarinfo(self, member, filter_function, path):
|
||||
"""Get filtered TarInfo (or None) from member, which might be a str"""
|
||||
if isinstance(member, str):
|
||||
tarinfo = self.getmember(member)
|
||||
else:
|
||||
tarinfo = member
|
||||
|
||||
unfiltered = tarinfo
|
||||
try:
|
||||
tarinfo = filter_function(tarinfo, path)
|
||||
except (OSError, FilterError) as e:
|
||||
self._handle_fatal_error(e)
|
||||
except ExtractError as e:
|
||||
self._handle_nonfatal_error(e)
|
||||
if tarinfo is None:
|
||||
self._dbg(2, "tarfile: Excluded %r" % unfiltered.name)
|
||||
return None
|
||||
# Prepare the link target for makelink().
|
||||
if tarinfo.islnk():
|
||||
tarinfo = copy.copy(tarinfo)
|
||||
tarinfo._link_target = os.path.join(path, tarinfo.linkname)
|
||||
return tarinfo
|
||||
|
||||
def _extract_one(self, tarinfo, path, set_attrs, numeric_owner):
|
||||
"""Extract from filtered tarinfo to disk"""
|
||||
self._check("r")
|
||||
|
||||
try:
|
||||
self._extract_member(tarinfo, os.path.join(path, tarinfo.name),
|
||||
set_attrs=set_attrs,
|
||||
numeric_owner=numeric_owner)
|
||||
except OSError as e:
|
||||
if self.errorlevel > 0:
|
||||
raise
|
||||
else:
|
||||
if e.filename is None:
|
||||
self._dbg(1, "tarfile: %s" % e.strerror)
|
||||
else:
|
||||
self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
|
||||
self._handle_fatal_error(e)
|
||||
except ExtractError as e:
|
||||
self._handle_nonfatal_error(e)
|
||||
|
||||
def _handle_nonfatal_error(self, e):
|
||||
"""Handle non-fatal error (ExtractError) according to errorlevel"""
|
||||
if self.errorlevel > 1:
|
||||
raise
|
||||
else:
|
||||
self._dbg(1, "tarfile: %s" % e)
|
||||
|
||||
def _handle_fatal_error(self, e):
|
||||
"""Handle "fatal" error according to self.errorlevel"""
|
||||
if self.errorlevel > 0:
|
||||
raise
|
||||
elif isinstance(e, OSError):
|
||||
if e.filename is None:
|
||||
self._dbg(1, "tarfile: %s" % e.strerror)
|
||||
else:
|
||||
self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
|
||||
else:
|
||||
self._dbg(1, "tarfile: %s %s" % (type(e).__name__, e))
|
||||
|
||||
def extractfile(self, member):
|
||||
"""Extract a member from the archive as a file object. `member' may be
|
||||
a filename or a TarInfo object. If `member' is a regular file or
|
||||
|
|
@ -2195,6 +2433,10 @@ class TarFile(object):
|
|||
"""Make a directory called targetpath.
|
||||
"""
|
||||
try:
|
||||
if tarinfo.mode is None:
|
||||
# Use the system's default mode
|
||||
os.mkdir(targetpath)
|
||||
else:
|
||||
# Use a safe mode for the directory, the real mode is set
|
||||
# later in _extract_member().
|
||||
os.mkdir(targetpath, 0o700)
|
||||
|
|
@ -2240,6 +2482,9 @@ class TarFile(object):
|
|||
raise ExtractError("special devices not supported by system")
|
||||
|
||||
mode = tarinfo.mode
|
||||
if mode is None:
|
||||
# Use mknod's default
|
||||
mode = 0o600
|
||||
if tarinfo.isblk():
|
||||
mode |= stat.S_IFBLK
|
||||
else:
|
||||
|
|
@ -2261,7 +2506,6 @@ class TarFile(object):
|
|||
os.unlink(targetpath)
|
||||
os.symlink(tarinfo.linkname, targetpath)
|
||||
else:
|
||||
# See extract().
|
||||
if os.path.exists(tarinfo._link_target):
|
||||
os.link(tarinfo._link_target, targetpath)
|
||||
else:
|
||||
|
|
@ -2286,15 +2530,19 @@ class TarFile(object):
|
|||
u = tarinfo.uid
|
||||
if not numeric_owner:
|
||||
try:
|
||||
if grp:
|
||||
if grp and tarinfo.gname:
|
||||
g = grp.getgrnam(tarinfo.gname)[2]
|
||||
except KeyError:
|
||||
pass
|
||||
try:
|
||||
if pwd:
|
||||
if pwd and tarinfo.uname:
|
||||
u = pwd.getpwnam(tarinfo.uname)[2]
|
||||
except KeyError:
|
||||
pass
|
||||
if g is None:
|
||||
g = -1
|
||||
if u is None:
|
||||
u = -1
|
||||
try:
|
||||
if tarinfo.issym() and hasattr(os, "lchown"):
|
||||
os.lchown(targetpath, u, g)
|
||||
|
|
@ -2306,6 +2554,8 @@ class TarFile(object):
|
|||
def chmod(self, tarinfo, targetpath):
|
||||
"""Set file permissions of targetpath according to tarinfo.
|
||||
"""
|
||||
if tarinfo.mode is None:
|
||||
return
|
||||
try:
|
||||
os.chmod(targetpath, tarinfo.mode)
|
||||
except OSError as e:
|
||||
|
|
@ -2314,10 +2564,13 @@ class TarFile(object):
|
|||
def utime(self, tarinfo, targetpath):
|
||||
"""Set modification time of targetpath according to tarinfo.
|
||||
"""
|
||||
mtime = tarinfo.mtime
|
||||
if mtime is None:
|
||||
return
|
||||
if not hasattr(os, 'utime'):
|
||||
return
|
||||
try:
|
||||
os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
|
||||
os.utime(targetpath, (mtime, mtime))
|
||||
except OSError as e:
|
||||
raise ExtractError("could not change modification time") from e
|
||||
|
||||
|
|
@ -2395,13 +2648,26 @@ class TarFile(object):
|
|||
members = self.getmembers()
|
||||
|
||||
# Limit the member search list up to tarinfo.
|
||||
skipping = False
|
||||
if tarinfo is not None:
|
||||
members = members[:members.index(tarinfo)]
|
||||
try:
|
||||
index = members.index(tarinfo)
|
||||
except ValueError:
|
||||
# The given starting point might be a (modified) copy.
|
||||
# We'll later skip members until we find an equivalent.
|
||||
skipping = True
|
||||
else:
|
||||
# Happy fast path
|
||||
members = members[:index]
|
||||
|
||||
if normalize:
|
||||
name = os.path.normpath(name)
|
||||
|
||||
for member in reversed(members):
|
||||
if skipping:
|
||||
if tarinfo.offset == member.offset:
|
||||
skipping = False
|
||||
continue
|
||||
if normalize:
|
||||
member_name = os.path.normpath(member.name)
|
||||
else:
|
||||
|
|
@ -2410,6 +2676,10 @@ class TarFile(object):
|
|||
if name == member_name:
|
||||
return member
|
||||
|
||||
if skipping:
|
||||
# Starting point was not found
|
||||
raise ValueError(tarinfo)
|
||||
|
||||
def _load(self):
|
||||
"""Read through the entire archive file and look for readable
|
||||
members.
|
||||
|
|
@ -2500,6 +2770,7 @@ class TarFile(object):
|
|||
#--------------------
|
||||
# exported functions
|
||||
#--------------------
|
||||
|
||||
def is_tarfile(name):
|
||||
"""Return True if name points to a tar archive that we
|
||||
are able to handle, else return False.
|
||||
|
|
@ -2528,6 +2799,10 @@ def main():
|
|||
parser = argparse.ArgumentParser(description=description)
|
||||
parser.add_argument('-v', '--verbose', action='store_true', default=False,
|
||||
help='Verbose output')
|
||||
parser.add_argument('--filter', metavar='<filtername>',
|
||||
choices=_NAMED_FILTERS,
|
||||
help='Filter for extraction')
|
||||
|
||||
group = parser.add_mutually_exclusive_group(required=True)
|
||||
group.add_argument('-l', '--list', metavar='<tarfile>',
|
||||
help='Show listing of a tarfile')
|
||||
|
|
@ -2539,8 +2814,12 @@ def main():
|
|||
help='Create tarfile from sources')
|
||||
group.add_argument('-t', '--test', metavar='<tarfile>',
|
||||
help='Test if a tarfile is valid')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.filter and args.extract is None:
|
||||
parser.exit(1, '--filter is only valid for extraction\n')
|
||||
|
||||
if args.test is not None:
|
||||
src = args.test
|
||||
if is_tarfile(src):
|
||||
|
|
@ -2571,7 +2850,7 @@ def main():
|
|||
|
||||
if is_tarfile(src):
|
||||
with TarFile.open(src, 'r:*') as tf:
|
||||
tf.extractall(path=curdir)
|
||||
tf.extractall(path=curdir, filter=args.filter)
|
||||
if args.verbose:
|
||||
if curdir == '.':
|
||||
msg = '{!r} file is extracted.'.format(src)
|
||||
|
|
|
|||
|
|
@ -33,6 +33,7 @@ except ImportError:
|
|||
from test import support
|
||||
from test.support import os_helper
|
||||
from test.support.os_helper import TESTFN, FakePath
|
||||
from test.support import warnings_helper
|
||||
|
||||
TESTFN2 = TESTFN + "2"
|
||||
TESTFN_SRC = TESTFN + "_SRC"
|
||||
|
|
@ -1841,12 +1842,14 @@ class TestArchives(BaseTest, unittest.TestCase):
|
|||
|
||||
### shutil.unpack_archive
|
||||
|
||||
def check_unpack_archive(self, format):
|
||||
self.check_unpack_archive_with_converter(format, lambda path: path)
|
||||
self.check_unpack_archive_with_converter(format, pathlib.Path)
|
||||
self.check_unpack_archive_with_converter(format, FakePath)
|
||||
def check_unpack_archive(self, format, **kwargs):
|
||||
self.check_unpack_archive_with_converter(
|
||||
format, lambda path: path, **kwargs)
|
||||
self.check_unpack_archive_with_converter(
|
||||
format, pathlib.Path, **kwargs)
|
||||
self.check_unpack_archive_with_converter(format, FakePath, **kwargs)
|
||||
|
||||
def check_unpack_archive_with_converter(self, format, converter):
|
||||
def check_unpack_archive_with_converter(self, format, converter, **kwargs):
|
||||
root_dir, base_dir = self._create_files()
|
||||
expected = rlistdir(root_dir)
|
||||
expected.remove('outer')
|
||||
|
|
@ -1856,36 +1859,48 @@ class TestArchives(BaseTest, unittest.TestCase):
|
|||
|
||||
# let's try to unpack it now
|
||||
tmpdir2 = self.mkdtemp()
|
||||
unpack_archive(converter(filename), converter(tmpdir2))
|
||||
unpack_archive(converter(filename), converter(tmpdir2), **kwargs)
|
||||
self.assertEqual(rlistdir(tmpdir2), expected)
|
||||
|
||||
# and again, this time with the format specified
|
||||
tmpdir3 = self.mkdtemp()
|
||||
unpack_archive(converter(filename), converter(tmpdir3), format=format)
|
||||
unpack_archive(converter(filename), converter(tmpdir3), format=format,
|
||||
**kwargs)
|
||||
self.assertEqual(rlistdir(tmpdir3), expected)
|
||||
|
||||
self.assertRaises(shutil.ReadError, unpack_archive, converter(TESTFN))
|
||||
self.assertRaises(ValueError, unpack_archive, converter(TESTFN), format='xxx')
|
||||
with self.assertRaises(shutil.ReadError):
|
||||
unpack_archive(converter(TESTFN), **kwargs)
|
||||
with self.assertRaises(ValueError):
|
||||
unpack_archive(converter(TESTFN), format='xxx', **kwargs)
|
||||
|
||||
def check_unpack_tarball(self, format):
|
||||
self.check_unpack_archive(format, filter='fully_trusted')
|
||||
self.check_unpack_archive(format, filter='data')
|
||||
with warnings_helper.check_warnings(
|
||||
('Python 3.14', DeprecationWarning)):
|
||||
self.check_unpack_archive(format)
|
||||
|
||||
def test_unpack_archive_tar(self):
|
||||
self.check_unpack_archive('tar')
|
||||
self.check_unpack_tarball('tar')
|
||||
|
||||
@support.requires_zlib()
|
||||
def test_unpack_archive_gztar(self):
|
||||
self.check_unpack_archive('gztar')
|
||||
self.check_unpack_tarball('gztar')
|
||||
|
||||
@support.requires_bz2()
|
||||
def test_unpack_archive_bztar(self):
|
||||
self.check_unpack_archive('bztar')
|
||||
self.check_unpack_tarball('bztar')
|
||||
|
||||
@support.requires_lzma()
|
||||
@unittest.skipIf(AIX and not _maxdataOK(), "AIX MAXDATA must be 0x20000000 or larger")
|
||||
def test_unpack_archive_xztar(self):
|
||||
self.check_unpack_archive('xztar')
|
||||
self.check_unpack_tarball('xztar')
|
||||
|
||||
@support.requires_zlib()
|
||||
def test_unpack_archive_zip(self):
|
||||
self.check_unpack_archive('zip')
|
||||
with self.assertRaises(TypeError):
|
||||
self.check_unpack_archive('zip', filter='data')
|
||||
|
||||
def test_unpack_registry(self):
|
||||
|
||||
|
|
|
|||
File diff suppressed because it is too large
Load diff
|
|
@ -0,0 +1,4 @@
|
|||
The extraction methods in :mod:`tarfile`, and :func:`shutil.unpack_archive`,
|
||||
have a new a *filter* argument that allows limiting tar features than may be
|
||||
surprising or dangerous, such as creating files outside the destination
|
||||
directory. See :ref:`tarfile-extraction-filter` for details.
|
||||
Loading…
Add table
Add a link
Reference in a new issue