bpo-30693: zip+tarfile: sort directory listing (#2263)

tarfile and zipfile now sort directory listing to generate tar and zip archives
in a more reproducible way.

See also https://reproducible-builds.org/docs/stable-inputs/ on that topic.
This commit is contained in:
Bernhard M. Wiedemann 2018-01-31 11:17:10 +01:00 committed by Ned Deily
parent 209108bd69
commit 57750be4ad
7 changed files with 39 additions and 6 deletions

View file

@ -451,7 +451,8 @@ be finalized; only the internally used file object will be closed. See the
(directory, fifo, symbolic link, etc.). If given, *arcname* specifies an (directory, fifo, symbolic link, etc.). If given, *arcname* specifies an
alternative name for the file in the archive. Directories are added alternative name for the file in the archive. Directories are added
recursively by default. This can be avoided by setting *recursive* to recursively by default. This can be avoided by setting *recursive* to
:const:`False`. If *filter* is given, it :const:`False`. Recursion adds entries in sorted order.
If *filter* is given, it
should be a function that takes a :class:`TarInfo` object argument and should be a function that takes a :class:`TarInfo` object argument and
returns the changed :class:`TarInfo` object. If it instead returns returns the changed :class:`TarInfo` object. If it instead returns
:const:`None` the :class:`TarInfo` object will be excluded from the :const:`None` the :class:`TarInfo` object will be excluded from the
@ -460,6 +461,9 @@ be finalized; only the internally used file object will be closed. See the
.. versionchanged:: 3.2 .. versionchanged:: 3.2
Added the *filter* parameter. Added the *filter* parameter.
.. versionchanged:: 3.7
Recursion adds entries in sorted order.
.. method:: TarFile.addfile(tarinfo, fileobj=None) .. method:: TarFile.addfile(tarinfo, fileobj=None)

View file

@ -491,7 +491,7 @@ The :class:`PyZipFile` constructor takes the same parameters as the
:file:`\*.pyc` are added at the top level. If the directory is a :file:`\*.pyc` are added at the top level. If the directory is a
package directory, then all :file:`\*.pyc` are added under the package package directory, then all :file:`\*.pyc` are added under the package
name as a file path, and if any subdirectories are package directories, name as a file path, and if any subdirectories are package directories,
all of these are added recursively. all of these are added recursively in sorted order.
*basename* is intended for internal use only. *basename* is intended for internal use only.
@ -524,6 +524,9 @@ The :class:`PyZipFile` constructor takes the same parameters as the
.. versionchanged:: 3.6.2 .. versionchanged:: 3.6.2
The *pathname* parameter accepts a :term:`path-like object`. The *pathname* parameter accepts a :term:`path-like object`.
.. versionchanged:: 3.7
Recursion sorts directory entries.
.. _zipinfo-objects: .. _zipinfo-objects:

View file

@ -1943,7 +1943,7 @@ class TarFile(object):
elif tarinfo.isdir(): elif tarinfo.isdir():
self.addfile(tarinfo) self.addfile(tarinfo)
if recursive: if recursive:
for f in os.listdir(name): for f in sorted(os.listdir(name)):
self.add(os.path.join(name, f), os.path.join(arcname, f), self.add(os.path.join(name, f), os.path.join(arcname, f),
recursive, filter=filter) recursive, filter=filter)

View file

@ -1129,6 +1129,30 @@ class WriteTest(WriteTestBase, unittest.TestCase):
finally: finally:
support.rmdir(path) support.rmdir(path)
# mock the following:
# os.listdir: so we know that files are in the wrong order
@unittest.mock.patch('os.listdir')
def test_ordered_recursion(self, mock_listdir):
path = os.path.join(TEMPDIR, "directory")
os.mkdir(path)
open(os.path.join(path, "1"), "a").close()
open(os.path.join(path, "2"), "a").close()
mock_listdir.return_value = ["2", "1"]
try:
tar = tarfile.open(tmpname, self.mode)
try:
tar.add(path)
paths = []
for m in tar.getmembers():
paths.append(os.path.split(m.name)[-1])
self.assertEqual(paths, ["directory", "1", "2"]);
finally:
tar.close()
finally:
support.unlink(os.path.join(path, "1"))
support.unlink(os.path.join(path, "2"))
support.rmdir(path)
def test_gettarinfo_pathlike_name(self): def test_gettarinfo_pathlike_name(self):
with tarfile.open(tmpname, self.mode) as tar: with tarfile.open(tmpname, self.mode) as tar:
path = pathlib.Path(TEMPDIR) / "file" path = pathlib.Path(TEMPDIR) / "file"

View file

@ -1940,7 +1940,7 @@ class PyZipFile(ZipFile):
if self.debug: if self.debug:
print("Adding", arcname) print("Adding", arcname)
self.write(fname, arcname) self.write(fname, arcname)
dirlist = os.listdir(pathname) dirlist = sorted(os.listdir(pathname))
dirlist.remove("__init__.py") dirlist.remove("__init__.py")
# Add all *.py files and package subdirectories # Add all *.py files and package subdirectories
for filename in dirlist: for filename in dirlist:
@ -1965,7 +1965,7 @@ class PyZipFile(ZipFile):
# This is NOT a package directory, add its files at top level # This is NOT a package directory, add its files at top level
if self.debug: if self.debug:
print("Adding files from directory", pathname) print("Adding files from directory", pathname)
for filename in os.listdir(pathname): for filename in sorted(os.listdir(pathname)):
path = os.path.join(pathname, filename) path = os.path.join(pathname, filename)
root, ext = os.path.splitext(filename) root, ext = os.path.splitext(filename)
if ext == ".py": if ext == ".py":
@ -2116,7 +2116,7 @@ def main(args=None):
elif os.path.isdir(path): elif os.path.isdir(path):
if zippath: if zippath:
zf.write(path, zippath) zf.write(path, zippath)
for nm in os.listdir(path): for nm in sorted(os.listdir(path)):
addToZip(zf, addToZip(zf,
os.path.join(path, nm), os.path.join(zippath, nm)) os.path.join(path, nm), os.path.join(zippath, nm))
# else: ignore # else: ignore

View file

@ -0,0 +1 @@
The ZipFile class now recurses directories in a reproducible way.

View file

@ -0,0 +1 @@
The TarFile class now recurses directories in a reproducible way.