From c410f381bf66c48d84812e19e3ba7c2878511a3e Mon Sep 17 00:00:00 2001 From: "Miss Islington (bot)" <31488909+miss-islington@users.noreply.github.com> Date: Sat, 24 Aug 2019 09:03:52 -0700 Subject: [PATCH] bpo-37772: fix zipfile.Path.iterdir() outputs (GH-15170) (#15461) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * fix Path._add_implied_dirs to include all implied directories * fix Path._add_implied_dirs to include all implied directories * Optimize code by using sets instead of lists * 📜🤖 Added by blurb_it. * fix Path._add_implied_dirs to include all implied directories * Optimize code by using sets instead of lists * 📜🤖 Added by blurb_it. * Add tests to zipfile.Path.iterdir() fix * Update test for zipfile.Path.iterdir() * remove whitespace from test file * Rewrite NEWS blurb to describe the user-facing impact and avoid implementation details. * remove redundant [] within set comprehension * Update to use unique_everseen to maintain order and other suggestions in review * remove whitespace and add back add_dirs in tests * Add new standalone function parents using posixpath to get parents of a directory * removing whitespace (sorry) * Remove import pathlib from zipfile.py * Rewrite _parents as a slice on a generator of the ancestry of a path. * Remove check for '.' and '/', now that parents no longer returns those. * Separate calculation of implied dirs from adding those * Re-use _implied_dirs in tests for generating zipfile with dir entries. * Replace three fixtures (abcde, abcdef, abde) with one representative example alpharep. * Simplify implementation of _implied_dirs by collapsing the generation of parent directories for each name. (cherry picked from commit a4e2991bdc993b60b6457c8a38d6e4a1fc845781) Co-authored-by: shireenrao --- Lib/test/test_zipfile.py | 109 ++++++++++-------- Lib/zipfile.py | 77 ++++++++++++- .../2019-08-07-23-48-09.bpo-37772.hLCvdn.rst | 1 + 3 files changed, 135 insertions(+), 52 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst diff --git a/Lib/test/test_zipfile.py b/Lib/test/test_zipfile.py index 0c8ffcdbf14..8e437e5cb2b 100644 --- a/Lib/test/test_zipfile.py +++ b/Lib/test/test_zipfile.py @@ -2397,37 +2397,49 @@ class CommandLineTest(unittest.TestCase): consume = tuple -def add_dirs(zipfile): +def add_dirs(zf): """ - Given a writable zipfile, inject directory entries for + Given a writable zip file zf, inject directory entries for any directories implied by the presence of children. """ - names = zipfile.namelist() - consume( - zipfile.writestr(name + "/", b"") - for name in map(posixpath.dirname, names) - if name and name + "/" not in names - ) - return zipfile + for name in zipfile.Path._implied_dirs(zf.namelist()): + zf.writestr(name, b"") + return zf -def build_abcde_files(): +def build_alpharep_fixture(): """ Create a zip file with this structure: . ├── a.txt - └── b - ├── c.txt - └── d - └── e.txt + ├── b + │ ├── c.txt + │ ├── d + │ │ └── e.txt + │ └── f.txt + └── g + └── h + └── i.txt + + This fixture has the following key characteristics: + + - a file at the root (a) + - a file two levels deep (b/d/e) + - multiple files in a directory (b/c, b/f) + - a directory containing only a directory (g/h) + + "alpha" because it uses alphabet + "rep" because it's a representative example """ data = io.BytesIO() zf = zipfile.ZipFile(data, "w") zf.writestr("a.txt", b"content of a") zf.writestr("b/c.txt", b"content of c") zf.writestr("b/d/e.txt", b"content of e") - zf.filename = "abcde.zip" + zf.writestr("b/f.txt", b"content of f") + zf.writestr("g/h/i.txt", b"content of i") + zf.filename = "alpharep.zip" return zf @@ -2436,60 +2448,64 @@ class TestPath(unittest.TestCase): self.fixtures = contextlib.ExitStack() self.addCleanup(self.fixtures.close) - def zipfile_abcde(self): + def zipfile_alpharep(self): with self.subTest(): - yield build_abcde_files() + yield build_alpharep_fixture() with self.subTest(): - yield add_dirs(build_abcde_files()) + yield add_dirs(build_alpharep_fixture()) def zipfile_ondisk(self): tmpdir = pathlib.Path(self.fixtures.enter_context(temp_dir())) - for zipfile_abcde in self.zipfile_abcde(): - buffer = zipfile_abcde.fp - zipfile_abcde.close() - path = tmpdir / zipfile_abcde.filename + for alpharep in self.zipfile_alpharep(): + buffer = alpharep.fp + alpharep.close() + path = tmpdir / alpharep.filename with path.open("wb") as strm: strm.write(buffer.getvalue()) yield path - def test_iterdir_istype(self): - for zipfile_abcde in self.zipfile_abcde(): - root = zipfile.Path(zipfile_abcde) + def test_iterdir_and_types(self): + for alpharep in self.zipfile_alpharep(): + root = zipfile.Path(alpharep) assert root.is_dir() - a, b = root.iterdir() + a, b, g = root.iterdir() assert a.is_file() assert b.is_dir() - c, d = b.iterdir() - assert c.is_file() + assert g.is_dir() + c, f, d = b.iterdir() + assert c.is_file() and f.is_file() e, = d.iterdir() assert e.is_file() + h, = g.iterdir() + i, = h.iterdir() + assert i.is_file() def test_open(self): - for zipfile_abcde in self.zipfile_abcde(): - root = zipfile.Path(zipfile_abcde) - a, b = root.iterdir() + for alpharep in self.zipfile_alpharep(): + root = zipfile.Path(alpharep) + a, b, g = root.iterdir() with a.open() as strm: data = strm.read() assert data == b"content of a" def test_read(self): - for zipfile_abcde in self.zipfile_abcde(): - root = zipfile.Path(zipfile_abcde) - a, b = root.iterdir() + for alpharep in self.zipfile_alpharep(): + root = zipfile.Path(alpharep) + a, b, g = root.iterdir() assert a.read_text() == "content of a" assert a.read_bytes() == b"content of a" def test_joinpath(self): - for zipfile_abcde in self.zipfile_abcde(): - root = zipfile.Path(zipfile_abcde) + for alpharep in self.zipfile_alpharep(): + root = zipfile.Path(alpharep) a = root.joinpath("a") assert a.is_file() e = root.joinpath("b").joinpath("d").joinpath("e.txt") assert e.read_text() == "content of e" def test_traverse_truediv(self): - for zipfile_abcde in self.zipfile_abcde(): - root = zipfile.Path(zipfile_abcde) + for alpharep in self.zipfile_alpharep(): + root = zipfile.Path(alpharep) a = root / "a" assert a.is_file() e = root / "b" / "d" / "e.txt" @@ -2504,26 +2520,27 @@ class TestPath(unittest.TestCase): zipfile.Path(pathlike) def test_traverse_pathlike(self): - for zipfile_abcde in self.zipfile_abcde(): - root = zipfile.Path(zipfile_abcde) + for alpharep in self.zipfile_alpharep(): + root = zipfile.Path(alpharep) root / pathlib.Path("a") def test_parent(self): - for zipfile_abcde in self.zipfile_abcde(): - root = zipfile.Path(zipfile_abcde) + for alpharep in self.zipfile_alpharep(): + root = zipfile.Path(alpharep) assert (root / 'a').parent.at == '' assert (root / 'a' / 'b').parent.at == 'a/' def test_dir_parent(self): - for zipfile_abcde in self.zipfile_abcde(): - root = zipfile.Path(zipfile_abcde) + for alpharep in self.zipfile_alpharep(): + root = zipfile.Path(alpharep) assert (root / 'b').parent.at == '' assert (root / 'b/').parent.at == '' def test_missing_dir_parent(self): - for zipfile_abcde in self.zipfile_abcde(): - root = zipfile.Path(zipfile_abcde) + for alpharep in self.zipfile_alpharep(): + root = zipfile.Path(alpharep) assert (root / 'missing dir/').parent.at == '' + if __name__ == "__main__": unittest.main() diff --git a/Lib/zipfile.py b/Lib/zipfile.py index 3c1f1235034..dfd09079501 100644 --- a/Lib/zipfile.py +++ b/Lib/zipfile.py @@ -7,6 +7,7 @@ import binascii import functools import importlib.util import io +import itertools import os import posixpath import shutil @@ -2104,6 +2105,65 @@ class PyZipFile(ZipFile): return (fname, archivename) +def _unique_everseen(iterable, key=None): + "List unique elements, preserving order. Remember all elements ever seen." + # unique_everseen('AAAABBBCCDAABBB') --> A B C D + # unique_everseen('ABBCcAD', str.lower) --> A B C D + seen = set() + seen_add = seen.add + if key is None: + for element in itertools.filterfalse(seen.__contains__, iterable): + seen_add(element) + yield element + else: + for element in iterable: + k = key(element) + if k not in seen: + seen_add(k) + yield element + + +def _parents(path): + """ + Given a path with elements separated by + posixpath.sep, generate all parents of that path. + + >>> list(_parents('b/d')) + ['b'] + >>> list(_parents('/b/d/')) + ['/b'] + >>> list(_parents('b/d/f/')) + ['b/d', 'b'] + >>> list(_parents('b')) + [] + >>> list(_parents('')) + [] + """ + return itertools.islice(_ancestry(path), 1, None) + + +def _ancestry(path): + """ + Given a path with elements separated by + posixpath.sep, generate all elements of that path + + >>> list(_ancestry('b/d')) + ['b/d', 'b'] + >>> list(_ancestry('/b/d/')) + ['/b/d', '/b'] + >>> list(_ancestry('b/d/f/')) + ['b/d/f', 'b/d', 'b'] + >>> list(_ancestry('b')) + ['b'] + >>> list(_ancestry('')) + [] + """ + path = path.rstrip(posixpath.sep) + while path and path != posixpath.sep: + yield path + path, tail = posixpath.split(path) + + class Path: """ A pathlib-compatible interface for zip files. @@ -2227,12 +2287,17 @@ class Path: __truediv__ = joinpath @staticmethod - def _add_implied_dirs(names): - return names + [ - name + "/" - for name in map(posixpath.dirname, names) - if name and name + "/" not in names - ] + def _implied_dirs(names): + return _unique_everseen( + parent + "/" + for name in names + for parent in _parents(name) + if parent + "/" not in names + ) + + @classmethod + def _add_implied_dirs(cls, names): + return names + list(cls._implied_dirs(names)) @property def parent(self): diff --git a/Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst b/Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst new file mode 100644 index 00000000000..f9ec6a33b07 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-08-07-23-48-09.bpo-37772.hLCvdn.rst @@ -0,0 +1 @@ +In ``zipfile.Path``, when adding implicit dirs, ensure that ancestral directories are added and that duplicates are excluded.