bpo-37772: fix zipfile.Path.iterdir() outputs (GH-15170) (#15461)

* fix Path._add_implied_dirs to include all implied directories

* fix Path._add_implied_dirs to include all implied directories

* Optimize code by using sets instead of lists

* 📜🤖 Added by blurb_it.

* fix Path._add_implied_dirs to include all implied directories

* Optimize code by using sets instead of lists

* 📜🤖 Added by blurb_it.

* Add tests to zipfile.Path.iterdir() fix

* Update test for zipfile.Path.iterdir()

* remove whitespace from test file

* Rewrite NEWS blurb to describe the user-facing impact and avoid implementation details.

* remove redundant [] within set comprehension

* Update to use unique_everseen to maintain order and other suggestions in review

* remove whitespace and add back add_dirs in tests

* Add new standalone function parents using posixpath to get parents of a directory

* removing whitespace (sorry)

* Remove import pathlib from zipfile.py

* Rewrite _parents as a slice on a generator of the ancestry of a path.

* Remove check for '.' and '/', now that parents no longer returns those.

* Separate calculation of implied dirs from adding those

* Re-use _implied_dirs in tests for generating zipfile with dir entries.

* Replace three fixtures (abcde, abcdef, abde) with one representative example alpharep.

* Simplify implementation of _implied_dirs by collapsing the generation of parent directories for each name.
(cherry picked from commit a4e2991bdc)

Co-authored-by: shireenrao <shireenrao@gmail.com>
This commit is contained in:
Miss Islington (bot) 2019-08-24 09:03:52 -07:00 committed by Jason R. Coombs
parent ed146b52a3
commit c410f381bf
3 changed files with 135 additions and 52 deletions

View file

@ -7,6 +7,7 @@ import binascii
import functools
import importlib.util
import io
import itertools
import os
import posixpath
import shutil
@ -2104,6 +2105,65 @@ class PyZipFile(ZipFile):
return (fname, archivename)
def _unique_everseen(iterable, key=None):
"List unique elements, preserving order. Remember all elements ever seen."
# unique_everseen('AAAABBBCCDAABBB') --> A B C D
# unique_everseen('ABBCcAD', str.lower) --> A B C D
seen = set()
seen_add = seen.add
if key is None:
for element in itertools.filterfalse(seen.__contains__, iterable):
seen_add(element)
yield element
else:
for element in iterable:
k = key(element)
if k not in seen:
seen_add(k)
yield element
def _parents(path):
"""
Given a path with elements separated by
posixpath.sep, generate all parents of that path.
>>> list(_parents('b/d'))
['b']
>>> list(_parents('/b/d/'))
['/b']
>>> list(_parents('b/d/f/'))
['b/d', 'b']
>>> list(_parents('b'))
[]
>>> list(_parents(''))
[]
"""
return itertools.islice(_ancestry(path), 1, None)
def _ancestry(path):
"""
Given a path with elements separated by
posixpath.sep, generate all elements of that path
>>> list(_ancestry('b/d'))
['b/d', 'b']
>>> list(_ancestry('/b/d/'))
['/b/d', '/b']
>>> list(_ancestry('b/d/f/'))
['b/d/f', 'b/d', 'b']
>>> list(_ancestry('b'))
['b']
>>> list(_ancestry(''))
[]
"""
path = path.rstrip(posixpath.sep)
while path and path != posixpath.sep:
yield path
path, tail = posixpath.split(path)
class Path:
"""
A pathlib-compatible interface for zip files.
@ -2227,12 +2287,17 @@ class Path:
__truediv__ = joinpath
@staticmethod
def _add_implied_dirs(names):
return names + [
name + "/"
for name in map(posixpath.dirname, names)
if name and name + "/" not in names
]
def _implied_dirs(names):
return _unique_everseen(
parent + "/"
for name in names
for parent in _parents(name)
if parent + "/" not in names
)
@classmethod
def _add_implied_dirs(cls, names):
return names + list(cls._implied_dirs(names))
@property
def parent(self):