bpo-39595: Improve zipfile.Path performance (#18406)

* Improve zipfile.Path performance on zipfiles with a large number of entries.

* 📜🤖 Added by blurb_it.

* Add bpo to blurb

* Sync with importlib_metadata 1.5 (6fe70ca)

* Update blurb.

* Remove compatibility code

* Add stubs module, omitted from earlier commit

Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
This commit is contained in:
Jason R. Coombs 2020-02-11 21:58:47 -05:00 committed by GitHub
parent e6be9b59a9
commit e5bd73632e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 254 additions and 68 deletions

View file

@ -2724,16 +2724,71 @@ class CommandLineTest(unittest.TestCase):
self.assertEqual(f.read(), zf.read(zi))
class TestExecutablePrependedZip(unittest.TestCase):
"""Test our ability to open zip files with an executable prepended."""
def setUp(self):
self.exe_zip = findfile('exe_with_zip', subdir='ziptestdata')
self.exe_zip64 = findfile('exe_with_z64', subdir='ziptestdata')
def _test_zip_works(self, name):
# bpo28494 sanity check: ensure is_zipfile works on these.
self.assertTrue(zipfile.is_zipfile(name),
f'is_zipfile failed on {name}')
# Ensure we can operate on these via ZipFile.
with zipfile.ZipFile(name) as zipfp:
for n in zipfp.namelist():
data = zipfp.read(n)
self.assertIn(b'FAVORITE_NUMBER', data)
def test_read_zip_with_exe_prepended(self):
self._test_zip_works(self.exe_zip)
def test_read_zip64_with_exe_prepended(self):
self._test_zip_works(self.exe_zip64)
@unittest.skipUnless(sys.executable, 'sys.executable required.')
@unittest.skipUnless(os.access('/bin/bash', os.X_OK),
'Test relies on #!/bin/bash working.')
def test_execute_zip2(self):
output = subprocess.check_output([self.exe_zip, sys.executable])
self.assertIn(b'number in executable: 5', output)
@unittest.skipUnless(sys.executable, 'sys.executable required.')
@unittest.skipUnless(os.access('/bin/bash', os.X_OK),
'Test relies on #!/bin/bash working.')
def test_execute_zip64(self):
output = subprocess.check_output([self.exe_zip64, sys.executable])
self.assertIn(b'number in executable: 5', output)
# Poor man's technique to consume a (smallish) iterable.
consume = tuple
# from jaraco.itertools 5.0
class jaraco:
class itertools:
class Counter:
def __init__(self, i):
self.count = 0
self._orig_iter = iter(i)
def __iter__(self):
return self
def __next__(self):
result = next(self._orig_iter)
self.count += 1
return result
def add_dirs(zf):
"""
Given a writable zip file zf, inject directory entries for
any directories implied by the presence of children.
"""
for name in zipfile.Path._implied_dirs(zf.namelist()):
for name in zipfile.CompleteDirs._implied_dirs(zf.namelist()):
zf.writestr(name, b"")
return zf
@ -2774,44 +2829,6 @@ def build_alpharep_fixture():
return zf
class TestExecutablePrependedZip(unittest.TestCase):
"""Test our ability to open zip files with an executable prepended."""
def setUp(self):
self.exe_zip = findfile('exe_with_zip', subdir='ziptestdata')
self.exe_zip64 = findfile('exe_with_z64', subdir='ziptestdata')
def _test_zip_works(self, name):
# bpo-28494 sanity check: ensure is_zipfile works on these.
self.assertTrue(zipfile.is_zipfile(name),
f'is_zipfile failed on {name}')
# Ensure we can operate on these via ZipFile.
with zipfile.ZipFile(name) as zipfp:
for n in zipfp.namelist():
data = zipfp.read(n)
self.assertIn(b'FAVORITE_NUMBER', data)
def test_read_zip_with_exe_prepended(self):
self._test_zip_works(self.exe_zip)
def test_read_zip64_with_exe_prepended(self):
self._test_zip_works(self.exe_zip64)
@unittest.skipUnless(sys.executable, 'sys.executable required.')
@unittest.skipUnless(os.access('/bin/bash', os.X_OK),
'Test relies on #!/bin/bash working.')
def test_execute_zip2(self):
output = subprocess.check_output([self.exe_zip, sys.executable])
self.assertIn(b'number in executable: 5', output)
@unittest.skipUnless(sys.executable, 'sys.executable required.')
@unittest.skipUnless(os.access('/bin/bash', os.X_OK),
'Test relies on #!/bin/bash working.')
def test_execute_zip64(self):
output = subprocess.check_output([self.exe_zip64, sys.executable])
self.assertIn(b'number in executable: 5', output)
class TestPath(unittest.TestCase):
def setUp(self):
self.fixtures = contextlib.ExitStack()
@ -2849,6 +2866,14 @@ class TestPath(unittest.TestCase):
i, = h.iterdir()
assert i.is_file()
def test_subdir_is_dir(self):
for alpharep in self.zipfile_alpharep():
root = zipfile.Path(alpharep)
assert (root / 'b').is_dir()
assert (root / 'b/').is_dir()
assert (root / 'g').is_dir()
assert (root / 'g/').is_dir()
def test_open(self):
for alpharep in self.zipfile_alpharep():
root = zipfile.Path(alpharep)
@ -2910,6 +2935,45 @@ class TestPath(unittest.TestCase):
root = zipfile.Path(alpharep)
assert (root / 'missing dir/').parent.at == ''
def test_mutability(self):
"""
If the underlying zipfile is changed, the Path object should
reflect that change.
"""
for alpharep in self.zipfile_alpharep():
root = zipfile.Path(alpharep)
a, b, g = root.iterdir()
alpharep.writestr('foo.txt', 'foo')
alpharep.writestr('bar/baz.txt', 'baz')
assert any(
child.name == 'foo.txt'
for child in root.iterdir())
assert (root / 'foo.txt').read_text() == 'foo'
baz, = (root / 'bar').iterdir()
assert baz.read_text() == 'baz'
HUGE_ZIPFILE_NUM_ENTRIES = 2 ** 13
def huge_zipfile(self):
"""Create a read-only zipfile with a huge number of entries entries."""
strm = io.BytesIO()
zf = zipfile.ZipFile(strm, "w")
for entry in map(str, range(self.HUGE_ZIPFILE_NUM_ENTRIES)):
zf.writestr(entry, entry)
zf.mode = 'r'
return zf
def test_joinpath_constant_time(self):
"""
Ensure joinpath on items in zipfile is linear time.
"""
root = zipfile.Path(self.huge_zipfile())
entries = jaraco.itertools.Counter(root.iterdir())
for entry in entries:
entry.joinpath('suffix')
# Check the file iterated all items
assert entries.count == self.HUGE_ZIPFILE_NUM_ENTRIES
if __name__ == "__main__":
unittest.main()