mirror of
https://github.com/python/cpython.git
synced 2025-07-24 11:44:31 +00:00
bpo-39595: Improve zipfile.Path performance (#18406)
* Improve zipfile.Path performance on zipfiles with a large number of entries. * 📜🤖 Added by blurb_it. * Add bpo to blurb * Sync with importlib_metadata 1.5 (6fe70ca) * Update blurb. * Remove compatibility code * Add stubs module, omitted from earlier commit Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
This commit is contained in:
parent
e6be9b59a9
commit
e5bd73632e
7 changed files with 254 additions and 68 deletions
|
@ -2724,16 +2724,71 @@ class CommandLineTest(unittest.TestCase):
|
|||
self.assertEqual(f.read(), zf.read(zi))
|
||||
|
||||
|
||||
class TestExecutablePrependedZip(unittest.TestCase):
|
||||
"""Test our ability to open zip files with an executable prepended."""
|
||||
|
||||
def setUp(self):
|
||||
self.exe_zip = findfile('exe_with_zip', subdir='ziptestdata')
|
||||
self.exe_zip64 = findfile('exe_with_z64', subdir='ziptestdata')
|
||||
|
||||
def _test_zip_works(self, name):
|
||||
# bpo28494 sanity check: ensure is_zipfile works on these.
|
||||
self.assertTrue(zipfile.is_zipfile(name),
|
||||
f'is_zipfile failed on {name}')
|
||||
# Ensure we can operate on these via ZipFile.
|
||||
with zipfile.ZipFile(name) as zipfp:
|
||||
for n in zipfp.namelist():
|
||||
data = zipfp.read(n)
|
||||
self.assertIn(b'FAVORITE_NUMBER', data)
|
||||
|
||||
def test_read_zip_with_exe_prepended(self):
|
||||
self._test_zip_works(self.exe_zip)
|
||||
|
||||
def test_read_zip64_with_exe_prepended(self):
|
||||
self._test_zip_works(self.exe_zip64)
|
||||
|
||||
@unittest.skipUnless(sys.executable, 'sys.executable required.')
|
||||
@unittest.skipUnless(os.access('/bin/bash', os.X_OK),
|
||||
'Test relies on #!/bin/bash working.')
|
||||
def test_execute_zip2(self):
|
||||
output = subprocess.check_output([self.exe_zip, sys.executable])
|
||||
self.assertIn(b'number in executable: 5', output)
|
||||
|
||||
@unittest.skipUnless(sys.executable, 'sys.executable required.')
|
||||
@unittest.skipUnless(os.access('/bin/bash', os.X_OK),
|
||||
'Test relies on #!/bin/bash working.')
|
||||
def test_execute_zip64(self):
|
||||
output = subprocess.check_output([self.exe_zip64, sys.executable])
|
||||
self.assertIn(b'number in executable: 5', output)
|
||||
|
||||
|
||||
# Poor man's technique to consume a (smallish) iterable.
|
||||
consume = tuple
|
||||
|
||||
|
||||
# from jaraco.itertools 5.0
|
||||
class jaraco:
|
||||
class itertools:
|
||||
class Counter:
|
||||
def __init__(self, i):
|
||||
self.count = 0
|
||||
self._orig_iter = iter(i)
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
result = next(self._orig_iter)
|
||||
self.count += 1
|
||||
return result
|
||||
|
||||
|
||||
def add_dirs(zf):
|
||||
"""
|
||||
Given a writable zip file zf, inject directory entries for
|
||||
any directories implied by the presence of children.
|
||||
"""
|
||||
for name in zipfile.Path._implied_dirs(zf.namelist()):
|
||||
for name in zipfile.CompleteDirs._implied_dirs(zf.namelist()):
|
||||
zf.writestr(name, b"")
|
||||
return zf
|
||||
|
||||
|
@ -2774,44 +2829,6 @@ def build_alpharep_fixture():
|
|||
return zf
|
||||
|
||||
|
||||
class TestExecutablePrependedZip(unittest.TestCase):
|
||||
"""Test our ability to open zip files with an executable prepended."""
|
||||
|
||||
def setUp(self):
|
||||
self.exe_zip = findfile('exe_with_zip', subdir='ziptestdata')
|
||||
self.exe_zip64 = findfile('exe_with_z64', subdir='ziptestdata')
|
||||
|
||||
def _test_zip_works(self, name):
|
||||
# bpo-28494 sanity check: ensure is_zipfile works on these.
|
||||
self.assertTrue(zipfile.is_zipfile(name),
|
||||
f'is_zipfile failed on {name}')
|
||||
# Ensure we can operate on these via ZipFile.
|
||||
with zipfile.ZipFile(name) as zipfp:
|
||||
for n in zipfp.namelist():
|
||||
data = zipfp.read(n)
|
||||
self.assertIn(b'FAVORITE_NUMBER', data)
|
||||
|
||||
def test_read_zip_with_exe_prepended(self):
|
||||
self._test_zip_works(self.exe_zip)
|
||||
|
||||
def test_read_zip64_with_exe_prepended(self):
|
||||
self._test_zip_works(self.exe_zip64)
|
||||
|
||||
@unittest.skipUnless(sys.executable, 'sys.executable required.')
|
||||
@unittest.skipUnless(os.access('/bin/bash', os.X_OK),
|
||||
'Test relies on #!/bin/bash working.')
|
||||
def test_execute_zip2(self):
|
||||
output = subprocess.check_output([self.exe_zip, sys.executable])
|
||||
self.assertIn(b'number in executable: 5', output)
|
||||
|
||||
@unittest.skipUnless(sys.executable, 'sys.executable required.')
|
||||
@unittest.skipUnless(os.access('/bin/bash', os.X_OK),
|
||||
'Test relies on #!/bin/bash working.')
|
||||
def test_execute_zip64(self):
|
||||
output = subprocess.check_output([self.exe_zip64, sys.executable])
|
||||
self.assertIn(b'number in executable: 5', output)
|
||||
|
||||
|
||||
class TestPath(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.fixtures = contextlib.ExitStack()
|
||||
|
@ -2849,6 +2866,14 @@ class TestPath(unittest.TestCase):
|
|||
i, = h.iterdir()
|
||||
assert i.is_file()
|
||||
|
||||
def test_subdir_is_dir(self):
|
||||
for alpharep in self.zipfile_alpharep():
|
||||
root = zipfile.Path(alpharep)
|
||||
assert (root / 'b').is_dir()
|
||||
assert (root / 'b/').is_dir()
|
||||
assert (root / 'g').is_dir()
|
||||
assert (root / 'g/').is_dir()
|
||||
|
||||
def test_open(self):
|
||||
for alpharep in self.zipfile_alpharep():
|
||||
root = zipfile.Path(alpharep)
|
||||
|
@ -2910,6 +2935,45 @@ class TestPath(unittest.TestCase):
|
|||
root = zipfile.Path(alpharep)
|
||||
assert (root / 'missing dir/').parent.at == ''
|
||||
|
||||
def test_mutability(self):
|
||||
"""
|
||||
If the underlying zipfile is changed, the Path object should
|
||||
reflect that change.
|
||||
"""
|
||||
for alpharep in self.zipfile_alpharep():
|
||||
root = zipfile.Path(alpharep)
|
||||
a, b, g = root.iterdir()
|
||||
alpharep.writestr('foo.txt', 'foo')
|
||||
alpharep.writestr('bar/baz.txt', 'baz')
|
||||
assert any(
|
||||
child.name == 'foo.txt'
|
||||
for child in root.iterdir())
|
||||
assert (root / 'foo.txt').read_text() == 'foo'
|
||||
baz, = (root / 'bar').iterdir()
|
||||
assert baz.read_text() == 'baz'
|
||||
|
||||
HUGE_ZIPFILE_NUM_ENTRIES = 2 ** 13
|
||||
|
||||
def huge_zipfile(self):
|
||||
"""Create a read-only zipfile with a huge number of entries entries."""
|
||||
strm = io.BytesIO()
|
||||
zf = zipfile.ZipFile(strm, "w")
|
||||
for entry in map(str, range(self.HUGE_ZIPFILE_NUM_ENTRIES)):
|
||||
zf.writestr(entry, entry)
|
||||
zf.mode = 'r'
|
||||
return zf
|
||||
|
||||
def test_joinpath_constant_time(self):
|
||||
"""
|
||||
Ensure joinpath on items in zipfile is linear time.
|
||||
"""
|
||||
root = zipfile.Path(self.huge_zipfile())
|
||||
entries = jaraco.itertools.Counter(root.iterdir())
|
||||
for entry in entries:
|
||||
entry.joinpath('suffix')
|
||||
# Check the file iterated all items
|
||||
assert entries.count == self.HUGE_ZIPFILE_NUM_ENTRIES
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
unittest.main()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue