mirror of
https://github.com/python/cpython.git
synced 2025-11-03 03:22:27 +00:00
gh-103200: Fix performance issues with zipimport.invalidate_caches() (GH-103208)
Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com> Co-authored-by: Brett Cannon <brett@python.org>
This commit is contained in:
parent
6e6a4cd523
commit
1fb9bd222b
3 changed files with 67 additions and 25 deletions
|
|
@ -520,10 +520,10 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
|
||||||
z.writestr(zinfo, data)
|
z.writestr(zinfo, data)
|
||||||
|
|
||||||
zi = zipimport.zipimporter(TEMP_ZIP)
|
zi = zipimport.zipimporter(TEMP_ZIP)
|
||||||
self.assertEqual(zi._files.keys(), files.keys())
|
self.assertEqual(zi._get_files().keys(), files.keys())
|
||||||
# Check that the file information remains accurate after reloading
|
# Check that the file information remains accurate after reloading
|
||||||
zi.invalidate_caches()
|
zi.invalidate_caches()
|
||||||
self.assertEqual(zi._files.keys(), files.keys())
|
self.assertEqual(zi._get_files().keys(), files.keys())
|
||||||
# Add a new file to the ZIP archive
|
# Add a new file to the ZIP archive
|
||||||
newfile = {"spam2" + pyc_ext: (NOW, test_pyc)}
|
newfile = {"spam2" + pyc_ext: (NOW, test_pyc)}
|
||||||
files.update(newfile)
|
files.update(newfile)
|
||||||
|
|
@ -535,17 +535,54 @@ class UncompressedZipImportTestCase(ImportHooksBaseTestCase):
|
||||||
z.writestr(zinfo, data)
|
z.writestr(zinfo, data)
|
||||||
# Check that we can detect the new file after invalidating the cache
|
# Check that we can detect the new file after invalidating the cache
|
||||||
zi.invalidate_caches()
|
zi.invalidate_caches()
|
||||||
self.assertEqual(zi._files.keys(), files.keys())
|
self.assertEqual(zi._get_files().keys(), files.keys())
|
||||||
spec = zi.find_spec('spam2')
|
spec = zi.find_spec('spam2')
|
||||||
self.assertIsNotNone(spec)
|
self.assertIsNotNone(spec)
|
||||||
self.assertIsInstance(spec.loader, zipimport.zipimporter)
|
self.assertIsInstance(spec.loader, zipimport.zipimporter)
|
||||||
# Check that the cached data is removed if the file is deleted
|
# Check that the cached data is removed if the file is deleted
|
||||||
os.remove(TEMP_ZIP)
|
os.remove(TEMP_ZIP)
|
||||||
zi.invalidate_caches()
|
zi.invalidate_caches()
|
||||||
self.assertFalse(zi._files)
|
self.assertFalse(zi._get_files())
|
||||||
self.assertIsNone(zipimport._zip_directory_cache.get(zi.archive))
|
self.assertIsNone(zipimport._zip_directory_cache.get(zi.archive))
|
||||||
self.assertIsNone(zi.find_spec("name_does_not_matter"))
|
self.assertIsNone(zi.find_spec("name_does_not_matter"))
|
||||||
|
|
||||||
|
def testInvalidateCachesWithMultipleZipimports(self):
|
||||||
|
packdir = TESTPACK + os.sep
|
||||||
|
packdir2 = packdir + TESTPACK2 + os.sep
|
||||||
|
files = {packdir + "__init__" + pyc_ext: (NOW, test_pyc),
|
||||||
|
packdir2 + "__init__" + pyc_ext: (NOW, test_pyc),
|
||||||
|
packdir2 + TESTMOD + pyc_ext: (NOW, test_pyc),
|
||||||
|
"spam" + pyc_ext: (NOW, test_pyc)}
|
||||||
|
self.addCleanup(os_helper.unlink, TEMP_ZIP)
|
||||||
|
with ZipFile(TEMP_ZIP, "w") as z:
|
||||||
|
for name, (mtime, data) in files.items():
|
||||||
|
zinfo = ZipInfo(name, time.localtime(mtime))
|
||||||
|
zinfo.compress_type = self.compression
|
||||||
|
zinfo.comment = b"spam"
|
||||||
|
z.writestr(zinfo, data)
|
||||||
|
|
||||||
|
zi = zipimport.zipimporter(TEMP_ZIP)
|
||||||
|
self.assertEqual(zi._get_files().keys(), files.keys())
|
||||||
|
# Zipimporter for the same path.
|
||||||
|
zi2 = zipimport.zipimporter(TEMP_ZIP)
|
||||||
|
self.assertEqual(zi2._get_files().keys(), files.keys())
|
||||||
|
# Add a new file to the ZIP archive to make the cache wrong.
|
||||||
|
newfile = {"spam2" + pyc_ext: (NOW, test_pyc)}
|
||||||
|
files.update(newfile)
|
||||||
|
with ZipFile(TEMP_ZIP, "a") as z:
|
||||||
|
for name, (mtime, data) in newfile.items():
|
||||||
|
zinfo = ZipInfo(name, time.localtime(mtime))
|
||||||
|
zinfo.compress_type = self.compression
|
||||||
|
zinfo.comment = b"spam"
|
||||||
|
z.writestr(zinfo, data)
|
||||||
|
# Invalidate the cache of the first zipimporter.
|
||||||
|
zi.invalidate_caches()
|
||||||
|
# Check that the second zipimporter detects the new file and isn't using a stale cache.
|
||||||
|
self.assertEqual(zi2._get_files().keys(), files.keys())
|
||||||
|
spec = zi2.find_spec('spam2')
|
||||||
|
self.assertIsNotNone(spec)
|
||||||
|
self.assertIsInstance(spec.loader, zipimport.zipimporter)
|
||||||
|
|
||||||
def testZipImporterMethodsInSubDirectory(self):
|
def testZipImporterMethodsInSubDirectory(self):
|
||||||
packdir = TESTPACK + os.sep
|
packdir = TESTPACK + os.sep
|
||||||
packdir2 = packdir + TESTPACK2 + os.sep
|
packdir2 = packdir + TESTPACK2 + os.sep
|
||||||
|
|
|
||||||
|
|
@ -88,12 +88,8 @@ class zipimporter(_bootstrap_external._LoaderBasics):
|
||||||
raise ZipImportError('not a Zip file', path=path)
|
raise ZipImportError('not a Zip file', path=path)
|
||||||
break
|
break
|
||||||
|
|
||||||
try:
|
if path not in _zip_directory_cache:
|
||||||
files = _zip_directory_cache[path]
|
_zip_directory_cache[path] = _read_directory(path)
|
||||||
except KeyError:
|
|
||||||
files = _read_directory(path)
|
|
||||||
_zip_directory_cache[path] = files
|
|
||||||
self._files = files
|
|
||||||
self.archive = path
|
self.archive = path
|
||||||
# a prefix directory following the ZIP file path.
|
# a prefix directory following the ZIP file path.
|
||||||
self.prefix = _bootstrap_external._path_join(*prefix[::-1])
|
self.prefix = _bootstrap_external._path_join(*prefix[::-1])
|
||||||
|
|
@ -152,7 +148,7 @@ class zipimporter(_bootstrap_external._LoaderBasics):
|
||||||
key = pathname[len(self.archive + path_sep):]
|
key = pathname[len(self.archive + path_sep):]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
toc_entry = self._files[key]
|
toc_entry = self._get_files()[key]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
raise OSError(0, '', key)
|
raise OSError(0, '', key)
|
||||||
return _get_data(self.archive, toc_entry)
|
return _get_data(self.archive, toc_entry)
|
||||||
|
|
@ -189,7 +185,7 @@ class zipimporter(_bootstrap_external._LoaderBasics):
|
||||||
fullpath = f'{path}.py'
|
fullpath = f'{path}.py'
|
||||||
|
|
||||||
try:
|
try:
|
||||||
toc_entry = self._files[fullpath]
|
toc_entry = self._get_files()[fullpath]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
# we have the module, but no source
|
# we have the module, but no source
|
||||||
return None
|
return None
|
||||||
|
|
@ -268,14 +264,22 @@ class zipimporter(_bootstrap_external._LoaderBasics):
|
||||||
return ZipReader(self, fullname)
|
return ZipReader(self, fullname)
|
||||||
|
|
||||||
|
|
||||||
def invalidate_caches(self):
|
def _get_files(self):
|
||||||
"""Reload the file data of the archive path."""
|
"""Return the files within the archive path."""
|
||||||
try:
|
try:
|
||||||
self._files = _read_directory(self.archive)
|
files = _zip_directory_cache[self.archive]
|
||||||
_zip_directory_cache[self.archive] = self._files
|
except KeyError:
|
||||||
except ZipImportError:
|
try:
|
||||||
_zip_directory_cache.pop(self.archive, None)
|
files = _zip_directory_cache[self.archive] = _read_directory(self.archive)
|
||||||
self._files = {}
|
except ZipImportError:
|
||||||
|
files = {}
|
||||||
|
|
||||||
|
return files
|
||||||
|
|
||||||
|
|
||||||
|
def invalidate_caches(self):
|
||||||
|
"""Invalidates the cache of file data of the archive path."""
|
||||||
|
_zip_directory_cache.pop(self.archive, None)
|
||||||
|
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
|
|
@ -305,15 +309,15 @@ def _is_dir(self, path):
|
||||||
# of a namespace package. We test by seeing if the name, with an
|
# of a namespace package. We test by seeing if the name, with an
|
||||||
# appended path separator, exists.
|
# appended path separator, exists.
|
||||||
dirpath = path + path_sep
|
dirpath = path + path_sep
|
||||||
# If dirpath is present in self._files, we have a directory.
|
# If dirpath is present in self._get_files(), we have a directory.
|
||||||
return dirpath in self._files
|
return dirpath in self._get_files()
|
||||||
|
|
||||||
# Return some information about a module.
|
# Return some information about a module.
|
||||||
def _get_module_info(self, fullname):
|
def _get_module_info(self, fullname):
|
||||||
path = _get_module_path(self, fullname)
|
path = _get_module_path(self, fullname)
|
||||||
for suffix, isbytecode, ispackage in _zip_searchorder:
|
for suffix, isbytecode, ispackage in _zip_searchorder:
|
||||||
fullpath = path + suffix
|
fullpath = path + suffix
|
||||||
if fullpath in self._files:
|
if fullpath in self._get_files():
|
||||||
return ispackage
|
return ispackage
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
@ -656,7 +660,7 @@ def _get_mtime_and_size_of_source(self, path):
|
||||||
# strip 'c' or 'o' from *.py[co]
|
# strip 'c' or 'o' from *.py[co]
|
||||||
assert path[-1:] in ('c', 'o')
|
assert path[-1:] in ('c', 'o')
|
||||||
path = path[:-1]
|
path = path[:-1]
|
||||||
toc_entry = self._files[path]
|
toc_entry = self._get_files()[path]
|
||||||
# fetch the time stamp of the .py file for comparison
|
# fetch the time stamp of the .py file for comparison
|
||||||
# with an embedded pyc time stamp
|
# with an embedded pyc time stamp
|
||||||
time = toc_entry[5]
|
time = toc_entry[5]
|
||||||
|
|
@ -676,7 +680,7 @@ def _get_pyc_source(self, path):
|
||||||
path = path[:-1]
|
path = path[:-1]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
toc_entry = self._files[path]
|
toc_entry = self._get_files()[path]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
|
|
@ -692,7 +696,7 @@ def _get_module_code(self, fullname):
|
||||||
fullpath = path + suffix
|
fullpath = path + suffix
|
||||||
_bootstrap._verbose_message('trying {}{}{}', self.archive, path_sep, fullpath, verbosity=2)
|
_bootstrap._verbose_message('trying {}{}{}', self.archive, path_sep, fullpath, verbosity=2)
|
||||||
try:
|
try:
|
||||||
toc_entry = self._files[fullpath]
|
toc_entry = self._get_files()[fullpath]
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -0,0 +1 @@
|
||||||
|
Fix cache repopulation semantics of zipimport.invalidate_caches(). The cache is now repopulated upon retrieving files with an invalid cache, not when the cache is invalidated.
|
||||||
Loading…
Add table
Add a link
Reference in a new issue