gh-103200: Fix performance issues with zipimport.invalidate_caches() (GH-103208)

Co-authored-by: blurb-it[bot] <43283697+blurb-it[bot]@users.noreply.github.com>
Co-authored-by: Brett Cannon <brett@python.org>
This commit is contained in:
Desmond Cheong 2023-07-07 15:02:13 -07:00 committed by GitHub
parent 6e6a4cd523
commit 1fb9bd222b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 67 additions and 25 deletions

View file

@ -88,12 +88,8 @@ class zipimporter(_bootstrap_external._LoaderBasics):
raise ZipImportError('not a Zip file', path=path)
break
try:
files = _zip_directory_cache[path]
except KeyError:
files = _read_directory(path)
_zip_directory_cache[path] = files
self._files = files
if path not in _zip_directory_cache:
_zip_directory_cache[path] = _read_directory(path)
self.archive = path
# a prefix directory following the ZIP file path.
self.prefix = _bootstrap_external._path_join(*prefix[::-1])
@ -152,7 +148,7 @@ class zipimporter(_bootstrap_external._LoaderBasics):
key = pathname[len(self.archive + path_sep):]
try:
toc_entry = self._files[key]
toc_entry = self._get_files()[key]
except KeyError:
raise OSError(0, '', key)
return _get_data(self.archive, toc_entry)
@ -189,7 +185,7 @@ class zipimporter(_bootstrap_external._LoaderBasics):
fullpath = f'{path}.py'
try:
toc_entry = self._files[fullpath]
toc_entry = self._get_files()[fullpath]
except KeyError:
# we have the module, but no source
return None
@ -268,14 +264,22 @@ class zipimporter(_bootstrap_external._LoaderBasics):
return ZipReader(self, fullname)
def invalidate_caches(self):
"""Reload the file data of the archive path."""
def _get_files(self):
"""Return the files within the archive path."""
try:
self._files = _read_directory(self.archive)
_zip_directory_cache[self.archive] = self._files
except ZipImportError:
_zip_directory_cache.pop(self.archive, None)
self._files = {}
files = _zip_directory_cache[self.archive]
except KeyError:
try:
files = _zip_directory_cache[self.archive] = _read_directory(self.archive)
except ZipImportError:
files = {}
return files
def invalidate_caches(self):
"""Invalidates the cache of file data of the archive path."""
_zip_directory_cache.pop(self.archive, None)
def __repr__(self):
@ -305,15 +309,15 @@ def _is_dir(self, path):
# of a namespace package. We test by seeing if the name, with an
# appended path separator, exists.
dirpath = path + path_sep
# If dirpath is present in self._files, we have a directory.
return dirpath in self._files
# If dirpath is present in self._get_files(), we have a directory.
return dirpath in self._get_files()
# Return some information about a module.
def _get_module_info(self, fullname):
path = _get_module_path(self, fullname)
for suffix, isbytecode, ispackage in _zip_searchorder:
fullpath = path + suffix
if fullpath in self._files:
if fullpath in self._get_files():
return ispackage
return None
@ -656,7 +660,7 @@ def _get_mtime_and_size_of_source(self, path):
# strip 'c' or 'o' from *.py[co]
assert path[-1:] in ('c', 'o')
path = path[:-1]
toc_entry = self._files[path]
toc_entry = self._get_files()[path]
# fetch the time stamp of the .py file for comparison
# with an embedded pyc time stamp
time = toc_entry[5]
@ -676,7 +680,7 @@ def _get_pyc_source(self, path):
path = path[:-1]
try:
toc_entry = self._files[path]
toc_entry = self._get_files()[path]
except KeyError:
return None
else:
@ -692,7 +696,7 @@ def _get_module_code(self, fullname):
fullpath = path + suffix
_bootstrap._verbose_message('trying {}{}{}', self.archive, path_sep, fullpath, verbosity=2)
try:
toc_entry = self._files[fullpath]
toc_entry = self._get_files()[fullpath]
except KeyError:
pass
else: