GH-110109: Speed up pathlib._PathBase.resolve() (#110412)

- Add fast path to `_split_stack()`
- Skip unnecessarily resolution of the current directory when a relative
  path is given to `resolve()`
- Remove stat and target caches, which slow down most `resolve()` calls in
  practice.
- Slightly refactor code for clarity.
This commit is contained in:
Barney Gale 2023-11-17 16:58:17 +00:00 committed by GitHub
parent 25538c72d1
commit 9fb0f2dfee
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -1182,6 +1182,8 @@ class _PathBase(PurePath):
uppermost parent of the path (equivalent to path.parents[-1]), and uppermost parent of the path (equivalent to path.parents[-1]), and
*parts* is a reversed list of parts following the anchor. *parts* is a reversed list of parts following the anchor.
""" """
if not self._tail:
return self, []
return self._from_parsed_parts(self.drive, self.root, []), self._tail[::-1] return self._from_parsed_parts(self.drive, self.root, []), self._tail[::-1]
def resolve(self, strict=False): def resolve(self, strict=False):
@ -1191,18 +1193,16 @@ class _PathBase(PurePath):
""" """
if self._resolving: if self._resolving:
return self return self
path, parts = self._split_stack()
try: try:
path = self.absolute() path = path.absolute()
except UnsupportedOperation: except UnsupportedOperation:
path = self pass
# If the user has *not* overridden the `readlink()` method, then symlinks are unsupported # If the user has *not* overridden the `readlink()` method, then symlinks are unsupported
# and (in non-strict mode) we can improve performance by not calling `stat()`. # and (in non-strict mode) we can improve performance by not calling `stat()`.
querying = strict or getattr(self.readlink, '_supported', True) querying = strict or getattr(self.readlink, '_supported', True)
link_count = 0 link_count = 0
stat_cache = {}
target_cache = {}
path, parts = path._split_stack()
while parts: while parts:
part = parts.pop() part = parts.pop()
if part == '..': if part == '..':
@ -1214,40 +1214,35 @@ class _PathBase(PurePath):
# Delete '..' segment and its predecessor # Delete '..' segment and its predecessor
path = path.parent path = path.parent
continue continue
# Join the current part onto the path. next_path = path._make_child_relpath(part)
path_parent = path
path = path._make_child_relpath(part)
if querying and part != '..': if querying and part != '..':
path._resolving = True next_path._resolving = True
try: try:
st = stat_cache.get(path) st = next_path.stat(follow_symlinks=False)
if st is None:
st = stat_cache[path] = path.stat(follow_symlinks=False)
if S_ISLNK(st.st_mode): if S_ISLNK(st.st_mode):
# Like Linux and macOS, raise OSError(errno.ELOOP) if too many symlinks are # Like Linux and macOS, raise OSError(errno.ELOOP) if too many symlinks are
# encountered during resolution. # encountered during resolution.
link_count += 1 link_count += 1
if link_count >= _MAX_SYMLINKS: if link_count >= _MAX_SYMLINKS:
raise OSError(ELOOP, "Too many symbolic links in path", str(path)) raise OSError(ELOOP, "Too many symbolic links in path", str(self))
target = target_cache.get(path) target, target_parts = next_path.readlink()._split_stack()
if target is None:
target = target_cache[path] = path.readlink()
target, target_parts = target._split_stack()
# If the symlink target is absolute (like '/etc/hosts'), set the current # If the symlink target is absolute (like '/etc/hosts'), set the current
# path to its uppermost parent (like '/'). If not, the symlink target is # path to its uppermost parent (like '/').
# relative to the symlink parent, which we recorded earlier. if target.root:
path = target if target.root else path_parent path = target
# Add the symlink target's reversed tail parts (like ['hosts', 'etc']) to # Add the symlink target's reversed tail parts (like ['hosts', 'etc']) to
# the stack of unresolved path parts. # the stack of unresolved path parts.
parts.extend(target_parts) parts.extend(target_parts)
continue
elif parts and not S_ISDIR(st.st_mode): elif parts and not S_ISDIR(st.st_mode):
raise NotADirectoryError(ENOTDIR, "Not a directory", str(path)) raise NotADirectoryError(ENOTDIR, "Not a directory", str(self))
except OSError: except OSError:
if strict: if strict:
raise raise
else: else:
querying = False querying = False
path._resolving = False next_path._resolving = False
path = next_path
return path return path
def symlink_to(self, target, target_is_directory=False): def symlink_to(self, target, target_is_directory=False):