mirror of
https://github.com/python/cpython.git
synced 2025-08-02 16:13:13 +00:00
GH-101362: Omit path anchor from pathlib.PurePath()._parts
(GH-102476)
Improve performance of path construction by skipping the addition of the path anchor (`drive + root`) to the internal `_parts` list. Rename this attribute to `_tail` for clarity.
This commit is contained in:
parent
0a675f4bb5
commit
2c673d5e93
3 changed files with 108 additions and 67 deletions
171
Lib/pathlib.py
171
Lib/pathlib.py
|
@ -210,20 +210,17 @@ class _RecursiveWildcardSelector(_Selector):
|
||||||
class _PathParents(Sequence):
|
class _PathParents(Sequence):
|
||||||
"""This object provides sequence-like access to the logical ancestors
|
"""This object provides sequence-like access to the logical ancestors
|
||||||
of a path. Don't try to construct it yourself."""
|
of a path. Don't try to construct it yourself."""
|
||||||
__slots__ = ('_pathcls', '_drv', '_root', '_parts')
|
__slots__ = ('_pathcls', '_drv', '_root', '_tail')
|
||||||
|
|
||||||
def __init__(self, path):
|
def __init__(self, path):
|
||||||
# We don't store the instance to avoid reference cycles
|
# We don't store the instance to avoid reference cycles
|
||||||
self._pathcls = type(path)
|
self._pathcls = type(path)
|
||||||
self._drv = path.drive
|
self._drv = path.drive
|
||||||
self._root = path.root
|
self._root = path.root
|
||||||
self._parts = path._parts
|
self._tail = path._tail
|
||||||
|
|
||||||
def __len__(self):
|
def __len__(self):
|
||||||
if self._drv or self._root:
|
return len(self._tail)
|
||||||
return len(self._parts) - 1
|
|
||||||
else:
|
|
||||||
return len(self._parts)
|
|
||||||
|
|
||||||
def __getitem__(self, idx):
|
def __getitem__(self, idx):
|
||||||
if isinstance(idx, slice):
|
if isinstance(idx, slice):
|
||||||
|
@ -234,7 +231,7 @@ class _PathParents(Sequence):
|
||||||
if idx < 0:
|
if idx < 0:
|
||||||
idx += len(self)
|
idx += len(self)
|
||||||
return self._pathcls._from_parsed_parts(self._drv, self._root,
|
return self._pathcls._from_parsed_parts(self._drv, self._root,
|
||||||
self._parts[:-idx - 1])
|
self._tail[:-idx - 1])
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "<{}.parents>".format(self._pathcls.__name__)
|
return "<{}.parents>".format(self._pathcls.__name__)
|
||||||
|
@ -249,9 +246,41 @@ class PurePath(object):
|
||||||
PureWindowsPath object. You can also instantiate either of these classes
|
PureWindowsPath object. You can also instantiate either of these classes
|
||||||
directly, regardless of your system.
|
directly, regardless of your system.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__slots__ = (
|
__slots__ = (
|
||||||
'_raw_path', '_drv', '_root', '_parts_cached',
|
# The `_raw_path` slot stores an unnormalized string path. This is set
|
||||||
'_str', '_hash', '_parts_tuple', '_parts_normcase_cached',
|
# in the `__init__()` method.
|
||||||
|
'_raw_path',
|
||||||
|
|
||||||
|
# The `_drv`, `_root` and `_tail_cached` slots store parsed and
|
||||||
|
# normalized parts of the path. They are set when any of the `drive`,
|
||||||
|
# `root` or `_tail` properties are accessed for the first time. The
|
||||||
|
# three-part division corresponds to the result of
|
||||||
|
# `os.path.splitroot()`, except that the tail is further split on path
|
||||||
|
# separators (i.e. it is a list of strings), and that the root and
|
||||||
|
# tail are normalized.
|
||||||
|
'_drv', '_root', '_tail_cached',
|
||||||
|
|
||||||
|
# The `_str` slot stores the string representation of the path,
|
||||||
|
# computed from the drive, root and tail when `__str__()` is called
|
||||||
|
# for the first time. It's used to implement `_str_normcase`
|
||||||
|
'_str',
|
||||||
|
|
||||||
|
# The `_str_normcase_cached` slot stores the string path with
|
||||||
|
# normalized case. It is set when the `_str_normcase` property is
|
||||||
|
# accessed for the first time. It's used to implement `__eq__()`
|
||||||
|
# `__hash__()`, and `_parts_normcase`
|
||||||
|
'_str_normcase_cached',
|
||||||
|
|
||||||
|
# The `_parts_normcase_cached` slot stores the case-normalized
|
||||||
|
# string path after splitting on path separators. It's set when the
|
||||||
|
# `_parts_normcase` property is accessed for the first time. It's used
|
||||||
|
# to implement comparison methods like `__lt__()`.
|
||||||
|
'_parts_normcase_cached',
|
||||||
|
|
||||||
|
# The `_hash` slot stores the hash of the case-normalized string
|
||||||
|
# path. It's set when `__hash__()` is called for the first time.
|
||||||
|
'_hash',
|
||||||
)
|
)
|
||||||
_flavour = os.path
|
_flavour = os.path
|
||||||
|
|
||||||
|
@ -277,10 +306,7 @@ class PurePath(object):
|
||||||
path = os.fspath(args[0])
|
path = os.fspath(args[0])
|
||||||
else:
|
else:
|
||||||
path = self._flavour.join(*args)
|
path = self._flavour.join(*args)
|
||||||
if isinstance(path, str):
|
if not isinstance(path, str):
|
||||||
# Force-cast str subclasses to str (issue #21127)
|
|
||||||
path = str(path)
|
|
||||||
else:
|
|
||||||
raise TypeError(
|
raise TypeError(
|
||||||
"argument should be a str or an os.PathLike "
|
"argument should be a str or an os.PathLike "
|
||||||
"object where __fspath__ returns a str, "
|
"object where __fspath__ returns a str, "
|
||||||
|
@ -299,33 +325,32 @@ class PurePath(object):
|
||||||
if drv.startswith(sep):
|
if drv.startswith(sep):
|
||||||
# pathlib assumes that UNC paths always have a root.
|
# pathlib assumes that UNC paths always have a root.
|
||||||
root = sep
|
root = sep
|
||||||
unfiltered_parsed = [drv + root] + rel.split(sep)
|
parsed = [sys.intern(str(x)) for x in rel.split(sep) if x and x != '.']
|
||||||
parsed = [sys.intern(x) for x in unfiltered_parsed if x and x != '.']
|
|
||||||
return drv, root, parsed
|
return drv, root, parsed
|
||||||
|
|
||||||
def _load_parts(self):
|
def _load_parts(self):
|
||||||
drv, root, parts = self._parse_path(self._raw_path)
|
drv, root, tail = self._parse_path(self._raw_path)
|
||||||
self._drv = drv
|
self._drv = drv
|
||||||
self._root = root
|
self._root = root
|
||||||
self._parts_cached = parts
|
self._tail_cached = tail
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _from_parsed_parts(cls, drv, root, parts):
|
def _from_parsed_parts(cls, drv, root, tail):
|
||||||
path = cls._format_parsed_parts(drv, root, parts)
|
path = cls._format_parsed_parts(drv, root, tail)
|
||||||
self = cls(path)
|
self = cls(path)
|
||||||
self._str = path or '.'
|
self._str = path or '.'
|
||||||
self._drv = drv
|
self._drv = drv
|
||||||
self._root = root
|
self._root = root
|
||||||
self._parts_cached = parts
|
self._tail_cached = tail
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _format_parsed_parts(cls, drv, root, parts):
|
def _format_parsed_parts(cls, drv, root, tail):
|
||||||
if drv or root:
|
if drv or root:
|
||||||
return drv + root + cls._flavour.sep.join(parts[1:])
|
return drv + root + cls._flavour.sep.join(tail)
|
||||||
elif parts and cls._flavour.splitdrive(parts[0])[0]:
|
elif tail and cls._flavour.splitdrive(tail[0])[0]:
|
||||||
parts = ['.'] + parts
|
tail = ['.'] + tail
|
||||||
return cls._flavour.sep.join(parts)
|
return cls._flavour.sep.join(tail)
|
||||||
|
|
||||||
def __str__(self):
|
def __str__(self):
|
||||||
"""Return the string representation of the path, suitable for
|
"""Return the string representation of the path, suitable for
|
||||||
|
@ -334,7 +359,7 @@ class PurePath(object):
|
||||||
return self._str
|
return self._str
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
self._str = self._format_parsed_parts(self.drive, self.root,
|
self._str = self._format_parsed_parts(self.drive, self.root,
|
||||||
self._parts) or '.'
|
self._tail) or '.'
|
||||||
return self._str
|
return self._str
|
||||||
|
|
||||||
def __fspath__(self):
|
def __fspath__(self):
|
||||||
|
@ -374,25 +399,34 @@ class PurePath(object):
|
||||||
path = str(self)
|
path = str(self)
|
||||||
return prefix + urlquote_from_bytes(os.fsencode(path))
|
return prefix + urlquote_from_bytes(os.fsencode(path))
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _str_normcase(self):
|
||||||
|
# String with normalized case, for hashing and equality checks
|
||||||
|
try:
|
||||||
|
return self._str_normcase_cached
|
||||||
|
except AttributeError:
|
||||||
|
self._str_normcase_cached = self._flavour.normcase(str(self))
|
||||||
|
return self._str_normcase_cached
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _parts_normcase(self):
|
def _parts_normcase(self):
|
||||||
# Cached parts with normalized case, for hashing and comparison.
|
# Cached parts with normalized case, for comparisons.
|
||||||
try:
|
try:
|
||||||
return self._parts_normcase_cached
|
return self._parts_normcase_cached
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
self._parts_normcase_cached = [self._flavour.normcase(p) for p in self._parts]
|
self._parts_normcase_cached = self._str_normcase.split(self._flavour.sep)
|
||||||
return self._parts_normcase_cached
|
return self._parts_normcase_cached
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
if not isinstance(other, PurePath):
|
if not isinstance(other, PurePath):
|
||||||
return NotImplemented
|
return NotImplemented
|
||||||
return self._parts_normcase == other._parts_normcase and self._flavour is other._flavour
|
return self._str_normcase == other._str_normcase and self._flavour is other._flavour
|
||||||
|
|
||||||
def __hash__(self):
|
def __hash__(self):
|
||||||
try:
|
try:
|
||||||
return self._hash
|
return self._hash
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
self._hash = hash(tuple(self._parts_normcase))
|
self._hash = hash(self._str_normcase)
|
||||||
return self._hash
|
return self._hash
|
||||||
|
|
||||||
def __lt__(self, other):
|
def __lt__(self, other):
|
||||||
|
@ -434,12 +468,12 @@ class PurePath(object):
|
||||||
return self._root
|
return self._root
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def _parts(self):
|
def _tail(self):
|
||||||
try:
|
try:
|
||||||
return self._parts_cached
|
return self._tail_cached
|
||||||
except AttributeError:
|
except AttributeError:
|
||||||
self._load_parts()
|
self._load_parts()
|
||||||
return self._parts_cached
|
return self._tail_cached
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def anchor(self):
|
def anchor(self):
|
||||||
|
@ -450,10 +484,10 @@ class PurePath(object):
|
||||||
@property
|
@property
|
||||||
def name(self):
|
def name(self):
|
||||||
"""The final path component, if any."""
|
"""The final path component, if any."""
|
||||||
parts = self._parts
|
tail = self._tail
|
||||||
if len(parts) == (1 if (self.drive or self.root) else 0):
|
if not tail:
|
||||||
return ''
|
return ''
|
||||||
return parts[-1]
|
return tail[-1]
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def suffix(self):
|
def suffix(self):
|
||||||
|
@ -501,7 +535,7 @@ class PurePath(object):
|
||||||
if drv or root or not tail or f.sep in tail or (f.altsep and f.altsep in tail):
|
if drv or root or not tail or f.sep in tail or (f.altsep and f.altsep in tail):
|
||||||
raise ValueError("Invalid name %r" % (name))
|
raise ValueError("Invalid name %r" % (name))
|
||||||
return self._from_parsed_parts(self.drive, self.root,
|
return self._from_parsed_parts(self.drive, self.root,
|
||||||
self._parts[:-1] + [name])
|
self._tail[:-1] + [name])
|
||||||
|
|
||||||
def with_stem(self, stem):
|
def with_stem(self, stem):
|
||||||
"""Return a new path with the stem changed."""
|
"""Return a new path with the stem changed."""
|
||||||
|
@ -526,7 +560,7 @@ class PurePath(object):
|
||||||
else:
|
else:
|
||||||
name = name[:-len(old_suffix)] + suffix
|
name = name[:-len(old_suffix)] + suffix
|
||||||
return self._from_parsed_parts(self.drive, self.root,
|
return self._from_parsed_parts(self.drive, self.root,
|
||||||
self._parts[:-1] + [name])
|
self._tail[:-1] + [name])
|
||||||
|
|
||||||
def relative_to(self, other, /, *_deprecated, walk_up=False):
|
def relative_to(self, other, /, *_deprecated, walk_up=False):
|
||||||
"""Return the relative path to another path identified by the passed
|
"""Return the relative path to another path identified by the passed
|
||||||
|
@ -551,7 +585,7 @@ class PurePath(object):
|
||||||
raise ValueError(f"{str(self)!r} and {str(other)!r} have different anchors")
|
raise ValueError(f"{str(self)!r} and {str(other)!r} have different anchors")
|
||||||
if step and not walk_up:
|
if step and not walk_up:
|
||||||
raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}")
|
raise ValueError(f"{str(self)!r} is not in the subpath of {str(other)!r}")
|
||||||
parts = ('..',) * step + self.parts[len(path.parts):]
|
parts = ['..'] * step + self._tail[len(path._tail):]
|
||||||
return path_cls(*parts)
|
return path_cls(*parts)
|
||||||
|
|
||||||
def is_relative_to(self, other, /, *_deprecated):
|
def is_relative_to(self, other, /, *_deprecated):
|
||||||
|
@ -570,13 +604,10 @@ class PurePath(object):
|
||||||
def parts(self):
|
def parts(self):
|
||||||
"""An object providing sequence-like access to the
|
"""An object providing sequence-like access to the
|
||||||
components in the filesystem path."""
|
components in the filesystem path."""
|
||||||
# We cache the tuple to avoid building a new one each time .parts
|
if self.drive or self.root:
|
||||||
# is accessed. XXX is this necessary?
|
return (self.drive + self.root,) + tuple(self._tail)
|
||||||
try:
|
else:
|
||||||
return self._parts_tuple
|
return tuple(self._tail)
|
||||||
except AttributeError:
|
|
||||||
self._parts_tuple = tuple(self._parts)
|
|
||||||
return self._parts_tuple
|
|
||||||
|
|
||||||
def joinpath(self, *args):
|
def joinpath(self, *args):
|
||||||
"""Combine this path with one or several arguments, and return a
|
"""Combine this path with one or several arguments, and return a
|
||||||
|
@ -603,10 +634,10 @@ class PurePath(object):
|
||||||
"""The logical parent of the path."""
|
"""The logical parent of the path."""
|
||||||
drv = self.drive
|
drv = self.drive
|
||||||
root = self.root
|
root = self.root
|
||||||
parts = self._parts
|
tail = self._tail
|
||||||
if len(parts) == 1 and (drv or root):
|
if not tail:
|
||||||
return self
|
return self
|
||||||
return self._from_parsed_parts(drv, root, parts[:-1])
|
return self._from_parsed_parts(drv, root, tail[:-1])
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def parents(self):
|
def parents(self):
|
||||||
|
@ -624,29 +655,29 @@ class PurePath(object):
|
||||||
def is_reserved(self):
|
def is_reserved(self):
|
||||||
"""Return True if the path contains one of the special names reserved
|
"""Return True if the path contains one of the special names reserved
|
||||||
by the system, if any."""
|
by the system, if any."""
|
||||||
if self._flavour is posixpath or not self._parts:
|
if self._flavour is posixpath or not self._tail:
|
||||||
return False
|
return False
|
||||||
|
|
||||||
# NOTE: the rules for reserved names seem somewhat complicated
|
# NOTE: the rules for reserved names seem somewhat complicated
|
||||||
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
|
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
|
||||||
# exist). We err on the side of caution and return True for paths
|
# exist). We err on the side of caution and return True for paths
|
||||||
# which are not considered reserved by Windows.
|
# which are not considered reserved by Windows.
|
||||||
if self._parts[0].startswith('\\\\'):
|
if self.drive.startswith('\\\\'):
|
||||||
# UNC paths are never reserved.
|
# UNC paths are never reserved.
|
||||||
return False
|
return False
|
||||||
name = self._parts[-1].partition('.')[0].partition(':')[0].rstrip(' ')
|
name = self._tail[-1].partition('.')[0].partition(':')[0].rstrip(' ')
|
||||||
return name.upper() in _WIN_RESERVED_NAMES
|
return name.upper() in _WIN_RESERVED_NAMES
|
||||||
|
|
||||||
def match(self, path_pattern):
|
def match(self, path_pattern):
|
||||||
"""
|
"""
|
||||||
Return True if this path matches the given pattern.
|
Return True if this path matches the given pattern.
|
||||||
"""
|
"""
|
||||||
path_pattern = self._flavour.normcase(path_pattern)
|
pat = type(self)(path_pattern)
|
||||||
drv, root, pat_parts = self._parse_path(path_pattern)
|
if not pat.parts:
|
||||||
if not pat_parts:
|
|
||||||
raise ValueError("empty pattern")
|
raise ValueError("empty pattern")
|
||||||
|
pat_parts = pat._parts_normcase
|
||||||
parts = self._parts_normcase
|
parts = self._parts_normcase
|
||||||
if drv or root:
|
if pat.drive or pat.root:
|
||||||
if len(pat_parts) != len(parts):
|
if len(pat_parts) != len(parts):
|
||||||
return False
|
return False
|
||||||
elif len(pat_parts) > len(parts):
|
elif len(pat_parts) > len(parts):
|
||||||
|
@ -707,11 +738,21 @@ class Path(PurePath):
|
||||||
cls = WindowsPath if os.name == 'nt' else PosixPath
|
cls = WindowsPath if os.name == 'nt' else PosixPath
|
||||||
return object.__new__(cls)
|
return object.__new__(cls)
|
||||||
|
|
||||||
def _make_child_relpath(self, part):
|
def _make_child_relpath(self, name):
|
||||||
# This is an optimization used for dir walking. `part` must be
|
path_str = str(self)
|
||||||
# a single part relative to this path.
|
tail = self._tail
|
||||||
parts = self._parts + [part]
|
if tail:
|
||||||
return self._from_parsed_parts(self.drive, self.root, parts)
|
path_str = f'{path_str}{self._flavour.sep}{name}'
|
||||||
|
elif path_str != '.':
|
||||||
|
path_str = f'{path_str}{name}'
|
||||||
|
else:
|
||||||
|
path_str = name
|
||||||
|
path = type(self)(path_str)
|
||||||
|
path._str = path_str
|
||||||
|
path._drv = self.drive
|
||||||
|
path._root = self.root
|
||||||
|
path._tail_cached = tail + [name]
|
||||||
|
return path
|
||||||
|
|
||||||
def __enter__(self):
|
def __enter__(self):
|
||||||
# In previous versions of pathlib, __exit__() marked this path as
|
# In previous versions of pathlib, __exit__() marked this path as
|
||||||
|
@ -1196,12 +1237,12 @@ class Path(PurePath):
|
||||||
(as returned by os.path.expanduser)
|
(as returned by os.path.expanduser)
|
||||||
"""
|
"""
|
||||||
if (not (self.drive or self.root) and
|
if (not (self.drive or self.root) and
|
||||||
self._parts and self._parts[0][:1] == '~'):
|
self._tail and self._tail[0][:1] == '~'):
|
||||||
homedir = self._flavour.expanduser(self._parts[0])
|
homedir = self._flavour.expanduser(self._tail[0])
|
||||||
if homedir[:1] == "~":
|
if homedir[:1] == "~":
|
||||||
raise RuntimeError("Could not determine home directory.")
|
raise RuntimeError("Could not determine home directory.")
|
||||||
drv, root, parts = self._parse_path(homedir)
|
drv, root, tail = self._parse_path(homedir)
|
||||||
return self._from_parsed_parts(drv, root, parts + self._parts[1:])
|
return self._from_parsed_parts(drv, root, tail + self._tail[1:])
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|
|
@ -346,8 +346,6 @@ class _BasePurePathTest(object):
|
||||||
p = P('a/b')
|
p = P('a/b')
|
||||||
parts = p.parts
|
parts = p.parts
|
||||||
self.assertEqual(parts, ('a', 'b'))
|
self.assertEqual(parts, ('a', 'b'))
|
||||||
# The object gets reused.
|
|
||||||
self.assertIs(parts, p.parts)
|
|
||||||
# When the path is absolute, the anchor is a separate part.
|
# When the path is absolute, the anchor is a separate part.
|
||||||
p = P('/a/b')
|
p = P('/a/b')
|
||||||
parts = p.parts
|
parts = p.parts
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Speed up :class:`pathlib.Path` construction by omitting the path anchor from
|
||||||
|
the internal list of path parts.
|
Loading…
Add table
Add a link
Reference in a new issue