Add pathlib._abc.PathModuleBase (#113893)

Path modules provide a subset of the `os.path` API, specifically those
functions needed to provide `PurePathBase` functionality. Each
`PurePathBase` subclass references its path module via a `pathmod` class
attribute.

This commit adds a new `PathModuleBase` class, which provides abstract
methods that unconditionally raise `UnsupportedOperation`. An instance of
this class is assigned to `PurePathBase.pathmod`, replacing `posixpath`.
As a result, `PurePathBase` is no longer POSIX-y by default, and
all its methods raise `UnsupportedOperation` courtesy of `pathmod`.

Users who subclass `PurePathBase` or `PathBase` should choose the path
syntax by setting `pathmod` to `posixpath`, `ntpath`, `os.path`, or their
own subclass of `PathModuleBase`, as circumstances demand.
This commit is contained in:
Barney Gale 2024-01-14 21:49:53 +00:00 committed by GitHub
parent c2808431b3
commit ca6cf56330
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 182 additions and 59 deletions

View file

@ -33,6 +33,15 @@ __all__ = [
] ]
# Reference for Windows paths can be found at
# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file .
_WIN_RESERVED_NAMES = frozenset(
{'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} |
{f'COM{c}' for c in '123456789\xb9\xb2\xb3'} |
{f'LPT{c}' for c in '123456789\xb9\xb2\xb3'}
)
class _PathParents(Sequence): class _PathParents(Sequence):
"""This object provides sequence-like access to the logical ancestors """This object provides sequence-like access to the logical ancestors
of a path. Don't try to construct it yourself.""" of a path. Don't try to construct it yourself."""
@ -76,6 +85,10 @@ class PurePath(_abc.PurePathBase):
""" """
__slots__ = ( __slots__ = (
# The `_raw_paths` slot stores unnormalized string paths. This is set
# in the `__init__()` method.
'_raw_paths',
# The `_drv`, `_root` and `_tail_cached` slots store parsed and # The `_drv`, `_root` and `_tail_cached` slots store parsed and
# normalized parts of the path. They are set when any of the `drive`, # normalized parts of the path. They are set when any of the `drive`,
# `root` or `_tail` properties are accessed for the first time. The # `root` or `_tail` properties are accessed for the first time. The
@ -141,6 +154,26 @@ class PurePath(_abc.PurePathBase):
# Avoid calling super().__init__, as an optimisation # Avoid calling super().__init__, as an optimisation
self._raw_paths = paths self._raw_paths = paths
def joinpath(self, *pathsegments):
"""Combine this path with one or several arguments, and return a
new path representing either a subpath (if all arguments are relative
paths) or a totally different path (if one of the arguments is
anchored).
"""
return self.with_segments(self, *pathsegments)
def __truediv__(self, key):
try:
return self.with_segments(self, key)
except TypeError:
return NotImplemented
def __rtruediv__(self, key):
try:
return self.with_segments(key, self)
except TypeError:
return NotImplemented
def __reduce__(self): def __reduce__(self):
# Using the parts tuple helps share interned path parts # Using the parts tuple helps share interned path parts
# when pickling related paths. # when pickling related paths.
@ -386,6 +419,33 @@ class PurePath(_abc.PurePathBase):
other = self.with_segments(other) other = self.with_segments(other)
return other == self or other in self.parents return other == self or other in self.parents
def is_absolute(self):
"""True if the path is absolute (has both a root and, if applicable,
a drive)."""
if self.pathmod is posixpath:
# Optimization: work with raw paths on POSIX.
for path in self._raw_paths:
if path.startswith('/'):
return True
return False
return self.pathmod.isabs(self)
def is_reserved(self):
"""Return True if the path contains one of the special names reserved
by the system, if any."""
if self.pathmod is not ntpath or not self.name:
return False
# NOTE: the rules for reserved names seem somewhat complicated
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
# exist). We err on the side of caution and return True for paths
# which are not considered reserved by Windows.
if self.drive.startswith('\\\\'):
# UNC paths are never reserved.
return False
name = self.name.partition('.')[0].partition(':')[0].rstrip(' ')
return name.upper() in _WIN_RESERVED_NAMES
def as_uri(self): def as_uri(self):
"""Return the path as a URI.""" """Return the path as a URI."""
if not self.is_absolute(): if not self.is_absolute():

View file

@ -12,7 +12,6 @@ resemble pathlib's PurePath and Path respectively.
""" """
import functools import functools
import posixpath
from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL from errno import ENOENT, ENOTDIR, EBADF, ELOOP, EINVAL
from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
@ -20,14 +19,6 @@ from stat import S_ISDIR, S_ISLNK, S_ISREG, S_ISSOCK, S_ISBLK, S_ISCHR, S_ISFIFO
# Internals # Internals
# #
# Reference for Windows paths can be found at
# https://learn.microsoft.com/en-gb/windows/win32/fileio/naming-a-file .
_WIN_RESERVED_NAMES = frozenset(
{'CON', 'PRN', 'AUX', 'NUL', 'CONIN$', 'CONOUT$'} |
{f'COM{c}' for c in '123456789\xb9\xb2\xb3'} |
{f'LPT{c}' for c in '123456789\xb9\xb2\xb3'}
)
_WINERROR_NOT_READY = 21 # drive exists but is not accessible _WINERROR_NOT_READY = 21 # drive exists but is not accessible
_WINERROR_INVALID_NAME = 123 # fix for bpo-35306 _WINERROR_INVALID_NAME = 123 # fix for bpo-35306
_WINERROR_CANT_RESOLVE_FILENAME = 1921 # broken symlink pointing to itself _WINERROR_CANT_RESOLVE_FILENAME = 1921 # broken symlink pointing to itself
@ -144,6 +135,53 @@ class UnsupportedOperation(NotImplementedError):
pass pass
class PathModuleBase:
"""Base class for path modules, which do low-level path manipulation.
Path modules provide a subset of the os.path API, specifically those
functions needed to provide PurePathBase functionality. Each PurePathBase
subclass references its path module via a 'pathmod' class attribute.
Every method in this base class raises an UnsupportedOperation exception.
"""
@classmethod
def _unsupported(cls, attr):
raise UnsupportedOperation(f"{cls.__name__}.{attr} is unsupported")
@property
def sep(self):
"""The character used to separate path components."""
self._unsupported('sep')
def join(self, path, *paths):
"""Join path segments."""
self._unsupported('join()')
def split(self, path):
"""Split the path into a pair (head, tail), where *head* is everything
before the final path separator, and *tail* is everything after.
Either part may be empty.
"""
self._unsupported('split()')
def splitroot(self, path):
"""Split the pathname path into a 3-item tuple (drive, root, tail),
where *drive* is a device name or mount point, *root* is a string of
separators after the drive, and *tail* is everything after the root.
Any part may be empty."""
self._unsupported('splitroot()')
def normcase(self, path):
"""Normalize the case of the path."""
self._unsupported('normcase()')
def isabs(self, path):
"""Returns whether the path is absolute, i.e. unaffected by the
current directory or drive."""
self._unsupported('isabs()')
class PurePathBase: class PurePathBase:
"""Base class for pure path objects. """Base class for pure path objects.
@ -154,19 +192,19 @@ class PurePathBase:
""" """
__slots__ = ( __slots__ = (
# The `_raw_paths` slot stores unnormalized string paths. This is set # The `_raw_path` slot store a joined string path. This is set in the
# in the `__init__()` method. # `__init__()` method.
'_raw_paths', '_raw_path',
# The '_resolving' slot stores a boolean indicating whether the path # The '_resolving' slot stores a boolean indicating whether the path
# is being processed by `PathBase.resolve()`. This prevents duplicate # is being processed by `PathBase.resolve()`. This prevents duplicate
# work from occurring when `resolve()` calls `stat()` or `readlink()`. # work from occurring when `resolve()` calls `stat()` or `readlink()`.
'_resolving', '_resolving',
) )
pathmod = posixpath pathmod = PathModuleBase()
def __init__(self, *paths): def __init__(self, path, *paths):
self._raw_paths = paths self._raw_path = self.pathmod.join(path, *paths) if paths else path
self._resolving = False self._resolving = False
def with_segments(self, *pathsegments): def with_segments(self, *pathsegments):
@ -176,11 +214,6 @@ class PurePathBase:
""" """
return type(self)(*pathsegments) return type(self)(*pathsegments)
@property
def _raw_path(self):
"""The joined but unnormalized path."""
return self.pathmod.join(*self._raw_paths)
def __str__(self): def __str__(self):
"""Return the string representation of the path, suitable for """Return the string representation of the path, suitable for
passing to system calls.""" passing to system calls."""
@ -194,7 +227,7 @@ class PurePathBase:
@property @property
def drive(self): def drive(self):
"""The drive prefix (letter or UNC path), if any.""" """The drive prefix (letter or UNC path), if any."""
return self.pathmod.splitdrive(self._raw_path)[0] return self.pathmod.splitroot(self._raw_path)[0]
@property @property
def root(self): def root(self):
@ -210,7 +243,7 @@ class PurePathBase:
@property @property
def name(self): def name(self):
"""The final path component, if any.""" """The final path component, if any."""
return self.pathmod.basename(self._raw_path) return self.pathmod.split(self._raw_path)[1]
@property @property
def suffix(self): def suffix(self):
@ -251,10 +284,10 @@ class PurePathBase:
def with_name(self, name): def with_name(self, name):
"""Return a new path with the file name changed.""" """Return a new path with the file name changed."""
dirname = self.pathmod.dirname split = self.pathmod.split
if dirname(name): if split(name)[0]:
raise ValueError(f"Invalid name {name!r}") raise ValueError(f"Invalid name {name!r}")
return self.with_segments(dirname(self._raw_path), name) return self.with_segments(split(self._raw_path)[0], name)
def with_stem(self, stem): def with_stem(self, stem):
"""Return a new path with the stem changed.""" """Return a new path with the stem changed."""
@ -336,17 +369,17 @@ class PurePathBase:
paths) or a totally different path (if one of the arguments is paths) or a totally different path (if one of the arguments is
anchored). anchored).
""" """
return self.with_segments(*self._raw_paths, *pathsegments) return self.with_segments(self._raw_path, *pathsegments)
def __truediv__(self, key): def __truediv__(self, key):
try: try:
return self.joinpath(key) return self.with_segments(self._raw_path, key)
except TypeError: except TypeError:
return NotImplemented return NotImplemented
def __rtruediv__(self, key): def __rtruediv__(self, key):
try: try:
return self.with_segments(key, *self._raw_paths) return self.with_segments(key, self._raw_path)
except TypeError: except TypeError:
return NotImplemented return NotImplemented
@ -371,7 +404,7 @@ class PurePathBase:
def parent(self): def parent(self):
"""The logical parent of the path.""" """The logical parent of the path."""
path = self._raw_path path = self._raw_path
parent = self.pathmod.dirname(path) parent = self.pathmod.split(path)[0]
if path != parent: if path != parent:
parent = self.with_segments(parent) parent = self.with_segments(parent)
parent._resolving = self._resolving parent._resolving = self._resolving
@ -381,43 +414,20 @@ class PurePathBase:
@property @property
def parents(self): def parents(self):
"""A sequence of this path's logical parents.""" """A sequence of this path's logical parents."""
dirname = self.pathmod.dirname split = self.pathmod.split
path = self._raw_path path = self._raw_path
parent = dirname(path) parent = split(path)[0]
parents = [] parents = []
while path != parent: while path != parent:
parents.append(self.with_segments(parent)) parents.append(self.with_segments(parent))
path = parent path = parent
parent = dirname(path) parent = split(path)[0]
return tuple(parents) return tuple(parents)
def is_absolute(self): def is_absolute(self):
"""True if the path is absolute (has both a root and, if applicable, """True if the path is absolute (has both a root and, if applicable,
a drive).""" a drive)."""
if self.pathmod is posixpath: return self.pathmod.isabs(self._raw_path)
# Optimization: work with raw paths on POSIX.
for path in self._raw_paths:
if path.startswith('/'):
return True
return False
else:
return self.pathmod.isabs(self._raw_path)
def is_reserved(self):
"""Return True if the path contains one of the special names reserved
by the system, if any."""
if self.pathmod is posixpath or not self.name:
return False
# NOTE: the rules for reserved names seem somewhat complicated
# (e.g. r"..\NUL" is reserved but not r"foo\NUL" if "foo" does not
# exist). We err on the side of caution and return True for paths
# which are not considered reserved by Windows.
if self.drive.startswith('\\\\'):
# UNC paths are never reserved.
return False
name = self.name.partition('.')[0].partition(':')[0].rstrip(' ')
return name.upper() in _WIN_RESERVED_NAMES
def match(self, path_pattern, *, case_sensitive=None): def match(self, path_pattern, *, case_sensitive=None):
""" """
@ -726,7 +736,7 @@ class PathBase(PurePathBase):
raise ValueError("Unacceptable pattern: {!r}".format(pattern)) raise ValueError("Unacceptable pattern: {!r}".format(pattern))
pattern_parts = list(path_pattern.parts) pattern_parts = list(path_pattern.parts)
if not self.pathmod.basename(pattern): if not self.pathmod.split(pattern)[1]:
# GH-65238: pathlib doesn't preserve trailing slash. Add it back. # GH-65238: pathlib doesn't preserve trailing slash. Add it back.
pattern_parts.append('') pattern_parts.append('')

View file

@ -1151,6 +1151,7 @@ class PathTest(test_pathlib_abc.DummyPathTest, PurePathTest):
def test_matches_pathbase_api(self): def test_matches_pathbase_api(self):
our_names = {name for name in dir(self.cls) if name[0] != '_'} our_names = {name for name in dir(self.cls) if name[0] != '_'}
our_names.remove('is_reserved') # only present in PurePath
path_names = {name for name in dir(pathlib._abc.PathBase) if name[0] != '_'} path_names = {name for name in dir(pathlib._abc.PathBase) if name[0] != '_'}
self.assertEqual(our_names, path_names) self.assertEqual(our_names, path_names)
for attr_name in our_names: for attr_name in our_names:

View file

@ -5,7 +5,7 @@ import errno
import stat import stat
import unittest import unittest
from pathlib._abc import UnsupportedOperation, PurePathBase, PathBase from pathlib._abc import UnsupportedOperation, PathModuleBase, PurePathBase, PathBase
import posixpath import posixpath
from test.support.os_helper import TESTFN from test.support.os_helper import TESTFN
@ -17,6 +17,20 @@ class UnsupportedOperationTest(unittest.TestCase):
self.assertTrue(isinstance(UnsupportedOperation(), NotImplementedError)) self.assertTrue(isinstance(UnsupportedOperation(), NotImplementedError))
class PathModuleBaseTest(unittest.TestCase):
cls = PathModuleBase
def test_unsupported_operation(self):
m = self.cls()
e = UnsupportedOperation
with self.assertRaises(e):
m.sep
self.assertRaises(e, m.join, 'foo')
self.assertRaises(e, m.split, 'foo')
self.assertRaises(e, m.splitroot, 'foo')
self.assertRaises(e, m.normcase, 'foo')
self.assertRaises(e, m.isabs, 'foo')
# #
# Tests for the pure classes. # Tests for the pure classes.
# #
@ -25,6 +39,42 @@ class UnsupportedOperationTest(unittest.TestCase):
class PurePathBaseTest(unittest.TestCase): class PurePathBaseTest(unittest.TestCase):
cls = PurePathBase cls = PurePathBase
def test_unsupported_operation_pure(self):
p = self.cls('foo')
e = UnsupportedOperation
with self.assertRaises(e):
p.drive
with self.assertRaises(e):
p.root
with self.assertRaises(e):
p.anchor
with self.assertRaises(e):
p.parts
with self.assertRaises(e):
p.parent
with self.assertRaises(e):
p.parents
with self.assertRaises(e):
p.name
with self.assertRaises(e):
p.stem
with self.assertRaises(e):
p.suffix
with self.assertRaises(e):
p.suffixes
with self.assertRaises(e):
p / 'bar'
with self.assertRaises(e):
'bar' / p
self.assertRaises(e, p.joinpath, 'bar')
self.assertRaises(e, p.with_name, 'bar')
self.assertRaises(e, p.with_stem, 'bar')
self.assertRaises(e, p.with_suffix, '.txt')
self.assertRaises(e, p.relative_to, '')
self.assertRaises(e, p.is_relative_to, '')
self.assertRaises(e, p.is_absolute)
self.assertRaises(e, p.match, '*')
def test_magic_methods(self): def test_magic_methods(self):
P = self.cls P = self.cls
self.assertFalse(hasattr(P, '__fspath__')) self.assertFalse(hasattr(P, '__fspath__'))
@ -39,11 +89,12 @@ class PurePathBaseTest(unittest.TestCase):
self.assertIs(P.__ge__, object.__ge__) self.assertIs(P.__ge__, object.__ge__)
def test_pathmod(self): def test_pathmod(self):
self.assertIs(self.cls.pathmod, posixpath) self.assertIsInstance(self.cls.pathmod, PathModuleBase)
class DummyPurePath(PurePathBase): class DummyPurePath(PurePathBase):
__slots__ = () __slots__ = ()
pathmod = posixpath
def __eq__(self, other): def __eq__(self, other):
if not isinstance(other, DummyPurePath): if not isinstance(other, DummyPurePath):
@ -669,6 +720,7 @@ class DummyPath(PathBase):
memory. memory.
""" """
__slots__ = () __slots__ = ()
pathmod = posixpath
_files = {} _files = {}
_directories = {} _directories = {}