GH-82805: Fix handling of single-dot file extensions in pathlib (#118952)

pathlib now treats "`.`" as a valid file extension (suffix). This brings
it in line with `os.path.splitext()`.

In the (private) pathlib ABCs, we add a new `ParserBase.splitext()` method
that splits a path into a `(root, ext)` pair, like `os.path.splitext()`.
This method is called by `PurePathBase.stem`, `suffix`, etc. In a future
version of pathlib, we might make these base classes public, and so users
will be able to define their own `splitext()` method to control file
extension splitting.

In `pathlib.PurePath` we add optimised `stem`, `suffix` and `suffixes`
properties that don't use `splitext()`, which avoids computing the path
base name twice.
This commit is contained in:
Barney Gale 2024-05-25 21:01:36 +01:00 committed by GitHub
parent 0c5ebe13e9
commit e418fc3a6e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 101 additions and 35 deletions

View file

@ -449,6 +449,10 @@ Pure paths provide the following methods and properties:
This is commonly called the file extension. This is commonly called the file extension.
.. versionchanged:: 3.14
A single dot ("``.``") is considered a valid suffix.
.. attribute:: PurePath.suffixes .. attribute:: PurePath.suffixes
A list of the path's suffixes, often called file extensions:: A list of the path's suffixes, often called file extensions::
@ -460,6 +464,10 @@ Pure paths provide the following methods and properties:
>>> PurePosixPath('my/library').suffixes >>> PurePosixPath('my/library').suffixes
[] []
.. versionchanged:: 3.14
A single dot ("``.``") is considered a valid suffix.
.. attribute:: PurePath.stem .. attribute:: PurePath.stem
@ -713,6 +721,11 @@ Pure paths provide the following methods and properties:
>>> p.with_suffix('') >>> p.with_suffix('')
PureWindowsPath('README') PureWindowsPath('README')
.. versionchanged:: 3.14
A single dot ("``.``") is considered a valid suffix. In previous
versions, :exc:`ValueError` is raised if a single dot is supplied.
.. method:: PurePath.with_segments(*pathsegments) .. method:: PurePath.with_segments(*pathsegments)

View file

@ -68,6 +68,12 @@ class ParserBase:
drive. Either part may be empty.""" drive. Either part may be empty."""
raise UnsupportedOperation(self._unsupported_msg('splitdrive()')) raise UnsupportedOperation(self._unsupported_msg('splitdrive()'))
def splitext(self, path):
"""Split the path into a pair (root, ext), where *ext* is empty or
begins with a begins with a period and contains at most one period,
and *root* is everything before the extension."""
raise UnsupportedOperation(self._unsupported_msg('splitext()'))
def normcase(self, path): def normcase(self, path):
"""Normalize the case of the path.""" """Normalize the case of the path."""
raise UnsupportedOperation(self._unsupported_msg('normcase()')) raise UnsupportedOperation(self._unsupported_msg('normcase()'))
@ -151,12 +157,7 @@ class PurePathBase:
This includes the leading period. For example: '.txt' This includes the leading period. For example: '.txt'
""" """
name = self.name return self.parser.splitext(self.name)[1]
i = name.rfind('.')
if 0 < i < len(name) - 1:
return name[i:]
else:
return ''
@property @property
def suffixes(self): def suffixes(self):
@ -165,21 +166,18 @@ class PurePathBase:
These include the leading periods. For example: ['.tar', '.gz'] These include the leading periods. For example: ['.tar', '.gz']
""" """
name = self.name split = self.parser.splitext
if name.endswith('.'): stem, suffix = split(self.name)
return [] suffixes = []
name = name.lstrip('.') while suffix:
return ['.' + suffix for suffix in name.split('.')[1:]] suffixes.append(suffix)
stem, suffix = split(stem)
return suffixes[::-1]
@property @property
def stem(self): def stem(self):
"""The final path component, minus its last suffix.""" """The final path component, minus its last suffix."""
name = self.name return self.parser.splitext(self.name)[0]
i = name.rfind('.')
if 0 < i < len(name) - 1:
return name[:i]
else:
return name
def with_name(self, name): def with_name(self, name):
"""Return a new path with the file name changed.""" """Return a new path with the file name changed."""
@ -208,7 +206,7 @@ class PurePathBase:
if not stem: if not stem:
# If the stem is empty, we can't make the suffix non-empty. # If the stem is empty, we can't make the suffix non-empty.
raise ValueError(f"{self!r} has an empty name") raise ValueError(f"{self!r} has an empty name")
elif suffix and not (suffix.startswith('.') and len(suffix) > 1): elif suffix and not suffix.startswith('.'):
raise ValueError(f"Invalid suffix {suffix!r}") raise ValueError(f"Invalid suffix {suffix!r}")
else: else:
return self.with_name(stem + suffix) return self.with_name(stem + suffix)

View file

@ -361,6 +361,40 @@ class PurePath(PurePathBase):
tail[-1] = name tail[-1] = name
return self._from_parsed_parts(self.drive, self.root, tail) return self._from_parsed_parts(self.drive, self.root, tail)
@property
def stem(self):
"""The final path component, minus its last suffix."""
name = self.name
i = name.rfind('.')
if i != -1:
stem = name[:i]
# Stem must contain at least one non-dot character.
if stem.lstrip('.'):
return stem
return name
@property
def suffix(self):
"""
The final component's last suffix, if any.
This includes the leading period. For example: '.txt'
"""
name = self.name.lstrip('.')
i = name.rfind('.')
if i != -1:
return name[i:]
return ''
@property
def suffixes(self):
"""
A list of the final component's suffixes, if any.
These include the leading periods. For example: ['.tar', '.gz']
"""
return ['.' + ext for ext in self.name.lstrip('.').split('.')[1:]]
def relative_to(self, other, *, walk_up=False): def relative_to(self, other, *, walk_up=False):
"""Return the relative path to another path identified by the passed """Return the relative path to another path identified by the passed
arguments. If the operation is not possible (because this is not arguments. If the operation is not possible (because this is not

View file

@ -50,6 +50,7 @@ class ParserBaseTest(unittest.TestCase):
self.assertRaises(e, m.join, 'foo') self.assertRaises(e, m.join, 'foo')
self.assertRaises(e, m.split, 'foo') self.assertRaises(e, m.split, 'foo')
self.assertRaises(e, m.splitdrive, 'foo') self.assertRaises(e, m.splitdrive, 'foo')
self.assertRaises(e, m.splitext, 'foo')
self.assertRaises(e, m.normcase, 'foo') self.assertRaises(e, m.normcase, 'foo')
self.assertRaises(e, m.isabs, 'foo') self.assertRaises(e, m.isabs, 'foo')
@ -789,8 +790,12 @@ class DummyPurePathTest(unittest.TestCase):
self.assertEqual(P('/a/.hg.rc').suffix, '.rc') self.assertEqual(P('/a/.hg.rc').suffix, '.rc')
self.assertEqual(P('a/b.tar.gz').suffix, '.gz') self.assertEqual(P('a/b.tar.gz').suffix, '.gz')
self.assertEqual(P('/a/b.tar.gz').suffix, '.gz') self.assertEqual(P('/a/b.tar.gz').suffix, '.gz')
self.assertEqual(P('a/Some name. Ending with a dot.').suffix, '') self.assertEqual(P('a/trailing.dot.').suffix, '.')
self.assertEqual(P('/a/Some name. Ending with a dot.').suffix, '') self.assertEqual(P('/a/trailing.dot.').suffix, '.')
self.assertEqual(P('a/..d.o.t..').suffix, '.')
self.assertEqual(P('a/inn.er..dots').suffix, '.dots')
self.assertEqual(P('photo').suffix, '')
self.assertEqual(P('photo.jpg').suffix, '.jpg')
@needs_windows @needs_windows
def test_suffix_windows(self): def test_suffix_windows(self):
@ -807,8 +812,8 @@ class DummyPurePathTest(unittest.TestCase):
self.assertEqual(P('c:/a/.hg.rc').suffix, '.rc') self.assertEqual(P('c:/a/.hg.rc').suffix, '.rc')
self.assertEqual(P('c:a/b.tar.gz').suffix, '.gz') self.assertEqual(P('c:a/b.tar.gz').suffix, '.gz')
self.assertEqual(P('c:/a/b.tar.gz').suffix, '.gz') self.assertEqual(P('c:/a/b.tar.gz').suffix, '.gz')
self.assertEqual(P('c:a/Some name. Ending with a dot.').suffix, '') self.assertEqual(P('c:a/trailing.dot.').suffix, '.')
self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffix, '') self.assertEqual(P('c:/a/trailing.dot.').suffix, '.')
self.assertEqual(P('//My.py/Share.php').suffix, '') self.assertEqual(P('//My.py/Share.php').suffix, '')
self.assertEqual(P('//My.py/Share.php/a/b').suffix, '') self.assertEqual(P('//My.py/Share.php/a/b').suffix, '')
@ -828,8 +833,12 @@ class DummyPurePathTest(unittest.TestCase):
self.assertEqual(P('/a/.hg.rc').suffixes, ['.rc']) self.assertEqual(P('/a/.hg.rc').suffixes, ['.rc'])
self.assertEqual(P('a/b.tar.gz').suffixes, ['.tar', '.gz']) self.assertEqual(P('a/b.tar.gz').suffixes, ['.tar', '.gz'])
self.assertEqual(P('/a/b.tar.gz').suffixes, ['.tar', '.gz']) self.assertEqual(P('/a/b.tar.gz').suffixes, ['.tar', '.gz'])
self.assertEqual(P('a/Some name. Ending with a dot.').suffixes, []) self.assertEqual(P('a/trailing.dot.').suffixes, ['.dot', '.'])
self.assertEqual(P('/a/Some name. Ending with a dot.').suffixes, []) self.assertEqual(P('/a/trailing.dot.').suffixes, ['.dot', '.'])
self.assertEqual(P('a/..d.o.t..').suffixes, ['.o', '.t', '.', '.'])
self.assertEqual(P('a/inn.er..dots').suffixes, ['.er', '.', '.dots'])
self.assertEqual(P('photo').suffixes, [])
self.assertEqual(P('photo.jpg').suffixes, ['.jpg'])
@needs_windows @needs_windows
def test_suffixes_windows(self): def test_suffixes_windows(self):
@ -848,8 +857,8 @@ class DummyPurePathTest(unittest.TestCase):
self.assertEqual(P('c:/a/b.tar.gz').suffixes, ['.tar', '.gz']) self.assertEqual(P('c:/a/b.tar.gz').suffixes, ['.tar', '.gz'])
self.assertEqual(P('//My.py/Share.php').suffixes, []) self.assertEqual(P('//My.py/Share.php').suffixes, [])
self.assertEqual(P('//My.py/Share.php/a/b').suffixes, []) self.assertEqual(P('//My.py/Share.php/a/b').suffixes, [])
self.assertEqual(P('c:a/Some name. Ending with a dot.').suffixes, []) self.assertEqual(P('c:a/trailing.dot.').suffixes, ['.dot', '.'])
self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffixes, []) self.assertEqual(P('c:/a/trailing.dot.').suffixes, ['.dot', '.'])
def test_stem_empty(self): def test_stem_empty(self):
P = self.cls P = self.cls
@ -865,8 +874,11 @@ class DummyPurePathTest(unittest.TestCase):
self.assertEqual(P('a/.hgrc').stem, '.hgrc') self.assertEqual(P('a/.hgrc').stem, '.hgrc')
self.assertEqual(P('a/.hg.rc').stem, '.hg') self.assertEqual(P('a/.hg.rc').stem, '.hg')
self.assertEqual(P('a/b.tar.gz').stem, 'b.tar') self.assertEqual(P('a/b.tar.gz').stem, 'b.tar')
self.assertEqual(P('a/Some name. Ending with a dot.').stem, self.assertEqual(P('a/trailing.dot.').stem, 'trailing.dot')
'Some name. Ending with a dot.') self.assertEqual(P('a/..d.o.t..').stem, '..d.o.t.')
self.assertEqual(P('a/inn.er..dots').stem, 'inn.er.')
self.assertEqual(P('photo').stem, 'photo')
self.assertEqual(P('photo.jpg').stem, 'photo')
@needs_windows @needs_windows
def test_stem_windows(self): def test_stem_windows(self):
@ -880,8 +892,8 @@ class DummyPurePathTest(unittest.TestCase):
self.assertEqual(P('c:a/.hgrc').stem, '.hgrc') self.assertEqual(P('c:a/.hgrc').stem, '.hgrc')
self.assertEqual(P('c:a/.hg.rc').stem, '.hg') self.assertEqual(P('c:a/.hg.rc').stem, '.hg')
self.assertEqual(P('c:a/b.tar.gz').stem, 'b.tar') self.assertEqual(P('c:a/b.tar.gz').stem, 'b.tar')
self.assertEqual(P('c:a/Some name. Ending with a dot.').stem, self.assertEqual(P('c:a/trailing.dot.').stem, 'trailing.dot')
'Some name. Ending with a dot.')
def test_with_name_common(self): def test_with_name_common(self):
P = self.cls P = self.cls
self.assertEqual(P('a/b').with_name('d.xml'), P('a/d.xml')) self.assertEqual(P('a/b').with_name('d.xml'), P('a/d.xml'))
@ -929,16 +941,16 @@ class DummyPurePathTest(unittest.TestCase):
self.assertEqual(P('a/b.py').with_stem('d'), P('a/d.py')) self.assertEqual(P('a/b.py').with_stem('d'), P('a/d.py'))
self.assertEqual(P('/a/b.py').with_stem('d'), P('/a/d.py')) self.assertEqual(P('/a/b.py').with_stem('d'), P('/a/d.py'))
self.assertEqual(P('/a/b.tar.gz').with_stem('d'), P('/a/d.gz')) self.assertEqual(P('/a/b.tar.gz').with_stem('d'), P('/a/d.gz'))
self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d')) self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d.'))
self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d')) self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d.'))
@needs_windows @needs_windows
def test_with_stem_windows(self): def test_with_stem_windows(self):
P = self.cls P = self.cls
self.assertEqual(P('c:a/b').with_stem('d'), P('c:a/d')) self.assertEqual(P('c:a/b').with_stem('d'), P('c:a/d'))
self.assertEqual(P('c:/a/b').with_stem('d'), P('c:/a/d')) self.assertEqual(P('c:/a/b').with_stem('d'), P('c:/a/d'))
self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d')) self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d.'))
self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d')) self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d.'))
self.assertRaises(ValueError, P('c:').with_stem, 'd') self.assertRaises(ValueError, P('c:').with_stem, 'd')
self.assertRaises(ValueError, P('c:/').with_stem, 'd') self.assertRaises(ValueError, P('c:/').with_stem, 'd')
self.assertRaises(ValueError, P('//My/Share').with_stem, 'd') self.assertRaises(ValueError, P('//My/Share').with_stem, 'd')
@ -974,6 +986,11 @@ class DummyPurePathTest(unittest.TestCase):
# Stripping suffix. # Stripping suffix.
self.assertEqual(P('a/b.py').with_suffix(''), P('a/b')) self.assertEqual(P('a/b.py').with_suffix(''), P('a/b'))
self.assertEqual(P('/a/b').with_suffix(''), P('/a/b')) self.assertEqual(P('/a/b').with_suffix(''), P('/a/b'))
# Single dot
self.assertEqual(P('a/b').with_suffix('.'), P('a/b.'))
self.assertEqual(P('/a/b').with_suffix('.'), P('/a/b.'))
self.assertEqual(P('a/b.py').with_suffix('.'), P('a/b.'))
self.assertEqual(P('/a/b.py').with_suffix('.'), P('/a/b.'))
@needs_windows @needs_windows
def test_with_suffix_windows(self): def test_with_suffix_windows(self):
@ -1012,7 +1029,6 @@ class DummyPurePathTest(unittest.TestCase):
# Invalid suffix. # Invalid suffix.
self.assertRaises(ValueError, P('a/b').with_suffix, 'gz') self.assertRaises(ValueError, P('a/b').with_suffix, 'gz')
self.assertRaises(ValueError, P('a/b').with_suffix, '/') self.assertRaises(ValueError, P('a/b').with_suffix, '/')
self.assertRaises(ValueError, P('a/b').with_suffix, '.')
self.assertRaises(ValueError, P('a/b').with_suffix, '/.gz') self.assertRaises(ValueError, P('a/b').with_suffix, '/.gz')
self.assertRaises(ValueError, P('a/b').with_suffix, 'c/d') self.assertRaises(ValueError, P('a/b').with_suffix, 'c/d')
self.assertRaises(ValueError, P('a/b').with_suffix, '.c/.d') self.assertRaises(ValueError, P('a/b').with_suffix, '.c/.d')

View file

@ -0,0 +1,5 @@
Support single-dot file extensions in :attr:`pathlib.PurePath.suffix` and
related attributes and methods. For example, the
:attr:`~pathlib.PurePath.suffixes` of ``PurePath('foo.bar.')`` are now
``['.bar', '.']`` rather than ``[]``. This brings file extension splitting
in line with :func:`os.path.splitext`.