GH-82805: Fix handling of single-dot file extensions in pathlib (#118952)

pathlib now treats "`.`" as a valid file extension (suffix). This brings
it in line with `os.path.splitext()`.

In the (private) pathlib ABCs, we add a new `ParserBase.splitext()` method
that splits a path into a `(root, ext)` pair, like `os.path.splitext()`.
This method is called by `PurePathBase.stem`, `suffix`, etc. In a future
version of pathlib, we might make these base classes public, and so users
will be able to define their own `splitext()` method to control file
extension splitting.

In `pathlib.PurePath` we add optimised `stem`, `suffix` and `suffixes`
properties that don't use `splitext()`, which avoids computing the path
base name twice.
This commit is contained in:
Barney Gale 2024-05-25 21:01:36 +01:00 committed by GitHub
parent 0c5ebe13e9
commit e418fc3a6e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 101 additions and 35 deletions

View file

@ -449,6 +449,10 @@ Pure paths provide the following methods and properties:
This is commonly called the file extension.
.. versionchanged:: 3.14
A single dot ("``.``") is considered a valid suffix.
.. attribute:: PurePath.suffixes
A list of the path's suffixes, often called file extensions::
@ -460,6 +464,10 @@ Pure paths provide the following methods and properties:
>>> PurePosixPath('my/library').suffixes
[]
.. versionchanged:: 3.14
A single dot ("``.``") is considered a valid suffix.
.. attribute:: PurePath.stem
@ -713,6 +721,11 @@ Pure paths provide the following methods and properties:
>>> p.with_suffix('')
PureWindowsPath('README')
.. versionchanged:: 3.14
A single dot ("``.``") is considered a valid suffix. In previous
versions, :exc:`ValueError` is raised if a single dot is supplied.
.. method:: PurePath.with_segments(*pathsegments)

View file

@ -68,6 +68,12 @@ class ParserBase:
drive. Either part may be empty."""
raise UnsupportedOperation(self._unsupported_msg('splitdrive()'))
def splitext(self, path):
"""Split the path into a pair (root, ext), where *ext* is empty or
begins with a begins with a period and contains at most one period,
and *root* is everything before the extension."""
raise UnsupportedOperation(self._unsupported_msg('splitext()'))
def normcase(self, path):
"""Normalize the case of the path."""
raise UnsupportedOperation(self._unsupported_msg('normcase()'))
@ -151,12 +157,7 @@ class PurePathBase:
This includes the leading period. For example: '.txt'
"""
name = self.name
i = name.rfind('.')
if 0 < i < len(name) - 1:
return name[i:]
else:
return ''
return self.parser.splitext(self.name)[1]
@property
def suffixes(self):
@ -165,21 +166,18 @@ class PurePathBase:
These include the leading periods. For example: ['.tar', '.gz']
"""
name = self.name
if name.endswith('.'):
return []
name = name.lstrip('.')
return ['.' + suffix for suffix in name.split('.')[1:]]
split = self.parser.splitext
stem, suffix = split(self.name)
suffixes = []
while suffix:
suffixes.append(suffix)
stem, suffix = split(stem)
return suffixes[::-1]
@property
def stem(self):
"""The final path component, minus its last suffix."""
name = self.name
i = name.rfind('.')
if 0 < i < len(name) - 1:
return name[:i]
else:
return name
return self.parser.splitext(self.name)[0]
def with_name(self, name):
"""Return a new path with the file name changed."""
@ -208,7 +206,7 @@ class PurePathBase:
if not stem:
# If the stem is empty, we can't make the suffix non-empty.
raise ValueError(f"{self!r} has an empty name")
elif suffix and not (suffix.startswith('.') and len(suffix) > 1):
elif suffix and not suffix.startswith('.'):
raise ValueError(f"Invalid suffix {suffix!r}")
else:
return self.with_name(stem + suffix)

View file

@ -361,6 +361,40 @@ class PurePath(PurePathBase):
tail[-1] = name
return self._from_parsed_parts(self.drive, self.root, tail)
@property
def stem(self):
"""The final path component, minus its last suffix."""
name = self.name
i = name.rfind('.')
if i != -1:
stem = name[:i]
# Stem must contain at least one non-dot character.
if stem.lstrip('.'):
return stem
return name
@property
def suffix(self):
"""
The final component's last suffix, if any.
This includes the leading period. For example: '.txt'
"""
name = self.name.lstrip('.')
i = name.rfind('.')
if i != -1:
return name[i:]
return ''
@property
def suffixes(self):
"""
A list of the final component's suffixes, if any.
These include the leading periods. For example: ['.tar', '.gz']
"""
return ['.' + ext for ext in self.name.lstrip('.').split('.')[1:]]
def relative_to(self, other, *, walk_up=False):
"""Return the relative path to another path identified by the passed
arguments. If the operation is not possible (because this is not

View file

@ -50,6 +50,7 @@ class ParserBaseTest(unittest.TestCase):
self.assertRaises(e, m.join, 'foo')
self.assertRaises(e, m.split, 'foo')
self.assertRaises(e, m.splitdrive, 'foo')
self.assertRaises(e, m.splitext, 'foo')
self.assertRaises(e, m.normcase, 'foo')
self.assertRaises(e, m.isabs, 'foo')
@ -789,8 +790,12 @@ class DummyPurePathTest(unittest.TestCase):
self.assertEqual(P('/a/.hg.rc').suffix, '.rc')
self.assertEqual(P('a/b.tar.gz').suffix, '.gz')
self.assertEqual(P('/a/b.tar.gz').suffix, '.gz')
self.assertEqual(P('a/Some name. Ending with a dot.').suffix, '')
self.assertEqual(P('/a/Some name. Ending with a dot.').suffix, '')
self.assertEqual(P('a/trailing.dot.').suffix, '.')
self.assertEqual(P('/a/trailing.dot.').suffix, '.')
self.assertEqual(P('a/..d.o.t..').suffix, '.')
self.assertEqual(P('a/inn.er..dots').suffix, '.dots')
self.assertEqual(P('photo').suffix, '')
self.assertEqual(P('photo.jpg').suffix, '.jpg')
@needs_windows
def test_suffix_windows(self):
@ -807,8 +812,8 @@ class DummyPurePathTest(unittest.TestCase):
self.assertEqual(P('c:/a/.hg.rc').suffix, '.rc')
self.assertEqual(P('c:a/b.tar.gz').suffix, '.gz')
self.assertEqual(P('c:/a/b.tar.gz').suffix, '.gz')
self.assertEqual(P('c:a/Some name. Ending with a dot.').suffix, '')
self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffix, '')
self.assertEqual(P('c:a/trailing.dot.').suffix, '.')
self.assertEqual(P('c:/a/trailing.dot.').suffix, '.')
self.assertEqual(P('//My.py/Share.php').suffix, '')
self.assertEqual(P('//My.py/Share.php/a/b').suffix, '')
@ -828,8 +833,12 @@ class DummyPurePathTest(unittest.TestCase):
self.assertEqual(P('/a/.hg.rc').suffixes, ['.rc'])
self.assertEqual(P('a/b.tar.gz').suffixes, ['.tar', '.gz'])
self.assertEqual(P('/a/b.tar.gz').suffixes, ['.tar', '.gz'])
self.assertEqual(P('a/Some name. Ending with a dot.').suffixes, [])
self.assertEqual(P('/a/Some name. Ending with a dot.').suffixes, [])
self.assertEqual(P('a/trailing.dot.').suffixes, ['.dot', '.'])
self.assertEqual(P('/a/trailing.dot.').suffixes, ['.dot', '.'])
self.assertEqual(P('a/..d.o.t..').suffixes, ['.o', '.t', '.', '.'])
self.assertEqual(P('a/inn.er..dots').suffixes, ['.er', '.', '.dots'])
self.assertEqual(P('photo').suffixes, [])
self.assertEqual(P('photo.jpg').suffixes, ['.jpg'])
@needs_windows
def test_suffixes_windows(self):
@ -848,8 +857,8 @@ class DummyPurePathTest(unittest.TestCase):
self.assertEqual(P('c:/a/b.tar.gz').suffixes, ['.tar', '.gz'])
self.assertEqual(P('//My.py/Share.php').suffixes, [])
self.assertEqual(P('//My.py/Share.php/a/b').suffixes, [])
self.assertEqual(P('c:a/Some name. Ending with a dot.').suffixes, [])
self.assertEqual(P('c:/a/Some name. Ending with a dot.').suffixes, [])
self.assertEqual(P('c:a/trailing.dot.').suffixes, ['.dot', '.'])
self.assertEqual(P('c:/a/trailing.dot.').suffixes, ['.dot', '.'])
def test_stem_empty(self):
P = self.cls
@ -865,8 +874,11 @@ class DummyPurePathTest(unittest.TestCase):
self.assertEqual(P('a/.hgrc').stem, '.hgrc')
self.assertEqual(P('a/.hg.rc').stem, '.hg')
self.assertEqual(P('a/b.tar.gz').stem, 'b.tar')
self.assertEqual(P('a/Some name. Ending with a dot.').stem,
'Some name. Ending with a dot.')
self.assertEqual(P('a/trailing.dot.').stem, 'trailing.dot')
self.assertEqual(P('a/..d.o.t..').stem, '..d.o.t.')
self.assertEqual(P('a/inn.er..dots').stem, 'inn.er.')
self.assertEqual(P('photo').stem, 'photo')
self.assertEqual(P('photo.jpg').stem, 'photo')
@needs_windows
def test_stem_windows(self):
@ -880,8 +892,8 @@ class DummyPurePathTest(unittest.TestCase):
self.assertEqual(P('c:a/.hgrc').stem, '.hgrc')
self.assertEqual(P('c:a/.hg.rc').stem, '.hg')
self.assertEqual(P('c:a/b.tar.gz').stem, 'b.tar')
self.assertEqual(P('c:a/Some name. Ending with a dot.').stem,
'Some name. Ending with a dot.')
self.assertEqual(P('c:a/trailing.dot.').stem, 'trailing.dot')
def test_with_name_common(self):
P = self.cls
self.assertEqual(P('a/b').with_name('d.xml'), P('a/d.xml'))
@ -929,16 +941,16 @@ class DummyPurePathTest(unittest.TestCase):
self.assertEqual(P('a/b.py').with_stem('d'), P('a/d.py'))
self.assertEqual(P('/a/b.py').with_stem('d'), P('/a/d.py'))
self.assertEqual(P('/a/b.tar.gz').with_stem('d'), P('/a/d.gz'))
self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d'))
self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d'))
self.assertEqual(P('a/Dot ending.').with_stem('d'), P('a/d.'))
self.assertEqual(P('/a/Dot ending.').with_stem('d'), P('/a/d.'))
@needs_windows
def test_with_stem_windows(self):
P = self.cls
self.assertEqual(P('c:a/b').with_stem('d'), P('c:a/d'))
self.assertEqual(P('c:/a/b').with_stem('d'), P('c:/a/d'))
self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d'))
self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d'))
self.assertEqual(P('c:a/Dot ending.').with_stem('d'), P('c:a/d.'))
self.assertEqual(P('c:/a/Dot ending.').with_stem('d'), P('c:/a/d.'))
self.assertRaises(ValueError, P('c:').with_stem, 'd')
self.assertRaises(ValueError, P('c:/').with_stem, 'd')
self.assertRaises(ValueError, P('//My/Share').with_stem, 'd')
@ -974,6 +986,11 @@ class DummyPurePathTest(unittest.TestCase):
# Stripping suffix.
self.assertEqual(P('a/b.py').with_suffix(''), P('a/b'))
self.assertEqual(P('/a/b').with_suffix(''), P('/a/b'))
# Single dot
self.assertEqual(P('a/b').with_suffix('.'), P('a/b.'))
self.assertEqual(P('/a/b').with_suffix('.'), P('/a/b.'))
self.assertEqual(P('a/b.py').with_suffix('.'), P('a/b.'))
self.assertEqual(P('/a/b.py').with_suffix('.'), P('/a/b.'))
@needs_windows
def test_with_suffix_windows(self):
@ -1012,7 +1029,6 @@ class DummyPurePathTest(unittest.TestCase):
# Invalid suffix.
self.assertRaises(ValueError, P('a/b').with_suffix, 'gz')
self.assertRaises(ValueError, P('a/b').with_suffix, '/')
self.assertRaises(ValueError, P('a/b').with_suffix, '.')
self.assertRaises(ValueError, P('a/b').with_suffix, '/.gz')
self.assertRaises(ValueError, P('a/b').with_suffix, 'c/d')
self.assertRaises(ValueError, P('a/b').with_suffix, '.c/.d')

View file

@ -0,0 +1,5 @@
Support single-dot file extensions in :attr:`pathlib.PurePath.suffix` and
related attributes and methods. For example, the
:attr:`~pathlib.PurePath.suffixes` of ``PurePath('foo.bar.')`` are now
``['.bar', '.']`` rather than ``[]``. This brings file extension splitting
in line with :func:`os.path.splitext`.