mirror of
https://github.com/python/cpython.git
synced 2025-08-03 16:39:00 +00:00
GH-73435: Add pathlib.PurePath.full_match()
(#114350)
In 49f90ba
we added support for the recursive wildcard `**` in
`pathlib.PurePath.match()`. This should allow arbitrary prefix and suffix
matching, like `p.match('foo/**')` or `p.match('**/foo')`, but there's a
problem: for relative patterns only, `match()` implicitly inserts a `**`
token on the left hand side, causing all patterns to match from the right.
As a result, it's impossible to match relative patterns from the left:
`PurePath('foo/bar').match('bar/**')` is true!
This commit reverts the changes to `match()`, and instead adds a new
`full_match()` method that:
- Allows empty patterns
- Supports the recursive wildcard `**`
- Matches the *entire* path when given a relative pattern
This commit is contained in:
parent
841eacd076
commit
b69548a0f5
6 changed files with 158 additions and 75 deletions
|
@ -147,8 +147,9 @@ The :mod:`glob` module defines the following functions:
|
|||
|
||||
.. seealso::
|
||||
|
||||
:meth:`pathlib.PurePath.match` and :meth:`pathlib.Path.glob` methods,
|
||||
which call this function to implement pattern matching and globbing.
|
||||
:meth:`pathlib.PurePath.full_match` and :meth:`pathlib.Path.glob`
|
||||
methods, which call this function to implement pattern matching and
|
||||
globbing.
|
||||
|
||||
.. versionadded:: 3.13
|
||||
|
||||
|
|
|
@ -559,13 +559,41 @@ Pure paths provide the following methods and properties:
|
|||
PureWindowsPath('c:/Program Files')
|
||||
|
||||
|
||||
.. method:: PurePath.match(pattern, *, case_sensitive=None)
|
||||
.. method:: PurePath.full_match(pattern, *, case_sensitive=None)
|
||||
|
||||
Match this path against the provided glob-style pattern. Return ``True``
|
||||
if matching is successful, ``False`` otherwise.
|
||||
if matching is successful, ``False`` otherwise. For example::
|
||||
|
||||
If *pattern* is relative, the path can be either relative or absolute,
|
||||
and matching is done from the right::
|
||||
>>> PurePath('a/b.py').full_match('a/*.py')
|
||||
True
|
||||
>>> PurePath('a/b.py').full_match('*.py')
|
||||
False
|
||||
>>> PurePath('/a/b/c.py').full_match('/a/**')
|
||||
True
|
||||
>>> PurePath('/a/b/c.py').full_match('**/*.py')
|
||||
True
|
||||
|
||||
As with other methods, case-sensitivity follows platform defaults::
|
||||
|
||||
>>> PurePosixPath('b.py').full_match('*.PY')
|
||||
False
|
||||
>>> PureWindowsPath('b.py').full_match('*.PY')
|
||||
True
|
||||
|
||||
Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.
|
||||
|
||||
.. versionadded:: 3.13
|
||||
|
||||
|
||||
.. method:: PurePath.match(pattern, *, case_sensitive=None)
|
||||
|
||||
Match this path against the provided non-recursive glob-style pattern.
|
||||
Return ``True`` if matching is successful, ``False`` otherwise.
|
||||
|
||||
This method is similar to :meth:`~PurePath.full_match`, but empty patterns
|
||||
aren't allowed (:exc:`ValueError` is raised), the recursive wildcard
|
||||
"``**``" isn't supported (it acts like non-recursive "``*``"), and if a
|
||||
relative pattern is provided, then matching is done from the right::
|
||||
|
||||
>>> PurePath('a/b.py').match('*.py')
|
||||
True
|
||||
|
@ -574,40 +602,12 @@ Pure paths provide the following methods and properties:
|
|||
>>> PurePath('/a/b/c.py').match('a/*.py')
|
||||
False
|
||||
|
||||
If *pattern* is absolute, the path must be absolute, and the whole path
|
||||
must match::
|
||||
|
||||
>>> PurePath('/a.py').match('/*.py')
|
||||
True
|
||||
>>> PurePath('a/b.py').match('/*.py')
|
||||
False
|
||||
|
||||
The *pattern* may be another path object; this speeds up matching the same
|
||||
pattern against multiple files::
|
||||
|
||||
>>> pattern = PurePath('*.py')
|
||||
>>> PurePath('a/b.py').match(pattern)
|
||||
True
|
||||
|
||||
.. versionchanged:: 3.12
|
||||
Accepts an object implementing the :class:`os.PathLike` interface.
|
||||
|
||||
As with other methods, case-sensitivity follows platform defaults::
|
||||
|
||||
>>> PurePosixPath('b.py').match('*.PY')
|
||||
False
|
||||
>>> PureWindowsPath('b.py').match('*.PY')
|
||||
True
|
||||
|
||||
Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.
|
||||
The *pattern* parameter accepts a :term:`path-like object`.
|
||||
|
||||
.. versionchanged:: 3.12
|
||||
The *case_sensitive* parameter was added.
|
||||
|
||||
.. versionchanged:: 3.13
|
||||
Support for the recursive wildcard "``**``" was added. In previous
|
||||
versions, it acted like the non-recursive wildcard "``*``".
|
||||
|
||||
|
||||
.. method:: PurePath.relative_to(other, walk_up=False)
|
||||
|
||||
|
|
|
@ -336,7 +336,8 @@ pathlib
|
|||
object from a 'file' URI (``file:/``).
|
||||
(Contributed by Barney Gale in :gh:`107465`.)
|
||||
|
||||
* Add support for recursive wildcards in :meth:`pathlib.PurePath.match`.
|
||||
* Add :meth:`pathlib.PurePath.full_match` for matching paths with
|
||||
shell-style wildcards, including the recursive wildcard "``**``".
|
||||
(Contributed by Barney Gale in :gh:`73435`.)
|
||||
|
||||
* Add *follow_symlinks* keyword-only argument to :meth:`pathlib.Path.glob`,
|
||||
|
|
|
@ -490,6 +490,13 @@ class PurePath(_abc.PurePathBase):
|
|||
parts.reverse()
|
||||
return parts
|
||||
|
||||
@property
|
||||
def _pattern_str(self):
|
||||
"""The path expressed as a string, for use in pattern-matching."""
|
||||
# The string representation of an empty path is a single dot ('.'). Empty
|
||||
# paths shouldn't match wildcards, so we change it to the empty string.
|
||||
path_str = str(self)
|
||||
return '' if path_str == '.' else path_str
|
||||
|
||||
# Subclassing os.PathLike makes isinstance() checks slower,
|
||||
# which in turn makes Path construction slower. Register instead!
|
||||
|
|
|
@ -47,8 +47,8 @@ def _is_case_sensitive(pathmod):
|
|||
re = glob = None
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=256)
|
||||
def _compile_pattern(pat, sep, case_sensitive):
|
||||
@functools.lru_cache(maxsize=512)
|
||||
def _compile_pattern(pat, sep, case_sensitive, recursive=True):
|
||||
"""Compile given glob pattern to a re.Pattern object (observing case
|
||||
sensitivity)."""
|
||||
global re, glob
|
||||
|
@ -56,10 +56,7 @@ def _compile_pattern(pat, sep, case_sensitive):
|
|||
import re, glob
|
||||
|
||||
flags = re.NOFLAG if case_sensitive else re.IGNORECASE
|
||||
regex = glob.translate(pat, recursive=True, include_hidden=True, seps=sep)
|
||||
# The string representation of an empty path is a single dot ('.'). Empty
|
||||
# paths shouldn't match wildcards, so we consume it with an atomic group.
|
||||
regex = r'(\.\Z)?+' + regex
|
||||
regex = glob.translate(pat, recursive=recursive, include_hidden=True, seps=sep)
|
||||
return re.compile(regex, flags=flags).match
|
||||
|
||||
|
||||
|
@ -441,23 +438,48 @@ class PurePathBase:
|
|||
raise NotImplementedError("Non-relative patterns are unsupported")
|
||||
return parts
|
||||
|
||||
@property
|
||||
def _pattern_str(self):
|
||||
"""The path expressed as a string, for use in pattern-matching."""
|
||||
return str(self)
|
||||
|
||||
def match(self, path_pattern, *, case_sensitive=None):
|
||||
"""
|
||||
Return True if this path matches the given pattern.
|
||||
Return True if this path matches the given pattern. If the pattern is
|
||||
relative, matching is done from the right; otherwise, the entire path
|
||||
is matched. The recursive wildcard '**' is *not* supported by this
|
||||
method.
|
||||
"""
|
||||
if not isinstance(path_pattern, PurePathBase):
|
||||
path_pattern = self.with_segments(path_pattern)
|
||||
if case_sensitive is None:
|
||||
case_sensitive = _is_case_sensitive(self.pathmod)
|
||||
sep = path_pattern.pathmod.sep
|
||||
if path_pattern.anchor:
|
||||
pattern_str = str(path_pattern)
|
||||
elif path_pattern.parts:
|
||||
pattern_str = str('**' / path_pattern)
|
||||
else:
|
||||
path_parts = self.parts[::-1]
|
||||
pattern_parts = path_pattern.parts[::-1]
|
||||
if not pattern_parts:
|
||||
raise ValueError("empty pattern")
|
||||
match = _compile_pattern(pattern_str, sep, case_sensitive)
|
||||
return match(str(self)) is not None
|
||||
if len(path_parts) < len(pattern_parts):
|
||||
return False
|
||||
if len(path_parts) > len(pattern_parts) and path_pattern.anchor:
|
||||
return False
|
||||
for path_part, pattern_part in zip(path_parts, pattern_parts):
|
||||
match = _compile_pattern(pattern_part, sep, case_sensitive, recursive=False)
|
||||
if match(path_part) is None:
|
||||
return False
|
||||
return True
|
||||
|
||||
def full_match(self, pattern, *, case_sensitive=None):
|
||||
"""
|
||||
Return True if this path matches the given glob-style pattern. The
|
||||
pattern is matched against the entire path.
|
||||
"""
|
||||
if not isinstance(pattern, PurePathBase):
|
||||
pattern = self.with_segments(pattern)
|
||||
if case_sensitive is None:
|
||||
case_sensitive = _is_case_sensitive(self.pathmod)
|
||||
match = _compile_pattern(pattern._pattern_str, pattern.pathmod.sep, case_sensitive)
|
||||
return match(self._pattern_str) is not None
|
||||
|
||||
|
||||
|
||||
|
@ -781,8 +803,8 @@ class PathBase(PurePathBase):
|
|||
if filter_paths:
|
||||
# Filter out paths that don't match pattern.
|
||||
prefix_len = len(str(self._make_child_relpath('_'))) - 1
|
||||
match = _compile_pattern(str(pattern), sep, case_sensitive)
|
||||
paths = (path for path in paths if match(str(path), prefix_len))
|
||||
match = _compile_pattern(pattern._pattern_str, sep, case_sensitive)
|
||||
paths = (path for path in paths if match(path._pattern_str, prefix_len))
|
||||
return paths
|
||||
|
||||
def rglob(self, pattern, *, case_sensitive=None, follow_symlinks=None):
|
||||
|
|
|
@ -249,29 +249,8 @@ class DummyPurePathTest(unittest.TestCase):
|
|||
self.assertFalse(P('/ab.py').match('/a/*.py'))
|
||||
self.assertFalse(P('/a/b/c.py').match('/a/*.py'))
|
||||
# Multi-part glob-style pattern.
|
||||
self.assertTrue(P('a').match('**'))
|
||||
self.assertTrue(P('c.py').match('**'))
|
||||
self.assertTrue(P('a/b/c.py').match('**'))
|
||||
self.assertTrue(P('/a/b/c.py').match('**'))
|
||||
self.assertTrue(P('/a/b/c.py').match('/**'))
|
||||
self.assertTrue(P('/a/b/c.py').match('/a/**'))
|
||||
self.assertTrue(P('/a/b/c.py').match('**/*.py'))
|
||||
self.assertTrue(P('/a/b/c.py').match('/**/*.py'))
|
||||
self.assertFalse(P('/a/b/c.py').match('/**/*.py'))
|
||||
self.assertTrue(P('/a/b/c.py').match('/a/**/*.py'))
|
||||
self.assertTrue(P('/a/b/c.py').match('/a/b/**/*.py'))
|
||||
self.assertTrue(P('/a/b/c.py').match('/**/**/**/**/*.py'))
|
||||
self.assertFalse(P('c.py').match('**/a.py'))
|
||||
self.assertFalse(P('c.py').match('c/**'))
|
||||
self.assertFalse(P('a/b/c.py').match('**/a'))
|
||||
self.assertFalse(P('a/b/c.py').match('**/a/b'))
|
||||
self.assertFalse(P('a/b/c.py').match('**/a/b/c'))
|
||||
self.assertFalse(P('a/b/c.py').match('**/a/b/c.'))
|
||||
self.assertFalse(P('a/b/c.py').match('**/a/b/c./**'))
|
||||
self.assertFalse(P('a/b/c.py').match('**/a/b/c./**'))
|
||||
self.assertFalse(P('a/b/c.py').match('/a/b/c.py/**'))
|
||||
self.assertFalse(P('a/b/c.py').match('/**/a/b/c.py'))
|
||||
self.assertRaises(ValueError, P('a').match, '**a/b/c')
|
||||
self.assertRaises(ValueError, P('a').match, 'a/b/c**')
|
||||
# Case-sensitive flag
|
||||
self.assertFalse(P('A.py').match('a.PY', case_sensitive=True))
|
||||
self.assertTrue(P('A.py').match('a.PY', case_sensitive=False))
|
||||
|
@ -279,9 +258,82 @@ class DummyPurePathTest(unittest.TestCase):
|
|||
self.assertTrue(P('/a/b/c.py').match('/A/*/*.Py', case_sensitive=False))
|
||||
# Matching against empty path
|
||||
self.assertFalse(P('').match('*'))
|
||||
self.assertTrue(P('').match('**'))
|
||||
self.assertFalse(P('').match('**'))
|
||||
self.assertFalse(P('').match('**/*'))
|
||||
|
||||
def test_full_match_common(self):
|
||||
P = self.cls
|
||||
# Simple relative pattern.
|
||||
self.assertTrue(P('b.py').full_match('b.py'))
|
||||
self.assertFalse(P('a/b.py').full_match('b.py'))
|
||||
self.assertFalse(P('/a/b.py').full_match('b.py'))
|
||||
self.assertFalse(P('a.py').full_match('b.py'))
|
||||
self.assertFalse(P('b/py').full_match('b.py'))
|
||||
self.assertFalse(P('/a.py').full_match('b.py'))
|
||||
self.assertFalse(P('b.py/c').full_match('b.py'))
|
||||
# Wildcard relative pattern.
|
||||
self.assertTrue(P('b.py').full_match('*.py'))
|
||||
self.assertFalse(P('a/b.py').full_match('*.py'))
|
||||
self.assertFalse(P('/a/b.py').full_match('*.py'))
|
||||
self.assertFalse(P('b.pyc').full_match('*.py'))
|
||||
self.assertFalse(P('b./py').full_match('*.py'))
|
||||
self.assertFalse(P('b.py/c').full_match('*.py'))
|
||||
# Multi-part relative pattern.
|
||||
self.assertTrue(P('ab/c.py').full_match('a*/*.py'))
|
||||
self.assertFalse(P('/d/ab/c.py').full_match('a*/*.py'))
|
||||
self.assertFalse(P('a.py').full_match('a*/*.py'))
|
||||
self.assertFalse(P('/dab/c.py').full_match('a*/*.py'))
|
||||
self.assertFalse(P('ab/c.py/d').full_match('a*/*.py'))
|
||||
# Absolute pattern.
|
||||
self.assertTrue(P('/b.py').full_match('/*.py'))
|
||||
self.assertFalse(P('b.py').full_match('/*.py'))
|
||||
self.assertFalse(P('a/b.py').full_match('/*.py'))
|
||||
self.assertFalse(P('/a/b.py').full_match('/*.py'))
|
||||
# Multi-part absolute pattern.
|
||||
self.assertTrue(P('/a/b.py').full_match('/a/*.py'))
|
||||
self.assertFalse(P('/ab.py').full_match('/a/*.py'))
|
||||
self.assertFalse(P('/a/b/c.py').full_match('/a/*.py'))
|
||||
# Multi-part glob-style pattern.
|
||||
self.assertTrue(P('a').full_match('**'))
|
||||
self.assertTrue(P('c.py').full_match('**'))
|
||||
self.assertTrue(P('a/b/c.py').full_match('**'))
|
||||
self.assertTrue(P('/a/b/c.py').full_match('**'))
|
||||
self.assertTrue(P('/a/b/c.py').full_match('/**'))
|
||||
self.assertTrue(P('/a/b/c.py').full_match('/a/**'))
|
||||
self.assertTrue(P('/a/b/c.py').full_match('**/*.py'))
|
||||
self.assertTrue(P('/a/b/c.py').full_match('/**/*.py'))
|
||||
self.assertTrue(P('/a/b/c.py').full_match('/a/**/*.py'))
|
||||
self.assertTrue(P('/a/b/c.py').full_match('/a/b/**/*.py'))
|
||||
self.assertTrue(P('/a/b/c.py').full_match('/**/**/**/**/*.py'))
|
||||
self.assertFalse(P('c.py').full_match('**/a.py'))
|
||||
self.assertFalse(P('c.py').full_match('c/**'))
|
||||
self.assertFalse(P('a/b/c.py').full_match('**/a'))
|
||||
self.assertFalse(P('a/b/c.py').full_match('**/a/b'))
|
||||
self.assertFalse(P('a/b/c.py').full_match('**/a/b/c'))
|
||||
self.assertFalse(P('a/b/c.py').full_match('**/a/b/c.'))
|
||||
self.assertFalse(P('a/b/c.py').full_match('**/a/b/c./**'))
|
||||
self.assertFalse(P('a/b/c.py').full_match('**/a/b/c./**'))
|
||||
self.assertFalse(P('a/b/c.py').full_match('/a/b/c.py/**'))
|
||||
self.assertFalse(P('a/b/c.py').full_match('/**/a/b/c.py'))
|
||||
self.assertRaises(ValueError, P('a').full_match, '**a/b/c')
|
||||
self.assertRaises(ValueError, P('a').full_match, 'a/b/c**')
|
||||
# Case-sensitive flag
|
||||
self.assertFalse(P('A.py').full_match('a.PY', case_sensitive=True))
|
||||
self.assertTrue(P('A.py').full_match('a.PY', case_sensitive=False))
|
||||
self.assertFalse(P('c:/a/B.Py').full_match('C:/A/*.pY', case_sensitive=True))
|
||||
self.assertTrue(P('/a/b/c.py').full_match('/A/*/*.Py', case_sensitive=False))
|
||||
# Matching against empty path
|
||||
self.assertFalse(P('').full_match('*'))
|
||||
self.assertTrue(P('').full_match('**'))
|
||||
self.assertFalse(P('').full_match('**/*'))
|
||||
# Matching with empty pattern
|
||||
self.assertTrue(P('').full_match(''))
|
||||
self.assertTrue(P('.').full_match('.'))
|
||||
self.assertFalse(P('/').full_match(''))
|
||||
self.assertFalse(P('/').full_match('.'))
|
||||
self.assertFalse(P('foo').full_match(''))
|
||||
self.assertFalse(P('foo').full_match('.'))
|
||||
|
||||
def test_parts_common(self):
|
||||
# `parts` returns a tuple.
|
||||
sep = self.sep
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue