mirror of
https://github.com/python/cpython.git
synced 2025-07-18 00:35:17 +00:00
GH-73435: Implement recursive wildcards in pathlib.PurePath.match()
(#101398)
`PurePath.match()` now handles the `**` wildcard as in `Path.glob()`, i.e. it matches any number of path segments. We now compile a `re.Pattern` object for the entire pattern. This is made more difficult by `fnmatch` not treating directory separators as special when evaluating wildcards (`*`, `?`, etc), and so we arrange the path parts onto separate *lines* in a string, and ensure we don't set `re.DOTALL`. Co-authored-by: Hugo van Kemenade <hugovk@users.noreply.github.com> Co-authored-by: Alex Waygood <Alex.Waygood@Gmail.com>
This commit is contained in:
parent
4c770617c0
commit
49f90ba1ea
5 changed files with 123 additions and 15 deletions
|
@ -569,6 +569,13 @@ Pure paths provide the following methods and properties:
|
||||||
>>> PurePath('a/b.py').match('/*.py')
|
>>> PurePath('a/b.py').match('/*.py')
|
||||||
False
|
False
|
||||||
|
|
||||||
|
The *pattern* may be another path object; this speeds up matching the same
|
||||||
|
pattern against multiple files::
|
||||||
|
|
||||||
|
>>> pattern = PurePath('*.py')
|
||||||
|
>>> PurePath('a/b.py').match(pattern)
|
||||||
|
True
|
||||||
|
|
||||||
As with other methods, case-sensitivity follows platform defaults::
|
As with other methods, case-sensitivity follows platform defaults::
|
||||||
|
|
||||||
>>> PurePosixPath('b.py').match('*.PY')
|
>>> PurePosixPath('b.py').match('*.PY')
|
||||||
|
@ -581,6 +588,10 @@ Pure paths provide the following methods and properties:
|
||||||
.. versionadded:: 3.12
|
.. versionadded:: 3.12
|
||||||
The *case_sensitive* argument.
|
The *case_sensitive* argument.
|
||||||
|
|
||||||
|
.. versionchanged:: 3.13
|
||||||
|
Support for the recursive wildcard "``**``" was added. In previous
|
||||||
|
versions, it acted like the non-recursive wildcard "``*``".
|
||||||
|
|
||||||
|
|
||||||
.. method:: PurePath.relative_to(other, walk_up=False)
|
.. method:: PurePath.relative_to(other, walk_up=False)
|
||||||
|
|
||||||
|
|
|
@ -90,6 +90,9 @@ Improved Modules
|
||||||
pathlib
|
pathlib
|
||||||
-------
|
-------
|
||||||
|
|
||||||
|
* Add support for recursive wildcards in :meth:`pathlib.PurePath.match`.
|
||||||
|
(Contributed by Barney Gale in :gh:`73435`.)
|
||||||
|
|
||||||
* Add *follow_symlinks* keyword-only argument to :meth:`pathlib.Path.glob` and
|
* Add *follow_symlinks* keyword-only argument to :meth:`pathlib.Path.glob` and
|
||||||
:meth:`~pathlib.Path.rglob`.
|
:meth:`~pathlib.Path.rglob`.
|
||||||
(Contributed by Barney Gale in :gh:`77609`.)
|
(Contributed by Barney Gale in :gh:`77609`.)
|
||||||
|
|
|
@ -54,6 +54,7 @@ def _ignore_error(exception):
|
||||||
getattr(exception, 'winerror', None) in _IGNORED_WINERRORS)
|
getattr(exception, 'winerror', None) in _IGNORED_WINERRORS)
|
||||||
|
|
||||||
|
|
||||||
|
@functools.cache
|
||||||
def _is_case_sensitive(flavour):
|
def _is_case_sensitive(flavour):
|
||||||
return flavour.normcase('Aa') == 'Aa'
|
return flavour.normcase('Aa') == 'Aa'
|
||||||
|
|
||||||
|
@ -61,6 +62,22 @@ def _is_case_sensitive(flavour):
|
||||||
# Globbing helpers
|
# Globbing helpers
|
||||||
#
|
#
|
||||||
|
|
||||||
|
|
||||||
|
# fnmatch.translate() returns a regular expression that includes a prefix and
|
||||||
|
# a suffix, which enable matching newlines and ensure the end of the string is
|
||||||
|
# matched, respectively. These features are undesirable for our implementation
|
||||||
|
# of PurePatch.match(), which represents path separators as newlines and joins
|
||||||
|
# pattern segments together. As a workaround, we define a slice object that
|
||||||
|
# can remove the prefix and suffix from any translate() result. See the
|
||||||
|
# _compile_pattern_lines() function for more details.
|
||||||
|
_FNMATCH_PREFIX, _FNMATCH_SUFFIX = fnmatch.translate('_').split('_')
|
||||||
|
_FNMATCH_SLICE = slice(len(_FNMATCH_PREFIX), -len(_FNMATCH_SUFFIX))
|
||||||
|
_SWAP_SEP_AND_NEWLINE = {
|
||||||
|
'/': str.maketrans({'/': '\n', '\n': '/'}),
|
||||||
|
'\\': str.maketrans({'\\': '\n', '\n': '\\'}),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
@functools.lru_cache()
|
@functools.lru_cache()
|
||||||
def _make_selector(pattern_parts, flavour, case_sensitive):
|
def _make_selector(pattern_parts, flavour, case_sensitive):
|
||||||
pat = pattern_parts[0]
|
pat = pattern_parts[0]
|
||||||
|
@ -92,6 +109,51 @@ def _compile_pattern(pat, case_sensitive):
|
||||||
return re.compile(fnmatch.translate(pat), flags).match
|
return re.compile(fnmatch.translate(pat), flags).match
|
||||||
|
|
||||||
|
|
||||||
|
@functools.lru_cache()
|
||||||
|
def _compile_pattern_lines(pattern_lines, case_sensitive):
|
||||||
|
"""Compile the given pattern lines to an `re.Pattern` object.
|
||||||
|
|
||||||
|
The *pattern_lines* argument is a glob-style pattern (e.g. '**/*.py') with
|
||||||
|
its path separators and newlines swapped (e.g. '**\n*.py`). By using
|
||||||
|
newlines to separate path components, and not setting `re.DOTALL`, we
|
||||||
|
ensure that the `*` wildcard cannot match path separators.
|
||||||
|
|
||||||
|
The returned `re.Pattern` object may have its `match()` method called to
|
||||||
|
match a complete pattern, or `search()` to match from the right. The
|
||||||
|
argument supplied to these methods must also have its path separators and
|
||||||
|
newlines swapped.
|
||||||
|
"""
|
||||||
|
|
||||||
|
# Match the start of the path, or just after a path separator
|
||||||
|
parts = ['^']
|
||||||
|
for part in pattern_lines.splitlines(keepends=True):
|
||||||
|
if part == '**\n':
|
||||||
|
# '**/' component: we use '[\s\S]' rather than '.' so that path
|
||||||
|
# separators (i.e. newlines) are matched. The trailing '^' ensures
|
||||||
|
# we terminate after a path separator (i.e. on a new line).
|
||||||
|
part = r'[\s\S]*^'
|
||||||
|
elif part == '**':
|
||||||
|
# '**' component.
|
||||||
|
part = r'[\s\S]*'
|
||||||
|
elif '**' in part:
|
||||||
|
raise ValueError("Invalid pattern: '**' can only be an entire path component")
|
||||||
|
else:
|
||||||
|
# Any other component: pass to fnmatch.translate(). We slice off
|
||||||
|
# the common prefix and suffix added by translate() to ensure that
|
||||||
|
# re.DOTALL is not set, and the end of the string not matched,
|
||||||
|
# respectively. With DOTALL not set, '*' wildcards will not match
|
||||||
|
# path separators, because the '.' characters in the pattern will
|
||||||
|
# not match newlines.
|
||||||
|
part = fnmatch.translate(part)[_FNMATCH_SLICE]
|
||||||
|
parts.append(part)
|
||||||
|
# Match the end of the path, always.
|
||||||
|
parts.append(r'\Z')
|
||||||
|
flags = re.MULTILINE
|
||||||
|
if not case_sensitive:
|
||||||
|
flags |= re.IGNORECASE
|
||||||
|
return re.compile(''.join(parts), flags=flags)
|
||||||
|
|
||||||
|
|
||||||
class _Selector:
|
class _Selector:
|
||||||
"""A selector matches a specific glob pattern part against the children
|
"""A selector matches a specific glob pattern part against the children
|
||||||
of a given path."""
|
of a given path."""
|
||||||
|
@ -276,6 +338,10 @@ class PurePath:
|
||||||
# to implement comparison methods like `__lt__()`.
|
# to implement comparison methods like `__lt__()`.
|
||||||
'_parts_normcase_cached',
|
'_parts_normcase_cached',
|
||||||
|
|
||||||
|
# The `_lines_cached` slot stores the string path with path separators
|
||||||
|
# and newlines swapped. This is used to implement `match()`.
|
||||||
|
'_lines_cached',
|
||||||
|
|
||||||
# The `_hash` slot stores the hash of the case-normalized string
|
# The `_hash` slot stores the hash of the case-normalized string
|
||||||
# path. It's set when `__hash__()` is called for the first time.
|
# path. It's set when `__hash__()` is called for the first time.
|
||||||
'_hash',
|
'_hash',
|
||||||
|
@ -441,6 +507,16 @@ class PurePath:
|
||||||
self._parts_normcase_cached = self._str_normcase.split(self._flavour.sep)
|
self._parts_normcase_cached = self._str_normcase.split(self._flavour.sep)
|
||||||
return self._parts_normcase_cached
|
return self._parts_normcase_cached
|
||||||
|
|
||||||
|
@property
|
||||||
|
def _lines(self):
|
||||||
|
# Path with separators and newlines swapped, for pattern matching.
|
||||||
|
try:
|
||||||
|
return self._lines_cached
|
||||||
|
except AttributeError:
|
||||||
|
trans = _SWAP_SEP_AND_NEWLINE[self._flavour.sep]
|
||||||
|
self._lines_cached = str(self).translate(trans)
|
||||||
|
return self._lines_cached
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
if not isinstance(other, PurePath):
|
if not isinstance(other, PurePath):
|
||||||
return NotImplemented
|
return NotImplemented
|
||||||
|
@ -697,23 +773,18 @@ class PurePath:
|
||||||
"""
|
"""
|
||||||
Return True if this path matches the given pattern.
|
Return True if this path matches the given pattern.
|
||||||
"""
|
"""
|
||||||
|
if not isinstance(path_pattern, PurePath):
|
||||||
|
path_pattern = self.with_segments(path_pattern)
|
||||||
if case_sensitive is None:
|
if case_sensitive is None:
|
||||||
case_sensitive = _is_case_sensitive(self._flavour)
|
case_sensitive = _is_case_sensitive(self._flavour)
|
||||||
pat = self.with_segments(path_pattern)
|
pattern = _compile_pattern_lines(path_pattern._lines, case_sensitive)
|
||||||
if not pat.parts:
|
if path_pattern.drive or path_pattern.root:
|
||||||
|
return pattern.match(self._lines) is not None
|
||||||
|
elif path_pattern._tail:
|
||||||
|
return pattern.search(self._lines) is not None
|
||||||
|
else:
|
||||||
raise ValueError("empty pattern")
|
raise ValueError("empty pattern")
|
||||||
pat_parts = pat.parts
|
|
||||||
parts = self.parts
|
|
||||||
if pat.drive or pat.root:
|
|
||||||
if len(pat_parts) != len(parts):
|
|
||||||
return False
|
|
||||||
elif len(pat_parts) > len(parts):
|
|
||||||
return False
|
|
||||||
for part, pat in zip(reversed(parts), reversed(pat_parts)):
|
|
||||||
match = _compile_pattern(pat, case_sensitive)
|
|
||||||
if not match(part):
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
# Subclassing os.PathLike makes isinstance() checks slower,
|
# Subclassing os.PathLike makes isinstance() checks slower,
|
||||||
# which in turn makes Path construction slower. Register instead!
|
# which in turn makes Path construction slower. Register instead!
|
||||||
|
|
|
@ -310,8 +310,30 @@ class _BasePurePathTest(object):
|
||||||
self.assertFalse(P('/ab.py').match('/a/*.py'))
|
self.assertFalse(P('/ab.py').match('/a/*.py'))
|
||||||
self.assertFalse(P('/a/b/c.py').match('/a/*.py'))
|
self.assertFalse(P('/a/b/c.py').match('/a/*.py'))
|
||||||
# Multi-part glob-style pattern.
|
# Multi-part glob-style pattern.
|
||||||
self.assertFalse(P('/a/b/c.py').match('/**/*.py'))
|
self.assertTrue(P('a').match('**'))
|
||||||
|
self.assertTrue(P('c.py').match('**'))
|
||||||
|
self.assertTrue(P('a/b/c.py').match('**'))
|
||||||
|
self.assertTrue(P('/a/b/c.py').match('**'))
|
||||||
|
self.assertTrue(P('/a/b/c.py').match('/**'))
|
||||||
|
self.assertTrue(P('/a/b/c.py').match('**/'))
|
||||||
|
self.assertTrue(P('/a/b/c.py').match('/a/**'))
|
||||||
|
self.assertTrue(P('/a/b/c.py').match('**/*.py'))
|
||||||
|
self.assertTrue(P('/a/b/c.py').match('/**/*.py'))
|
||||||
self.assertTrue(P('/a/b/c.py').match('/a/**/*.py'))
|
self.assertTrue(P('/a/b/c.py').match('/a/**/*.py'))
|
||||||
|
self.assertTrue(P('/a/b/c.py').match('/a/b/**/*.py'))
|
||||||
|
self.assertTrue(P('/a/b/c.py').match('/**/**/**/**/*.py'))
|
||||||
|
self.assertFalse(P('c.py').match('**/a.py'))
|
||||||
|
self.assertFalse(P('c.py').match('c/**'))
|
||||||
|
self.assertFalse(P('a/b/c.py').match('**/a'))
|
||||||
|
self.assertFalse(P('a/b/c.py').match('**/a/b'))
|
||||||
|
self.assertFalse(P('a/b/c.py').match('**/a/b/c'))
|
||||||
|
self.assertFalse(P('a/b/c.py').match('**/a/b/c.'))
|
||||||
|
self.assertFalse(P('a/b/c.py').match('**/a/b/c./**'))
|
||||||
|
self.assertFalse(P('a/b/c.py').match('**/a/b/c./**'))
|
||||||
|
self.assertFalse(P('a/b/c.py').match('/a/b/c.py/**'))
|
||||||
|
self.assertFalse(P('a/b/c.py').match('/**/a/b/c.py'))
|
||||||
|
self.assertRaises(ValueError, P('a').match, '**a/b/c')
|
||||||
|
self.assertRaises(ValueError, P('a').match, 'a/b/c**')
|
||||||
# Case-sensitive flag
|
# Case-sensitive flag
|
||||||
self.assertFalse(P('A.py').match('a.PY', case_sensitive=True))
|
self.assertFalse(P('A.py').match('a.PY', case_sensitive=True))
|
||||||
self.assertTrue(P('A.py').match('a.PY', case_sensitive=False))
|
self.assertTrue(P('A.py').match('a.PY', case_sensitive=False))
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Add support for recursive wildcards in :meth:`pathlib.PurePath.match`.
|
Loading…
Add table
Add a link
Reference in a new issue