GH-81079: Add case_sensitive argument to pathlib.Path.glob() (GH-102710)

This argument allows case-sensitive matching to be enabled on Windows, and
case-insensitive matching to be enabled on Posix.

Co-authored-by: Steve Dower <steve.dower@microsoft.com>
This commit is contained in:
Barney Gale 2023-05-04 17:44:36 +01:00 committed by GitHub
parent 09b7695f12
commit 8100be5535
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 51 additions and 17 deletions

View file

@ -855,7 +855,7 @@ call fails (for example because the path doesn't exist).
.. versionadded:: 3.5 .. versionadded:: 3.5
.. method:: Path.glob(pattern) .. method:: Path.glob(pattern, *, case_sensitive=None)
Glob the given relative *pattern* in the directory represented by this path, Glob the given relative *pattern* in the directory represented by this path,
yielding all matching files (of any kind):: yielding all matching files (of any kind)::
@ -876,6 +876,11 @@ call fails (for example because the path doesn't exist).
PosixPath('setup.py'), PosixPath('setup.py'),
PosixPath('test_pathlib.py')] PosixPath('test_pathlib.py')]
By default, or when the *case_sensitive* keyword-only argument is set to
``None``, this method matches paths using platform-specific casing rules:
typically, case-sensitive on POSIX, and case-insensitive on Windows.
Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.
.. note:: .. note::
Using the "``**``" pattern in large directory trees may consume Using the "``**``" pattern in large directory trees may consume
an inordinate amount of time. an inordinate amount of time.
@ -886,6 +891,9 @@ call fails (for example because the path doesn't exist).
Return only directories if *pattern* ends with a pathname components Return only directories if *pattern* ends with a pathname components
separator (:data:`~os.sep` or :data:`~os.altsep`). separator (:data:`~os.sep` or :data:`~os.altsep`).
.. versionadded:: 3.12
The *case_sensitive* argument.
.. method:: Path.group() .. method:: Path.group()
Return the name of the group owning the file. :exc:`KeyError` is raised Return the name of the group owning the file. :exc:`KeyError` is raised
@ -1271,7 +1279,7 @@ call fails (for example because the path doesn't exist).
.. versionadded:: 3.6 .. versionadded:: 3.6
The *strict* argument (pre-3.6 behavior is strict). The *strict* argument (pre-3.6 behavior is strict).
.. method:: Path.rglob(pattern) .. method:: Path.rglob(pattern, *, case_sensitive=None)
Glob the given relative *pattern* recursively. This is like calling Glob the given relative *pattern* recursively. This is like calling
:func:`Path.glob` with "``**/``" added in front of the *pattern*, where :func:`Path.glob` with "``**/``" added in front of the *pattern*, where
@ -1284,12 +1292,20 @@ call fails (for example because the path doesn't exist).
PosixPath('setup.py'), PosixPath('setup.py'),
PosixPath('test_pathlib.py')] PosixPath('test_pathlib.py')]
By default, or when the *case_sensitive* keyword-only argument is set to
``None``, this method matches paths using platform-specific casing rules:
typically, case-sensitive on POSIX, and case-insensitive on Windows.
Set *case_sensitive* to ``True`` or ``False`` to override this behaviour.
.. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob .. audit-event:: pathlib.Path.rglob self,pattern pathlib.Path.rglob
.. versionchanged:: 3.11 .. versionchanged:: 3.11
Return only directories if *pattern* ends with a pathname components Return only directories if *pattern* ends with a pathname components
separator (:data:`~os.sep` or :data:`~os.altsep`). separator (:data:`~os.sep` or :data:`~os.altsep`).
.. versionadded:: 3.12
The *case_sensitive* argument.
.. method:: Path.rmdir() .. method:: Path.rmdir()
Remove this directory. The directory must be empty. Remove this directory. The directory must be empty.

View file

@ -62,7 +62,7 @@ def _is_case_sensitive(flavour):
# #
@functools.lru_cache() @functools.lru_cache()
def _make_selector(pattern_parts, flavour): def _make_selector(pattern_parts, flavour, case_sensitive):
pat = pattern_parts[0] pat = pattern_parts[0]
child_parts = pattern_parts[1:] child_parts = pattern_parts[1:]
if not pat: if not pat:
@ -75,17 +75,17 @@ def _make_selector(pattern_parts, flavour):
raise ValueError("Invalid pattern: '**' can only be an entire path component") raise ValueError("Invalid pattern: '**' can only be an entire path component")
else: else:
cls = _WildcardSelector cls = _WildcardSelector
return cls(pat, child_parts, flavour) return cls(pat, child_parts, flavour, case_sensitive)
class _Selector: class _Selector:
"""A selector matches a specific glob pattern part against the children """A selector matches a specific glob pattern part against the children
of a given path.""" of a given path."""
def __init__(self, child_parts, flavour): def __init__(self, child_parts, flavour, case_sensitive):
self.child_parts = child_parts self.child_parts = child_parts
if child_parts: if child_parts:
self.successor = _make_selector(child_parts, flavour) self.successor = _make_selector(child_parts, flavour, case_sensitive)
self.dironly = True self.dironly = True
else: else:
self.successor = _TerminatingSelector() self.successor = _TerminatingSelector()
@ -108,8 +108,9 @@ class _TerminatingSelector:
class _ParentSelector(_Selector): class _ParentSelector(_Selector):
def __init__(self, name, child_parts, flavour):
_Selector.__init__(self, child_parts, flavour) def __init__(self, name, child_parts, flavour, case_sensitive):
_Selector.__init__(self, child_parts, flavour, case_sensitive)
def _select_from(self, parent_path, scandir): def _select_from(self, parent_path, scandir):
path = parent_path._make_child_relpath('..') path = parent_path._make_child_relpath('..')
@ -119,10 +120,13 @@ class _ParentSelector(_Selector):
class _WildcardSelector(_Selector): class _WildcardSelector(_Selector):
def __init__(self, pat, child_parts, flavour): def __init__(self, pat, child_parts, flavour, case_sensitive):
flags = re.NOFLAG if _is_case_sensitive(flavour) else re.IGNORECASE _Selector.__init__(self, child_parts, flavour, case_sensitive)
if case_sensitive is None:
# TODO: evaluate case-sensitivity of each directory in _select_from()
case_sensitive = _is_case_sensitive(flavour)
flags = re.NOFLAG if case_sensitive else re.IGNORECASE
self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch self.match = re.compile(fnmatch.translate(pat), flags=flags).fullmatch
_Selector.__init__(self, child_parts, flavour)
def _select_from(self, parent_path, scandir): def _select_from(self, parent_path, scandir):
try: try:
@ -153,8 +157,8 @@ class _WildcardSelector(_Selector):
class _RecursiveWildcardSelector(_Selector): class _RecursiveWildcardSelector(_Selector):
def __init__(self, pat, child_parts, flavour): def __init__(self, pat, child_parts, flavour, case_sensitive):
_Selector.__init__(self, child_parts, flavour) _Selector.__init__(self, child_parts, flavour, case_sensitive)
def _iterate_directories(self, parent_path, scandir): def _iterate_directories(self, parent_path, scandir):
yield parent_path yield parent_path
@ -819,7 +823,7 @@ class Path(PurePath):
# includes scandir(), which is used to implement glob(). # includes scandir(), which is used to implement glob().
return os.scandir(self) return os.scandir(self)
def glob(self, pattern): def glob(self, pattern, *, case_sensitive=None):
"""Iterate over this subtree and yield all existing files (of any """Iterate over this subtree and yield all existing files (of any
kind, including directories) matching the given relative pattern. kind, including directories) matching the given relative pattern.
""" """
@ -831,11 +835,11 @@ class Path(PurePath):
raise NotImplementedError("Non-relative patterns are unsupported") raise NotImplementedError("Non-relative patterns are unsupported")
if pattern[-1] in (self._flavour.sep, self._flavour.altsep): if pattern[-1] in (self._flavour.sep, self._flavour.altsep):
pattern_parts.append('') pattern_parts.append('')
selector = _make_selector(tuple(pattern_parts), self._flavour) selector = _make_selector(tuple(pattern_parts), self._flavour, case_sensitive)
for p in selector.select_from(self): for p in selector.select_from(self):
yield p yield p
def rglob(self, pattern): def rglob(self, pattern, *, case_sensitive=None):
"""Recursively yield all existing files (of any kind, including """Recursively yield all existing files (of any kind, including
directories) matching the given relative pattern, anywhere in directories) matching the given relative pattern, anywhere in
this subtree. this subtree.
@ -846,7 +850,7 @@ class Path(PurePath):
raise NotImplementedError("Non-relative patterns are unsupported") raise NotImplementedError("Non-relative patterns are unsupported")
if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep): if pattern and pattern[-1] in (self._flavour.sep, self._flavour.altsep):
pattern_parts.append('') pattern_parts.append('')
selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour) selector = _make_selector(("**",) + tuple(pattern_parts), self._flavour, case_sensitive)
for p in selector.select_from(self): for p in selector.select_from(self):
yield p yield p

View file

@ -1816,6 +1816,18 @@ class _BasePathTest(object):
else: else:
_check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE", "linkB"]) _check(p.glob("*/"), ["dirA", "dirB", "dirC", "dirE", "linkB"])
def test_glob_case_sensitive(self):
P = self.cls
def _check(path, pattern, case_sensitive, expected):
actual = {str(q) for q in path.glob(pattern, case_sensitive=case_sensitive)}
expected = {str(P(BASE, q)) for q in expected}
self.assertEqual(actual, expected)
path = P(BASE)
_check(path, "DIRB/FILE*", True, [])
_check(path, "DIRB/FILE*", False, ["dirB/fileB"])
_check(path, "dirb/file*", True, [])
_check(path, "dirb/file*", False, ["dirB/fileB"])
def test_rglob_common(self): def test_rglob_common(self):
def _check(glob, expected): def _check(glob, expected):
self.assertEqual(set(glob), { P(BASE, q) for q in expected }) self.assertEqual(set(glob), { P(BASE, q) for q in expected })

View file

@ -0,0 +1,2 @@
Add *case_sensitive* keyword-only argument to :meth:`pathlib.Path.glob` and
:meth:`~pathlib.Path.rglob`.