mirror of
https://github.com/python/cpython.git
synced 2025-12-09 02:35:14 +00:00
GH-102613: Fast recursive globbing in pathlib.Path.glob() (GH-104512)
This commit introduces a 'walk-and-match' strategy for handling glob patterns that include a non-terminal `**` wildcard, such as `**/*.py`. For this example, the previous implementation recursively walked directories using `os.scandir()` when it expanded the `**` component, and then **scanned those same directories again** when expanded the `*.py` component. This is wasteful. In the new implementation, any components following a `**` wildcard are used to build a `re.Pattern` object, which is used to filter the results of the recursive walk. A pattern like `**/*.py` uses half the number of `os.scandir()` calls; a pattern like `**/*/*.py` a third, etc. This new algorithm does not apply if either: 1. The *follow_symlinks* argument is set to `None` (its default), or 2. The pattern contains `..` components. In these cases we fall back to the old implementation. This commit also replaces selector classes with selector functions. These generators directly yield results rather calling through to their successors. A new internal `Path._glob()` method takes care to chain these generators together, which simplifies the lazy algorithm and slightly improves performance. It should also be easier to understand and maintain.
This commit is contained in:
parent
2587b9f64e
commit
24af45172f
4 changed files with 161 additions and 138 deletions
|
|
@ -1898,6 +1898,16 @@ class _BasePathTest(object):
|
|||
_check(p, "*B/*", ["dirB/fileB", "dirB/linkD", "linkB/fileB", "linkB/linkD"])
|
||||
_check(p, "*/fileB", ["dirB/fileB", "linkB/fileB"])
|
||||
_check(p, "*/", ["dirA", "dirB", "dirC", "dirE", "linkB"])
|
||||
_check(p, "dir*/*/..", ["dirC/dirD/..", "dirA/linkC/.."])
|
||||
_check(p, "dir*/**/", ["dirA", "dirA/linkC", "dirA/linkC/linkD", "dirB", "dirB/linkD",
|
||||
"dirC", "dirC/dirD", "dirE"])
|
||||
_check(p, "dir*/**/..", ["dirA/..", "dirA/linkC/..", "dirB/..",
|
||||
"dirC/..", "dirC/dirD/..", "dirE/.."])
|
||||
_check(p, "dir*/*/**/", ["dirA/linkC", "dirA/linkC/linkD", "dirB/linkD", "dirC/dirD"])
|
||||
_check(p, "dir*/*/**/..", ["dirA/linkC/..", "dirC/dirD/.."])
|
||||
_check(p, "dir*/**/fileC", ["dirC/fileC"])
|
||||
_check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD"])
|
||||
_check(p, "*/dirD/**/", ["dirC/dirD"])
|
||||
|
||||
@os_helper.skip_unless_symlink
|
||||
def test_glob_no_follow_symlinks_common(self):
|
||||
|
|
@ -1912,6 +1922,14 @@ class _BasePathTest(object):
|
|||
_check(p, "*B/*", ["dirB/fileB", "dirB/linkD"])
|
||||
_check(p, "*/fileB", ["dirB/fileB"])
|
||||
_check(p, "*/", ["dirA", "dirB", "dirC", "dirE"])
|
||||
_check(p, "dir*/*/..", ["dirC/dirD/.."])
|
||||
_check(p, "dir*/**/", ["dirA", "dirB", "dirC", "dirC/dirD", "dirE"])
|
||||
_check(p, "dir*/**/..", ["dirA/..", "dirB/..", "dirC/..", "dirC/dirD/..", "dirE/.."])
|
||||
_check(p, "dir*/*/**/", ["dirC/dirD"])
|
||||
_check(p, "dir*/*/**/..", ["dirC/dirD/.."])
|
||||
_check(p, "dir*/**/fileC", ["dirC/fileC"])
|
||||
_check(p, "dir*/*/../dirD/**/", ["dirC/dirD/../dirD"])
|
||||
_check(p, "*/dirD/**/", ["dirC/dirD"])
|
||||
|
||||
def test_rglob_common(self):
|
||||
def _check(glob, expected):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue