GH-115060: Speed up pathlib.Path.glob() by not scanning literal parts (#117732)

Don't bother calling `os.scandir()` to scan for literal pattern segments,
like `foo` in `foo/*.py`. Instead, append the segment(s) as-is and call
through to the next selector with `exists=False`, which signals that the
path might not exist. Subsequent selectors will call `os.scandir()` or
`os.lstat()` to filter out missing paths as needed.
This commit is contained in:
Barney Gale 2024-04-12 22:19:21 +01:00 committed by GitHub
parent 069de14cb9
commit 0eb52f5f26
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 42 additions and 11 deletions

View file

@ -331,9 +331,10 @@ class _Globber:
"""Class providing shell-style pattern matching and globbing.
"""
def __init__(self, sep, case_sensitive, recursive=False):
def __init__(self, sep, case_sensitive, case_pedantic=False, recursive=False):
self.sep = sep
self.case_sensitive = case_sensitive
self.case_pedantic = case_pedantic
self.recursive = recursive
# Low-level methods
@ -373,6 +374,8 @@ class _Globber:
selector = self.recursive_selector
elif part in _special_parts:
selector = self.special_selector
elif not self.case_pedantic and magic_check.search(part) is None:
selector = self.literal_selector
else:
selector = self.wildcard_selector
return selector(part, parts)
@ -387,6 +390,23 @@ class _Globber:
return select_next(path, exists)
return select_special
def literal_selector(self, part, parts):
"""Returns a function that selects a literal descendant of a path.
"""
# Optimization: consume and join any subsequent literal parts here,
# rather than leaving them for the next selector. This reduces the
# number of string concatenation operations and calls to add_slash().
while parts and magic_check.search(parts[-1]) is None:
part += self.sep + parts.pop()
select_next = self.selector(parts)
def select_literal(path, exists=False):
path = self.concat_path(self.add_slash(path), part)
return select_next(path, exists=False)
return select_literal
def wildcard_selector(self, part, parts):
"""Returns a function that selects direct children of a given path,
filtering by pattern.