GH-115060: Speed up pathlib.Path.glob() by not scanning literal parts (#117732)

Don't bother calling `os.scandir()` to scan for literal pattern segments,
like `foo` in `foo/*.py`. Instead, append the segment(s) as-is and call
through to the next selector with `exists=False`, which signals that the
path might not exist. Subsequent selectors will call `os.scandir()` or
`os.lstat()` to filter out missing paths as needed.
This commit is contained in:
Barney Gale 2024-04-12 22:19:21 +01:00 committed by GitHub
parent 069de14cb9
commit 0eb52f5f26
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 42 additions and 11 deletions

View file

@ -686,8 +686,14 @@ class PathBase(PurePathBase):
def _glob_selector(self, parts, case_sensitive, recurse_symlinks):
if case_sensitive is None:
case_sensitive = _is_case_sensitive(self.parser)
case_pedantic = False
else:
# The user has expressed a case sensitivity choice, but we don't
# know the case sensitivity of the underlying filesystem, so we
# must use scandir() for everything, including non-wildcard parts.
case_pedantic = True
recursive = True if recurse_symlinks else glob._no_recurse_symlinks
globber = self._globber(self.parser.sep, case_sensitive, recursive)
globber = self._globber(self.parser.sep, case_sensitive, case_pedantic, recursive)
return globber.selector(parts)
def glob(self, pattern, *, case_sensitive=None, recurse_symlinks=True):