GH-115060: Speed up pathlib.Path.glob() by omitting initial stat() (#117831)

Since 6258844c, paths that might not exist can be fed into pathlib's
globbing implementation, which will call `os.scandir()` / `os.lstat()` only
when strictly necessary. This allows us to drop an initial `self.is_dir()`
call, which saves a `stat()`.

Co-authored-by: Shantanu <12621235+hauntsaninja@users.noreply.github.com>
This commit is contained in:
Barney Gale 2024-04-14 00:08:03 +01:00 committed by GitHub
parent 3095d02642
commit a74f117dab
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 20 additions and 10 deletions

View file

@ -1004,10 +1004,6 @@ call fails (for example because the path doesn't exist).
.. seealso::
:ref:`pathlib-pattern-language` documentation.
This method calls :meth:`Path.is_dir` on the top-level directory and
propagates any :exc:`OSError` exception that is raised. Subsequent
:exc:`OSError` exceptions from scanning directories are suppressed.
By default, or when the *case_sensitive* keyword-only argument is set to
``None``, this method matches paths using platform-specific casing rules:
typically, case-sensitive on POSIX, and case-insensitive on Windows.
@ -1028,6 +1024,11 @@ call fails (for example because the path doesn't exist).
.. versionchanged:: 3.13
The *pattern* parameter accepts a :term:`path-like object`.
.. versionchanged:: 3.13
Any :exc:`OSError` exceptions raised from scanning the filesystem are
suppressed. In previous versions, such exceptions are suppressed in many
cases, but not all.
.. method:: Path.rglob(pattern, *, case_sensitive=None, recurse_symlinks=False)

View file

@ -607,11 +607,9 @@ class Path(_abc.PathBase, PurePath):
if raw[-1] in (self.parser.sep, self.parser.altsep):
# GH-65238: pathlib doesn't preserve trailing slash. Add it back.
parts.append('')
if not self.is_dir():
return iter([])
select = self._glob_selector(parts[::-1], case_sensitive, recurse_symlinks)
root = str(self)
paths = select(root, exists=True)
paths = select(root)
# Normalize results
if root == '.':

View file

@ -705,10 +705,8 @@ class PathBase(PurePathBase):
anchor, parts = pattern._stack
if anchor:
raise NotImplementedError("Non-relative patterns are unsupported")
if not self.is_dir():
return iter([])
select = self._glob_selector(parts, case_sensitive, recurse_symlinks)
return select(self, exists=True)
return select(self)
def rglob(self, pattern, *, case_sensitive=None, recurse_symlinks=True):
"""Recursively yield all existing files (of any kind, including

View file

@ -1263,6 +1263,13 @@ class PathTest(test_pathlib_abc.DummyPathTest, PurePathTest):
self.assertEqual(
set(P('.').glob('**/*/*')), {P("dirD/fileD")})
def test_glob_inaccessible(self):
P = self.cls
p = P(self.base, "mydir1", "mydir2")
p.mkdir(parents=True)
p.parent.chmod(0)
self.assertEqual(set(p.glob('*')), set())
def test_rglob_pathlike(self):
P = self.cls
p = P(self.base, "dirC")

View file

@ -8,6 +8,7 @@ import unittest
from pathlib._abc import UnsupportedOperation, ParserBase, PurePathBase, PathBase
import posixpath
from test.support import is_wasi
from test.support.os_helper import TESTFN
@ -1920,6 +1921,8 @@ class DummyPathTest(DummyPurePathTest):
}
self.assertEqual(given, {p / x for x in expect})
# See https://github.com/WebAssembly/wasi-filesystem/issues/26
@unittest.skipIf(is_wasi, "WASI resolution of '..' parts doesn't match POSIX")
def test_glob_dotdot(self):
# ".." is not special in globs.
P = self.cls

View file

@ -0,0 +1,3 @@
Speed up :meth:`pathlib.Path.glob` by omitting an initial
:meth:`~pathlib.Path.is_dir` call. As a result of this change,
:meth:`~pathlib.Path.glob` can no longer raise :exc:`OSError`.