GH-125413: pathlib ABCs: use scandir() to speed up walk() (#126262)

Use the new `PathBase.scandir()` method in `PathBase.walk()`, which greatly
reduces the number of `PathBase.stat()` calls needed when walking.

There are no user-facing changes, because the pathlib ABCs are still
private and `Path.walk()` doesn't use the implementation in its superclass.
This commit is contained in:
Barney Gale 2024-11-01 18:52:00 +00:00 committed by GitHub
parent 68a51e0178
commit 37651cfbce
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 14 additions and 12 deletions

View file

@ -693,16 +693,18 @@ class PathBase(PurePathBase):
if not top_down: if not top_down:
paths.append((path, dirnames, filenames)) paths.append((path, dirnames, filenames))
try: try:
for child in path.iterdir(): with path.scandir() as entries:
try: for entry in entries:
if child.is_dir(follow_symlinks=follow_symlinks): name = entry.name
if not top_down: try:
paths.append(child) if entry.is_dir(follow_symlinks=follow_symlinks):
dirnames.append(child.name) if not top_down:
else: paths.append(path.joinpath(name))
filenames.append(child.name) dirnames.append(name)
except OSError: else:
filenames.append(child.name) filenames.append(name)
except OSError:
filenames.append(name)
except OSError as error: except OSError as error:
if on_error is not None: if on_error is not None:
on_error(error) on_error(error)

View file

@ -1951,7 +1951,7 @@ class DummyPathTest(DummyPurePathTest):
if self.can_symlink: if self.can_symlink:
# Add some symlinks # Add some symlinks
source.joinpath('linkC').symlink_to('fileC') source.joinpath('linkC').symlink_to('fileC')
source.joinpath('linkD').symlink_to('dirD') source.joinpath('linkD').symlink_to('dirD', target_is_directory=True)
# Perform the copy # Perform the copy
target = base / 'copyC' target = base / 'copyC'
@ -2969,7 +2969,7 @@ class DummyPathTest(DummyPurePathTest):
f.write(f"I'm {path} and proud of it. Blame test_pathlib.\n") f.write(f"I'm {path} and proud of it. Blame test_pathlib.\n")
if self.can_symlink: if self.can_symlink:
self.link_path.symlink_to(t2_path) self.link_path.symlink_to(t2_path, target_is_directory=True)
broken_link_path.symlink_to('broken') broken_link_path.symlink_to('broken')
broken_link2_path.symlink_to(self.cls('tmp3', 'broken')) broken_link2_path.symlink_to(self.cls('tmp3', 'broken'))
self.sub2_tree = (self.sub2_path, [], ["broken_link", "broken_link2", "link", "tmp3"]) self.sub2_tree = (self.sub2_path, [], ["broken_link", "broken_link2", "link", "tmp3"])