From 1e610fb05fa4ba61a759b68461f1a9aed07622fc Mon Sep 17 00:00:00 2001 From: Barney Gale Date: Sat, 20 Jan 2024 03:06:00 +0000 Subject: [PATCH] GH-113225: Speed up `pathlib.Path.walk(top_down=False)` (#113693) Use `_make_child_entry()` rather than `_make_child_relpath()` to retrieve path objects for directories to visit. This saves the allocation of one path object per directory in user subclasses of `PathBase`, and avoids a second loop. This trick does not apply when walking top-down, because users can affect the walk by modifying *dirnames* in-place. A side effect of this change is that, in bottom-up mode, subdirectories of each directory are visited in reverse order, and that this order doesn't match that of the names in *dirnames*. I suspect this is fine as the order is arbitrary anyway. --- Lib/pathlib/_abc.py | 9 +++++---- .../2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst | 2 ++ 2 files changed, 7 insertions(+), 4 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst diff --git a/Lib/pathlib/_abc.py b/Lib/pathlib/_abc.py index e5eeb4afce2..553e1a39906 100644 --- a/Lib/pathlib/_abc.py +++ b/Lib/pathlib/_abc.py @@ -820,6 +820,8 @@ class PathBase(PurePathBase): with scandir_obj as scandir_it: dirnames = [] filenames = [] + if not top_down: + paths.append((path, dirnames, filenames)) for entry in scandir_it: try: is_dir = entry.is_dir(follow_symlinks=follow_symlinks) @@ -828,16 +830,15 @@ class PathBase(PurePathBase): is_dir = False if is_dir: + if not top_down: + paths.append(path._make_child_entry(entry)) dirnames.append(entry.name) else: filenames.append(entry.name) if top_down: yield path, dirnames, filenames - else: - paths.append((path, dirnames, filenames)) - - paths += [path._make_child_relpath(d) for d in reversed(dirnames)] + paths += [path._make_child_relpath(d) for d in reversed(dirnames)] def absolute(self): """Return an absolute version of this path diff --git a/Misc/NEWS.d/next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst b/Misc/NEWS.d/next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst new file mode 100644 index 00000000000..0c07f42fd06 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-01-04-20-58-17.gh-issue-113225.-nyJM4.rst @@ -0,0 +1,2 @@ +Speed up :meth:`pathlib.Path.walk` by using :attr:`os.DirEntry.path` where +possible.