GH-89727: Fix pathlib.Path.walk RecursionError on deep trees (GH-100282)

Use a stack to implement `pathlib.Path.walk()` iteratively instead of recursively to avoid hitting recursion limits on deeply nested trees.

Co-authored-by: Barney Gale <barney.gale@gmail.com>
Co-authored-by: Brett Cannon <brett@python.org>
This commit is contained in:
Stanislav Zmiev 2023-03-22 18:45:25 +04:00 committed by GitHub
parent af9c34f6ef
commit 713df2c534
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 49 additions and 33 deletions

View file

@ -1197,45 +1197,47 @@ class Path(PurePath):
def walk(self, top_down=True, on_error=None, follow_symlinks=False):
"""Walk the directory tree from this directory, similar to os.walk()."""
sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks)
return self._walk(top_down, on_error, follow_symlinks)
paths = [self]
def _walk(self, top_down, on_error, follow_symlinks):
# We may not have read permission for self, in which case we can't
# get a list of the files the directory contains. os.walk
# always suppressed the exception then, rather than blow up for a
# minor reason when (say) a thousand readable directories are still
# left to visit. That logic is copied here.
try:
scandir_it = self._scandir()
except OSError as error:
if on_error is not None:
on_error(error)
return
while paths:
path = paths.pop()
if isinstance(path, tuple):
yield path
continue
with scandir_it:
dirnames = []
filenames = []
for entry in scandir_it:
try:
is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
except OSError:
# Carried over from os.path.isdir().
is_dir = False
# We may not have read permission for self, in which case we can't
# get a list of the files the directory contains. os.walk()
# always suppressed the exception in that instance, rather than
# blow up for a minor reason when (say) a thousand readable
# directories are still left to visit. That logic is copied here.
try:
scandir_it = path._scandir()
except OSError as error:
if on_error is not None:
on_error(error)
continue
if is_dir:
dirnames.append(entry.name)
else:
filenames.append(entry.name)
with scandir_it:
dirnames = []
filenames = []
for entry in scandir_it:
try:
is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
except OSError:
# Carried over from os.path.isdir().
is_dir = False
if top_down:
yield self, dirnames, filenames
if is_dir:
dirnames.append(entry.name)
else:
filenames.append(entry.name)
for dirname in dirnames:
dirpath = self._make_child_relpath(dirname)
yield from dirpath._walk(top_down, on_error, follow_symlinks)
if top_down:
yield path, dirnames, filenames
else:
paths.append((path, dirnames, filenames))
if not top_down:
yield self, dirnames, filenames
paths += [path._make_child_relpath(d) for d in reversed(dirnames)]
class PosixPath(Path, PurePosixPath):