GH-89727: Fix pathlib.Path.walk RecursionError on deep trees (GH-100282)

Use a stack to implement `pathlib.Path.walk()` iteratively instead of recursively to avoid hitting recursion limits on deeply nested trees.

Co-authored-by: Barney Gale <barney.gale@gmail.com>
Co-authored-by: Brett Cannon <brett@python.org>
This commit is contained in:
Stanislav Zmiev 2023-03-22 18:45:25 +04:00 committed by GitHub
parent af9c34f6ef
commit 713df2c534
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 49 additions and 33 deletions

View file

@ -1197,20 +1197,25 @@ class Path(PurePath):
def walk(self, top_down=True, on_error=None, follow_symlinks=False):
"""Walk the directory tree from this directory, similar to os.walk()."""
sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks)
return self._walk(top_down, on_error, follow_symlinks)
paths = [self]
while paths:
path = paths.pop()
if isinstance(path, tuple):
yield path
continue
def _walk(self, top_down, on_error, follow_symlinks):
# We may not have read permission for self, in which case we can't
# get a list of the files the directory contains. os.walk
# always suppressed the exception then, rather than blow up for a
# minor reason when (say) a thousand readable directories are still
# left to visit. That logic is copied here.
# get a list of the files the directory contains. os.walk()
# always suppressed the exception in that instance, rather than
# blow up for a minor reason when (say) a thousand readable
# directories are still left to visit. That logic is copied here.
try:
scandir_it = self._scandir()
scandir_it = path._scandir()
except OSError as error:
if on_error is not None:
on_error(error)
return
continue
with scandir_it:
dirnames = []
@ -1228,14 +1233,11 @@ class Path(PurePath):
filenames.append(entry.name)
if top_down:
yield self, dirnames, filenames
yield path, dirnames, filenames
else:
paths.append((path, dirnames, filenames))
for dirname in dirnames:
dirpath = self._make_child_relpath(dirname)
yield from dirpath._walk(top_down, on_error, follow_symlinks)
if not top_down:
yield self, dirnames, filenames
paths += [path._make_child_relpath(d) for d in reversed(dirnames)]
class PosixPath(Path, PurePosixPath):

View file

@ -13,6 +13,7 @@ import unittest
from unittest import mock
from test.support import import_helper
from test.support import set_recursion_limit
from test.support import is_emscripten, is_wasi
from test.support import os_helper
from test.support.os_helper import TESTFN, FakePath
@ -2793,6 +2794,18 @@ class WalkTests(unittest.TestCase):
self.assertEqual(next(it), expected)
path = path / 'd'
def test_walk_above_recursion_limit(self):
recursion_limit = 40
# directory_depth > recursion_limit
directory_depth = recursion_limit + 10
base = pathlib.Path(os_helper.TESTFN, 'deep')
path = pathlib.Path(base, *(['d'] * directory_depth))
path.mkdir(parents=True)
with set_recursion_limit(recursion_limit):
list(base.walk())
list(base.walk(top_down=False))
class PathTest(_BasePathTest, unittest.TestCase):
cls = pathlib.Path

View file

@ -0,0 +1 @@
Fix pathlib.Path.walk RecursionError on deep directory trees by rewriting it using iteration instead of recursion.