GH-89727: Fix pathlib.Path.walk RecursionError on deep trees (GH-100282)

Use a stack to implement `pathlib.Path.walk()` iteratively instead of recursively to avoid hitting recursion limits on deeply nested trees.

Co-authored-by: Barney Gale <barney.gale@gmail.com>
Co-authored-by: Brett Cannon <brett@python.org>
This commit is contained in:
Stanislav Zmiev 2023-03-22 18:45:25 +04:00 committed by GitHub
parent af9c34f6ef
commit 713df2c534
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 49 additions and 33 deletions

View file

@ -1197,45 +1197,47 @@ class Path(PurePath):
def walk(self, top_down=True, on_error=None, follow_symlinks=False): def walk(self, top_down=True, on_error=None, follow_symlinks=False):
"""Walk the directory tree from this directory, similar to os.walk().""" """Walk the directory tree from this directory, similar to os.walk()."""
sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks) sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks)
return self._walk(top_down, on_error, follow_symlinks) paths = [self]
def _walk(self, top_down, on_error, follow_symlinks): while paths:
# We may not have read permission for self, in which case we can't path = paths.pop()
# get a list of the files the directory contains. os.walk if isinstance(path, tuple):
# always suppressed the exception then, rather than blow up for a yield path
# minor reason when (say) a thousand readable directories are still continue
# left to visit. That logic is copied here.
try:
scandir_it = self._scandir()
except OSError as error:
if on_error is not None:
on_error(error)
return
with scandir_it: # We may not have read permission for self, in which case we can't
dirnames = [] # get a list of the files the directory contains. os.walk()
filenames = [] # always suppressed the exception in that instance, rather than
for entry in scandir_it: # blow up for a minor reason when (say) a thousand readable
try: # directories are still left to visit. That logic is copied here.
is_dir = entry.is_dir(follow_symlinks=follow_symlinks) try:
except OSError: scandir_it = path._scandir()
# Carried over from os.path.isdir(). except OSError as error:
is_dir = False if on_error is not None:
on_error(error)
continue
if is_dir: with scandir_it:
dirnames.append(entry.name) dirnames = []
else: filenames = []
filenames.append(entry.name) for entry in scandir_it:
try:
is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
except OSError:
# Carried over from os.path.isdir().
is_dir = False
if top_down: if is_dir:
yield self, dirnames, filenames dirnames.append(entry.name)
else:
filenames.append(entry.name)
for dirname in dirnames: if top_down:
dirpath = self._make_child_relpath(dirname) yield path, dirnames, filenames
yield from dirpath._walk(top_down, on_error, follow_symlinks) else:
paths.append((path, dirnames, filenames))
if not top_down: paths += [path._make_child_relpath(d) for d in reversed(dirnames)]
yield self, dirnames, filenames
class PosixPath(Path, PurePosixPath): class PosixPath(Path, PurePosixPath):

View file

@ -13,6 +13,7 @@ import unittest
from unittest import mock from unittest import mock
from test.support import import_helper from test.support import import_helper
from test.support import set_recursion_limit
from test.support import is_emscripten, is_wasi from test.support import is_emscripten, is_wasi
from test.support import os_helper from test.support import os_helper
from test.support.os_helper import TESTFN, FakePath from test.support.os_helper import TESTFN, FakePath
@ -2793,6 +2794,18 @@ class WalkTests(unittest.TestCase):
self.assertEqual(next(it), expected) self.assertEqual(next(it), expected)
path = path / 'd' path = path / 'd'
def test_walk_above_recursion_limit(self):
recursion_limit = 40
# directory_depth > recursion_limit
directory_depth = recursion_limit + 10
base = pathlib.Path(os_helper.TESTFN, 'deep')
path = pathlib.Path(base, *(['d'] * directory_depth))
path.mkdir(parents=True)
with set_recursion_limit(recursion_limit):
list(base.walk())
list(base.walk(top_down=False))
class PathTest(_BasePathTest, unittest.TestCase): class PathTest(_BasePathTest, unittest.TestCase):
cls = pathlib.Path cls = pathlib.Path

View file

@ -0,0 +1 @@
Fix pathlib.Path.walk RecursionError on deep directory trees by rewriting it using iteration instead of recursion.