mirror of
https://github.com/python/cpython.git
synced 2025-07-24 11:44:31 +00:00
GH-89727: Fix pathlib.Path.walk RecursionError on deep trees (GH-100282)
Use a stack to implement `pathlib.Path.walk()` iteratively instead of recursively to avoid hitting recursion limits on deeply nested trees. Co-authored-by: Barney Gale <barney.gale@gmail.com> Co-authored-by: Brett Cannon <brett@python.org>
This commit is contained in:
parent
af9c34f6ef
commit
713df2c534
3 changed files with 49 additions and 33 deletions
|
@ -1197,45 +1197,47 @@ class Path(PurePath):
|
||||||
def walk(self, top_down=True, on_error=None, follow_symlinks=False):
|
def walk(self, top_down=True, on_error=None, follow_symlinks=False):
|
||||||
"""Walk the directory tree from this directory, similar to os.walk()."""
|
"""Walk the directory tree from this directory, similar to os.walk()."""
|
||||||
sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks)
|
sys.audit("pathlib.Path.walk", self, on_error, follow_symlinks)
|
||||||
return self._walk(top_down, on_error, follow_symlinks)
|
paths = [self]
|
||||||
|
|
||||||
def _walk(self, top_down, on_error, follow_symlinks):
|
while paths:
|
||||||
# We may not have read permission for self, in which case we can't
|
path = paths.pop()
|
||||||
# get a list of the files the directory contains. os.walk
|
if isinstance(path, tuple):
|
||||||
# always suppressed the exception then, rather than blow up for a
|
yield path
|
||||||
# minor reason when (say) a thousand readable directories are still
|
continue
|
||||||
# left to visit. That logic is copied here.
|
|
||||||
try:
|
|
||||||
scandir_it = self._scandir()
|
|
||||||
except OSError as error:
|
|
||||||
if on_error is not None:
|
|
||||||
on_error(error)
|
|
||||||
return
|
|
||||||
|
|
||||||
with scandir_it:
|
# We may not have read permission for self, in which case we can't
|
||||||
dirnames = []
|
# get a list of the files the directory contains. os.walk()
|
||||||
filenames = []
|
# always suppressed the exception in that instance, rather than
|
||||||
for entry in scandir_it:
|
# blow up for a minor reason when (say) a thousand readable
|
||||||
try:
|
# directories are still left to visit. That logic is copied here.
|
||||||
is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
|
try:
|
||||||
except OSError:
|
scandir_it = path._scandir()
|
||||||
# Carried over from os.path.isdir().
|
except OSError as error:
|
||||||
is_dir = False
|
if on_error is not None:
|
||||||
|
on_error(error)
|
||||||
|
continue
|
||||||
|
|
||||||
if is_dir:
|
with scandir_it:
|
||||||
dirnames.append(entry.name)
|
dirnames = []
|
||||||
else:
|
filenames = []
|
||||||
filenames.append(entry.name)
|
for entry in scandir_it:
|
||||||
|
try:
|
||||||
|
is_dir = entry.is_dir(follow_symlinks=follow_symlinks)
|
||||||
|
except OSError:
|
||||||
|
# Carried over from os.path.isdir().
|
||||||
|
is_dir = False
|
||||||
|
|
||||||
if top_down:
|
if is_dir:
|
||||||
yield self, dirnames, filenames
|
dirnames.append(entry.name)
|
||||||
|
else:
|
||||||
|
filenames.append(entry.name)
|
||||||
|
|
||||||
for dirname in dirnames:
|
if top_down:
|
||||||
dirpath = self._make_child_relpath(dirname)
|
yield path, dirnames, filenames
|
||||||
yield from dirpath._walk(top_down, on_error, follow_symlinks)
|
else:
|
||||||
|
paths.append((path, dirnames, filenames))
|
||||||
|
|
||||||
if not top_down:
|
paths += [path._make_child_relpath(d) for d in reversed(dirnames)]
|
||||||
yield self, dirnames, filenames
|
|
||||||
|
|
||||||
|
|
||||||
class PosixPath(Path, PurePosixPath):
|
class PosixPath(Path, PurePosixPath):
|
||||||
|
|
|
@ -13,6 +13,7 @@ import unittest
|
||||||
from unittest import mock
|
from unittest import mock
|
||||||
|
|
||||||
from test.support import import_helper
|
from test.support import import_helper
|
||||||
|
from test.support import set_recursion_limit
|
||||||
from test.support import is_emscripten, is_wasi
|
from test.support import is_emscripten, is_wasi
|
||||||
from test.support import os_helper
|
from test.support import os_helper
|
||||||
from test.support.os_helper import TESTFN, FakePath
|
from test.support.os_helper import TESTFN, FakePath
|
||||||
|
@ -2793,6 +2794,18 @@ class WalkTests(unittest.TestCase):
|
||||||
self.assertEqual(next(it), expected)
|
self.assertEqual(next(it), expected)
|
||||||
path = path / 'd'
|
path = path / 'd'
|
||||||
|
|
||||||
|
def test_walk_above_recursion_limit(self):
|
||||||
|
recursion_limit = 40
|
||||||
|
# directory_depth > recursion_limit
|
||||||
|
directory_depth = recursion_limit + 10
|
||||||
|
base = pathlib.Path(os_helper.TESTFN, 'deep')
|
||||||
|
path = pathlib.Path(base, *(['d'] * directory_depth))
|
||||||
|
path.mkdir(parents=True)
|
||||||
|
|
||||||
|
with set_recursion_limit(recursion_limit):
|
||||||
|
list(base.walk())
|
||||||
|
list(base.walk(top_down=False))
|
||||||
|
|
||||||
|
|
||||||
class PathTest(_BasePathTest, unittest.TestCase):
|
class PathTest(_BasePathTest, unittest.TestCase):
|
||||||
cls = pathlib.Path
|
cls = pathlib.Path
|
||||||
|
|
|
@ -0,0 +1 @@
|
||||||
|
Fix pathlib.Path.walk RecursionError on deep directory trees by rewriting it using iteration instead of recursion.
|
Loading…
Add table
Add a link
Reference in a new issue