gh-101765: Fix SystemError / segmentation fault in iter __reduce__ when internal access of builtins.__dict__ exhausts the iterator (#101769)

2025-08-04 00:48:58 +00:00 · 2023-02-24 18:02:04 -05:00 · 2023-02-24 18:02:04 -05:00 · 54dfa14c5a
commit 54dfa14c5a
parent 89b4c12053
9 changed files with 148 additions and 23 deletions
--- a/Lib/test/test_iter.py
+++ b/Lib/test/test_iter.py
@ -7,6 +7,9 @@ from test.support.os_helper import TESTFN, unlink
 from test.support import check_free_after_iterating, ALWAYS_EQ, NEVER_EQ
 import pickle
 import collections.abc
+import functools
+import contextlib
+import builtins

 # Test result of triple loop (too big to inline)
 TRIPLETS = [(0, 0, 0), (0, 0, 1), (0, 0, 2),
@ -91,6 +94,12 @@ class CallableIterClass:
            raise IndexError # Emergency stop
        return i

+class EmptyIterClass:
+    def __len__(self):
+        return 0
+    def __getitem__(self, i):
+        raise StopIteration
+
 # Main test suite

 class TestCase(unittest.TestCase):
@ -238,6 +247,78 @@ class TestCase(unittest.TestCase):
        self.assertEqual(list(empit), [5, 6])
        self.assertEqual(list(a), [0, 1, 2, 3, 4, 5, 6])

+    def test_reduce_mutating_builtins_iter(self):
+        # This is a reproducer of issue #101765
+        # where iter `__reduce__` calls could lead to a segfault or SystemError
+        # depending on the order of C argument evaluation, which is undefined
+
+        # Backup builtins
+        builtins_dict = builtins.__dict__
+        orig = {"iter": iter, "reversed": reversed}
+
+        def run(builtin_name, item, sentinel=None):
+            it = iter(item) if sentinel is None else iter(item, sentinel)
+
+            class CustomStr:
+                def __init__(self, name, iterator):
+                    self.name = name
+                    self.iterator = iterator
+                def __hash__(self):
+                    return hash(self.name)
+                def __eq__(self, other):
+                    # Here we exhaust our iterator, possibly changing
+                    # its `it_seq` pointer to NULL
+                    # The `__reduce__` call should correctly get
+                    # the pointers after this call
+                    list(self.iterator)
+                    return other == self.name
+
+            # del is required here
+            # to not prematurely call __eq__ from
+            # the hash collision with the old key
+            del builtins_dict[builtin_name]
+            builtins_dict[CustomStr(builtin_name, it)] = orig[builtin_name]
+
+            return it.__reduce__()
+
+        types = [
+            (EmptyIterClass(),),
+            (bytes(8),),
+            (bytearray(8),),
+            ((1, 2, 3),),
+            (lambda: 0, 0),
+            (tuple[int],)  # GenericAlias
+        ]
+
+        try:
+            run_iter = functools.partial(run, "iter")
+            # The returned value of `__reduce__` should not only be valid
+            # but also *empty*, as `it` was exhausted during `__eq__`
+            # i.e "xyz" returns (iter, ("",))
+            self.assertEqual(run_iter("xyz"), (orig["iter"], ("",)))
+            self.assertEqual(run_iter([1, 2, 3]), (orig["iter"], ([],)))
+
+            # _PyEval_GetBuiltin is also called for `reversed` in a branch of
+            # listiter_reduce_general
+            self.assertEqual(
+                run("reversed", orig["reversed"](list(range(8)))),
+                (iter, ([],))
+            )
+
+            for case in types:
+                self.assertEqual(run_iter(*case), (orig["iter"], ((),)))
+        finally:
+            # Restore original builtins
+            for key, func in orig.items():
+                # need to suppress KeyErrors in case
+                # a failed test deletes the key without setting anything
+                with contextlib.suppress(KeyError):
+                    # del is required here
+                    # to not invoke our custom __eq__ from
+                    # the hash collision with the old key
+                    del builtins_dict[key]
+                builtins_dict[key] = func
+
    # Test a new_style class with __iter__ but no next() method
    def test_new_style_iter_class(self):
        class IterClass(object):