gh-132762: Fix underallocation bug in dict.fromkeys()(gh-133627)

The function `dict_set_fromkeys()` adds elements of a set to an existing
dictionary. The size of the expanded dictionary was estimated with
`PySet_GET_SIZE(iterable)`, which did not take into account the size of the
existing dictionary.
This commit is contained in:
Angela Liss 2025-05-08 13:13:11 -04:00 committed by GitHub
parent 2d82ab761a
commit 421ba589d0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 24 additions and 5 deletions

View file

@ -338,17 +338,34 @@ class DictTest(unittest.TestCase):
self.assertRaises(Exc, baddict2.fromkeys, [1])
# test fast path for dictionary inputs
res = dict(zip(range(6), [0]*6))
d = dict(zip(range(6), range(6)))
self.assertEqual(dict.fromkeys(d, 0), dict(zip(range(6), [0]*6)))
self.assertEqual(dict.fromkeys(d, 0), res)
# test fast path for set inputs
d = set(range(6))
self.assertEqual(dict.fromkeys(d, 0), res)
# test slow path for other iterable inputs
d = list(range(6))
self.assertEqual(dict.fromkeys(d, 0), res)
# test fast path when object's constructor returns large non-empty dict
class baddict3(dict):
def __new__(cls):
return d
d = {i : i for i in range(10)}
d = {i : i for i in range(1000)}
res = d.copy()
res.update(a=None, b=None, c=None)
self.assertEqual(baddict3.fromkeys({"a", "b", "c"}), res)
# test slow path when object is a proper subclass of dict
class baddict4(dict):
def __init__(self):
dict.__init__(self, d)
d = {i : i for i in range(1000)}
res = d.copy()
res.update(a=None, b=None, c=None)
self.assertEqual(baddict4.fromkeys({"a", "b", "c"}), res)
def test_copy(self):
d = {1: 1, 2: 2, 3: 3}
self.assertIsNot(d.copy(), d)

View file

@ -0,0 +1 @@
:meth:`~dict.fromkeys` no longer loops forever when adding a small set of keys to a large base dict. Patch by Angela Liss.

View file

@ -3178,9 +3178,10 @@ dict_set_fromkeys(PyInterpreterState *interp, PyDictObject *mp,
Py_ssize_t pos = 0;
PyObject *key;
Py_hash_t hash;
if (dictresize(interp, mp,
estimate_log2_keysize(PySet_GET_SIZE(iterable)), 0)) {
uint8_t new_size = Py_MAX(
estimate_log2_keysize(PySet_GET_SIZE(iterable)),
DK_LOG_SIZE(mp->ma_keys));
if (dictresize(interp, mp, new_size, 0)) {
Py_DECREF(mp);
return NULL;
}