bpo-31179: Make dict.copy() up to 5.5 times faster. (#3067)

This commit is contained in:
Yury Selivanov 2018-01-22 11:54:41 -05:00 committed by GitHub
parent a4b1bb4801
commit b0a7a037b8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 122 additions and 2 deletions

View file

@ -615,6 +615,52 @@ new_dict_with_shared_keys(PyDictKeysObject *keys)
return new_dict(keys, values);
}
static PyObject *
clone_combined_dict(PyDictObject *orig)
{
assert(PyDict_CheckExact(orig));
assert(orig->ma_values == NULL);
assert(orig->ma_keys->dk_refcnt == 1);
Py_ssize_t keys_size = _PyDict_KeysSize(orig->ma_keys);
PyDictKeysObject *keys = PyObject_Malloc(keys_size);
if (keys == NULL) {
PyErr_NoMemory();
return NULL;
}
memcpy(keys, orig->ma_keys, keys_size);
/* After copying key/value pairs, we need to incref all
keys and values and they are about to be co-owned by a
new dict object. */
PyDictKeyEntry *ep0 = DK_ENTRIES(keys);
Py_ssize_t n = keys->dk_nentries;
for (Py_ssize_t i = 0; i < n; i++) {
PyDictKeyEntry *entry = &ep0[i];
PyObject *value = entry->me_value;
if (value != NULL) {
Py_INCREF(value);
Py_INCREF(entry->me_key);
}
}
PyDictObject *new = (PyDictObject *)new_dict(keys, NULL);
if (new == NULL) {
/* In case of an error, `new_dict()` takes care of
cleaning up `keys`. */
return NULL;
}
new->ma_used = orig->ma_used;
assert(_PyDict_CheckConsistency(new));
if (_PyObject_GC_IS_TRACKED(orig)) {
/* Maintain tracking. */
_PyObject_GC_TRACK(new);
}
return (PyObject *)new;
}
PyObject *
PyDict_New(void)
{
@ -2484,7 +2530,13 @@ PyDict_Copy(PyObject *o)
PyErr_BadInternalCall();
return NULL;
}
mp = (PyDictObject *)o;
if (mp->ma_used == 0) {
/* The dict is empty; just return a new dict. */
return PyDict_New();
}
if (_PyDict_HasSplitTable(mp)) {
PyDictObject *split_copy;
Py_ssize_t size = USABLE_FRACTION(DK_SIZE(mp->ma_keys));
@ -2510,6 +2562,27 @@ PyDict_Copy(PyObject *o)
_PyObject_GC_TRACK(split_copy);
return (PyObject *)split_copy;
}
if (PyDict_CheckExact(mp) && mp->ma_values == NULL &&
(mp->ma_used >= (mp->ma_keys->dk_nentries * 2) / 3))
{
/* Use fast-copy if:
(1) 'mp' is an instance of a subclassed dict; and
(2) 'mp' is not a split-dict; and
(3) if 'mp' is non-compact ('del' operation does not resize dicts),
do fast-copy only if it has at most 1/3 non-used keys.
The last condition (3) is important to guard against a pathalogical
case when a large dict is almost emptied with multiple del/pop
operations and copied after that. In cases like this, we defer to
PyDict_Merge, which produces a compacted copy.
*/
return clone_combined_dict(mp);
}
copy = PyDict_New();
if (copy == NULL)
return NULL;