Further reduce the cost of hash collisions by inspecting an additional nearby entry.

This commit is contained in:
Raymond Hettinger 2013-08-31 21:27:08 -07:00
parent 34567ec94b
commit 95c0d67581

View file

@ -65,10 +65,11 @@ chaining would be substantial (100% with typical malloc overhead).
The initial probe index is computed as hash mod the table size. Subsequent The initial probe index is computed as hash mod the table size. Subsequent
probe indices are computed as explained in Objects/dictobject.c. probe indices are computed as explained in Objects/dictobject.c.
To improve cache locality, each probe is done in pairs. To improve cache locality, each probe inspects nearby entries before
After the probe is examined, an adjacent entry is then examined as well. moving on to probes elsewhere in memory. Depending on alignment and the
The likelihood is that an adjacent entry is in the same cache line and size of a cache line, the nearby entries are cheaper to inspect than
can be examined more cheaply than another probe elsewhere in memory. other probes elsewhere in memory. This probe strategy reduces the cost
of hash collisions.
All arithmetic on hash should ignore overflow. All arithmetic on hash should ignore overflow.
@ -130,6 +131,26 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
if (entry->key == dummy && freeslot == NULL) if (entry->key == dummy && freeslot == NULL)
freeslot = entry; freeslot = entry;
entry = &table[j ^ 2];
if (entry->key == NULL)
break;
if (entry->key == key)
return entry;
if (entry->hash == hash && entry->key != dummy) {
PyObject *startkey = entry->key;
Py_INCREF(startkey);
cmp = PyObject_RichCompareBool(startkey, key, Py_EQ);
Py_DECREF(startkey);
if (cmp < 0)
return NULL;
if (table != so->table || entry->key != startkey)
return set_lookkey(so, key, hash);
if (cmp > 0)
return entry;
}
if (entry->key == dummy && freeslot == NULL)
freeslot = entry;
i = i * 5 + perturb + 1; i = i * 5 + perturb + 1;
j = i & mask; j = i & mask;
perturb >>= PERTURB_SHIFT; perturb >>= PERTURB_SHIFT;
@ -190,6 +211,17 @@ set_lookkey_unicode(PySetObject *so, PyObject *key, Py_hash_t hash)
if (entry->key == dummy && freeslot == NULL) if (entry->key == dummy && freeslot == NULL)
freeslot = entry; freeslot = entry;
entry = &table[j ^ 2];
if (entry->key == NULL)
break;
if (entry->key == key
|| (entry->hash == hash
&& entry->key != dummy
&& unicode_eq(entry->key, key)))
return entry;
if (entry->key == dummy && freeslot == NULL)
freeslot = entry;
i = i * 5 + perturb + 1; i = i * 5 + perturb + 1;
j = i & mask; j = i & mask;
perturb >>= PERTURB_SHIFT; perturb >>= PERTURB_SHIFT;
@ -256,6 +288,9 @@ set_insert_clean(PySetObject *so, PyObject *key, Py_hash_t hash)
if (entry->key == NULL) if (entry->key == NULL)
break; break;
entry = &table[j ^ 1]; entry = &table[j ^ 1];
if (entry->key == NULL)
break;
entry = &table[j ^ 2];
if (entry->key == NULL) if (entry->key == NULL)
break; break;
i = i * 5 + perturb + 1; i = i * 5 + perturb + 1;