Move insertion resize logic into set_insert_key().

Simplifies the code a little bit and does the resize check
only when a new key is added (giving a small speed up in
the case where the key already exists).

Fixes possible bug in set_merge() where the set_insert_key()
call relies on a big resize at the start to make enough room
for the keys but is vulnerable to a comparision callback that
could cause the table to shrink in the middle of the merge.

Also, changed the resize threshold from two-thirds of the
mask+1 to just two-thirds.  The plus one offset gave no
real benefit (afterall, the two-thirds mark is just a
heuristic and isn't a precise cut-off).
This commit is contained in:
Raymond Hettinger 2015-07-03 17:21:17 -07:00
parent df418b67ab
commit 15f0869609

View file

@ -125,6 +125,8 @@ set_lookkey(PySetObject *so, PyObject *key, Py_hash_t hash)
}
}
static int set_table_resize(PySetObject *, Py_ssize_t);
static int
set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
{
@ -139,7 +141,7 @@ set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
entry = &table[i];
if (entry->key == NULL)
goto found_null_first;
goto found_unused;
freeslot = NULL;
perturb = hash;
@ -173,7 +175,7 @@ set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
for (j = 0 ; j < LINEAR_PROBES ; j++) {
entry++;
if (entry->hash == 0 && entry->key == NULL)
goto found_null;
goto found_unused_or_dummy;
if (entry->hash == hash) {
PyObject *startkey = entry->key;
assert(startkey != dummy);
@ -204,30 +206,27 @@ set_insert_key(PySetObject *so, PyObject *key, Py_hash_t hash)
entry = &table[i];
if (entry->key == NULL)
goto found_null;
goto found_unused_or_dummy;
}
found_null_first:
found_unused_or_dummy:
if (freeslot == NULL)
goto found_unused;
Py_INCREF(key);
so->used++;
freeslot->key = key;
freeslot->hash = hash;
return 0;
found_unused:
Py_INCREF(key);
so->fill++;
so->used++;
entry->key = key;
entry->hash = hash;
if ((size_t)so->fill*3 < mask*2)
return 0;
found_null:
Py_INCREF(key);
if (freeslot == NULL) {
/* UNUSED */
so->fill++;
} else {
/* DUMMY */
entry = freeslot;
}
so->used++;
entry->key = key;
entry->hash = hash;
return 0;
return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
found_active:
return 0;
@ -366,28 +365,15 @@ set_table_resize(PySetObject *so, Py_ssize_t minused)
return 0;
}
/* CAUTION: set_add_key/entry() must guarantee it won't resize the table */
static int
set_add_entry(PySetObject *so, setentry *entry)
{
Py_ssize_t n_used;
PyObject *key = entry->key;
Py_hash_t hash = entry->hash;
assert(so->fill <= so->mask); /* at least one empty slot */
n_used = so->used;
if (set_insert_key(so, key, hash))
return -1;
if (!(so->used > n_used && so->fill*3 >= (so->mask+1)*2))
return 0;
return set_table_resize(so, so->used>50000 ? so->used*2 : so->used*4);
return set_insert_key(so, entry->key, entry->hash);
}
static int
set_add_key(PySetObject *so, PyObject *key)
{
setentry entry;
Py_hash_t hash;
if (!PyUnicode_CheckExact(key) ||
@ -396,9 +382,7 @@ set_add_key(PySetObject *so, PyObject *key)
if (hash == -1)
return -1;
}
entry.key = key;
entry.hash = hash;
return set_add_entry(so, &entry);
return set_insert_key(so, key, hash);
}
#define DISCARD_NOTFOUND 0
@ -631,7 +615,7 @@ set_merge(PySetObject *so, PyObject *otherset)
* incrementally resizing as we insert new keys. Expect
* that there will be no (or few) overlapping keys.
*/
if ((so->fill + other->used)*3 >= (so->mask+1)*2) {
if ((so->fill + other->used)*3 >= so->mask*2) {
if (set_table_resize(so, (so->used + other->used)*2) != 0)
return -1;
}
@ -979,16 +963,12 @@ set_update_internal(PySetObject *so, PyObject *other)
*/
if (dictsize == -1)
return -1;
if ((so->fill + dictsize)*3 >= (so->mask+1)*2) {
if ((so->fill + dictsize)*3 >= so->mask*2) {
if (set_table_resize(so, (so->used + dictsize)*2) != 0)
return -1;
}
while (_PyDict_Next(other, &pos, &key, &value, &hash)) {
setentry an_entry;
an_entry.hash = hash;
an_entry.key = key;
if (set_add_entry(so, &an_entry))
if (set_insert_key(so, key, hash))
return -1;
}
return 0;
@ -1583,17 +1563,16 @@ set_difference(PySetObject *so, PyObject *other)
if (PyDict_CheckExact(other)) {
while (set_next(so, &pos, &entry)) {
setentry entrycopy;
PyObject *key = entry->key;
Py_hash_t hash = entry->hash;
int rv;
entrycopy.hash = entry->hash;
entrycopy.key = entry->key;
rv = _PyDict_Contains(other, entry->key, entry->hash);
rv = _PyDict_Contains(other, key, hash);
if (rv < 0) {
Py_DECREF(result);
return NULL;
}
if (!rv) {
if (set_add_entry((PySetObject *)result, &entrycopy)) {
if (set_insert_key((PySetObject *)result, key, hash)) {
Py_DECREF(result);
return NULL;
}