mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
bpo-34751: improved hash function for tuples (GH-9471)
This commit is contained in:
parent
53125a53f4
commit
aeb1be5868
3 changed files with 142 additions and 42 deletions
|
@ -333,39 +333,60 @@ error:
|
|||
return NULL;
|
||||
}
|
||||
|
||||
/* The addend 82520, was selected from the range(0, 1000000) for
|
||||
generating the greatest number of prime multipliers for tuples
|
||||
up to length eight:
|
||||
|
||||
1082527, 1165049, 1082531, 1165057, 1247581, 1330103, 1082533,
|
||||
1330111, 1412633, 1165069, 1247599, 1495177, 1577699
|
||||
/* Hash for tuples. This is a slightly simplified version of the xxHash
|
||||
non-cryptographic hash:
|
||||
- we do not use any parallellism, there is only 1 accumulator.
|
||||
- we drop the final mixing since this is just a permutation of the
|
||||
output space: it does not help against collisions.
|
||||
- at the end, we mangle the length with a single constant.
|
||||
For the xxHash specification, see
|
||||
https://github.com/Cyan4973/xxHash/blob/master/doc/xxhash_spec.md
|
||||
|
||||
Tests have shown that it's not worth to cache the hash value, see
|
||||
issue #9685.
|
||||
Below are the official constants from the xxHash specification. Optimizing
|
||||
compilers should emit a single "rotate" instruction for the
|
||||
_PyHASH_XXROTATE() expansion. If that doesn't happen for some important
|
||||
platform, the macro could be changed to expand to a platform-specific rotate
|
||||
spelling instead.
|
||||
*/
|
||||
#if SIZEOF_PY_UHASH_T > 4
|
||||
#define _PyHASH_XXPRIME_1 ((Py_uhash_t)11400714785074694791ULL)
|
||||
#define _PyHASH_XXPRIME_2 ((Py_uhash_t)14029467366897019727ULL)
|
||||
#define _PyHASH_XXPRIME_5 ((Py_uhash_t)2870177450012600261ULL)
|
||||
#define _PyHASH_XXROTATE(x) ((x << 31) | (x >> 33)) /* Rotate left 31 bits */
|
||||
#else
|
||||
#define _PyHASH_XXPRIME_1 ((Py_uhash_t)2654435761UL)
|
||||
#define _PyHASH_XXPRIME_2 ((Py_uhash_t)2246822519UL)
|
||||
#define _PyHASH_XXPRIME_5 ((Py_uhash_t)374761393UL)
|
||||
#define _PyHASH_XXROTATE(x) ((x << 13) | (x >> 19)) /* Rotate left 13 bits */
|
||||
#endif
|
||||
|
||||
/* Tests have shown that it's not worth to cache the hash value, see
|
||||
https://bugs.python.org/issue9685 */
|
||||
static Py_hash_t
|
||||
tuplehash(PyTupleObject *v)
|
||||
{
|
||||
Py_uhash_t x; /* Unsigned for defined overflow behavior. */
|
||||
Py_hash_t y;
|
||||
Py_ssize_t len = Py_SIZE(v);
|
||||
PyObject **p;
|
||||
Py_uhash_t mult = _PyHASH_MULTIPLIER;
|
||||
x = 0x345678UL;
|
||||
p = v->ob_item;
|
||||
while (--len >= 0) {
|
||||
y = PyObject_Hash(*p++);
|
||||
if (y == -1)
|
||||
Py_ssize_t i, len = Py_SIZE(v);
|
||||
PyObject **item = v->ob_item;
|
||||
|
||||
Py_uhash_t acc = _PyHASH_XXPRIME_5;
|
||||
for (i = 0; i < len; i++) {
|
||||
Py_uhash_t lane = PyObject_Hash(item[i]);
|
||||
if (lane == (Py_uhash_t)-1) {
|
||||
return -1;
|
||||
x = (x ^ y) * mult;
|
||||
/* the cast might truncate len; that doesn't change hash stability */
|
||||
mult += (Py_hash_t)(82520UL + len + len);
|
||||
}
|
||||
acc += lane * _PyHASH_XXPRIME_2;
|
||||
acc = _PyHASH_XXROTATE(acc);
|
||||
acc *= _PyHASH_XXPRIME_1;
|
||||
}
|
||||
x += 97531UL;
|
||||
if (x == (Py_uhash_t)-1)
|
||||
x = -2;
|
||||
return x;
|
||||
|
||||
/* Add input length, mangled to keep the historical value of hash(()). */
|
||||
acc += len ^ (_PyHASH_XXPRIME_5 ^ 3527539UL);
|
||||
|
||||
if (acc == (Py_uhash_t)-1) {
|
||||
return 1546275796;
|
||||
}
|
||||
return acc;
|
||||
}
|
||||
|
||||
static Py_ssize_t
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue