gh-131525: Cache the result of tuple_hash (#131529)

* gh-131525: Cache the result of tuple_hash * Fix debug builds * Add blurb * Fix formatting * Pre-compute empty tuple singleton * Mostly set the cache within tuple_alloc * Fixes for TSAN * Pre-compute empty tuple singleton * Fix for 32-bit platforms * Assert that op != NULL in _PyTuple_RESET_HASH_CACHE * Use FT_ATOMIC_STORE_SSIZE_RELAXED macro * Update Include/internal/pycore_tuple.h Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> * Fix alignment * atomic load * Update Objects/tupleobject.c Co-authored-by: Chris Eibl <138194463+chris-eibl@users.noreply.github.com> --------- Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> Co-authored-by: Chris Eibl <138194463+chris-eibl@users.noreply.github.com>
2025-10-12 18:02:39 +00:00 · 2025-03-27 09:57:06 -04:00 · 2025-03-27 09:57:06 -04:00 · 8614f86b71
commit 8614f86b71
parent cf5e438c02
102 changed files with 1218 additions and 183 deletions
--- a/Objects/tupleobject.c
+++ b/Objects/tupleobject.c
@ -45,6 +45,7 @@ tuple_alloc(Py_ssize_t size)
    if (index < PyTuple_MAXSAVESIZE) {
        PyTupleObject *op = _Py_FREELIST_POP(PyTupleObject, tuples[index]);
        if (op != NULL) {
+            _PyTuple_RESET_HASH_CACHE(op);
            return op;
        }
    }
@ -53,7 +54,11 @@ tuple_alloc(Py_ssize_t size)
                sizeof(PyObject *))) / sizeof(PyObject *)) {
        return (PyTupleObject *)PyErr_NoMemory();
    }
-    return PyObject_GC_NewVar(PyTupleObject, &PyTuple_Type, size);
+    PyTupleObject *result = PyObject_GC_NewVar(PyTupleObject, &PyTuple_Type, size);
+    if (result != NULL) {
+        _PyTuple_RESET_HASH_CACHE(result);
+    }
+    return result;
 }

 // The empty tuple singleton is not tracked by the GC.
@ -296,50 +301,41 @@ error:
   For the xxHash specification, see
   https://github.com/Cyan4973/xxHash/blob/master/doc/xxhash_spec.md

-   Below are the official constants from the xxHash specification. Optimizing
-   compilers should emit a single "rotate" instruction for the
-   _PyHASH_XXROTATE() expansion. If that doesn't happen for some important
-   platform, the macro could be changed to expand to a platform-specific rotate
-   spelling instead.
+   The constants for the hash function are defined in pycore_tuple.h.
 */
-#if SIZEOF_PY_UHASH_T > 4
-#define _PyHASH_XXPRIME_1 ((Py_uhash_t)11400714785074694791ULL)
-#define _PyHASH_XXPRIME_2 ((Py_uhash_t)14029467366897019727ULL)
-#define _PyHASH_XXPRIME_5 ((Py_uhash_t)2870177450012600261ULL)
-#define _PyHASH_XXROTATE(x) ((x << 31) | (x >> 33))  /* Rotate left 31 bits */
-#else
-#define _PyHASH_XXPRIME_1 ((Py_uhash_t)2654435761UL)
-#define _PyHASH_XXPRIME_2 ((Py_uhash_t)2246822519UL)
-#define _PyHASH_XXPRIME_5 ((Py_uhash_t)374761393UL)
-#define _PyHASH_XXROTATE(x) ((x << 13) | (x >> 19))  /* Rotate left 13 bits */
-#endif

-/* Tests have shown that it's not worth to cache the hash value, see
-   https://bugs.python.org/issue9685 */
 static Py_hash_t
 tuple_hash(PyObject *op)
 {
    PyTupleObject *v = _PyTuple_CAST(op);
+
+    Py_uhash_t acc = FT_ATOMIC_LOAD_SSIZE_RELAXED(v->ob_hash);
+    if (acc != (Py_uhash_t)-1) {
+        return acc;
+    }
+
    Py_ssize_t len = Py_SIZE(v);
    PyObject **item = v->ob_item;
-
-    Py_uhash_t acc = _PyHASH_XXPRIME_5;
+    acc = _PyTuple_HASH_XXPRIME_5;
    for (Py_ssize_t i = 0; i < len; i++) {
        Py_uhash_t lane = PyObject_Hash(item[i]);
        if (lane == (Py_uhash_t)-1) {
            return -1;
        }
-        acc += lane * _PyHASH_XXPRIME_2;
-        acc = _PyHASH_XXROTATE(acc);
-        acc *= _PyHASH_XXPRIME_1;
+        acc += lane * _PyTuple_HASH_XXPRIME_2;
+        acc = _PyTuple_HASH_XXROTATE(acc);
+        acc *= _PyTuple_HASH_XXPRIME_1;
    }

    /* Add input length, mangled to keep the historical value of hash(()). */
-    acc += len ^ (_PyHASH_XXPRIME_5 ^ 3527539UL);
+    acc += len ^ (_PyTuple_HASH_XXPRIME_5 ^ 3527539UL);

    if (acc == (Py_uhash_t)-1) {
-        return 1546275796;
+        acc = 1546275796;
    }
+
+    FT_ATOMIC_STORE_SSIZE_RELAXED(v->ob_hash, acc);
+
    return acc;
 }

@ -764,6 +760,8 @@ tuple_subtype_new(PyTypeObject *type, PyObject *iterable)
    }
    Py_DECREF(tmp);

+    _PyTuple_RESET_HASH_CACHE(newobj);
+
    // Don't track if a subclass tp_alloc is PyType_GenericAlloc()
    if (!_PyObject_GC_IS_TRACKED(newobj)) {
        _PyObject_GC_TRACK(newobj);