bpo-43475: Fix worst case collision behavior for NaN instances (GH-25493)

This commit is contained in:
Raymond Hettinger 2021-04-22 08:34:57 -07:00 committed by GitHub
parent accea7dc2b
commit a07da09ad5
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
10 changed files with 25 additions and 21 deletions

View file

@ -692,10 +692,9 @@ Here are the rules in detail:
as ``-hash(-x)``. If the resulting hash is ``-1``, replace it with as ``-hash(-x)``. If the resulting hash is ``-1``, replace it with
``-2``. ``-2``.
- The particular values ``sys.hash_info.inf``, ``-sys.hash_info.inf`` - The particular values ``sys.hash_info.inf`` and ``-sys.hash_info.inf``
and ``sys.hash_info.nan`` are used as hash values for positive are used as hash values for positive
infinity, negative infinity, or nans (respectively). (All hashable infinity or negative infinity (respectively).
nans have the same hash value.)
- For a :class:`complex` number ``z``, the hash values of the real - For a :class:`complex` number ``z``, the hash values of the real
and imaginary parts are combined by computing ``hash(z.real) + and imaginary parts are combined by computing ``hash(z.real) +
@ -740,7 +739,7 @@ number, :class:`float`, or :class:`complex`::
"""Compute the hash of a float x.""" """Compute the hash of a float x."""
if math.isnan(x): if math.isnan(x):
return sys.hash_info.nan return super().__hash__()
elif math.isinf(x): elif math.isinf(x):
return sys.hash_info.inf if x > 0 else -sys.hash_info.inf return sys.hash_info.inf if x > 0 else -sys.hash_info.inf
else: else:

View file

@ -855,7 +855,7 @@ always available.
+---------------------+--------------------------------------------------+ +---------------------+--------------------------------------------------+
| :const:`inf` | hash value returned for a positive infinity | | :const:`inf` | hash value returned for a positive infinity |
+---------------------+--------------------------------------------------+ +---------------------+--------------------------------------------------+
| :const:`nan` | hash value returned for a nan | | :const:`nan` | (this attribute is no longer used) |
+---------------------+--------------------------------------------------+ +---------------------+--------------------------------------------------+
| :const:`imag` | multiplier used for the imaginary part of a | | :const:`imag` | multiplier used for the imaginary part of a |
| | complex number | | | complex number |

View file

@ -7,7 +7,7 @@ extern "C" {
/* Helpers for hash functions */ /* Helpers for hash functions */
#ifndef Py_LIMITED_API #ifndef Py_LIMITED_API
PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double); PyAPI_FUNC(Py_hash_t) _Py_HashDouble(PyObject *, double);
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*); PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*);
// Similar to _Py_HashPointer(), but don't replace -1 with -2 // Similar to _Py_HashPointer(), but don't replace -1 with -2
PyAPI_FUNC(Py_hash_t) _Py_HashPointerRaw(const void*); PyAPI_FUNC(Py_hash_t) _Py_HashPointerRaw(const void*);
@ -29,7 +29,6 @@ PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1) #define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
#define _PyHASH_INF 314159 #define _PyHASH_INF 314159
#define _PyHASH_NAN 0
#define _PyHASH_IMAG _PyHASH_MULTIPLIER #define _PyHASH_IMAG _PyHASH_MULTIPLIER

View file

@ -951,7 +951,7 @@ class Decimal(object):
if self.is_snan(): if self.is_snan():
raise TypeError('Cannot hash a signaling NaN value.') raise TypeError('Cannot hash a signaling NaN value.')
elif self.is_nan(): elif self.is_nan():
return _PyHASH_NAN return super().__hash__()
else: else:
if self._sign: if self._sign:
return -_PyHASH_INF return -_PyHASH_INF

View file

@ -0,0 +1,3 @@
Hashes of NaN values now depend on object identity. Formerly, they always
hashed to 0 even though NaN values are not equal to one another. Having the
same hash for unequal values caused pile-ups in hash tables.

View file

@ -4536,7 +4536,6 @@ _dec_hash(PyDecObject *v)
#error "No valid combination of CONFIG_64, CONFIG_32 and _PyHASH_BITS" #error "No valid combination of CONFIG_64, CONFIG_32 and _PyHASH_BITS"
#endif #endif
const Py_hash_t py_hash_inf = 314159; const Py_hash_t py_hash_inf = 314159;
const Py_hash_t py_hash_nan = 0;
mpd_uint_t ten_data[1] = {10}; mpd_uint_t ten_data[1] = {10};
mpd_t ten = {MPD_POS|MPD_STATIC|MPD_CONST_DATA, mpd_t ten = {MPD_POS|MPD_STATIC|MPD_CONST_DATA,
0, 2, 1, 1, ten_data}; 0, 2, 1, 1, ten_data};
@ -4555,7 +4554,7 @@ _dec_hash(PyDecObject *v)
return -1; return -1;
} }
else if (mpd_isnan(MPD(v))) { else if (mpd_isnan(MPD(v))) {
return py_hash_nan; return _Py_HashPointer(v);
} }
else { else {
return py_hash_inf * mpd_arith_sign(MPD(v)); return py_hash_inf * mpd_arith_sign(MPD(v));
@ -5939,5 +5938,3 @@ error:
return NULL; /* GCOV_NOT_REACHED */ return NULL; /* GCOV_NOT_REACHED */
} }

View file

@ -412,10 +412,10 @@ static Py_hash_t
complex_hash(PyComplexObject *v) complex_hash(PyComplexObject *v)
{ {
Py_uhash_t hashreal, hashimag, combined; Py_uhash_t hashreal, hashimag, combined;
hashreal = (Py_uhash_t)_Py_HashDouble(v->cval.real); hashreal = (Py_uhash_t)_Py_HashDouble((PyObject *) v, v->cval.real);
if (hashreal == (Py_uhash_t)-1) if (hashreal == (Py_uhash_t)-1)
return -1; return -1;
hashimag = (Py_uhash_t)_Py_HashDouble(v->cval.imag); hashimag = (Py_uhash_t)_Py_HashDouble((PyObject *)v, v->cval.imag);
if (hashimag == (Py_uhash_t)-1) if (hashimag == (Py_uhash_t)-1)
return -1; return -1;
/* Note: if the imaginary part is 0, hashimag is 0 now, /* Note: if the imaginary part is 0, hashimag is 0 now,

View file

@ -556,7 +556,7 @@ float_richcompare(PyObject *v, PyObject *w, int op)
static Py_hash_t static Py_hash_t
float_hash(PyFloatObject *v) float_hash(PyFloatObject *v)
{ {
return _Py_HashDouble(v->ob_fval); return _Py_HashDouble((PyObject *)v, v->ob_fval);
} }
static PyObject * static PyObject *

View file

@ -56,8 +56,12 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
If the result of the reduction is infinity (this is impossible for If the result of the reduction is infinity (this is impossible for
integers, floats and Decimals) then use the predefined hash value integers, floats and Decimals) then use the predefined hash value
_PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead. _PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
_PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the _PyHASH_INF and -_PyHASH_INF are also used for the
hashes of float and Decimal infinities and nans. hashes of float and Decimal infinities.
NaNs hash with a pointer hash. Having distinct hash values prevents
catastrophic pileups from distinct NaN instances which used to always
have the same hash value but would compare unequal.
A selling point for the above strategy is that it makes it possible A selling point for the above strategy is that it makes it possible
to compute hashes of decimal and binary floating-point numbers to compute hashes of decimal and binary floating-point numbers
@ -82,8 +86,10 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
*/ */
Py_hash_t _Py_HashPointer(const void *);
Py_hash_t Py_hash_t
_Py_HashDouble(double v) _Py_HashDouble(PyObject *inst, double v)
{ {
int e, sign; int e, sign;
double m; double m;
@ -93,7 +99,7 @@ _Py_HashDouble(double v)
if (Py_IS_INFINITY(v)) if (Py_IS_INFINITY(v))
return v > 0 ? _PyHASH_INF : -_PyHASH_INF; return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
else else
return _PyHASH_NAN; return _Py_HashPointer(inst);
} }
m = frexp(v, &e); m = frexp(v, &e);

View file

@ -1405,7 +1405,7 @@ get_hash_info(PyThreadState *tstate)
PyStructSequence_SET_ITEM(hash_info, field++, PyStructSequence_SET_ITEM(hash_info, field++,
PyLong_FromLong(_PyHASH_INF)); PyLong_FromLong(_PyHASH_INF));
PyStructSequence_SET_ITEM(hash_info, field++, PyStructSequence_SET_ITEM(hash_info, field++,
PyLong_FromLong(_PyHASH_NAN)); PyLong_FromLong(0)); // This is no longer used
PyStructSequence_SET_ITEM(hash_info, field++, PyStructSequence_SET_ITEM(hash_info, field++,
PyLong_FromLong(_PyHASH_IMAG)); PyLong_FromLong(_PyHASH_IMAG));
PyStructSequence_SET_ITEM(hash_info, field++, PyStructSequence_SET_ITEM(hash_info, field++,