mirror of
https://github.com/python/cpython.git
synced 2025-08-23 18:24:46 +00:00
bpo-43475: Fix worst case collision behavior for NaN instances (GH-25493)
This commit is contained in:
parent
accea7dc2b
commit
a07da09ad5
10 changed files with 25 additions and 21 deletions
|
@ -692,10 +692,9 @@ Here are the rules in detail:
|
||||||
as ``-hash(-x)``. If the resulting hash is ``-1``, replace it with
|
as ``-hash(-x)``. If the resulting hash is ``-1``, replace it with
|
||||||
``-2``.
|
``-2``.
|
||||||
|
|
||||||
- The particular values ``sys.hash_info.inf``, ``-sys.hash_info.inf``
|
- The particular values ``sys.hash_info.inf`` and ``-sys.hash_info.inf``
|
||||||
and ``sys.hash_info.nan`` are used as hash values for positive
|
are used as hash values for positive
|
||||||
infinity, negative infinity, or nans (respectively). (All hashable
|
infinity or negative infinity (respectively).
|
||||||
nans have the same hash value.)
|
|
||||||
|
|
||||||
- For a :class:`complex` number ``z``, the hash values of the real
|
- For a :class:`complex` number ``z``, the hash values of the real
|
||||||
and imaginary parts are combined by computing ``hash(z.real) +
|
and imaginary parts are combined by computing ``hash(z.real) +
|
||||||
|
@ -740,7 +739,7 @@ number, :class:`float`, or :class:`complex`::
|
||||||
"""Compute the hash of a float x."""
|
"""Compute the hash of a float x."""
|
||||||
|
|
||||||
if math.isnan(x):
|
if math.isnan(x):
|
||||||
return sys.hash_info.nan
|
return super().__hash__()
|
||||||
elif math.isinf(x):
|
elif math.isinf(x):
|
||||||
return sys.hash_info.inf if x > 0 else -sys.hash_info.inf
|
return sys.hash_info.inf if x > 0 else -sys.hash_info.inf
|
||||||
else:
|
else:
|
||||||
|
|
|
@ -855,7 +855,7 @@ always available.
|
||||||
+---------------------+--------------------------------------------------+
|
+---------------------+--------------------------------------------------+
|
||||||
| :const:`inf` | hash value returned for a positive infinity |
|
| :const:`inf` | hash value returned for a positive infinity |
|
||||||
+---------------------+--------------------------------------------------+
|
+---------------------+--------------------------------------------------+
|
||||||
| :const:`nan` | hash value returned for a nan |
|
| :const:`nan` | (this attribute is no longer used) |
|
||||||
+---------------------+--------------------------------------------------+
|
+---------------------+--------------------------------------------------+
|
||||||
| :const:`imag` | multiplier used for the imaginary part of a |
|
| :const:`imag` | multiplier used for the imaginary part of a |
|
||||||
| | complex number |
|
| | complex number |
|
||||||
|
|
|
@ -7,7 +7,7 @@ extern "C" {
|
||||||
|
|
||||||
/* Helpers for hash functions */
|
/* Helpers for hash functions */
|
||||||
#ifndef Py_LIMITED_API
|
#ifndef Py_LIMITED_API
|
||||||
PyAPI_FUNC(Py_hash_t) _Py_HashDouble(double);
|
PyAPI_FUNC(Py_hash_t) _Py_HashDouble(PyObject *, double);
|
||||||
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*);
|
PyAPI_FUNC(Py_hash_t) _Py_HashPointer(const void*);
|
||||||
// Similar to _Py_HashPointer(), but don't replace -1 with -2
|
// Similar to _Py_HashPointer(), but don't replace -1 with -2
|
||||||
PyAPI_FUNC(Py_hash_t) _Py_HashPointerRaw(const void*);
|
PyAPI_FUNC(Py_hash_t) _Py_HashPointerRaw(const void*);
|
||||||
|
@ -29,7 +29,6 @@ PyAPI_FUNC(Py_hash_t) _Py_HashBytes(const void*, Py_ssize_t);
|
||||||
|
|
||||||
#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
|
#define _PyHASH_MODULUS (((size_t)1 << _PyHASH_BITS) - 1)
|
||||||
#define _PyHASH_INF 314159
|
#define _PyHASH_INF 314159
|
||||||
#define _PyHASH_NAN 0
|
|
||||||
#define _PyHASH_IMAG _PyHASH_MULTIPLIER
|
#define _PyHASH_IMAG _PyHASH_MULTIPLIER
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -951,7 +951,7 @@ class Decimal(object):
|
||||||
if self.is_snan():
|
if self.is_snan():
|
||||||
raise TypeError('Cannot hash a signaling NaN value.')
|
raise TypeError('Cannot hash a signaling NaN value.')
|
||||||
elif self.is_nan():
|
elif self.is_nan():
|
||||||
return _PyHASH_NAN
|
return super().__hash__()
|
||||||
else:
|
else:
|
||||||
if self._sign:
|
if self._sign:
|
||||||
return -_PyHASH_INF
|
return -_PyHASH_INF
|
||||||
|
|
|
@ -0,0 +1,3 @@
|
||||||
|
Hashes of NaN values now depend on object identity. Formerly, they always
|
||||||
|
hashed to 0 even though NaN values are not equal to one another. Having the
|
||||||
|
same hash for unequal values caused pile-ups in hash tables.
|
|
@ -4536,7 +4536,6 @@ _dec_hash(PyDecObject *v)
|
||||||
#error "No valid combination of CONFIG_64, CONFIG_32 and _PyHASH_BITS"
|
#error "No valid combination of CONFIG_64, CONFIG_32 and _PyHASH_BITS"
|
||||||
#endif
|
#endif
|
||||||
const Py_hash_t py_hash_inf = 314159;
|
const Py_hash_t py_hash_inf = 314159;
|
||||||
const Py_hash_t py_hash_nan = 0;
|
|
||||||
mpd_uint_t ten_data[1] = {10};
|
mpd_uint_t ten_data[1] = {10};
|
||||||
mpd_t ten = {MPD_POS|MPD_STATIC|MPD_CONST_DATA,
|
mpd_t ten = {MPD_POS|MPD_STATIC|MPD_CONST_DATA,
|
||||||
0, 2, 1, 1, ten_data};
|
0, 2, 1, 1, ten_data};
|
||||||
|
@ -4555,7 +4554,7 @@ _dec_hash(PyDecObject *v)
|
||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
else if (mpd_isnan(MPD(v))) {
|
else if (mpd_isnan(MPD(v))) {
|
||||||
return py_hash_nan;
|
return _Py_HashPointer(v);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
return py_hash_inf * mpd_arith_sign(MPD(v));
|
return py_hash_inf * mpd_arith_sign(MPD(v));
|
||||||
|
@ -5939,5 +5938,3 @@ error:
|
||||||
|
|
||||||
return NULL; /* GCOV_NOT_REACHED */
|
return NULL; /* GCOV_NOT_REACHED */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -412,10 +412,10 @@ static Py_hash_t
|
||||||
complex_hash(PyComplexObject *v)
|
complex_hash(PyComplexObject *v)
|
||||||
{
|
{
|
||||||
Py_uhash_t hashreal, hashimag, combined;
|
Py_uhash_t hashreal, hashimag, combined;
|
||||||
hashreal = (Py_uhash_t)_Py_HashDouble(v->cval.real);
|
hashreal = (Py_uhash_t)_Py_HashDouble((PyObject *) v, v->cval.real);
|
||||||
if (hashreal == (Py_uhash_t)-1)
|
if (hashreal == (Py_uhash_t)-1)
|
||||||
return -1;
|
return -1;
|
||||||
hashimag = (Py_uhash_t)_Py_HashDouble(v->cval.imag);
|
hashimag = (Py_uhash_t)_Py_HashDouble((PyObject *)v, v->cval.imag);
|
||||||
if (hashimag == (Py_uhash_t)-1)
|
if (hashimag == (Py_uhash_t)-1)
|
||||||
return -1;
|
return -1;
|
||||||
/* Note: if the imaginary part is 0, hashimag is 0 now,
|
/* Note: if the imaginary part is 0, hashimag is 0 now,
|
||||||
|
|
|
@ -556,7 +556,7 @@ float_richcompare(PyObject *v, PyObject *w, int op)
|
||||||
static Py_hash_t
|
static Py_hash_t
|
||||||
float_hash(PyFloatObject *v)
|
float_hash(PyFloatObject *v)
|
||||||
{
|
{
|
||||||
return _Py_HashDouble(v->ob_fval);
|
return _Py_HashDouble((PyObject *)v, v->ob_fval);
|
||||||
}
|
}
|
||||||
|
|
||||||
static PyObject *
|
static PyObject *
|
||||||
|
|
|
@ -56,8 +56,12 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
|
||||||
If the result of the reduction is infinity (this is impossible for
|
If the result of the reduction is infinity (this is impossible for
|
||||||
integers, floats and Decimals) then use the predefined hash value
|
integers, floats and Decimals) then use the predefined hash value
|
||||||
_PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
|
_PyHASH_INF for x >= 0, or -_PyHASH_INF for x < 0, instead.
|
||||||
_PyHASH_INF, -_PyHASH_INF and _PyHASH_NAN are also used for the
|
_PyHASH_INF and -_PyHASH_INF are also used for the
|
||||||
hashes of float and Decimal infinities and nans.
|
hashes of float and Decimal infinities.
|
||||||
|
|
||||||
|
NaNs hash with a pointer hash. Having distinct hash values prevents
|
||||||
|
catastrophic pileups from distinct NaN instances which used to always
|
||||||
|
have the same hash value but would compare unequal.
|
||||||
|
|
||||||
A selling point for the above strategy is that it makes it possible
|
A selling point for the above strategy is that it makes it possible
|
||||||
to compute hashes of decimal and binary floating-point numbers
|
to compute hashes of decimal and binary floating-point numbers
|
||||||
|
@ -82,8 +86,10 @@ static Py_ssize_t hashstats[Py_HASH_STATS_MAX + 1] = {0};
|
||||||
|
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
Py_hash_t _Py_HashPointer(const void *);
|
||||||
|
|
||||||
Py_hash_t
|
Py_hash_t
|
||||||
_Py_HashDouble(double v)
|
_Py_HashDouble(PyObject *inst, double v)
|
||||||
{
|
{
|
||||||
int e, sign;
|
int e, sign;
|
||||||
double m;
|
double m;
|
||||||
|
@ -93,7 +99,7 @@ _Py_HashDouble(double v)
|
||||||
if (Py_IS_INFINITY(v))
|
if (Py_IS_INFINITY(v))
|
||||||
return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
|
return v > 0 ? _PyHASH_INF : -_PyHASH_INF;
|
||||||
else
|
else
|
||||||
return _PyHASH_NAN;
|
return _Py_HashPointer(inst);
|
||||||
}
|
}
|
||||||
|
|
||||||
m = frexp(v, &e);
|
m = frexp(v, &e);
|
||||||
|
|
|
@ -1405,7 +1405,7 @@ get_hash_info(PyThreadState *tstate)
|
||||||
PyStructSequence_SET_ITEM(hash_info, field++,
|
PyStructSequence_SET_ITEM(hash_info, field++,
|
||||||
PyLong_FromLong(_PyHASH_INF));
|
PyLong_FromLong(_PyHASH_INF));
|
||||||
PyStructSequence_SET_ITEM(hash_info, field++,
|
PyStructSequence_SET_ITEM(hash_info, field++,
|
||||||
PyLong_FromLong(_PyHASH_NAN));
|
PyLong_FromLong(0)); // This is no longer used
|
||||||
PyStructSequence_SET_ITEM(hash_info, field++,
|
PyStructSequence_SET_ITEM(hash_info, field++,
|
||||||
PyLong_FromLong(_PyHASH_IMAG));
|
PyLong_FromLong(_PyHASH_IMAG));
|
||||||
PyStructSequence_SET_ITEM(hash_info, field++,
|
PyStructSequence_SET_ITEM(hash_info, field++,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue