This patch addresses two main issues: (1) There exist some non-fatal

errors in some of the hash algorithms. For exmaple, in float_hash and
complex_hash a certain part of the value is not included in the hash
calculation. See Tim's, Guido's, and my discussion of this on
python-dev in May under the title "fix float_hash and complex_hash for
64-bit *nix"

(2) The hash algorithms that use pointers (e.g. func_hash, code_hash)
are universally not correct on Win64 (they assume that sizeof(long) ==
sizeof(void*))

As well, this patch significantly cleans up the hash code. It adds the
two function _Py_HashDouble and _PyHash_VoidPtr that the various
hashing routine are changed to use.

These help maintain the hash function invariant: (a==b) =>
(hash(a)==hash(b))) I have added Lib/test/test_hash.py and
Lib/test/output/test_hash to test this for some cases.
This commit is contained in:
Fred Drake 2000-06-29 19:17:04 +00:00
parent b46696c0ed
commit 13634cf7a4
10 changed files with 126 additions and 49 deletions

View file

@ -293,6 +293,10 @@ extern DL_IMPORT(void) Py_ReprLeave Py_PROTO((PyObject *));
/* tstate dict key for PyObject_Compare helper */ /* tstate dict key for PyObject_Compare helper */
extern PyObject *_PyCompareState_Key; extern PyObject *_PyCompareState_Key;
/* Helpers for hash functions */
extern DL_IMPORT(long) _Py_HashDouble Py_PROTO((double));
extern DL_IMPORT(long) _Py_HashPointer Py_PROTO((void*));
/* Flag bits for printing: */ /* Flag bits for printing: */
#define Py_PRINT_RAW 1 /* No string quotes etc. */ #define Py_PRINT_RAW 1 /* No string quotes etc. */

View file

@ -0,0 +1 @@
test_hash

26
Lib/test/test_hash.py Normal file
View file

@ -0,0 +1,26 @@
# test the invariant that
# iff a==b then hash(a)==hash(b)
#
import test_support
def same_hash(*objlist):
# hash each object given an raise TestFailed if
# the hash values are not all the same
hashed = map(hash, objlist)
for h in hashed[1:]:
if h != hashed[0]:
raise TestFailed, "hashed values differ: %s" % `objlist`
same_hash(1, 1L, 1.0, 1.0+0.0j)
same_hash(int(1), long(1), float(1), complex(1))
same_hash(long(1.23e300), float(1.23e300))
same_hash(float(0.5), complex(0.5, 0.0))

View file

@ -864,10 +864,7 @@ instance_hash(inst)
func = instance_getattr(inst, cmpstr); func = instance_getattr(inst, cmpstr);
if (func == NULL) { if (func == NULL) {
PyErr_Clear(); PyErr_Clear();
outcome = (long)inst; return _Py_HashPointer(inst);
if (outcome == -1)
outcome = -2;
return outcome;
} }
PyErr_SetString(PyExc_TypeError, "unhashable instance"); PyErr_SetString(PyExc_TypeError, "unhashable instance");
return -1; return -1;

View file

@ -285,8 +285,7 @@ complex_hash(v)
PyComplexObject *v; PyComplexObject *v;
{ {
double intpart, fractpart; double intpart, fractpart;
int expo; long x;
long hipart, x;
/* This is designed so that Python numbers with the same /* This is designed so that Python numbers with the same
value hash to the same value, otherwise comparisons value hash to the same value, otherwise comparisons
of mapping keys will turn out weird */ of mapping keys will turn out weird */
@ -302,7 +301,7 @@ complex_hash(v)
#endif #endif
if (fractpart == 0.0 && v->cval.imag == 0.0) { if (fractpart == 0.0 && v->cval.imag == 0.0) {
if (intpart > 0x7fffffffL || -intpart > 0x7fffffffL) { if (intpart > LONG_MAX || -intpart > LONG_MAX) {
/* Convert to long int and use its hash... */ /* Convert to long int and use its hash... */
PyObject *w = PyLong_FromDouble(v->cval.real); PyObject *w = PyLong_FromDouble(v->cval.real);
if (w == NULL) if (w == NULL)
@ -314,36 +313,18 @@ complex_hash(v)
x = (long)intpart; x = (long)intpart;
} }
else { else {
fractpart = frexp(fractpart, &expo); x = _Py_HashDouble(v->cval.real);
fractpart = fractpart * 2147483648.0; /* 2**31 */ if (x == -1)
hipart = (long)fractpart; /* Take the top 32 bits */ return -1;
fractpart = (fractpart - (double)hipart) * 2147483648.0;
/* Get the next 32 bits */
x = hipart + (long)fractpart + (long)intpart + (expo << 15);
/* Combine everything */
if (v->cval.imag != 0.0) { /* Hash the imaginary part */ if (v->cval.imag != 0.0) { /* Hash the imaginary part */
/* XXX Note that this hashes complex(x, y) /* XXX Note that this hashes complex(x, y)
to the same value as complex(y, x). to the same value as complex(y, x).
Still better than it used to be :-) */ Still better than it used to be :-) */
#ifdef MPW long y = _Py_HashDouble(v->cval.imag);
{ if (y == -1)
extended e; return -1;
fractpart = modf(v->cval.imag, &e); x += y;
intpart = e;
}
#else
fractpart = modf(v->cval.imag, &intpart);
#endif
fractpart = frexp(fractpart, &expo);
fractpart = fractpart * 2147483648.0; /* 2**31 */
hipart = (long)fractpart; /* Take the top 32 bits */
fractpart =
(fractpart - (double)hipart) * 2147483648.0;
/* Get the next 32 bits */
x ^= hipart + (long)fractpart +
(long)intpart + (expo << 15);
/* Combine everything */
} }
} }
if (x == -1) if (x == -1)

View file

@ -59,7 +59,13 @@ PERFORMANCE OF THIS SOFTWARE.
#endif #endif
#ifndef LONG_MAX #ifndef LONG_MAX
#if SIZEOF_LONG == 4
#define LONG_MAX 0X7FFFFFFFL #define LONG_MAX 0X7FFFFFFFL
#elif SIZEOF_LONG == 8
#define LONG_MAX 0X7FFFFFFFFFFFFFFFL
#else
#error "could not set LONG_MAX"
#endif
#endif #endif
#ifndef LONG_MIN #ifndef LONG_MIN
@ -357,12 +363,12 @@ float_compare(v, w)
return (i < j) ? -1 : (i > j) ? 1 : 0; return (i < j) ? -1 : (i > j) ? 1 : 0;
} }
static long static long
float_hash(v) float_hash(v)
PyFloatObject *v; PyFloatObject *v;
{ {
double intpart, fractpart; double intpart, fractpart;
int expo;
long x; long x;
/* This is designed so that Python numbers with the same /* This is designed so that Python numbers with the same
value hash to the same value, otherwise comparisons value hash to the same value, otherwise comparisons
@ -379,7 +385,7 @@ float_hash(v)
#endif #endif
if (fractpart == 0.0) { if (fractpart == 0.0) {
if (intpart > 0x7fffffffL || -intpart > 0x7fffffffL) { if (intpart > LONG_MAX || -intpart > LONG_MAX) {
/* Convert to long int and use its hash... */ /* Convert to long int and use its hash... */
PyObject *w = PyLong_FromDouble(v->ob_fval); PyObject *w = PyLong_FromDouble(v->ob_fval);
if (w == NULL) if (w == NULL)
@ -393,14 +399,9 @@ float_hash(v)
else { else {
/* Note -- if you change this code, also change the copy /* Note -- if you change this code, also change the copy
in complexobject.c */ in complexobject.c */
long hipart; x = _Py_HashDouble(v->ob_fval);
fractpart = frexp(fractpart, &expo); if (x == -1)
fractpart = fractpart * 2147483648.0; /* 2**31 */ return -1;
hipart = (long)fractpart; /* Take the top 32 bits */
fractpart = (fractpart - (double)hipart) * 2147483648.0;
/* Get the next 32 bits */
x = hipart + (long)fractpart + (long)intpart + (expo << 15);
/* Combine everything */
} }
if (x == -1) if (x == -1)
x = -2; x = -2;

View file

@ -231,10 +231,12 @@ static long
func_hash(f) func_hash(f)
PyFunctionObject *f; PyFunctionObject *f;
{ {
long h; long h,x;
h = PyObject_Hash(f->func_code); h = PyObject_Hash(f->func_code);
if (h == -1) return h; if (h == -1) return h;
h = h ^ (long)f->func_globals; x = _Py_HashPointer(f->func_globals);
if (x == -1) return x;
h ^= x;
if (h == -1) h = -2; if (h == -1) h = -2;
return h; return h;
} }

View file

@ -172,7 +172,7 @@ static long
meth_hash(a) meth_hash(a)
PyCFunctionObject *a; PyCFunctionObject *a;
{ {
long x; long x,y;
if (a->m_self == NULL) if (a->m_self == NULL)
x = 0; x = 0;
else { else {
@ -180,7 +180,13 @@ meth_hash(a)
if (x == -1) if (x == -1)
return -1; return -1;
} }
return x ^ (long) a->m_ml->ml_meth; y = _Py_HashPointer(a->m_ml->ml_meth);
if (y == -1)
return -1;
x ^= y;
if (x == -1)
x = -2;
return x;
} }
PyTypeObject PyCFunction_Type = { PyTypeObject PyCFunction_Type = {

View file

@ -33,6 +33,8 @@ PERFORMANCE OF THIS SOFTWARE.
#include "Python.h" #include "Python.h"
#include "mymath.h"
/* just for trashcan: */ /* just for trashcan: */
#include "compile.h" #include "compile.h"
#include "frameobject.h" #include "frameobject.h"
@ -507,6 +509,62 @@ PyObject_Compare(v, w)
return result; return result;
} }
/* Set of hash utility functions to help maintaining the invariant that
iff a==b then hash(a)==hash(b)
All the utility functions (_Py_Hash*()) return "-1" to signify an error.
*/
long
_Py_HashDouble(v)
double v;
{
/* Use frexp to get at the bits in the double.
* Since the VAX D double format has 56 mantissa bits, which is the
* most of any double format in use, each of these parts may have as
* many as (but no more than) 56 significant bits.
* So, assuming sizeof(long) >= 4, each part can be broken into two longs;
* frexp and multiplication are used to do that.
* Also, since the Cray double format has 15 exponent bits, which is the
* most of any double format in use, shifting the exponent field left by
* 15 won't overflow a long (again assuming sizeof(long) >= 4).
*/
int expo;
long hipart;
v = frexp(v, &expo);
v = v * 2147483648.0; /* 2**31 */
hipart = (long)v; /* Take the top 32 bits */
v = (v - (double)hipart) * 2147483648.0; /* Get the next 32 bits */
return hipart + (long)v + (expo << 15); /* Combine everything */
}
long
_Py_HashPointer(p)
void *p;
{
#if SIZEOF_LONG >= SIZEOF_VOID_P
return (long)p;
#else
/* convert to a Python long and hash that */
PyObject* longobj;
long x;
if ((longobj = PyLong_FromVoidPtr(p)) == NULL) {
x = -1;
goto finally;
}
x = PyObject_Hash(longobj);
finally:
Py_XDECREF(longobj);
return x;
#endif
}
long long
PyObject_Hash(v) PyObject_Hash(v)
PyObject *v; PyObject *v;
@ -514,8 +572,9 @@ PyObject_Hash(v)
PyTypeObject *tp = v->ob_type; PyTypeObject *tp = v->ob_type;
if (tp->tp_hash != NULL) if (tp->tp_hash != NULL)
return (*tp->tp_hash)(v); return (*tp->tp_hash)(v);
if (tp->tp_compare == NULL) if (tp->tp_compare == NULL) {
return (long) v; /* Use address as hash value */ return _Py_HashPointer(v); /* Use address as hash value */
}
/* If there's a cmp but no hash defined, the object can't be hashed */ /* If there's a cmp but no hash defined, the object can't be hashed */
PyErr_SetString(PyExc_TypeError, "unhashable type"); PyErr_SetString(PyExc_TypeError, "unhashable type");
return -1; return -1;

View file

@ -423,7 +423,7 @@ PyHKEY_hashFunc(PyObject *ob)
/* Just use the address. /* Just use the address.
XXX - should we use the handle value? XXX - should we use the handle value?
*/ */
return (long)ob; return _Py_HashPointer(ob);
} }