mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Got test_mutants.py working. One set of changes was straightforward:
use __eq__ instead of __cmp__. The other change is unexplained: with a random hash code as before, it would run forever; with a constant hash code, it fails quickly. This found a refcount bug in dict_equal() -- I wonder if that bug is also present in 2.5...
This commit is contained in:
parent
801f0d78b5
commit
dc5f6b232b
2 changed files with 32 additions and 23 deletions
|
@ -27,7 +27,7 @@ import os
|
||||||
# ran it. Indeed, at the start, the driver never got beyond 6 iterations
|
# ran it. Indeed, at the start, the driver never got beyond 6 iterations
|
||||||
# before the test died.
|
# before the test died.
|
||||||
|
|
||||||
# The dicts are global to make it easy to mutate them from within functions.
|
# The dicts are global to make it easy to mutate tham from within functions.
|
||||||
dict1 = {}
|
dict1 = {}
|
||||||
dict2 = {}
|
dict2 = {}
|
||||||
|
|
||||||
|
@ -88,15 +88,22 @@ class Horrid:
|
||||||
# have any systematic relationship between comparison outcomes
|
# have any systematic relationship between comparison outcomes
|
||||||
# (based on self.i and other.i) and relative position within the
|
# (based on self.i and other.i) and relative position within the
|
||||||
# hash vector (based on hashcode).
|
# hash vector (based on hashcode).
|
||||||
self.hashcode = random.randrange(1000000000)
|
# XXX This is no longer effective.
|
||||||
|
##self.hashcode = random.randrange(1000000000)
|
||||||
|
|
||||||
def __hash__(self):
|
def __hash__(self):
|
||||||
return self.hashcode
|
##return self.hashcode
|
||||||
|
return 42
|
||||||
|
|
||||||
def __eq__(self, other):
|
def __eq__(self, other):
|
||||||
maybe_mutate() # The point of the test.
|
maybe_mutate() # The point of the test.
|
||||||
return self.i == other.i
|
return self.i == other.i
|
||||||
|
|
||||||
|
def __ne__(self, other):
|
||||||
|
raise RuntimeError("I didn't expect some kind of Spanish inquisition!")
|
||||||
|
|
||||||
|
__lt__ = __le__ = __gt__ = __ge__ = __ne__
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
return "Horrid(%d)" % self.i
|
return "Horrid(%d)" % self.i
|
||||||
|
|
||||||
|
@ -133,7 +140,6 @@ def test_one(n):
|
||||||
if verbose:
|
if verbose:
|
||||||
print ".",
|
print ".",
|
||||||
c = dict1 == dict2
|
c = dict1 == dict2
|
||||||
XXX # Can't figure out how to make this work
|
|
||||||
if verbose:
|
if verbose:
|
||||||
print
|
print
|
||||||
|
|
||||||
|
|
|
@ -24,11 +24,11 @@ function, in the sense of simulating randomness. Python doesn't: its most
|
||||||
important hash functions (for strings and ints) are very regular in common
|
important hash functions (for strings and ints) are very regular in common
|
||||||
cases:
|
cases:
|
||||||
|
|
||||||
>>> map(hash, (0, 1, 2, 3))
|
>>> map(hash, (0, 1, 2, 3))
|
||||||
[0, 1, 2, 3]
|
[0, 1, 2, 3]
|
||||||
>>> map(hash, ("namea", "nameb", "namec", "named"))
|
>>> map(hash, ("namea", "nameb", "namec", "named"))
|
||||||
[-1658398457, -1658398460, -1658398459, -1658398462]
|
[-1658398457, -1658398460, -1658398459, -1658398462]
|
||||||
>>>
|
>>>
|
||||||
|
|
||||||
This isn't necessarily bad! To the contrary, in a table of size 2**i, taking
|
This isn't necessarily bad! To the contrary, in a table of size 2**i, taking
|
||||||
the low-order i bits as the initial table index is extremely fast, and there
|
the low-order i bits as the initial table index is extremely fast, and there
|
||||||
|
@ -39,9 +39,9 @@ gives better-than-random behavior in common cases, and that's very desirable.
|
||||||
OTOH, when collisions occur, the tendency to fill contiguous slices of the
|
OTOH, when collisions occur, the tendency to fill contiguous slices of the
|
||||||
hash table makes a good collision resolution strategy crucial. Taking only
|
hash table makes a good collision resolution strategy crucial. Taking only
|
||||||
the last i bits of the hash code is also vulnerable: for example, consider
|
the last i bits of the hash code is also vulnerable: for example, consider
|
||||||
[i << 16 for i in range(20000)] as a set of keys. Since ints are their own
|
the list [i << 16 for i in range(20000)] as a set of keys. Since ints are
|
||||||
hash codes, and this fits in a dict of size 2**15, the last 15 bits of every
|
their own hash codes, and this fits in a dict of size 2**15, the last 15 bits
|
||||||
hash code are all 0: they *all* map to the same table index.
|
of every hash code are all 0: they *all* map to the same table index.
|
||||||
|
|
||||||
But catering to unusual cases should not slow the usual ones, so we just take
|
But catering to unusual cases should not slow the usual ones, so we just take
|
||||||
the last i bits anyway. It's up to collision resolution to do the rest. If
|
the last i bits anyway. It's up to collision resolution to do the rest. If
|
||||||
|
@ -102,14 +102,14 @@ approach, using repeated multiplication by x in GF(2**n) where an irreducible
|
||||||
polynomial for each table size was chosen such that x was a primitive root.
|
polynomial for each table size was chosen such that x was a primitive root.
|
||||||
Christian Tismer later extended that to use division by x instead, as an
|
Christian Tismer later extended that to use division by x instead, as an
|
||||||
efficient way to get the high bits of the hash code into play. This scheme
|
efficient way to get the high bits of the hash code into play. This scheme
|
||||||
also gave excellent collision statistics, but was more expensive: two
|
also gave excellent collision statistics, but was more expensive: two if-tests
|
||||||
if-tests were required inside the loop; computing "the next" index took about
|
were required inside the loop; computing "the next" index took about the same
|
||||||
the same number of operations but without as much potential parallelism
|
number of operations but without as much potential parallelism (e.g.,
|
||||||
(e.g., computing 5*j can go on at the same time as computing 1+perturb in the
|
computing 5*j can go on at the same time as computing 1+perturb in the above,
|
||||||
above, and then shifting perturb can be done while the table index is being
|
and then shifting perturb can be done while the table index is being masked);
|
||||||
masked); and the dictobject struct required a member to hold the table's
|
and the dictobject struct required a member to hold the table's polynomial.
|
||||||
polynomial. In Tim's experiments the current scheme ran faster, produced
|
In Tim's experiments the current scheme ran faster, produced equally good
|
||||||
equally good collision statistics, needed less code & used less memory.
|
collision statistics, needed less code & used less memory.
|
||||||
|
|
||||||
Theoretical Python 2.5 headache: hash codes are only C "long", but
|
Theoretical Python 2.5 headache: hash codes are only C "long", but
|
||||||
sizeof(Py_ssize_t) > sizeof(long) may be possible. In that case, and if a
|
sizeof(Py_ssize_t) > sizeof(long) may be possible. In that case, and if a
|
||||||
|
@ -223,9 +223,9 @@ probe indices are computed as explained earlier.
|
||||||
|
|
||||||
All arithmetic on hash should ignore overflow.
|
All arithmetic on hash should ignore overflow.
|
||||||
|
|
||||||
(The details in this version are due to Tim Peters, building on many past
|
The details in this version are due to Tim Peters, building on many past
|
||||||
contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and
|
contributions by Reimer Behrends, Jyrki Alakuijala, Vladimir Marangozov and
|
||||||
Christian Tismer).
|
Christian Tismer.
|
||||||
|
|
||||||
lookdict() is general-purpose, and may return NULL if (and only if) a
|
lookdict() is general-purpose, and may return NULL if (and only if) a
|
||||||
comparison raises an exception (this was new in Python 2.5).
|
comparison raises an exception (this was new in Python 2.5).
|
||||||
|
@ -1485,7 +1485,10 @@ dict_equal(dictobject *a, dictobject *b)
|
||||||
/* temporarily bump aval's refcount to ensure it stays
|
/* temporarily bump aval's refcount to ensure it stays
|
||||||
alive until we're done with it */
|
alive until we're done with it */
|
||||||
Py_INCREF(aval);
|
Py_INCREF(aval);
|
||||||
|
/* ditto for key */
|
||||||
|
Py_INCREF(key);
|
||||||
bval = PyDict_GetItemWithError((PyObject *)b, key);
|
bval = PyDict_GetItemWithError((PyObject *)b, key);
|
||||||
|
Py_DECREF(key);
|
||||||
if (bval == NULL) {
|
if (bval == NULL) {
|
||||||
Py_DECREF(aval);
|
Py_DECREF(aval);
|
||||||
if (PyErr_Occurred())
|
if (PyErr_Occurred())
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue