mirror of
https://github.com/python/cpython.git
synced 2025-11-17 01:25:57 +00:00
Issue #8188: Introduce a new scheme for computing hashes of numbers
(instances of int, float, complex, decimal.Decimal and fractions.Fraction) that makes it easy to maintain the invariant that hash(x) == hash(y) whenever x and y have equal value.
This commit is contained in:
parent
03721133a6
commit
dc787d2055
14 changed files with 566 additions and 137 deletions
|
|
@ -862,7 +862,7 @@ class Decimal(object):
|
|||
# that specified by IEEE 754.
|
||||
|
||||
def __eq__(self, other, context=None):
|
||||
other = _convert_other(other, allow_float=True)
|
||||
other = _convert_other(other, allow_float = True)
|
||||
if other is NotImplemented:
|
||||
return other
|
||||
if self._check_nans(other, context):
|
||||
|
|
@ -870,7 +870,7 @@ class Decimal(object):
|
|||
return self._cmp(other) == 0
|
||||
|
||||
def __ne__(self, other, context=None):
|
||||
other = _convert_other(other, allow_float=True)
|
||||
other = _convert_other(other, allow_float = True)
|
||||
if other is NotImplemented:
|
||||
return other
|
||||
if self._check_nans(other, context):
|
||||
|
|
@ -879,7 +879,7 @@ class Decimal(object):
|
|||
|
||||
|
||||
def __lt__(self, other, context=None):
|
||||
other = _convert_other(other, allow_float=True)
|
||||
other = _convert_other(other, allow_float = True)
|
||||
if other is NotImplemented:
|
||||
return other
|
||||
ans = self._compare_check_nans(other, context)
|
||||
|
|
@ -888,7 +888,7 @@ class Decimal(object):
|
|||
return self._cmp(other) < 0
|
||||
|
||||
def __le__(self, other, context=None):
|
||||
other = _convert_other(other, allow_float=True)
|
||||
other = _convert_other(other, allow_float = True)
|
||||
if other is NotImplemented:
|
||||
return other
|
||||
ans = self._compare_check_nans(other, context)
|
||||
|
|
@ -897,7 +897,7 @@ class Decimal(object):
|
|||
return self._cmp(other) <= 0
|
||||
|
||||
def __gt__(self, other, context=None):
|
||||
other = _convert_other(other, allow_float=True)
|
||||
other = _convert_other(other, allow_float = True)
|
||||
if other is NotImplemented:
|
||||
return other
|
||||
ans = self._compare_check_nans(other, context)
|
||||
|
|
@ -906,7 +906,7 @@ class Decimal(object):
|
|||
return self._cmp(other) > 0
|
||||
|
||||
def __ge__(self, other, context=None):
|
||||
other = _convert_other(other, allow_float=True)
|
||||
other = _convert_other(other, allow_float = True)
|
||||
if other is NotImplemented:
|
||||
return other
|
||||
ans = self._compare_check_nans(other, context)
|
||||
|
|
@ -935,55 +935,28 @@ class Decimal(object):
|
|||
|
||||
def __hash__(self):
|
||||
"""x.__hash__() <==> hash(x)"""
|
||||
# Decimal integers must hash the same as the ints
|
||||
#
|
||||
# The hash of a nonspecial noninteger Decimal must depend only
|
||||
# on the value of that Decimal, and not on its representation.
|
||||
# For example: hash(Decimal('100E-1')) == hash(Decimal('10')).
|
||||
|
||||
# Equality comparisons involving signaling nans can raise an
|
||||
# exception; since equality checks are implicitly and
|
||||
# unpredictably used when checking set and dict membership, we
|
||||
# prevent signaling nans from being used as set elements or
|
||||
# dict keys by making __hash__ raise an exception.
|
||||
# In order to make sure that the hash of a Decimal instance
|
||||
# agrees with the hash of a numerically equal integer, float
|
||||
# or Fraction, we follow the rules for numeric hashes outlined
|
||||
# in the documentation. (See library docs, 'Built-in Types').
|
||||
if self._is_special:
|
||||
if self.is_snan():
|
||||
raise TypeError('Cannot hash a signaling NaN value.')
|
||||
elif self.is_nan():
|
||||
# 0 to match hash(float('nan'))
|
||||
return 0
|
||||
return _PyHASH_NAN
|
||||
else:
|
||||
# values chosen to match hash(float('inf')) and
|
||||
# hash(float('-inf')).
|
||||
if self._sign:
|
||||
return -271828
|
||||
return -_PyHASH_INF
|
||||
else:
|
||||
return 314159
|
||||
return _PyHASH_INF
|
||||
|
||||
# In Python 2.7, we're allowing comparisons (but not
|
||||
# arithmetic operations) between floats and Decimals; so if
|
||||
# a Decimal instance is exactly representable as a float then
|
||||
# its hash should match that of the float.
|
||||
self_as_float = float(self)
|
||||
if Decimal.from_float(self_as_float) == self:
|
||||
return hash(self_as_float)
|
||||
|
||||
if self._isinteger():
|
||||
op = _WorkRep(self.to_integral_value())
|
||||
# to make computation feasible for Decimals with large
|
||||
# exponent, we use the fact that hash(n) == hash(m) for
|
||||
# any two nonzero integers n and m such that (i) n and m
|
||||
# have the same sign, and (ii) n is congruent to m modulo
|
||||
# 2**64-1. So we can replace hash((-1)**s*c*10**e) with
|
||||
# hash((-1)**s*c*pow(10, e, 2**64-1).
|
||||
return hash((-1)**op.sign*op.int*pow(10, op.exp, 2**64-1))
|
||||
# The value of a nonzero nonspecial Decimal instance is
|
||||
# faithfully represented by the triple consisting of its sign,
|
||||
# its adjusted exponent, and its coefficient with trailing
|
||||
# zeros removed.
|
||||
return hash((self._sign,
|
||||
self._exp+len(self._int),
|
||||
self._int.rstrip('0')))
|
||||
if self._exp >= 0:
|
||||
exp_hash = pow(10, self._exp, _PyHASH_MODULUS)
|
||||
else:
|
||||
exp_hash = pow(_PyHASH_10INV, -self._exp, _PyHASH_MODULUS)
|
||||
hash_ = int(self._int) * exp_hash % _PyHASH_MODULUS
|
||||
return hash_ if self >= 0 else -hash_
|
||||
|
||||
def as_tuple(self):
|
||||
"""Represents the number as a triple tuple.
|
||||
|
|
@ -6218,6 +6191,17 @@ _NegativeOne = Decimal(-1)
|
|||
# _SignedInfinity[sign] is infinity w/ that sign
|
||||
_SignedInfinity = (_Infinity, _NegativeInfinity)
|
||||
|
||||
# Constants related to the hash implementation; hash(x) is based
|
||||
# on the reduction of x modulo _PyHASH_MODULUS
|
||||
import sys
|
||||
_PyHASH_MODULUS = sys.hash_info.modulus
|
||||
# hash values to use for positive and negative infinities, and nans
|
||||
_PyHASH_INF = sys.hash_info.inf
|
||||
_PyHASH_NAN = sys.hash_info.nan
|
||||
del sys
|
||||
|
||||
# _PyHASH_10INV is the inverse of 10 modulo the prime _PyHASH_MODULUS
|
||||
_PyHASH_10INV = pow(10, _PyHASH_MODULUS - 2, _PyHASH_MODULUS)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
|
|||
|
|
@ -8,6 +8,7 @@ import math
|
|||
import numbers
|
||||
import operator
|
||||
import re
|
||||
import sys
|
||||
|
||||
__all__ = ['Fraction', 'gcd']
|
||||
|
||||
|
|
@ -23,6 +24,12 @@ def gcd(a, b):
|
|||
a, b = b, a%b
|
||||
return a
|
||||
|
||||
# Constants related to the hash implementation; hash(x) is based
|
||||
# on the reduction of x modulo the prime _PyHASH_MODULUS.
|
||||
_PyHASH_MODULUS = sys.hash_info.modulus
|
||||
# Value to be used for rationals that reduce to infinity modulo
|
||||
# _PyHASH_MODULUS.
|
||||
_PyHASH_INF = sys.hash_info.inf
|
||||
|
||||
_RATIONAL_FORMAT = re.compile(r"""
|
||||
\A\s* # optional whitespace at the start, then
|
||||
|
|
@ -528,16 +535,22 @@ class Fraction(numbers.Rational):
|
|||
|
||||
"""
|
||||
# XXX since this method is expensive, consider caching the result
|
||||
if self._denominator == 1:
|
||||
# Get integers right.
|
||||
return hash(self._numerator)
|
||||
# Expensive check, but definitely correct.
|
||||
if self == float(self):
|
||||
return hash(float(self))
|
||||
|
||||
# In order to make sure that the hash of a Fraction agrees
|
||||
# with the hash of a numerically equal integer, float or
|
||||
# Decimal instance, we follow the rules for numeric hashes
|
||||
# outlined in the documentation. (See library docs, 'Built-in
|
||||
# Types').
|
||||
|
||||
# dinv is the inverse of self._denominator modulo the prime
|
||||
# _PyHASH_MODULUS, or 0 if self._denominator is divisible by
|
||||
# _PyHASH_MODULUS.
|
||||
dinv = pow(self._denominator, _PyHASH_MODULUS - 2, _PyHASH_MODULUS)
|
||||
if not dinv:
|
||||
hash_ = _PyHASH_INF
|
||||
else:
|
||||
# Use tuple's hash to avoid a high collision rate on
|
||||
# simple fractions.
|
||||
return hash((self._numerator, self._denominator))
|
||||
hash_ = abs(self._numerator) * dinv % _PyHASH_MODULUS
|
||||
return hash_ if self >= 0 else -hash_
|
||||
|
||||
def __eq__(a, b):
|
||||
"""a == b"""
|
||||
|
|
|
|||
|
|
@ -914,15 +914,6 @@ class InfNanTest(unittest.TestCase):
|
|||
self.assertFalse(NAN.is_inf())
|
||||
self.assertFalse((0.).is_inf())
|
||||
|
||||
def test_hash_inf(self):
|
||||
# the actual values here should be regarded as an
|
||||
# implementation detail, but they need to be
|
||||
# identical to those used in the Decimal module.
|
||||
self.assertEqual(hash(float('inf')), 314159)
|
||||
self.assertEqual(hash(float('-inf')), -271828)
|
||||
self.assertEqual(hash(float('nan')), 0)
|
||||
|
||||
|
||||
fromHex = float.fromhex
|
||||
toHex = float.hex
|
||||
class HexFloatTestCase(unittest.TestCase):
|
||||
|
|
|
|||
151
Lib/test/test_numeric_tower.py
Normal file
151
Lib/test/test_numeric_tower.py
Normal file
|
|
@ -0,0 +1,151 @@
|
|||
# test interactions betwen int, float, Decimal and Fraction
|
||||
|
||||
import unittest
|
||||
import random
|
||||
import math
|
||||
import sys
|
||||
import operator
|
||||
from test.support import run_unittest
|
||||
|
||||
from decimal import Decimal as D
|
||||
from fractions import Fraction as F
|
||||
|
||||
# Constants related to the hash implementation; hash(x) is based
|
||||
# on the reduction of x modulo the prime _PyHASH_MODULUS.
|
||||
_PyHASH_MODULUS = sys.hash_info.modulus
|
||||
_PyHASH_INF = sys.hash_info.inf
|
||||
|
||||
class HashTest(unittest.TestCase):
|
||||
def check_equal_hash(self, x, y):
|
||||
# check both that x and y are equal and that their hashes are equal
|
||||
self.assertEqual(hash(x), hash(y),
|
||||
"got different hashes for {!r} and {!r}".format(x, y))
|
||||
self.assertEqual(x, y)
|
||||
|
||||
def test_bools(self):
|
||||
self.check_equal_hash(False, 0)
|
||||
self.check_equal_hash(True, 1)
|
||||
|
||||
def test_integers(self):
|
||||
# check that equal values hash equal
|
||||
|
||||
# exact integers
|
||||
for i in range(-1000, 1000):
|
||||
self.check_equal_hash(i, float(i))
|
||||
self.check_equal_hash(i, D(i))
|
||||
self.check_equal_hash(i, F(i))
|
||||
|
||||
# the current hash is based on reduction modulo 2**n-1 for some
|
||||
# n, so pay special attention to numbers of the form 2**n and 2**n-1.
|
||||
for i in range(100):
|
||||
n = 2**i - 1
|
||||
if n == int(float(n)):
|
||||
self.check_equal_hash(n, float(n))
|
||||
self.check_equal_hash(-n, -float(n))
|
||||
self.check_equal_hash(n, D(n))
|
||||
self.check_equal_hash(n, F(n))
|
||||
self.check_equal_hash(-n, D(-n))
|
||||
self.check_equal_hash(-n, F(-n))
|
||||
|
||||
n = 2**i
|
||||
self.check_equal_hash(n, float(n))
|
||||
self.check_equal_hash(-n, -float(n))
|
||||
self.check_equal_hash(n, D(n))
|
||||
self.check_equal_hash(n, F(n))
|
||||
self.check_equal_hash(-n, D(-n))
|
||||
self.check_equal_hash(-n, F(-n))
|
||||
|
||||
# random values of various sizes
|
||||
for _ in range(1000):
|
||||
e = random.randrange(300)
|
||||
n = random.randrange(-10**e, 10**e)
|
||||
self.check_equal_hash(n, D(n))
|
||||
self.check_equal_hash(n, F(n))
|
||||
if n == int(float(n)):
|
||||
self.check_equal_hash(n, float(n))
|
||||
|
||||
def test_binary_floats(self):
|
||||
# check that floats hash equal to corresponding Fractions and Decimals
|
||||
|
||||
# floats that are distinct but numerically equal should hash the same
|
||||
self.check_equal_hash(0.0, -0.0)
|
||||
|
||||
# zeros
|
||||
self.check_equal_hash(0.0, D(0))
|
||||
self.check_equal_hash(-0.0, D(0))
|
||||
self.check_equal_hash(-0.0, D('-0.0'))
|
||||
self.check_equal_hash(0.0, F(0))
|
||||
|
||||
# infinities and nans
|
||||
self.check_equal_hash(float('inf'), D('inf'))
|
||||
self.check_equal_hash(float('-inf'), D('-inf'))
|
||||
|
||||
for _ in range(1000):
|
||||
x = random.random() * math.exp(random.random()*200.0 - 100.0)
|
||||
self.check_equal_hash(x, D.from_float(x))
|
||||
self.check_equal_hash(x, F.from_float(x))
|
||||
|
||||
def test_complex(self):
|
||||
# complex numbers with zero imaginary part should hash equal to
|
||||
# the corresponding float
|
||||
|
||||
test_values = [0.0, -0.0, 1.0, -1.0, 0.40625, -5136.5,
|
||||
float('inf'), float('-inf')]
|
||||
|
||||
for zero in -0.0, 0.0:
|
||||
for value in test_values:
|
||||
self.check_equal_hash(value, complex(value, zero))
|
||||
|
||||
def test_decimals(self):
|
||||
# check that Decimal instances that have different representations
|
||||
# but equal values give the same hash
|
||||
zeros = ['0', '-0', '0.0', '-0.0e10', '000e-10']
|
||||
for zero in zeros:
|
||||
self.check_equal_hash(D(zero), D(0))
|
||||
|
||||
self.check_equal_hash(D('1.00'), D(1))
|
||||
self.check_equal_hash(D('1.00000'), D(1))
|
||||
self.check_equal_hash(D('-1.00'), D(-1))
|
||||
self.check_equal_hash(D('-1.00000'), D(-1))
|
||||
self.check_equal_hash(D('123e2'), D(12300))
|
||||
self.check_equal_hash(D('1230e1'), D(12300))
|
||||
self.check_equal_hash(D('12300'), D(12300))
|
||||
self.check_equal_hash(D('12300.0'), D(12300))
|
||||
self.check_equal_hash(D('12300.00'), D(12300))
|
||||
self.check_equal_hash(D('12300.000'), D(12300))
|
||||
|
||||
def test_fractions(self):
|
||||
# check special case for fractions where either the numerator
|
||||
# or the denominator is a multiple of _PyHASH_MODULUS
|
||||
self.assertEqual(hash(F(1, _PyHASH_MODULUS)), _PyHASH_INF)
|
||||
self.assertEqual(hash(F(-1, 3*_PyHASH_MODULUS)), -_PyHASH_INF)
|
||||
self.assertEqual(hash(F(7*_PyHASH_MODULUS, 1)), 0)
|
||||
self.assertEqual(hash(F(-_PyHASH_MODULUS, 1)), 0)
|
||||
|
||||
def test_hash_normalization(self):
|
||||
# Test for a bug encountered while changing long_hash.
|
||||
#
|
||||
# Given objects x and y, it should be possible for y's
|
||||
# __hash__ method to return hash(x) in order to ensure that
|
||||
# hash(x) == hash(y). But hash(x) is not exactly equal to the
|
||||
# result of x.__hash__(): there's some internal normalization
|
||||
# to make sure that the result fits in a C long, and is not
|
||||
# equal to the invalid hash value -1. This internal
|
||||
# normalization must therefore not change the result of
|
||||
# hash(x) for any x.
|
||||
|
||||
class HalibutProxy:
|
||||
def __hash__(self):
|
||||
return hash('halibut')
|
||||
def __eq__(self, other):
|
||||
return other == 'halibut'
|
||||
|
||||
x = {'halibut', HalibutProxy()}
|
||||
self.assertEqual(len(x), 1)
|
||||
|
||||
|
||||
def test_main():
|
||||
run_unittest(HashTest)
|
||||
|
||||
if __name__ == '__main__':
|
||||
test_main()
|
||||
|
|
@ -426,6 +426,23 @@ class SysModuleTest(unittest.TestCase):
|
|||
self.assertEqual(type(sys.int_info.bits_per_digit), int)
|
||||
self.assertEqual(type(sys.int_info.sizeof_digit), int)
|
||||
self.assertIsInstance(sys.hexversion, int)
|
||||
|
||||
self.assertEqual(len(sys.hash_info), 5)
|
||||
self.assertLess(sys.hash_info.modulus, 2**sys.hash_info.width)
|
||||
# sys.hash_info.modulus should be a prime; we do a quick
|
||||
# probable primality test (doesn't exclude the possibility of
|
||||
# a Carmichael number)
|
||||
for x in range(1, 100):
|
||||
self.assertEqual(
|
||||
pow(x, sys.hash_info.modulus-1, sys.hash_info.modulus),
|
||||
1,
|
||||
"sys.hash_info.modulus {} is a non-prime".format(
|
||||
sys.hash_info.modulus)
|
||||
)
|
||||
self.assertIsInstance(sys.hash_info.inf, int)
|
||||
self.assertIsInstance(sys.hash_info.nan, int)
|
||||
self.assertIsInstance(sys.hash_info.imag, int)
|
||||
|
||||
self.assertIsInstance(sys.maxsize, int)
|
||||
self.assertIsInstance(sys.maxunicode, int)
|
||||
self.assertIsInstance(sys.platform, str)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue