GH-96465: Cache hashes for Fraction instances (GH-96483)

This commit is contained in:
Raymond Hettinger 2022-09-07 10:31:50 -05:00 committed by GitHub
parent 0cd992c000
commit 3eaf70d836
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 36 additions and 30 deletions

View file

@ -4,6 +4,7 @@
"""Fraction, infinite-precision, rational numbers."""
from decimal import Decimal
import functools
import math
import numbers
import operator
@ -20,6 +21,39 @@ _PyHASH_MODULUS = sys.hash_info.modulus
# _PyHASH_MODULUS.
_PyHASH_INF = sys.hash_info.inf
@functools.lru_cache(maxsize = 1 << 14)
def _hash_algorithm(numerator, denominator):
# To make sure that the hash of a Fraction agrees with the hash
# of a numerically equal integer, float or Decimal instance, we
# follow the rules for numeric hashes outlined in the
# documentation. (See library docs, 'Built-in Types').
try:
dinv = pow(denominator, -1, _PyHASH_MODULUS)
except ValueError:
# ValueError means there is no modular inverse.
hash_ = _PyHASH_INF
else:
# The general algorithm now specifies that the absolute value of
# the hash is
# (|N| * dinv) % P
# where N is self._numerator and P is _PyHASH_MODULUS. That's
# optimized here in two ways: first, for a non-negative int i,
# hash(i) == i % P, but the int hash implementation doesn't need
# to divide, and is faster than doing % P explicitly. So we do
# hash(|N| * dinv)
# instead. Second, N is unbounded, so its product with dinv may
# be arbitrarily expensive to compute. The final answer is the
# same if we use the bounded |N| % P instead, which can again
# be done with an int hash() call. If 0 <= i < P, hash(i) == i,
# so this nested hash() call wastes a bit of time making a
# redundant copy when |N| < P, but can save an arbitrarily large
# amount of computation for large |N|.
hash_ = hash(hash(abs(numerator)) * dinv)
result = hash_ if numerator >= 0 else -hash_
return -2 if result == -1 else result
_RATIONAL_FORMAT = re.compile(r"""
\A\s* # optional whitespace at the start,
(?P<sign>[-+]?) # an optional sign, then
@ -646,36 +680,7 @@ class Fraction(numbers.Rational):
def __hash__(self):
"""hash(self)"""
# To make sure that the hash of a Fraction agrees with the hash
# of a numerically equal integer, float or Decimal instance, we
# follow the rules for numeric hashes outlined in the
# documentation. (See library docs, 'Built-in Types').
try:
dinv = pow(self._denominator, -1, _PyHASH_MODULUS)
except ValueError:
# ValueError means there is no modular inverse.
hash_ = _PyHASH_INF
else:
# The general algorithm now specifies that the absolute value of
# the hash is
# (|N| * dinv) % P
# where N is self._numerator and P is _PyHASH_MODULUS. That's
# optimized here in two ways: first, for a non-negative int i,
# hash(i) == i % P, but the int hash implementation doesn't need
# to divide, and is faster than doing % P explicitly. So we do
# hash(|N| * dinv)
# instead. Second, N is unbounded, so its product with dinv may
# be arbitrarily expensive to compute. The final answer is the
# same if we use the bounded |N| % P instead, which can again
# be done with an int hash() call. If 0 <= i < P, hash(i) == i,
# so this nested hash() call wastes a bit of time making a
# redundant copy when |N| < P, but can save an arbitrarily large
# amount of computation for large |N|.
hash_ = hash(hash(abs(self._numerator)) * dinv)
result = hash_ if self._numerator >= 0 else -hash_
return -2 if result == -1 else result
return _hash_algorithm(self._numerator, self._denominator)
def __eq__(a, b):
"""a == b"""