bpo-36324: Add inv_cdf() to statistics.NormalDist() (GH-12377)

This commit is contained in:
Raymond Hettinger 2019-03-18 20:17:14 -07:00 committed by GitHub
parent faddaedd05
commit 714c60d7ac
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 182 additions and 0 deletions

View file

@ -2174,6 +2174,69 @@ class TestNormalDist(unittest.TestCase):
self.assertEqual(X.cdf(float('Inf')), 1.0)
self.assertTrue(math.isnan(X.cdf(float('NaN'))))
def test_inv_cdf(self):
NormalDist = statistics.NormalDist
# Center case should be exact.
iq = NormalDist(100, 15)
self.assertEqual(iq.inv_cdf(0.50), iq.mean)
# Test versus a published table of known percentage points.
# See the second table at the bottom of the page here:
# http://people.bath.ac.uk/masss/tables/normaltable.pdf
Z = NormalDist()
pp = {5.0: (0.000, 1.645, 2.576, 3.291, 3.891,
4.417, 4.892, 5.327, 5.731, 6.109),
2.5: (0.674, 1.960, 2.807, 3.481, 4.056,
4.565, 5.026, 5.451, 5.847, 6.219),
1.0: (1.282, 2.326, 3.090, 3.719, 4.265,
4.753, 5.199, 5.612, 5.998, 6.361)}
for base, row in pp.items():
for exp, x in enumerate(row, start=1):
p = base * 10.0 ** (-exp)
self.assertAlmostEqual(-Z.inv_cdf(p), x, places=3)
p = 1.0 - p
self.assertAlmostEqual(Z.inv_cdf(p), x, places=3)
# Match published example for MS Excel
# https://support.office.com/en-us/article/norm-inv-function-54b30935-fee7-493c-bedb-2278a9db7e13
self.assertAlmostEqual(NormalDist(40, 1.5).inv_cdf(0.908789), 42.000002)
# One million equally spaced probabilities
n = 2**20
for p in range(1, n):
p /= n
self.assertAlmostEqual(iq.cdf(iq.inv_cdf(p)), p)
# One hundred ever smaller probabilities to test tails out to
# extreme probabilities: 1 / 2**50 and (2**50-1) / 2 ** 50
for e in range(1, 51):
p = 2.0 ** (-e)
self.assertAlmostEqual(iq.cdf(iq.inv_cdf(p)), p)
p = 1.0 - p
self.assertAlmostEqual(iq.cdf(iq.inv_cdf(p)), p)
# Now apply cdf() first. At six sigmas, the round-trip
# loses a lot of precision, so only check to 6 places.
for x in range(10, 190):
self.assertAlmostEqual(iq.inv_cdf(iq.cdf(x)), x, places=6)
# Error cases:
with self.assertRaises(statistics.StatisticsError):
iq.inv_cdf(0.0) # p is zero
with self.assertRaises(statistics.StatisticsError):
iq.inv_cdf(-0.1) # p under zero
with self.assertRaises(statistics.StatisticsError):
iq.inv_cdf(1.0) # p is one
with self.assertRaises(statistics.StatisticsError):
iq.inv_cdf(1.1) # p over one
with self.assertRaises(statistics.StatisticsError):
iq.sigma = 0.0 # sigma is zero
iq.inv_cdf(0.5)
with self.assertRaises(statistics.StatisticsError):
iq.sigma = -0.1 # sigma under zero
iq.inv_cdf(0.5)
def test_overlap(self):
NormalDist = statistics.NormalDist