bpo-36546: Add more tests and expand docs (#13406)

This commit is contained in:
Raymond Hettinger 2019-05-18 10:18:29 -07:00 committed by GitHub
parent 73934b9da0
commit e917f2ed9a
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
2 changed files with 48 additions and 21 deletions

View file

@ -511,22 +511,33 @@ However, for reading convenience, most of the examples show sorted sequences.
is not least 1. is not least 1.
The *dist* can be any iterable containing sample data or it can be an The *dist* can be any iterable containing sample data or it can be an
instance of a class that defines an :meth:`~inv_cdf` method. instance of a class that defines an :meth:`~inv_cdf` method. For meaningful
results, the number of data points in *dist* should be larger than *n*.
Raises :exc:`StatisticsError` if there are not at least two data points. Raises :exc:`StatisticsError` if there are not at least two data points.
For sample data, the cut points are linearly interpolated from the For sample data, the cut points are linearly interpolated from the
two nearest data points. For example, if a cut point falls one-third two nearest data points. For example, if a cut point falls one-third
of the distance between two sample values, ``100`` and ``112``, the of the distance between two sample values, ``100`` and ``112``, the
cut-point will evaluate to ``104``. Other selection methods may be cut-point will evaluate to ``104``.
offered in the future (for example choose ``100`` as the nearest
value or compute ``106`` as the midpoint). This might matter if
there are too few samples for a given number of cut points.
If *method* is set to *inclusive*, *dist* is treated as population data. The *method* for computing quantiles can be varied depending on
The minimum value is treated as the 0th percentile and the maximum whether the data in *dist* includes or excludes the lowest and
value is treated as the 100th percentile. If *dist* is an instance of highest possible values from the population.
a class that defines an :meth:`~inv_cdf` method, setting *method*
has no effect. The default *method* is "exclusive" and is used for data sampled from
a population that can have more extreme values than found in the
samples. The portion of the population falling below the *i-th* of
*m* data points is computed as ``i / (m + 1)``.
Setting the *method* to "inclusive" is used for describing population
data or for samples that include the extreme points. The minimum
value in *dist* is treated as the 0th percentile and the maximum
value is treated as the 100th percentile. The portion of the
population falling below the *i-th* of *m* data points is computed as
``(i - 1) / (m - 1)``.
If *dist* is an instance of a class that defines an
:meth:`~inv_cdf` method, setting *method* has no effect.
.. doctest:: .. doctest::

View file

@ -2161,17 +2161,18 @@ class TestQuantiles(unittest.TestCase):
# Quantiles should be idempotent # Quantiles should be idempotent
if len(expected) >= 2: if len(expected) >= 2:
self.assertEqual(quantiles(expected, n=n), expected) self.assertEqual(quantiles(expected, n=n), expected)
# Cross-check against other methods # Cross-check against method='inclusive' which should give
if len(data) >= n: # the same result after adding in minimum and maximum values
# After end caps are added, method='inclusive' should # extrapolated from the two lowest and two highest points.
# give the same result as method='exclusive' whenever sdata = sorted(data)
# there are more data points than desired cut points. lo = 2 * sdata[0] - sdata[1]
padded_data = [min(data) - 1000] + data + [max(data) + 1000] hi = 2 * sdata[-1] - sdata[-2]
self.assertEqual( padded_data = data + [lo, hi]
quantiles(data, n=n), self.assertEqual(
quantiles(padded_data, n=n, method='inclusive'), quantiles(data, n=n),
(n, data), quantiles(padded_data, n=n, method='inclusive'),
) (n, data),
)
# Invariant under tranlation and scaling # Invariant under tranlation and scaling
def f(x): def f(x):
return 3.5 * x - 1234.675 return 3.5 * x - 1234.675
@ -2188,6 +2189,11 @@ class TestQuantiles(unittest.TestCase):
actual = quantiles(statistics.NormalDist(), n=n) actual = quantiles(statistics.NormalDist(), n=n)
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001) self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
for e, a in zip(expected, actual))) for e, a in zip(expected, actual)))
# Q2 agrees with median()
for k in range(2, 60):
data = random.choices(range(100), k=k)
q1, q2, q3 = quantiles(data)
self.assertEqual(q2, statistics.median(data))
def test_specific_cases_inclusive(self): def test_specific_cases_inclusive(self):
# Match results computed by hand and cross-checked # Match results computed by hand and cross-checked
@ -2233,6 +2239,11 @@ class TestQuantiles(unittest.TestCase):
actual = quantiles(statistics.NormalDist(), n=n, method="inclusive") actual = quantiles(statistics.NormalDist(), n=n, method="inclusive")
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001) self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
for e, a in zip(expected, actual))) for e, a in zip(expected, actual)))
# Natural deciles
self.assertEqual(quantiles([0, 100], n=10, method='inclusive'),
[10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
self.assertEqual(quantiles(range(0, 101), n=10, method='inclusive'),
[10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
# Whenever n is smaller than the number of data points, running # Whenever n is smaller than the number of data points, running
# method='inclusive' should give the same result as method='exclusive' # method='inclusive' should give the same result as method='exclusive'
# after the two included extreme points are removed. # after the two included extreme points are removed.
@ -2242,6 +2253,11 @@ class TestQuantiles(unittest.TestCase):
data.remove(max(data)) data.remove(max(data))
expected = quantiles(data, n=32) expected = quantiles(data, n=32)
self.assertEqual(expected, actual) self.assertEqual(expected, actual)
# Q2 agrees with median()
for k in range(2, 60):
data = random.choices(range(100), k=k)
q1, q2, q3 = quantiles(data, method='inclusive')
self.assertEqual(q2, statistics.median(data))
def test_equal_inputs(self): def test_equal_inputs(self):
quantiles = statistics.quantiles quantiles = statistics.quantiles