mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
bpo-36546: Add more tests and expand docs (#13406)
This commit is contained in:
parent
73934b9da0
commit
e917f2ed9a
2 changed files with 48 additions and 21 deletions
|
@ -511,22 +511,33 @@ However, for reading convenience, most of the examples show sorted sequences.
|
||||||
is not least 1.
|
is not least 1.
|
||||||
|
|
||||||
The *dist* can be any iterable containing sample data or it can be an
|
The *dist* can be any iterable containing sample data or it can be an
|
||||||
instance of a class that defines an :meth:`~inv_cdf` method.
|
instance of a class that defines an :meth:`~inv_cdf` method. For meaningful
|
||||||
|
results, the number of data points in *dist* should be larger than *n*.
|
||||||
Raises :exc:`StatisticsError` if there are not at least two data points.
|
Raises :exc:`StatisticsError` if there are not at least two data points.
|
||||||
|
|
||||||
For sample data, the cut points are linearly interpolated from the
|
For sample data, the cut points are linearly interpolated from the
|
||||||
two nearest data points. For example, if a cut point falls one-third
|
two nearest data points. For example, if a cut point falls one-third
|
||||||
of the distance between two sample values, ``100`` and ``112``, the
|
of the distance between two sample values, ``100`` and ``112``, the
|
||||||
cut-point will evaluate to ``104``. Other selection methods may be
|
cut-point will evaluate to ``104``.
|
||||||
offered in the future (for example choose ``100`` as the nearest
|
|
||||||
value or compute ``106`` as the midpoint). This might matter if
|
|
||||||
there are too few samples for a given number of cut points.
|
|
||||||
|
|
||||||
If *method* is set to *inclusive*, *dist* is treated as population data.
|
The *method* for computing quantiles can be varied depending on
|
||||||
The minimum value is treated as the 0th percentile and the maximum
|
whether the data in *dist* includes or excludes the lowest and
|
||||||
value is treated as the 100th percentile. If *dist* is an instance of
|
highest possible values from the population.
|
||||||
a class that defines an :meth:`~inv_cdf` method, setting *method*
|
|
||||||
has no effect.
|
The default *method* is "exclusive" and is used for data sampled from
|
||||||
|
a population that can have more extreme values than found in the
|
||||||
|
samples. The portion of the population falling below the *i-th* of
|
||||||
|
*m* data points is computed as ``i / (m + 1)``.
|
||||||
|
|
||||||
|
Setting the *method* to "inclusive" is used for describing population
|
||||||
|
data or for samples that include the extreme points. The minimum
|
||||||
|
value in *dist* is treated as the 0th percentile and the maximum
|
||||||
|
value is treated as the 100th percentile. The portion of the
|
||||||
|
population falling below the *i-th* of *m* data points is computed as
|
||||||
|
``(i - 1) / (m - 1)``.
|
||||||
|
|
||||||
|
If *dist* is an instance of a class that defines an
|
||||||
|
:meth:`~inv_cdf` method, setting *method* has no effect.
|
||||||
|
|
||||||
.. doctest::
|
.. doctest::
|
||||||
|
|
||||||
|
|
|
@ -2161,17 +2161,18 @@ class TestQuantiles(unittest.TestCase):
|
||||||
# Quantiles should be idempotent
|
# Quantiles should be idempotent
|
||||||
if len(expected) >= 2:
|
if len(expected) >= 2:
|
||||||
self.assertEqual(quantiles(expected, n=n), expected)
|
self.assertEqual(quantiles(expected, n=n), expected)
|
||||||
# Cross-check against other methods
|
# Cross-check against method='inclusive' which should give
|
||||||
if len(data) >= n:
|
# the same result after adding in minimum and maximum values
|
||||||
# After end caps are added, method='inclusive' should
|
# extrapolated from the two lowest and two highest points.
|
||||||
# give the same result as method='exclusive' whenever
|
sdata = sorted(data)
|
||||||
# there are more data points than desired cut points.
|
lo = 2 * sdata[0] - sdata[1]
|
||||||
padded_data = [min(data) - 1000] + data + [max(data) + 1000]
|
hi = 2 * sdata[-1] - sdata[-2]
|
||||||
self.assertEqual(
|
padded_data = data + [lo, hi]
|
||||||
quantiles(data, n=n),
|
self.assertEqual(
|
||||||
quantiles(padded_data, n=n, method='inclusive'),
|
quantiles(data, n=n),
|
||||||
(n, data),
|
quantiles(padded_data, n=n, method='inclusive'),
|
||||||
)
|
(n, data),
|
||||||
|
)
|
||||||
# Invariant under tranlation and scaling
|
# Invariant under tranlation and scaling
|
||||||
def f(x):
|
def f(x):
|
||||||
return 3.5 * x - 1234.675
|
return 3.5 * x - 1234.675
|
||||||
|
@ -2188,6 +2189,11 @@ class TestQuantiles(unittest.TestCase):
|
||||||
actual = quantiles(statistics.NormalDist(), n=n)
|
actual = quantiles(statistics.NormalDist(), n=n)
|
||||||
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
|
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
|
||||||
for e, a in zip(expected, actual)))
|
for e, a in zip(expected, actual)))
|
||||||
|
# Q2 agrees with median()
|
||||||
|
for k in range(2, 60):
|
||||||
|
data = random.choices(range(100), k=k)
|
||||||
|
q1, q2, q3 = quantiles(data)
|
||||||
|
self.assertEqual(q2, statistics.median(data))
|
||||||
|
|
||||||
def test_specific_cases_inclusive(self):
|
def test_specific_cases_inclusive(self):
|
||||||
# Match results computed by hand and cross-checked
|
# Match results computed by hand and cross-checked
|
||||||
|
@ -2233,6 +2239,11 @@ class TestQuantiles(unittest.TestCase):
|
||||||
actual = quantiles(statistics.NormalDist(), n=n, method="inclusive")
|
actual = quantiles(statistics.NormalDist(), n=n, method="inclusive")
|
||||||
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
|
self.assertTrue(all(math.isclose(e, a, abs_tol=0.0001)
|
||||||
for e, a in zip(expected, actual)))
|
for e, a in zip(expected, actual)))
|
||||||
|
# Natural deciles
|
||||||
|
self.assertEqual(quantiles([0, 100], n=10, method='inclusive'),
|
||||||
|
[10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
|
||||||
|
self.assertEqual(quantiles(range(0, 101), n=10, method='inclusive'),
|
||||||
|
[10.0, 20.0, 30.0, 40.0, 50.0, 60.0, 70.0, 80.0, 90.0])
|
||||||
# Whenever n is smaller than the number of data points, running
|
# Whenever n is smaller than the number of data points, running
|
||||||
# method='inclusive' should give the same result as method='exclusive'
|
# method='inclusive' should give the same result as method='exclusive'
|
||||||
# after the two included extreme points are removed.
|
# after the two included extreme points are removed.
|
||||||
|
@ -2242,6 +2253,11 @@ class TestQuantiles(unittest.TestCase):
|
||||||
data.remove(max(data))
|
data.remove(max(data))
|
||||||
expected = quantiles(data, n=32)
|
expected = quantiles(data, n=32)
|
||||||
self.assertEqual(expected, actual)
|
self.assertEqual(expected, actual)
|
||||||
|
# Q2 agrees with median()
|
||||||
|
for k in range(2, 60):
|
||||||
|
data = random.choices(range(100), k=k)
|
||||||
|
q1, q2, q3 = quantiles(data, method='inclusive')
|
||||||
|
self.assertEqual(q2, statistics.median(data))
|
||||||
|
|
||||||
def test_equal_inputs(self):
|
def test_equal_inputs(self):
|
||||||
quantiles = statistics.quantiles
|
quantiles = statistics.quantiles
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue