gh-110150: Fix base case handling in quantiles() (gh-110151)

This commit is contained in:
Raymond Hettinger 2023-09-30 23:35:54 -05:00 committed by GitHub
parent a46e960768
commit 62405c7867
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 17 additions and 3 deletions

View file

@ -585,7 +585,7 @@ However, for reading convenience, most of the examples show sorted sequences.
The *data* can be any iterable containing sample data. For meaningful The *data* can be any iterable containing sample data. For meaningful
results, the number of data points in *data* should be larger than *n*. results, the number of data points in *data* should be larger than *n*.
Raises :exc:`StatisticsError` if there are not at least two data points. Raises :exc:`StatisticsError` if there is not at least one data point.
The cut points are linearly interpolated from the The cut points are linearly interpolated from the
two nearest data points. For example, if a cut point falls one-third two nearest data points. For example, if a cut point falls one-third
@ -625,6 +625,11 @@ However, for reading convenience, most of the examples show sorted sequences.
.. versionadded:: 3.8 .. versionadded:: 3.8
.. versionchanged:: 3.13
No longer raises an exception for an input with only a single data point.
This allows quantile estimates to be built up one sample point
at a time becoming gradually more refined with each new data point.
.. function:: covariance(x, y, /) .. function:: covariance(x, y, /)
Return the sample covariance of two inputs *x* and *y*. Covariance Return the sample covariance of two inputs *x* and *y*. Covariance

View file

@ -844,7 +844,9 @@ def quantiles(data, *, n=4, method='exclusive'):
data = sorted(data) data = sorted(data)
ld = len(data) ld = len(data)
if ld < 2: if ld < 2:
raise StatisticsError('must have at least two data points') if ld == 1:
return data * (n - 1)
raise StatisticsError('must have at least one data point')
if method == 'inclusive': if method == 'inclusive':
m = ld - 1 m = ld - 1
result = [] result = []

View file

@ -2454,6 +2454,11 @@ class TestQuantiles(unittest.TestCase):
data = random.choices(range(100), k=k) data = random.choices(range(100), k=k)
q1, q2, q3 = quantiles(data, method='inclusive') q1, q2, q3 = quantiles(data, method='inclusive')
self.assertEqual(q2, statistics.median(data)) self.assertEqual(q2, statistics.median(data))
# Base case with a single data point: When estimating quantiles from
# a sample, we want to be able to add one sample point at a time,
# getting increasingly better estimates.
self.assertEqual(quantiles([10], n=4), [10.0, 10.0, 10.0])
self.assertEqual(quantiles([10], n=4, method='exclusive'), [10.0, 10.0, 10.0])
def test_equal_inputs(self): def test_equal_inputs(self):
quantiles = statistics.quantiles quantiles = statistics.quantiles
@ -2504,7 +2509,7 @@ class TestQuantiles(unittest.TestCase):
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
quantiles([10, 20, 30], method='X') # method is unknown quantiles([10, 20, 30], method='X') # method is unknown
with self.assertRaises(StatisticsError): with self.assertRaises(StatisticsError):
quantiles([10], n=4) # not enough data points quantiles([], n=4) # not enough data points
with self.assertRaises(TypeError): with self.assertRaises(TypeError):
quantiles([10, None, 30], n=4) # data is non-numeric quantiles([10, None, 30], n=4) # data is non-numeric

View file

@ -0,0 +1,2 @@
Fix base case handling in statistics.quantiles. Now allows a single data
point.