bpo-44150: Support optional weights parameter for fmean() (GH-26175)

This commit is contained in:
Raymond Hettinger 2021-05-20 20:22:26 -07:00 committed by GitHub
parent 18f41c04ff
commit be4dd7fcd9
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 59 additions and 9 deletions

View file

@ -43,7 +43,7 @@ or sample.
======================= =============================================================== ======================= ===============================================================
:func:`mean` Arithmetic mean ("average") of data. :func:`mean` Arithmetic mean ("average") of data.
:func:`fmean` Fast, floating point arithmetic mean. :func:`fmean` Fast, floating point arithmetic mean, with optional weighting.
:func:`geometric_mean` Geometric mean of data. :func:`geometric_mean` Geometric mean of data.
:func:`harmonic_mean` Harmonic mean of data. :func:`harmonic_mean` Harmonic mean of data.
:func:`median` Median (middle value) of data. :func:`median` Median (middle value) of data.
@ -128,7 +128,7 @@ However, for reading convenience, most of the examples show sorted sequences.
``mean(data)`` is equivalent to calculating the true population mean μ. ``mean(data)`` is equivalent to calculating the true population mean μ.
.. function:: fmean(data) .. function:: fmean(data, weights=None)
Convert *data* to floats and compute the arithmetic mean. Convert *data* to floats and compute the arithmetic mean.
@ -141,8 +141,25 @@ However, for reading convenience, most of the examples show sorted sequences.
>>> fmean([3.5, 4.0, 5.25]) >>> fmean([3.5, 4.0, 5.25])
4.25 4.25
Optional weighting is supported. For example, a professor assigns a
grade for a course by weighting quizzes at 20%, homework at 20%, a
midterm exam at 30%, and a final exam at 30%:
.. doctest::
>>> grades = [85, 92, 83, 91]
>>> weights = [0.20, 0.20, 0.30, 0.30]
>>> fmean(grades, weights)
87.6
If *weights* is supplied, it must be the same length as the *data* or
a :exc:`ValueError` will be raised.
.. versionadded:: 3.8 .. versionadded:: 3.8
.. versionchanged:: 3.11
Added support for *weights*.
.. function:: geometric_mean(data) .. function:: geometric_mean(data)

View file

@ -136,7 +136,7 @@ from decimal import Decimal
from itertools import groupby, repeat from itertools import groupby, repeat
from bisect import bisect_left, bisect_right from bisect import bisect_left, bisect_right
from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum
from operator import itemgetter from operator import itemgetter, mul
from collections import Counter, namedtuple from collections import Counter, namedtuple
# === Exceptions === # === Exceptions ===
@ -345,7 +345,7 @@ def mean(data):
return _convert(total / n, T) return _convert(total / n, T)
def fmean(data): def fmean(data, weights=None):
"""Convert data to floats and compute the arithmetic mean. """Convert data to floats and compute the arithmetic mean.
This runs faster than the mean() function and it always returns a float. This runs faster than the mean() function and it always returns a float.
@ -363,13 +363,24 @@ def fmean(data):
nonlocal n nonlocal n
for n, x in enumerate(iterable, start=1): for n, x in enumerate(iterable, start=1):
yield x yield x
total = fsum(count(data)) data = count(data)
else: if weights is None:
total = fsum(data) total = fsum(data)
try: if not n:
raise StatisticsError('fmean requires at least one data point')
return total / n return total / n
except ZeroDivisionError: try:
raise StatisticsError('fmean requires at least one data point') from None num_weights = len(weights)
except TypeError:
weights = list(weights)
num_weights = len(weights)
num = fsum(map(mul, data, weights))
if n != num_weights:
raise StatisticsError('data and weights must be the same length')
den = fsum(weights)
if not den:
raise StatisticsError('sum of weights must be non-zero')
return num / den
def geometric_mean(data): def geometric_mean(data):

View file

@ -1972,6 +1972,27 @@ class TestFMean(unittest.TestCase):
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
fmean([Inf, -Inf]) fmean([Inf, -Inf])
def test_weights(self):
fmean = statistics.fmean
StatisticsError = statistics.StatisticsError
self.assertEqual(
fmean([10, 10, 10, 50], [0.25] * 4),
fmean([10, 10, 10, 50]))
self.assertEqual(
fmean([10, 10, 20], [0.25, 0.25, 0.50]),
fmean([10, 10, 20, 20]))
self.assertEqual( # inputs are iterators
fmean(iter([10, 10, 20]), iter([0.25, 0.25, 0.50])),
fmean([10, 10, 20, 20]))
with self.assertRaises(StatisticsError):
fmean([10, 20, 30], [1, 2]) # unequal lengths
with self.assertRaises(StatisticsError):
fmean(iter([10, 20, 30]), iter([1, 2])) # unequal lengths
with self.assertRaises(StatisticsError):
fmean([10, 20], [-1, 1]) # sum of weights is zero
with self.assertRaises(StatisticsError):
fmean(iter([10, 20]), iter([-1, 1])) # sum of weights is zero
# === Tests for variances and standard deviations === # === Tests for variances and standard deviations ===

View file

@ -0,0 +1 @@
Add optional *weights* argument to statistics.fmean().