bpo-35892: Fix mode() and add multimode() (#12089)

2025-11-25 04:34:37 +00:00 · 2019-03-12 00:43:27 -07:00 · 2019-03-12 00:43:27 -07:00 · fc06a192fd
commit fc06a192fd
parent 3e936431e2
4 changed files with 97 additions and 48 deletions
--- a/Lib/statistics.py
+++ b/Lib/statistics.py
@ -17,6 +17,7 @@ median_low          Low median of data.
 median_high         High median of data.
 median_grouped      Median, or 50th percentile, of grouped data.
 mode                Mode (most common value) of data.
+multimode           List of modes (most common values of data)
 ==================  =============================================

 Calculate the arithmetic mean ("the average") of data:
@ -79,10 +80,9 @@ A single exception is defined: StatisticsError is a subclass of ValueError.
 __all__ = [ 'StatisticsError', 'NormalDist',
            'pstdev', 'pvariance', 'stdev', 'variance',
            'median',  'median_low', 'median_high', 'median_grouped',
-            'mean', 'mode', 'harmonic_mean', 'fmean',
+            'mean', 'mode', 'multimode', 'harmonic_mean', 'fmean',
          ]

-import collections
 import math
 import numbers
 import random
@ -92,8 +92,8 @@ from decimal import Decimal
 from itertools import groupby
 from bisect import bisect_left, bisect_right
 from math import hypot, sqrt, fabs, exp, erf, tau, log, fsum
-
-
+from operator import itemgetter
+from collections import Counter

 # === Exceptions ===

@ -249,20 +249,6 @@ def _convert(value, T):
            raise


-def _counts(data):
-    # Generate a table of sorted (value, frequency) pairs.
-    table = collections.Counter(iter(data)).most_common()
-    if not table:
-        return table
-    # Extract the values with the highest frequency.
-    maxfreq = table[0][1]
-    for i in range(1, len(table)):
-        if table[i][1] != maxfreq:
-            table = table[:i]
-            break
-    return table
-
-
 def _find_lteq(a, x):
    'Locate the leftmost value exactly equal to x'
    i = bisect_left(a, x)
@ -334,9 +320,9 @@ def fmean(data):
            nonlocal n
            n += 1
            return x
-        total = math.fsum(map(count, data))
+        total = fsum(map(count, data))
    else:
-        total = math.fsum(data)
+        total = fsum(data)
    try:
        return total / n
    except ZeroDivisionError:
@ -523,19 +509,38 @@ def mode(data):
    >>> mode(["red", "blue", "blue", "red", "green", "red", "red"])
    'red'

-    If there is not exactly one most common value, ``mode`` will raise
-    StatisticsError.
+    If there are multiple modes, return the first one encountered.
+
+        >>> mode(['red', 'red', 'green', 'blue', 'blue'])
+        'red'
+
+    If *data* is empty, ``mode``, raises StatisticsError.
+
    """
-    # Generate a table of sorted (value, frequency) pairs.
-    table = _counts(data)
-    if len(table) == 1:
-        return table[0][0]
-    elif table:
-        raise StatisticsError(
-                'no unique mode; found %d equally common values' % len(table)
-                )
-    else:
-        raise StatisticsError('no mode for empty data')
+    data = iter(data)
+    try:
+        return Counter(data).most_common(1)[0][0]
+    except IndexError:
+        raise StatisticsError('no mode for empty data') from None
+
+
+def multimode(data):
+    """ Return a list of the most frequently occurring values.
+
+        Will return more than one result if there are multiple modes
+        or an empty list if *data* is empty.
+
+        >>> multimode('aabbbbbbbbcc')
+        ['b']
+        >>> multimode('aabbbbccddddeeffffgg')
+        ['b', 'd', 'f']
+        >>> multimode('')
+        []
+
+    """
+    counts = Counter(iter(data)).most_common()
+    maxcount, mode_items = next(groupby(counts, key=itemgetter(1)), (0, []))
+    return list(map(itemgetter(0), mode_items))


 # === Measures of spread ===
@ -836,6 +841,7 @@ if __name__ == '__main__':
    from math import isclose
    from operator import add, sub, mul, truediv
    from itertools import repeat
+    import doctest

    g1 = NormalDist(10, 20)
    g2 = NormalDist(-5, 25)
@ -893,3 +899,5 @@ if __name__ == '__main__':
    S = NormalDist.from_samples([x - y for x, y in zip(X.samples(n),
                                                       Y.samples(n))])
    assert_close(X - Y, S)
+
+    print(doctest.testmod())