bpo-40755: Add missing multiset operations to Counter() (GH-20339)

This commit is contained in:
Raymond Hettinger 2020-05-28 08:35:46 -07:00 committed by GitHub
parent 0de437de62
commit 60398512c8
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 206 additions and 6 deletions

View file

@ -290,6 +290,47 @@ For example::
>>> sorted(c.elements()) >>> sorted(c.elements())
['a', 'a', 'a', 'a', 'b', 'b'] ['a', 'a', 'a', 'a', 'b', 'b']
.. method:: isdisjoint(other)
True if none of the elements in *self* overlap with those in *other*.
Negative or missing counts are ignored.
Logically equivalent to: ``not (+self) & (+other)``
.. versionadded:: 3.10
.. method:: isequal(other)
Test whether counts agree exactly.
Negative or missing counts are treated as zero.
This method works differently than the inherited :meth:`__eq__` method
which treats negative or missing counts as distinct from zero::
>>> Counter(a=1, b=0).isequal(Counter(a=1))
True
>>> Counter(a=1, b=0) == Counter(a=1)
False
Logically equivalent to: ``+self == +other``
.. versionadded:: 3.10
.. method:: issubset(other)
True if the counts in *self* are less than or equal to those in *other*.
Negative or missing counts are treated as zero.
Logically equivalent to: ``not self - (+other)``
.. versionadded:: 3.10
.. method:: issuperset(other)
True if the counts in *self* are greater than or equal to those in *other*.
Negative or missing counts are treated as zero.
Logically equivalent to: ``not other - (+self)``
.. versionadded:: 3.10
.. method:: most_common([n]) .. method:: most_common([n])
Return a list of the *n* most common elements and their counts from the Return a list of the *n* most common elements and their counts from the

View file

@ -710,12 +710,24 @@ class Counter(dict):
# To strip negative and zero counts, add-in an empty counter: # To strip negative and zero counts, add-in an empty counter:
# c += Counter() # c += Counter()
# #
# Rich comparison operators for multiset subset and superset tests # When the multiplicities are all zero or one, multiset operations
# are deliberately omitted due to semantic conflicts with the # are guaranteed to be equivalent to the corresponding operations
# existing inherited dict equality method. Subset and superset # for regular sets.
# semantics ignore zero counts and require that p≤q ∧ p≥q → p=q; # Given counter multisets such as:
# however, that would not be the case for p=Counter(a=1, b=0) # cp = Counter(a=1, b=0, c=1)
# and q=Counter(a=1) where the dictionaries are not equal. # cq = Counter(c=1, d=0, e=1)
# The corresponding regular sets would be:
# sp = {'a', 'c'}
# sq = {'c', 'e'}
# All of the following relations would hold:
# set(cp + cq) == sp | sq
# set(cp - cq) == sp - sq
# set(cp | cq) == sp | sq
# set(cp & cq) == sp & sq
# cp.isequal(cq) == (sp == sq)
# cp.issubset(cq) == sp.issubset(sq)
# cp.issuperset(cq) == sp.issuperset(sq)
# cp.isdisjoint(cq) == sp.isdisjoint(sq)
def __add__(self, other): def __add__(self, other):
'''Add counts from two counters. '''Add counts from two counters.
@ -874,6 +886,92 @@ class Counter(dict):
self[elem] = other_count self[elem] = other_count
return self._keep_positive() return self._keep_positive()
def isequal(self, other):
''' Test whether counts agree exactly.
Negative or missing counts are treated as zero.
This is different than the inherited __eq__() method which
treats negative or missing counts as distinct from zero:
>>> Counter(a=1, b=0).isequal(Counter(a=1))
True
>>> Counter(a=1, b=0) == Counter(a=1)
False
Logically equivalent to: +self == +other
'''
if not isinstance(other, Counter):
other = Counter(other)
for elem in set(self) | set(other):
left = self[elem]
right = other[elem]
if left == right:
continue
if left < 0:
left = 0
if right < 0:
right = 0
if left != right:
return False
return True
def issubset(self, other):
'''True if the counts in self are less than or equal to those in other.
Negative or missing counts are treated as zero.
Logically equivalent to: not self - (+other)
'''
if not isinstance(other, Counter):
other = Counter(other)
for elem, count in self.items():
other_count = other[elem]
if other_count < 0:
other_count = 0
if count > other_count:
return False
return True
def issuperset(self, other):
'''True if the counts in self are greater than or equal to those in other.
Negative or missing counts are treated as zero.
Logically equivalent to: not other - (+self)
'''
if not isinstance(other, Counter):
other = Counter(other)
return other.issubset(self)
def isdisjoint(self, other):
'''True if none of the elements in self overlap with those in other.
Negative or missing counts are ignored.
Logically equivalent to: not (+self) & (+other)
'''
if not isinstance(other, Counter):
other = Counter(other)
for elem, count in self.items():
if count > 0 and other[elem] > 0:
return False
return True
# Rich comparison operators for multiset subset and superset tests
# have been deliberately omitted due to semantic conflicts with the
# existing inherited dict equality method. Subset and superset
# semantics ignore zero counts and require that p⊆q ∧ p⊇q ⇔ p=q;
# however, that would not be the case for p=Counter(a=1, b=0)
# and q=Counter(a=1) where the dictionaries are not equal.
def _omitted(self, other):
raise TypeError(
'Rich comparison operators have been deliberately omitted. '
'Use the isequal(), issubset(), and issuperset() methods instead.')
__lt__ = __le__ = __gt__ = __ge__ = __lt__ = _omitted
######################################################################## ########################################################################
### ChainMap ### ChainMap

View file

@ -7,6 +7,7 @@ import inspect
import operator import operator
import pickle import pickle
from random import choice, randrange from random import choice, randrange
from itertools import product, chain, combinations
import string import string
import sys import sys
from test import support from test import support
@ -2219,6 +2220,64 @@ class TestCounter(unittest.TestCase):
self.assertTrue(c.called) self.assertTrue(c.called)
self.assertEqual(dict(c), {'a': 5, 'b': 2, 'c': 1, 'd': 1, 'r':2 }) self.assertEqual(dict(c), {'a': 5, 'b': 2, 'c': 1, 'd': 1, 'r':2 })
def test_multiset_operations_equivalent_to_set_operations(self):
# When the multiplicities are all zero or one, multiset operations
# are guaranteed to be equivalent to the corresponding operations
# for regular sets.
s = list(product(('a', 'b', 'c'), range(2)))
powerset = chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
counters = [Counter(dict(groups)) for groups in powerset]
for cp, cq in product(counters, repeat=2):
sp = set(cp.elements())
sq = set(cq.elements())
self.assertEqual(set(cp + cq), sp | sq)
self.assertEqual(set(cp - cq), sp - sq)
self.assertEqual(set(cp | cq), sp | sq)
self.assertEqual(set(cp & cq), sp & sq)
self.assertEqual(cp.isequal(cq), sp == sq)
self.assertEqual(cp.issubset(cq), sp.issubset(sq))
self.assertEqual(cp.issuperset(cq), sp.issuperset(sq))
self.assertEqual(cp.isdisjoint(cq), sp.isdisjoint(sq))
def test_multiset_equal(self):
self.assertTrue(Counter(a=3, b=2, c=0).isequal('ababa'))
self.assertFalse(Counter(a=3, b=2).isequal('babab'))
def test_multiset_subset(self):
self.assertTrue(Counter(a=3, b=2, c=0).issubset('ababa'))
self.assertFalse(Counter(a=3, b=2).issubset('babab'))
def test_multiset_superset(self):
self.assertTrue(Counter(a=3, b=2, c=0).issuperset('aab'))
self.assertFalse(Counter(a=3, b=2, c=0).issuperset('aabd'))
def test_multiset_disjoint(self):
self.assertTrue(Counter(a=3, b=2, c=0).isdisjoint('cde'))
self.assertFalse(Counter(a=3, b=2, c=0).isdisjoint('bcd'))
def test_multiset_predicates_with_negative_counts(self):
# Multiset predicates run on the output of the elements() method,
# meaning that zero counts and negative counts are ignored.
# The tests below confirm that we get that same results as the
# tests above, even after a negative count has been included
# in either *self* or *other*.
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isequal('ababa'))
self.assertFalse(Counter(a=3, b=2, d=-1).isequal('babab'))
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issubset('ababa'))
self.assertFalse(Counter(a=3, b=2, d=-1).issubset('babab'))
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issuperset('aab'))
self.assertFalse(Counter(a=3, b=2, c=0, d=-1).issuperset('aabd'))
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isdisjoint('cde'))
self.assertFalse(Counter(a=3, b=2, c=0, d=-1).isdisjoint('bcd'))
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isequal(Counter(a=3, b=2, c=-1)))
self.assertFalse(Counter(a=3, b=2, d=-1).isequal(Counter(a=2, b=3, c=-1)))
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issubset(Counter(a=3, b=2, c=-1)))
self.assertFalse(Counter(a=3, b=2, d=-1).issubset(Counter(a=2, b=3, c=-1)))
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issuperset(Counter(a=2, b=1, c=-1)))
self.assertFalse(Counter(a=3, b=2, c=0, d=-1).issuperset(Counter(a=2, b=1, c=-1, d=1)))
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isdisjoint(Counter(c=1, d=2, e=3, f=-1)))
self.assertFalse(Counter(a=3, b=2, c=0, d=-1).isdisjoint(Counter(b=1, c=1, d=1, e=-1)))
################################################################################ ################################################################################
### Run tests ### Run tests

View file

@ -0,0 +1,2 @@
Add multiset comparison methods to collections.Counter(): isequal(),
issubset(), issuperset(), and isdisjoint().