mirror of
https://github.com/python/cpython.git
synced 2025-09-26 10:19:53 +00:00
bpo-40755: Add missing multiset operations to Counter() (GH-20339)
This commit is contained in:
parent
0de437de62
commit
60398512c8
4 changed files with 206 additions and 6 deletions
|
@ -290,6 +290,47 @@ For example::
|
||||||
>>> sorted(c.elements())
|
>>> sorted(c.elements())
|
||||||
['a', 'a', 'a', 'a', 'b', 'b']
|
['a', 'a', 'a', 'a', 'b', 'b']
|
||||||
|
|
||||||
|
.. method:: isdisjoint(other)
|
||||||
|
|
||||||
|
True if none of the elements in *self* overlap with those in *other*.
|
||||||
|
Negative or missing counts are ignored.
|
||||||
|
Logically equivalent to: ``not (+self) & (+other)``
|
||||||
|
|
||||||
|
.. versionadded:: 3.10
|
||||||
|
|
||||||
|
.. method:: isequal(other)
|
||||||
|
|
||||||
|
Test whether counts agree exactly.
|
||||||
|
Negative or missing counts are treated as zero.
|
||||||
|
|
||||||
|
This method works differently than the inherited :meth:`__eq__` method
|
||||||
|
which treats negative or missing counts as distinct from zero::
|
||||||
|
|
||||||
|
>>> Counter(a=1, b=0).isequal(Counter(a=1))
|
||||||
|
True
|
||||||
|
>>> Counter(a=1, b=0) == Counter(a=1)
|
||||||
|
False
|
||||||
|
|
||||||
|
Logically equivalent to: ``+self == +other``
|
||||||
|
|
||||||
|
.. versionadded:: 3.10
|
||||||
|
|
||||||
|
.. method:: issubset(other)
|
||||||
|
|
||||||
|
True if the counts in *self* are less than or equal to those in *other*.
|
||||||
|
Negative or missing counts are treated as zero.
|
||||||
|
Logically equivalent to: ``not self - (+other)``
|
||||||
|
|
||||||
|
.. versionadded:: 3.10
|
||||||
|
|
||||||
|
.. method:: issuperset(other)
|
||||||
|
|
||||||
|
True if the counts in *self* are greater than or equal to those in *other*.
|
||||||
|
Negative or missing counts are treated as zero.
|
||||||
|
Logically equivalent to: ``not other - (+self)``
|
||||||
|
|
||||||
|
.. versionadded:: 3.10
|
||||||
|
|
||||||
.. method:: most_common([n])
|
.. method:: most_common([n])
|
||||||
|
|
||||||
Return a list of the *n* most common elements and their counts from the
|
Return a list of the *n* most common elements and their counts from the
|
||||||
|
|
|
@ -710,12 +710,24 @@ class Counter(dict):
|
||||||
# To strip negative and zero counts, add-in an empty counter:
|
# To strip negative and zero counts, add-in an empty counter:
|
||||||
# c += Counter()
|
# c += Counter()
|
||||||
#
|
#
|
||||||
# Rich comparison operators for multiset subset and superset tests
|
# When the multiplicities are all zero or one, multiset operations
|
||||||
# are deliberately omitted due to semantic conflicts with the
|
# are guaranteed to be equivalent to the corresponding operations
|
||||||
# existing inherited dict equality method. Subset and superset
|
# for regular sets.
|
||||||
# semantics ignore zero counts and require that p≤q ∧ p≥q → p=q;
|
# Given counter multisets such as:
|
||||||
# however, that would not be the case for p=Counter(a=1, b=0)
|
# cp = Counter(a=1, b=0, c=1)
|
||||||
# and q=Counter(a=1) where the dictionaries are not equal.
|
# cq = Counter(c=1, d=0, e=1)
|
||||||
|
# The corresponding regular sets would be:
|
||||||
|
# sp = {'a', 'c'}
|
||||||
|
# sq = {'c', 'e'}
|
||||||
|
# All of the following relations would hold:
|
||||||
|
# set(cp + cq) == sp | sq
|
||||||
|
# set(cp - cq) == sp - sq
|
||||||
|
# set(cp | cq) == sp | sq
|
||||||
|
# set(cp & cq) == sp & sq
|
||||||
|
# cp.isequal(cq) == (sp == sq)
|
||||||
|
# cp.issubset(cq) == sp.issubset(sq)
|
||||||
|
# cp.issuperset(cq) == sp.issuperset(sq)
|
||||||
|
# cp.isdisjoint(cq) == sp.isdisjoint(sq)
|
||||||
|
|
||||||
def __add__(self, other):
|
def __add__(self, other):
|
||||||
'''Add counts from two counters.
|
'''Add counts from two counters.
|
||||||
|
@ -874,6 +886,92 @@ class Counter(dict):
|
||||||
self[elem] = other_count
|
self[elem] = other_count
|
||||||
return self._keep_positive()
|
return self._keep_positive()
|
||||||
|
|
||||||
|
def isequal(self, other):
|
||||||
|
''' Test whether counts agree exactly.
|
||||||
|
|
||||||
|
Negative or missing counts are treated as zero.
|
||||||
|
|
||||||
|
This is different than the inherited __eq__() method which
|
||||||
|
treats negative or missing counts as distinct from zero:
|
||||||
|
|
||||||
|
>>> Counter(a=1, b=0).isequal(Counter(a=1))
|
||||||
|
True
|
||||||
|
>>> Counter(a=1, b=0) == Counter(a=1)
|
||||||
|
False
|
||||||
|
|
||||||
|
Logically equivalent to: +self == +other
|
||||||
|
'''
|
||||||
|
if not isinstance(other, Counter):
|
||||||
|
other = Counter(other)
|
||||||
|
for elem in set(self) | set(other):
|
||||||
|
left = self[elem]
|
||||||
|
right = other[elem]
|
||||||
|
if left == right:
|
||||||
|
continue
|
||||||
|
if left < 0:
|
||||||
|
left = 0
|
||||||
|
if right < 0:
|
||||||
|
right = 0
|
||||||
|
if left != right:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def issubset(self, other):
|
||||||
|
'''True if the counts in self are less than or equal to those in other.
|
||||||
|
|
||||||
|
Negative or missing counts are treated as zero.
|
||||||
|
|
||||||
|
Logically equivalent to: not self - (+other)
|
||||||
|
'''
|
||||||
|
if not isinstance(other, Counter):
|
||||||
|
other = Counter(other)
|
||||||
|
for elem, count in self.items():
|
||||||
|
other_count = other[elem]
|
||||||
|
if other_count < 0:
|
||||||
|
other_count = 0
|
||||||
|
if count > other_count:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def issuperset(self, other):
|
||||||
|
'''True if the counts in self are greater than or equal to those in other.
|
||||||
|
|
||||||
|
Negative or missing counts are treated as zero.
|
||||||
|
|
||||||
|
Logically equivalent to: not other - (+self)
|
||||||
|
'''
|
||||||
|
if not isinstance(other, Counter):
|
||||||
|
other = Counter(other)
|
||||||
|
return other.issubset(self)
|
||||||
|
|
||||||
|
def isdisjoint(self, other):
|
||||||
|
'''True if none of the elements in self overlap with those in other.
|
||||||
|
|
||||||
|
Negative or missing counts are ignored.
|
||||||
|
|
||||||
|
Logically equivalent to: not (+self) & (+other)
|
||||||
|
'''
|
||||||
|
if not isinstance(other, Counter):
|
||||||
|
other = Counter(other)
|
||||||
|
for elem, count in self.items():
|
||||||
|
if count > 0 and other[elem] > 0:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
# Rich comparison operators for multiset subset and superset tests
|
||||||
|
# have been deliberately omitted due to semantic conflicts with the
|
||||||
|
# existing inherited dict equality method. Subset and superset
|
||||||
|
# semantics ignore zero counts and require that p⊆q ∧ p⊇q ⇔ p=q;
|
||||||
|
# however, that would not be the case for p=Counter(a=1, b=0)
|
||||||
|
# and q=Counter(a=1) where the dictionaries are not equal.
|
||||||
|
|
||||||
|
def _omitted(self, other):
|
||||||
|
raise TypeError(
|
||||||
|
'Rich comparison operators have been deliberately omitted. '
|
||||||
|
'Use the isequal(), issubset(), and issuperset() methods instead.')
|
||||||
|
|
||||||
|
__lt__ = __le__ = __gt__ = __ge__ = __lt__ = _omitted
|
||||||
|
|
||||||
|
|
||||||
########################################################################
|
########################################################################
|
||||||
### ChainMap
|
### ChainMap
|
||||||
|
|
|
@ -7,6 +7,7 @@ import inspect
|
||||||
import operator
|
import operator
|
||||||
import pickle
|
import pickle
|
||||||
from random import choice, randrange
|
from random import choice, randrange
|
||||||
|
from itertools import product, chain, combinations
|
||||||
import string
|
import string
|
||||||
import sys
|
import sys
|
||||||
from test import support
|
from test import support
|
||||||
|
@ -2219,6 +2220,64 @@ class TestCounter(unittest.TestCase):
|
||||||
self.assertTrue(c.called)
|
self.assertTrue(c.called)
|
||||||
self.assertEqual(dict(c), {'a': 5, 'b': 2, 'c': 1, 'd': 1, 'r':2 })
|
self.assertEqual(dict(c), {'a': 5, 'b': 2, 'c': 1, 'd': 1, 'r':2 })
|
||||||
|
|
||||||
|
def test_multiset_operations_equivalent_to_set_operations(self):
|
||||||
|
# When the multiplicities are all zero or one, multiset operations
|
||||||
|
# are guaranteed to be equivalent to the corresponding operations
|
||||||
|
# for regular sets.
|
||||||
|
s = list(product(('a', 'b', 'c'), range(2)))
|
||||||
|
powerset = chain.from_iterable(combinations(s, r) for r in range(len(s)+1))
|
||||||
|
counters = [Counter(dict(groups)) for groups in powerset]
|
||||||
|
for cp, cq in product(counters, repeat=2):
|
||||||
|
sp = set(cp.elements())
|
||||||
|
sq = set(cq.elements())
|
||||||
|
self.assertEqual(set(cp + cq), sp | sq)
|
||||||
|
self.assertEqual(set(cp - cq), sp - sq)
|
||||||
|
self.assertEqual(set(cp | cq), sp | sq)
|
||||||
|
self.assertEqual(set(cp & cq), sp & sq)
|
||||||
|
self.assertEqual(cp.isequal(cq), sp == sq)
|
||||||
|
self.assertEqual(cp.issubset(cq), sp.issubset(sq))
|
||||||
|
self.assertEqual(cp.issuperset(cq), sp.issuperset(sq))
|
||||||
|
self.assertEqual(cp.isdisjoint(cq), sp.isdisjoint(sq))
|
||||||
|
|
||||||
|
def test_multiset_equal(self):
|
||||||
|
self.assertTrue(Counter(a=3, b=2, c=0).isequal('ababa'))
|
||||||
|
self.assertFalse(Counter(a=3, b=2).isequal('babab'))
|
||||||
|
|
||||||
|
def test_multiset_subset(self):
|
||||||
|
self.assertTrue(Counter(a=3, b=2, c=0).issubset('ababa'))
|
||||||
|
self.assertFalse(Counter(a=3, b=2).issubset('babab'))
|
||||||
|
|
||||||
|
def test_multiset_superset(self):
|
||||||
|
self.assertTrue(Counter(a=3, b=2, c=0).issuperset('aab'))
|
||||||
|
self.assertFalse(Counter(a=3, b=2, c=0).issuperset('aabd'))
|
||||||
|
|
||||||
|
def test_multiset_disjoint(self):
|
||||||
|
self.assertTrue(Counter(a=3, b=2, c=0).isdisjoint('cde'))
|
||||||
|
self.assertFalse(Counter(a=3, b=2, c=0).isdisjoint('bcd'))
|
||||||
|
|
||||||
|
def test_multiset_predicates_with_negative_counts(self):
|
||||||
|
# Multiset predicates run on the output of the elements() method,
|
||||||
|
# meaning that zero counts and negative counts are ignored.
|
||||||
|
# The tests below confirm that we get that same results as the
|
||||||
|
# tests above, even after a negative count has been included
|
||||||
|
# in either *self* or *other*.
|
||||||
|
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isequal('ababa'))
|
||||||
|
self.assertFalse(Counter(a=3, b=2, d=-1).isequal('babab'))
|
||||||
|
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issubset('ababa'))
|
||||||
|
self.assertFalse(Counter(a=3, b=2, d=-1).issubset('babab'))
|
||||||
|
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issuperset('aab'))
|
||||||
|
self.assertFalse(Counter(a=3, b=2, c=0, d=-1).issuperset('aabd'))
|
||||||
|
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isdisjoint('cde'))
|
||||||
|
self.assertFalse(Counter(a=3, b=2, c=0, d=-1).isdisjoint('bcd'))
|
||||||
|
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isequal(Counter(a=3, b=2, c=-1)))
|
||||||
|
self.assertFalse(Counter(a=3, b=2, d=-1).isequal(Counter(a=2, b=3, c=-1)))
|
||||||
|
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issubset(Counter(a=3, b=2, c=-1)))
|
||||||
|
self.assertFalse(Counter(a=3, b=2, d=-1).issubset(Counter(a=2, b=3, c=-1)))
|
||||||
|
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).issuperset(Counter(a=2, b=1, c=-1)))
|
||||||
|
self.assertFalse(Counter(a=3, b=2, c=0, d=-1).issuperset(Counter(a=2, b=1, c=-1, d=1)))
|
||||||
|
self.assertTrue(Counter(a=3, b=2, c=0, d=-1).isdisjoint(Counter(c=1, d=2, e=3, f=-1)))
|
||||||
|
self.assertFalse(Counter(a=3, b=2, c=0, d=-1).isdisjoint(Counter(b=1, c=1, d=1, e=-1)))
|
||||||
|
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
### Run tests
|
### Run tests
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
Add multiset comparison methods to collections.Counter(): isequal(),
|
||||||
|
issubset(), issuperset(), and isdisjoint().
|
Loading…
Add table
Add a link
Reference in a new issue