mirror of
https://github.com/python/cpython.git
synced 2025-10-14 10:53:40 +00:00
Forward port r68792 and r68789 putting Counter in __all__ and adding Counter buildouts.
This commit is contained in:
parent
afccd63ac9
commit
4d2073a073
3 changed files with 168 additions and 13 deletions
|
@ -177,6 +177,7 @@ For example::
|
||||||
>>> c = Counter() # a new, empty counter
|
>>> c = Counter() # a new, empty counter
|
||||||
>>> c = Counter('gallahad') # a new counter from an iterable
|
>>> c = Counter('gallahad') # a new counter from an iterable
|
||||||
>>> c = Counter({'red': 4, 'blue': 2}) # a new counter from a mapping
|
>>> c = Counter({'red': 4, 'blue': 2}) # a new counter from a mapping
|
||||||
|
>>> c = Counter(spam=8, eggs=1) # a new counter from keyword args
|
||||||
|
|
||||||
The returned object has a dictionary style interface except that it returns
|
The returned object has a dictionary style interface except that it returns
|
||||||
a zero count for missing items (instead of raising a :exc:`KeyError` like a
|
a zero count for missing items (instead of raising a :exc:`KeyError` like a
|
||||||
|
@ -207,7 +208,7 @@ For example::
|
||||||
Elements are returned in arbitrary order. If an element's count has been
|
Elements are returned in arbitrary order. If an element's count has been
|
||||||
set to zero or a negative number, :meth:`elements` will ignore it.
|
set to zero or a negative number, :meth:`elements` will ignore it.
|
||||||
|
|
||||||
>>> c = Counter({'a': 4, 'b': 2, 'd': 0, 'e': -2})
|
>>> c = Counter(a=4, b=2, c=0, d=-2)
|
||||||
>>> list(c.elements())
|
>>> list(c.elements())
|
||||||
['a', 'a', 'a', 'a', 'b', 'b']
|
['a', 'a', 'a', 'a', 'b', 'b']
|
||||||
|
|
||||||
|
@ -232,10 +233,10 @@ For example::
|
||||||
|
|
||||||
.. method:: update([iterable-or-mapping])
|
.. method:: update([iterable-or-mapping])
|
||||||
|
|
||||||
Like :meth:`dict.update` but adds-in counts instead of replacing them.
|
|
||||||
|
|
||||||
Elements are counted from an *iterable* or added-in from another
|
Elements are counted from an *iterable* or added-in from another
|
||||||
*mapping* (or counter)::
|
*mapping* (or counter). Like :meth:`dict.update` but adds-in counts
|
||||||
|
instead of replacing them, and the *iterable* is expected to be a
|
||||||
|
sequence of elements, not a sequence of ``(key, value)`` pairs::
|
||||||
|
|
||||||
>>> c = Counter('which')
|
>>> c = Counter('which')
|
||||||
>>> c.update('witch') # add elements from another iterable
|
>>> c.update('witch') # add elements from another iterable
|
||||||
|
@ -255,6 +256,34 @@ Common patterns for working with :class:`Counter` objects::
|
||||||
Counter(dict(list_of_pairs)) # convert from a list of (elem, cnt) pairs
|
Counter(dict(list_of_pairs)) # convert from a list of (elem, cnt) pairs
|
||||||
c.most_common()[:-n:-1] # n least common elements
|
c.most_common()[:-n:-1] # n least common elements
|
||||||
|
|
||||||
|
Several multiset mathematical operations are provided for combining
|
||||||
|
:class:`Counter` objects. Multisets are like regular sets but allowed to
|
||||||
|
contain repeated elements (with counts of one or more). Addition and
|
||||||
|
subtraction combine counters by adding or subtracting the counts of
|
||||||
|
corresponding elements. Intersection and union return the minimum and maximum
|
||||||
|
of corresponding counts::
|
||||||
|
|
||||||
|
>>> c = Counter('a': 3, 'b': 1})
|
||||||
|
>>> d = Counter({'a': 1, 'b': 2})
|
||||||
|
>>> c + d # add two counters together: c[x] + d[x]
|
||||||
|
Counter({'a': 4, 'b': 3})
|
||||||
|
>>> c - d # subtract (keeping only positive counts)
|
||||||
|
Counter({'a': 2})
|
||||||
|
>>> c & d # interection: min(c[x], d[x])
|
||||||
|
Counter({'a': 1, 'b': 1})
|
||||||
|
>>> c | d # union: max(c[x], d[x])
|
||||||
|
Counter({'a': 3, 'b': 2})
|
||||||
|
|
||||||
|
All four multiset operations produce only positive counts (negative and zero
|
||||||
|
results are skipped). If inputs include negative counts, addition will sum
|
||||||
|
both counts and then exclude non-positive results. The other three operations
|
||||||
|
are undefined for negative inputs::
|
||||||
|
|
||||||
|
>>> e = Counter(a=8, b=-2, c=0)
|
||||||
|
>>> e += Counter() # remove zero and negative counts
|
||||||
|
>>> e
|
||||||
|
Counter({'a': 8})
|
||||||
|
|
||||||
**References**:
|
**References**:
|
||||||
|
|
||||||
* Wikipedia entry for `Multisets <http://en.wikipedia.org/wiki/Multiset>`_
|
* Wikipedia entry for `Multisets <http://en.wikipedia.org/wiki/Multiset>`_
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
|
__all__ = ['deque', 'defaultdict', 'namedtuple', 'UserDict', 'UserList',
|
||||||
'UserString']
|
'UserString', 'Counter']
|
||||||
# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
|
# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
|
||||||
# They should however be considered an integral part of collections.py.
|
# They should however be considered an integral part of collections.py.
|
||||||
from _abcoll import *
|
from _abcoll import *
|
||||||
|
@ -171,7 +171,7 @@ class Counter(dict):
|
||||||
# http://code.activestate.com/recipes/259174/
|
# http://code.activestate.com/recipes/259174/
|
||||||
# Knuth, TAOCP Vol. II section 4.6.3
|
# Knuth, TAOCP Vol. II section 4.6.3
|
||||||
|
|
||||||
def __init__(self, iterable=None):
|
def __init__(self, iterable=None, **kwds):
|
||||||
'''Create a new, empty Counter object. And if given, count elements
|
'''Create a new, empty Counter object. And if given, count elements
|
||||||
from an input iterable. Or, initialize the count from another mapping
|
from an input iterable. Or, initialize the count from another mapping
|
||||||
of elements to their counts.
|
of elements to their counts.
|
||||||
|
@ -179,9 +179,10 @@ class Counter(dict):
|
||||||
>>> c = Counter() # a new, empty counter
|
>>> c = Counter() # a new, empty counter
|
||||||
>>> c = Counter('gallahad') # a new counter from an iterable
|
>>> c = Counter('gallahad') # a new counter from an iterable
|
||||||
>>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
|
>>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
|
||||||
|
>>> c = Counter(a=4, b=2) # a new counter from keyword args
|
||||||
|
|
||||||
'''
|
'''
|
||||||
self.update(iterable)
|
self.update(iterable, **kwds)
|
||||||
|
|
||||||
def __missing__(self, key):
|
def __missing__(self, key):
|
||||||
'The count of elements not in the Counter is zero.'
|
'The count of elements not in the Counter is zero.'
|
||||||
|
@ -232,7 +233,7 @@ class Counter(dict):
|
||||||
raise NotImplementedError(
|
raise NotImplementedError(
|
||||||
'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
|
'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
|
||||||
|
|
||||||
def update(self, iterable=None):
|
def update(self, iterable=None, **kwds):
|
||||||
'''Like dict.update() but add counts instead of replacing them.
|
'''Like dict.update() but add counts instead of replacing them.
|
||||||
|
|
||||||
Source can be an iterable, a dictionary, or another Counter instance.
|
Source can be an iterable, a dictionary, or another Counter instance.
|
||||||
|
@ -249,10 +250,8 @@ class Counter(dict):
|
||||||
# replace behavior results in the some of original untouched counts
|
# replace behavior results in the some of original untouched counts
|
||||||
# being mixed-in with all of the other counts for a mismash that
|
# being mixed-in with all of the other counts for a mismash that
|
||||||
# doesn't have a straight-forward interpretation in most counting
|
# doesn't have a straight-forward interpretation in most counting
|
||||||
# contexts. Instead, we look to Knuth for suggested operations on
|
# contexts. Instead, we implement straight-addition. Both the inputs
|
||||||
# multisets and implement the union-add operation discussed in
|
# and outputs are allowed to contain zero and negative counts.
|
||||||
# TAOCP Volume II section 4.6.3 exercise 19. The Wikipedia entry for
|
|
||||||
# multisets calls that operation a sum or join.
|
|
||||||
|
|
||||||
if iterable is not None:
|
if iterable is not None:
|
||||||
if isinstance(iterable, Mapping):
|
if isinstance(iterable, Mapping):
|
||||||
|
@ -261,17 +260,102 @@ class Counter(dict):
|
||||||
else:
|
else:
|
||||||
for elem in iterable:
|
for elem in iterable:
|
||||||
self[elem] += 1
|
self[elem] += 1
|
||||||
|
if kwds:
|
||||||
|
self.update(kwds)
|
||||||
|
|
||||||
def copy(self):
|
def copy(self):
|
||||||
'Like dict.copy() but returns a Counter instance instead of a dict.'
|
'Like dict.copy() but returns a Counter instance instead of a dict.'
|
||||||
return Counter(self)
|
return Counter(self)
|
||||||
|
|
||||||
|
def __delitem__(self, elem):
|
||||||
|
'Like dict.__delitem__() but does not raise KeyError for missing values.'
|
||||||
|
if elem in self:
|
||||||
|
dict.__delitem__(self, elem)
|
||||||
|
|
||||||
def __repr__(self):
|
def __repr__(self):
|
||||||
if not self:
|
if not self:
|
||||||
return '%s()' % self.__class__.__name__
|
return '%s()' % self.__class__.__name__
|
||||||
items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
|
items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
|
||||||
return '%s({%s})' % (self.__class__.__name__, items)
|
return '%s({%s})' % (self.__class__.__name__, items)
|
||||||
|
|
||||||
|
# Multiset-style mathematical operations discussed in:
|
||||||
|
# Knuth TAOCP Volume II section 4.6.3 exercise 19
|
||||||
|
# and at http://en.wikipedia.org/wiki/Multiset
|
||||||
|
#
|
||||||
|
# Results are undefined when inputs contain negative counts.
|
||||||
|
# Outputs guaranteed to only include positive counts.
|
||||||
|
#
|
||||||
|
# To strip negative and zero counts, add-in an empty counter:
|
||||||
|
# c += Counter()
|
||||||
|
|
||||||
|
def __add__(self, other):
|
||||||
|
'''Add counts from two counters.
|
||||||
|
|
||||||
|
>>> Counter('abbb') + Counter('bcc')
|
||||||
|
Counter({'b': 4, 'c': 2, 'a': 1})
|
||||||
|
|
||||||
|
'''
|
||||||
|
if not isinstance(other, Counter):
|
||||||
|
return NotImplemented
|
||||||
|
result = Counter()
|
||||||
|
for elem in set(self) | set(other):
|
||||||
|
newcount = self[elem] + other[elem]
|
||||||
|
if newcount > 0:
|
||||||
|
result[elem] = newcount
|
||||||
|
return result
|
||||||
|
|
||||||
|
def __sub__(self, other):
|
||||||
|
''' Subtract count, but keep only results with positive counts.
|
||||||
|
|
||||||
|
>>> Counter('abbbc') - Counter('bccd')
|
||||||
|
Counter({'b': 2, 'a': 1})
|
||||||
|
|
||||||
|
'''
|
||||||
|
if not isinstance(other, Counter):
|
||||||
|
return NotImplemented
|
||||||
|
result = Counter()
|
||||||
|
for elem, count in self.items():
|
||||||
|
newcount = count - other[elem]
|
||||||
|
if newcount > 0:
|
||||||
|
result[elem] = newcount
|
||||||
|
return result
|
||||||
|
|
||||||
|
def __or__(self, other):
|
||||||
|
'''Union is the maximum of value in either of the input counters.
|
||||||
|
|
||||||
|
>>> Counter('abbb') | Counter('bcc')
|
||||||
|
Counter({'b': 3, 'c': 2, 'a': 1})
|
||||||
|
|
||||||
|
'''
|
||||||
|
if not isinstance(other, Counter):
|
||||||
|
return NotImplemented
|
||||||
|
_max = max
|
||||||
|
result = Counter()
|
||||||
|
for elem in set(self) | set(other):
|
||||||
|
newcount = _max(self[elem], other[elem])
|
||||||
|
if newcount > 0:
|
||||||
|
result[elem] = newcount
|
||||||
|
return result
|
||||||
|
|
||||||
|
def __and__(self, other):
|
||||||
|
''' Intersection is the minimum of corresponding counts.
|
||||||
|
|
||||||
|
>>> Counter('abbb') & Counter('bcc')
|
||||||
|
Counter({'b': 1})
|
||||||
|
|
||||||
|
'''
|
||||||
|
if not isinstance(other, Counter):
|
||||||
|
return NotImplemented
|
||||||
|
_min = min
|
||||||
|
result = Counter()
|
||||||
|
if len(self) < len(other):
|
||||||
|
self, other = other, self
|
||||||
|
for elem in filter(self.__contains__, other):
|
||||||
|
newcount = _min(self[elem], other[elem])
|
||||||
|
if newcount > 0:
|
||||||
|
result[elem] = newcount
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
################################################################################
|
################################################################################
|
||||||
### UserDict
|
### UserDict
|
||||||
|
|
|
@ -4,6 +4,8 @@ import unittest, doctest
|
||||||
from test import support
|
from test import support
|
||||||
from collections import namedtuple, Counter, Mapping
|
from collections import namedtuple, Counter, Mapping
|
||||||
import pickle, copy
|
import pickle, copy
|
||||||
|
from random import randrange
|
||||||
|
import operator
|
||||||
from collections import Hashable, Iterable, Iterator
|
from collections import Hashable, Iterable, Iterator
|
||||||
from collections import Sized, Container, Callable
|
from collections import Sized, Container, Callable
|
||||||
from collections import Set, MutableSet
|
from collections import Set, MutableSet
|
||||||
|
@ -361,6 +363,8 @@ class TestCounter(unittest.TestCase):
|
||||||
|
|
||||||
def test_basics(self):
|
def test_basics(self):
|
||||||
c = Counter('abcaba')
|
c = Counter('abcaba')
|
||||||
|
self.assertEqual(c, Counter({'a':3 , 'b': 2, 'c': 1}))
|
||||||
|
self.assertEqual(c, Counter(a=3, b=2, c=1))
|
||||||
self.assert_(isinstance(c, dict))
|
self.assert_(isinstance(c, dict))
|
||||||
self.assert_(isinstance(c, Mapping))
|
self.assert_(isinstance(c, Mapping))
|
||||||
self.assert_(issubclass(Counter, dict))
|
self.assert_(issubclass(Counter, dict))
|
||||||
|
@ -388,6 +392,7 @@ class TestCounter(unittest.TestCase):
|
||||||
c['a'] += 1 # increment an existing value
|
c['a'] += 1 # increment an existing value
|
||||||
c['b'] -= 2 # sub existing value to zero
|
c['b'] -= 2 # sub existing value to zero
|
||||||
del c['c'] # remove an entry
|
del c['c'] # remove an entry
|
||||||
|
del c['c'] # make sure that del doesn't raise KeyError
|
||||||
c['d'] -= 2 # sub from a missing value
|
c['d'] -= 2 # sub from a missing value
|
||||||
c['e'] = -5 # directly assign a missing value
|
c['e'] = -5 # directly assign a missing value
|
||||||
c['f'] += 4 # add to a missing value
|
c['f'] += 4 # add to a missing value
|
||||||
|
@ -403,7 +408,8 @@ class TestCounter(unittest.TestCase):
|
||||||
self.assertEqual(repr(c), 'Counter()')
|
self.assertEqual(repr(c), 'Counter()')
|
||||||
self.assertRaises(NotImplementedError, Counter.fromkeys, 'abc')
|
self.assertRaises(NotImplementedError, Counter.fromkeys, 'abc')
|
||||||
self.assertRaises(TypeError, hash, c)
|
self.assertRaises(TypeError, hash, c)
|
||||||
c.update(dict(a=5, b=3, c=1))
|
c.update(dict(a=5, b=3))
|
||||||
|
c.update(c=1)
|
||||||
c.update(Counter('a' * 50 + 'b' * 30))
|
c.update(Counter('a' * 50 + 'b' * 30))
|
||||||
c.update() # test case with no args
|
c.update() # test case with no args
|
||||||
c.__init__('a' * 500 + 'b' * 300)
|
c.__init__('a' * 500 + 'b' * 300)
|
||||||
|
@ -447,7 +453,43 @@ class TestCounter(unittest.TestCase):
|
||||||
self.assertEqual(dict(Counter(s)), dict(Counter(s).items()))
|
self.assertEqual(dict(Counter(s)), dict(Counter(s).items()))
|
||||||
self.assertEqual(set(Counter(s)), set(s))
|
self.assertEqual(set(Counter(s)), set(s))
|
||||||
|
|
||||||
|
def test_multiset_operations(self):
|
||||||
|
# Verify that adding a zero counter will strip zeros and negatives
|
||||||
|
c = Counter(a=10, b=-2, c=0) + Counter()
|
||||||
|
self.assertEqual(dict(c), dict(a=10))
|
||||||
|
|
||||||
|
elements = 'abcd'
|
||||||
|
for i in range(1000):
|
||||||
|
# test random pairs of multisets
|
||||||
|
p = Counter(dict((elem, randrange(-2,4)) for elem in elements))
|
||||||
|
q = Counter(dict((elem, randrange(-2,4)) for elem in elements))
|
||||||
|
for counterop, numberop, defneg in [
|
||||||
|
(Counter.__add__, lambda x, y: x+y if x+y>0 else 0, True),
|
||||||
|
(Counter.__sub__, lambda x, y: x-y if x-y>0 else 0, False),
|
||||||
|
(Counter.__or__, max, False),
|
||||||
|
(Counter.__and__, min, False),
|
||||||
|
]:
|
||||||
|
result = counterop(p, q)
|
||||||
|
for x in elements:
|
||||||
|
# all except __add__ are undefined for negative inputs
|
||||||
|
if defneg or (p[x] >= 0 and q[x] >= 0):
|
||||||
|
self.assertEqual(numberop(p[x], q[x]), result[x])
|
||||||
|
# verify that results exclude non-positive counts
|
||||||
|
self.assert_(x>0 for x in result.values())
|
||||||
|
|
||||||
|
elements = 'abcdef'
|
||||||
|
for i in range(100):
|
||||||
|
# verify that random multisets with no repeats are exactly like sets
|
||||||
|
p = Counter(dict((elem, randrange(0, 2)) for elem in elements))
|
||||||
|
q = Counter(dict((elem, randrange(0, 2)) for elem in elements))
|
||||||
|
for counterop, setop in [
|
||||||
|
(Counter.__sub__, set.__sub__),
|
||||||
|
(Counter.__or__, set.__or__),
|
||||||
|
(Counter.__and__, set.__and__),
|
||||||
|
]:
|
||||||
|
counter_result = counterop(p, q)
|
||||||
|
set_result = setop(set(p.elements()), set(q.elements()))
|
||||||
|
self.assertEqual(counter_result, dict.fromkeys(set_result, 1))
|
||||||
|
|
||||||
import doctest, collections
|
import doctest, collections
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue