mirror of
https://github.com/python/cpython.git
synced 2025-08-01 23:53:15 +00:00
Build-outs for Counter() class:
* Constructor and update() support keyword args (like their dict counterparts). * The 'del' statement no longer raises KeyError for missing values. * Add multiset operations: __add__, __sub__, __and__, __or__.
This commit is contained in:
parent
e8b619c152
commit
bad1eb2ff3
3 changed files with 168 additions and 13 deletions
|
@ -10,7 +10,7 @@ from operator import itemgetter as _itemgetter
|
|||
from keyword import iskeyword as _iskeyword
|
||||
import sys as _sys
|
||||
import heapq as _heapq
|
||||
from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
|
||||
from itertools import repeat as _repeat, chain as _chain, starmap as _starmap, ifilter as _ifilter
|
||||
|
||||
########################################################################
|
||||
### namedtuple #######################################################
|
||||
|
@ -167,7 +167,7 @@ class Counter(dict):
|
|||
# http://code.activestate.com/recipes/259174/
|
||||
# Knuth, TAOCP Vol. II section 4.6.3
|
||||
|
||||
def __init__(self, iterable=None):
|
||||
def __init__(self, iterable=None, **kwds):
|
||||
'''Create a new, empty Counter object. And if given, count elements
|
||||
from an input iterable. Or, initialize the count from another mapping
|
||||
of elements to their counts.
|
||||
|
@ -175,9 +175,10 @@ class Counter(dict):
|
|||
>>> c = Counter() # a new, empty counter
|
||||
>>> c = Counter('gallahad') # a new counter from an iterable
|
||||
>>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
|
||||
>>> c = Counter(a=4, b=2) # a new counter from keyword args
|
||||
|
||||
'''
|
||||
self.update(iterable)
|
||||
self.update(iterable, **kwds)
|
||||
|
||||
def __missing__(self, key):
|
||||
'The count of elements not in the Counter is zero.'
|
||||
|
@ -228,7 +229,7 @@ class Counter(dict):
|
|||
raise NotImplementedError(
|
||||
'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
|
||||
|
||||
def update(self, iterable=None):
|
||||
def update(self, iterable=None, **kwds):
|
||||
'''Like dict.update() but add counts instead of replacing them.
|
||||
|
||||
Source can be an iterable, a dictionary, or another Counter instance.
|
||||
|
@ -245,10 +246,8 @@ class Counter(dict):
|
|||
# replace behavior results in the some of original untouched counts
|
||||
# being mixed-in with all of the other counts for a mismash that
|
||||
# doesn't have a straight-forward interpretation in most counting
|
||||
# contexts. Instead, we look to Knuth for suggested operations on
|
||||
# multisets and implement the union-add operation discussed in
|
||||
# TAOCP Volume II section 4.6.3 exercise 19. The Wikipedia entry for
|
||||
# multisets calls that operation a sum or join.
|
||||
# contexts. Instead, we implement straight-addition. Both the inputs
|
||||
# and outputs are allowed to contain zero and negative counts.
|
||||
|
||||
if iterable is not None:
|
||||
if isinstance(iterable, Mapping):
|
||||
|
@ -257,17 +256,101 @@ class Counter(dict):
|
|||
else:
|
||||
for elem in iterable:
|
||||
self[elem] += 1
|
||||
if kwds:
|
||||
self.update(kwds)
|
||||
|
||||
def copy(self):
|
||||
'Like dict.copy() but returns a Counter instance instead of a dict.'
|
||||
return Counter(self)
|
||||
|
||||
def __delitem__(self, elem):
|
||||
'Like dict.__delitem__() but does not raise KeyError for missing values.'
|
||||
if elem in self:
|
||||
dict.__delitem__(self, elem)
|
||||
|
||||
def __repr__(self):
|
||||
if not self:
|
||||
return '%s()' % self.__class__.__name__
|
||||
items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
|
||||
return '%s({%s})' % (self.__class__.__name__, items)
|
||||
|
||||
# Multiset-style mathematical operations discussed in:
|
||||
# Knuth TAOCP Volume II section 4.6.3 exercise 19
|
||||
# and at http://en.wikipedia.org/wiki/Multiset
|
||||
#
|
||||
# Results are undefined when inputs contain negative counts.
|
||||
# Outputs guaranteed to only include positive counts.
|
||||
#
|
||||
# To strip negative and zero counts, add-in an empty counter:
|
||||
# c += Counter()
|
||||
|
||||
def __add__(self, other):
|
||||
'''Add counts from two counters.
|
||||
|
||||
>>> Counter('abbb') + Counter('bcc')
|
||||
Counter({'b': 4, 'c': 2, 'a': 1})
|
||||
|
||||
'''
|
||||
if not isinstance(other, Counter):
|
||||
return NotImplemented
|
||||
result = Counter()
|
||||
for elem in set(self) | set(other):
|
||||
newcount = self[elem] + other[elem]
|
||||
if newcount > 0:
|
||||
result[elem] = newcount
|
||||
return result
|
||||
|
||||
def __sub__(self, other):
|
||||
''' Subtract count, but keep only results with positive counts.
|
||||
|
||||
>>> Counter('abbbc') - Counter('bccd')
|
||||
Counter({'b': 2, 'a': 1})
|
||||
|
||||
'''
|
||||
if not isinstance(other, Counter):
|
||||
return NotImplemented
|
||||
result = Counter()
|
||||
for elem, count in self.iteritems():
|
||||
newcount = count - other[elem]
|
||||
if newcount > 0:
|
||||
result[elem] = newcount
|
||||
return result
|
||||
|
||||
def __or__(self, other):
|
||||
'''Union is the maximum of value in either of the input counters.
|
||||
|
||||
>>> Counter('abbb') | Counter('bcc')
|
||||
Counter({'b': 3, 'c': 2, 'a': 1})
|
||||
|
||||
'''
|
||||
if not isinstance(other, Counter):
|
||||
return NotImplemented
|
||||
_max = max
|
||||
result = Counter()
|
||||
for elem in set(self) | set(other):
|
||||
newcount = _max(self[elem], other[elem])
|
||||
if newcount > 0:
|
||||
result[elem] = newcount
|
||||
return result
|
||||
|
||||
def __and__(self, other):
|
||||
''' Intersection is the minimum of corresponding counts.
|
||||
|
||||
>>> Counter('abbb') & Counter('bcc')
|
||||
Counter({'b': 1})
|
||||
|
||||
'''
|
||||
if not isinstance(other, Counter):
|
||||
return NotImplemented
|
||||
_min = min
|
||||
result = Counter()
|
||||
if len(self) < len(other):
|
||||
self, other = other, self
|
||||
for elem in _ifilter(self.__contains__, other):
|
||||
newcount = _min(self[elem], other[elem])
|
||||
if newcount > 0:
|
||||
result[elem] = newcount
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue