mirror of
https://github.com/python/cpython.git
synced 2025-08-04 00:48:58 +00:00
parent
3bc0f7ebee
commit
b8baf6360f
4 changed files with 391 additions and 2 deletions
|
@ -140,6 +140,140 @@ Notes on using :class:`Set` and :class:`MutableSet` as a mixin:
|
|||
(For more about ABCs, see the :mod:`abc` module and :pep:`3119`.)
|
||||
|
||||
|
||||
.. _counter-objects:
|
||||
|
||||
:class:`Counter` objects
|
||||
------------------------
|
||||
|
||||
A counter tool is provided to support convenient and rapid tallies.
|
||||
For example::
|
||||
|
||||
# Tally repeated words in a list
|
||||
>>> words = ['red', 'blue', 'red', 'green', 'blue', 'blue']
|
||||
>>> cnt = Counter()
|
||||
>>> for word in words:
|
||||
... cnt[word] += 1
|
||||
>>> cnt
|
||||
Counter({'blue': 3, 'red': 2, 'green': 1})
|
||||
|
||||
# Find the ten most common words in Hamlet
|
||||
>>> import re
|
||||
>>> words = re.findall('\w+', open('hamlet.txt').read().lower())
|
||||
>>> Counter(hamlet_words).most_common(10)
|
||||
[('the', 1143), ('and', 966), ('to', 762), ('of', 669), ('i', 631),
|
||||
('you', 554), ('a', 546), ('my', 514), ('hamlet', 471), ('in', 451)]
|
||||
|
||||
.. class:: Counter([iterable-or-mapping])
|
||||
|
||||
A :class:`Counter` is a :class:`dict` subclass for counting hashable items.
|
||||
It is an unordered collection where elements are stored as dictionary keys
|
||||
and their counts are stored as dictionary values. Counts are allowed to be
|
||||
any integer value including zero or negative counts. The :class:`Counter`
|
||||
class is similar to bags or multisets in other languages.
|
||||
|
||||
Elements are counted from an *iterable* or initialized from another
|
||||
*mapping* (or counter)::
|
||||
|
||||
>>> c = Counter() # a new, empty counter
|
||||
>>> c = Counter('gallahad') # a new counter from an iterable
|
||||
>>> c = Counter({'red': 4, 'blue': 2}) # a new counter from a mapping
|
||||
|
||||
The returned object has a dictionary style interface except that it returns
|
||||
a zero count for missing items (instead of raising a :exc:`KeyError` like a
|
||||
dictionary would)::
|
||||
|
||||
>>> c = Counter(['egg', 'ham'])
|
||||
>>> c['bacon'] # count of a missing element is zero
|
||||
0
|
||||
|
||||
Assigning a count of zero or reducing the count to zero leaves the
|
||||
element in the dictionary. Use ``del`` to remove the entry entirely:
|
||||
|
||||
>>> c = Counter(['arthur', 'gwain'])
|
||||
>>> c['arthur'] = 0 # set the count of 'arthur' to zero
|
||||
>>> 'arthur' in c # but 'arthur' is still in the counter
|
||||
True
|
||||
>>> del c['arthur'] # del will completely remove the entry
|
||||
|
||||
.. versionadded:: 2.7
|
||||
|
||||
|
||||
Counter objects support two methods beyond those available for all
|
||||
dictionaries:
|
||||
|
||||
.. method:: elements()
|
||||
|
||||
Return an iterator over elements repeating each as many times as its count.
|
||||
Elements are returned in arbitrary order. If an element's count has been
|
||||
set to zero or a negative number, :meth:`elements` will ignore it.
|
||||
|
||||
>>> c = Counter({'a': 4, 'b': 2, 'd': 0, 'e': -2})
|
||||
>>> list(c.elements())
|
||||
['a', 'a', 'a', 'a', 'b', 'b']
|
||||
|
||||
.. method:: most_common([n])
|
||||
|
||||
Return a list of the *n* most common elements and their counts from
|
||||
the most common to the least. If *n* is not specified or is ``None``,
|
||||
return a list of all element counts in decreasing order of frequency.
|
||||
Elements with equal counts are ordered arbitrarily::
|
||||
|
||||
>>> Counter('abracadabra').most_common(3)
|
||||
[('a', 5), ('r', 2), ('b', 2)]
|
||||
|
||||
The usual dictionary methods are available for :class:`Counter` objects.
|
||||
All of those work the same as they do for dictionaries except for two
|
||||
which work differently for counters.
|
||||
|
||||
.. method:: fromkeys(iterable)
|
||||
|
||||
There is no equivalent class method for :class:`Counter` objects.
|
||||
Raises a :exc:`NotImplementedError` when called.
|
||||
|
||||
.. method:: update([iterable-or-mapping])
|
||||
|
||||
Like :meth:`dict.update` but adds-in counts instead of replacing them.
|
||||
|
||||
Elements are counted from an *iterable* or added-in from another
|
||||
*mapping* (or counter)::
|
||||
|
||||
>>> c = Counter('which')
|
||||
>>> c.update('witch') # add elements from another iterable
|
||||
>>> d = Counter('watch')
|
||||
>>> c.update(d) # add elements from another counter
|
||||
>>> c['h'] # four 'h' in which, witch, and watch
|
||||
4
|
||||
|
||||
Common patterns for working with :class:`Counter` objects::
|
||||
|
||||
sum(c.values()) # total of all counts
|
||||
c.clear() # reset all counts
|
||||
list(c) # list unique elements
|
||||
set(c) # convert to a set
|
||||
dict(c) # convert to a regular dictionary
|
||||
c.items() # convert to a list of (elem, cnt) pairs
|
||||
Counter(dict(list_of_pairs)) # convert from a list of (elem, cnt) pairs
|
||||
c.most_common()[:-n:-1] # n least common elements
|
||||
|
||||
**References**:
|
||||
|
||||
* Wikipedia entry for `Multisets <http://en.wikipedia.org/wiki/Multiset>`_
|
||||
|
||||
* `Bag class <http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html>`_
|
||||
in Smalltalk
|
||||
* `C++ multisets <http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm>`_
|
||||
tutorial with standalone examples
|
||||
|
||||
* An early Python `Bag <http://code.activestate.com/recipes/259174/>`_ recipe
|
||||
for Python 2.4 and a `Counter <http://code.activestate.com/recipes/576611/>`_
|
||||
comformant recipe for Python 2.5 and later
|
||||
|
||||
* Use cases for multisets and mathematical operations on multisets.
|
||||
Knuth, Donald. The Art of Computer Programming Volume II,
|
||||
Section 4.6.3, Exercise 19
|
||||
|
||||
|
||||
|
||||
.. _deque-objects:
|
||||
|
||||
:class:`deque` objects
|
||||
|
|
|
@ -10,6 +10,9 @@ from _collections import deque, defaultdict
|
|||
from operator import itemgetter as _itemgetter
|
||||
from keyword import iskeyword as _iskeyword
|
||||
import sys as _sys
|
||||
import heapq as _heapq
|
||||
from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
|
||||
|
||||
|
||||
################################################################################
|
||||
### namedtuple
|
||||
|
@ -113,6 +116,162 @@ def namedtuple(typename, field_names, verbose=False):
|
|||
return result
|
||||
|
||||
|
||||
########################################################################
|
||||
### Counter
|
||||
########################################################################
|
||||
|
||||
class Counter(dict):
|
||||
'''Dict subclass for counting hashable items. Sometimes called a bag
|
||||
or multiset. Elements are stored as dictionary keys and their counts
|
||||
are stored as dictionary values.
|
||||
|
||||
>>> c = Counter('abracadabra') # count elements from a string
|
||||
|
||||
>>> c.most_common(3) # three most common elements
|
||||
[('a', 5), ('r', 2), ('b', 2)]
|
||||
>>> sorted(c) # list all unique elements
|
||||
['a', 'b', 'c', 'd', 'r']
|
||||
>>> ''.join(sorted(c.elements())) # list elements with repetitions
|
||||
'aaaaabbcdrr'
|
||||
>>> sum(c.values()) # total of all counts
|
||||
11
|
||||
|
||||
>>> c['a'] # count of letter 'a'
|
||||
5
|
||||
>>> for elem in 'shazam': # update counts from an iterable
|
||||
... c[elem] += 1 # by adding 1 to each element's count
|
||||
>>> c['a'] # now there are seven 'a'
|
||||
7
|
||||
>>> del c['r'] # remove all 'r'
|
||||
>>> c['r'] # now there are zero 'r'
|
||||
0
|
||||
|
||||
>>> d = Counter('simsalabim') # make another counter
|
||||
>>> c.update(d) # add in the second counter
|
||||
>>> c['a'] # now there are nine 'a'
|
||||
9
|
||||
|
||||
>>> c.clear() # empty the counter
|
||||
>>> c
|
||||
Counter()
|
||||
|
||||
Note: If a count is set to zero or reduced to zero, it will remain
|
||||
in the counter until the entry is deleted or the counter is cleared:
|
||||
|
||||
>>> c = Counter('aaabbc')
|
||||
>>> c['b'] -= 2 # reduce the count of 'b' by two
|
||||
>>> c.most_common() # 'b' is still in, but its count is zero
|
||||
[('a', 3), ('c', 1), ('b', 0)]
|
||||
|
||||
'''
|
||||
# References:
|
||||
# http://en.wikipedia.org/wiki/Multiset
|
||||
# http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
|
||||
# http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
|
||||
# http://code.activestate.com/recipes/259174/
|
||||
# Knuth, TAOCP Vol. II section 4.6.3
|
||||
|
||||
def __init__(self, iterable=None):
|
||||
'''Create a new, empty Counter object. And if given, count elements
|
||||
from an input iterable. Or, initialize the count from another mapping
|
||||
of elements to their counts.
|
||||
|
||||
>>> c = Counter() # a new, empty counter
|
||||
>>> c = Counter('gallahad') # a new counter from an iterable
|
||||
>>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
|
||||
|
||||
'''
|
||||
self.update(iterable)
|
||||
|
||||
def __missing__(self, key):
|
||||
'The count of elements not in the Counter is zero.'
|
||||
# Needed so that self[missing_item] does not raise KeyError
|
||||
return 0
|
||||
|
||||
def most_common(self, n=None):
|
||||
'''List the n most common elements and their counts from the most
|
||||
common to the least. If n is None, then list all element counts.
|
||||
|
||||
>>> Counter('abracadabra').most_common(3)
|
||||
[('a', 5), ('r', 2), ('b', 2)]
|
||||
|
||||
'''
|
||||
# Emulate Bag.sortedByCount from Smalltalk
|
||||
if n is None:
|
||||
return sorted(self.items(), key=_itemgetter(1), reverse=True)
|
||||
return _heapq.nlargest(n, self.items(), key=_itemgetter(1))
|
||||
|
||||
def elements(self):
|
||||
'''Iterator over elements repeating each as many times as its count.
|
||||
|
||||
>>> c = Counter('ABCABC')
|
||||
>>> sorted(c.elements())
|
||||
['A', 'A', 'B', 'B', 'C', 'C']
|
||||
|
||||
# Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
|
||||
>>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
|
||||
>>> product = 1
|
||||
>>> for factor in prime_factors.elements(): # loop over factors
|
||||
... product *= factor # and multiply them
|
||||
>>> product
|
||||
1836
|
||||
|
||||
Note, if an element's count has been set to zero or is a negative
|
||||
number, elements() will ignore it.
|
||||
|
||||
'''
|
||||
# Emulate Bag.do from Smalltalk and Multiset.begin from C++.
|
||||
return _chain.from_iterable(_starmap(_repeat, self.items()))
|
||||
|
||||
# Override dict methods where necessary
|
||||
|
||||
@classmethod
|
||||
def fromkeys(cls, iterable, v=None):
|
||||
# There is no equivalent method for counters because setting v=1
|
||||
# means that no element can have a count greater than one.
|
||||
raise NotImplementedError(
|
||||
'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
|
||||
|
||||
def update(self, iterable=None):
|
||||
'''Like dict.update() but add counts instead of replacing them.
|
||||
|
||||
Source can be an iterable, a dictionary, or another Counter instance.
|
||||
|
||||
>>> c = Counter('which')
|
||||
>>> c.update('witch') # add elements from another iterable
|
||||
>>> d = Counter('watch')
|
||||
>>> c.update(d) # add elements from another counter
|
||||
>>> c['h'] # four 'h' in which, witch, and watch
|
||||
4
|
||||
|
||||
'''
|
||||
# The regular dict.update() operation makes no sense here because the
|
||||
# replace behavior results in the some of original untouched counts
|
||||
# being mixed-in with all of the other counts for a mismash that
|
||||
# doesn't have a straight-forward interpretation in most counting
|
||||
# contexts. Instead, we look to Knuth for suggested operations on
|
||||
# multisets and implement the union-add operation discussed in
|
||||
# TAOCP Volume II section 4.6.3 exercise 19. The Wikipedia entry for
|
||||
# multisets calls that operation a sum or join.
|
||||
|
||||
if iterable is not None:
|
||||
if isinstance(iterable, Mapping):
|
||||
for elem, count in iterable.items():
|
||||
self[elem] += count
|
||||
else:
|
||||
for elem in iterable:
|
||||
self[elem] += 1
|
||||
|
||||
def copy(self):
|
||||
'Like dict.copy() but returns a Counter instance instead of a dict.'
|
||||
return Counter(self)
|
||||
|
||||
def __repr__(self):
|
||||
if not self:
|
||||
return '%s()' % self.__class__.__name__
|
||||
items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
|
||||
return '%s({%s})' % (self.__class__.__name__, items)
|
||||
|
||||
|
||||
################################################################################
|
||||
### UserDict
|
||||
|
|
|
@ -2,7 +2,7 @@
|
|||
|
||||
import unittest, doctest
|
||||
from test import support
|
||||
from collections import namedtuple
|
||||
from collections import namedtuple, Counter, Mapping
|
||||
import pickle, copy
|
||||
from collections import Hashable, Iterable, Iterator
|
||||
from collections import Sized, Container, Callable
|
||||
|
@ -357,11 +357,104 @@ class TestCollectionABCs(unittest.TestCase):
|
|||
self.failUnless(issubclass(sample, MutableSequence))
|
||||
self.failIf(issubclass(str, MutableSequence))
|
||||
|
||||
class TestCounter(unittest.TestCase):
|
||||
|
||||
def test_basics(self):
|
||||
c = Counter('abcaba')
|
||||
self.assert_(isinstance(c, dict))
|
||||
self.assert_(isinstance(c, Mapping))
|
||||
self.assert_(issubclass(Counter, dict))
|
||||
self.assert_(issubclass(Counter, Mapping))
|
||||
self.assertEqual(len(c), 3)
|
||||
self.assertEqual(sum(c.values()), 6)
|
||||
self.assertEqual(sorted(c.values()), [1, 2, 3])
|
||||
self.assertEqual(sorted(c.keys()), ['a', 'b', 'c'])
|
||||
self.assertEqual(sorted(c), ['a', 'b', 'c'])
|
||||
self.assertEqual(sorted(c.items()),
|
||||
[('a', 3), ('b', 2), ('c', 1)])
|
||||
self.assertEqual(c['b'], 2)
|
||||
self.assertEqual(c['z'], 0)
|
||||
self.assertEqual(c.__contains__('c'), True)
|
||||
self.assertEqual(c.__contains__('z'), False)
|
||||
self.assertEqual(c.get('b', 10), 2)
|
||||
self.assertEqual(c.get('z', 10), 10)
|
||||
self.assertEqual(c, dict(a=3, b=2, c=1))
|
||||
self.assertEqual(repr(c), "Counter({'a': 3, 'b': 2, 'c': 1})")
|
||||
self.assertEqual(c.most_common(), [('a', 3), ('b', 2), ('c', 1)])
|
||||
for i in range(5):
|
||||
self.assertEqual(c.most_common(i),
|
||||
[('a', 3), ('b', 2), ('c', 1)][:i])
|
||||
self.assertEqual(''.join(sorted(c.elements())), 'aaabbc')
|
||||
c['a'] += 1 # increment an existing value
|
||||
c['b'] -= 2 # sub existing value to zero
|
||||
del c['c'] # remove an entry
|
||||
c['d'] -= 2 # sub from a missing value
|
||||
c['e'] = -5 # directly assign a missing value
|
||||
c['f'] += 4 # add to a missing value
|
||||
self.assertEqual(c, dict(a=4, b=0, d=-2, e=-5, f=4))
|
||||
self.assertEqual(''.join(sorted(c.elements())), 'aaaaffff')
|
||||
self.assertEqual(c.pop('f'), 4)
|
||||
self.assertEqual('f' in c, False)
|
||||
for i in range(3):
|
||||
elem, cnt = c.popitem()
|
||||
self.assertEqual(elem in c, False)
|
||||
c.clear()
|
||||
self.assertEqual(c, {})
|
||||
self.assertEqual(repr(c), 'Counter()')
|
||||
self.assertRaises(NotImplementedError, Counter.fromkeys, 'abc')
|
||||
self.assertRaises(TypeError, hash, c)
|
||||
c.update(dict(a=5, b=3, c=1))
|
||||
c.update(Counter('a' * 50 + 'b' * 30))
|
||||
c.update() # test case with no args
|
||||
c.__init__('a' * 500 + 'b' * 300)
|
||||
c.__init__('cdc')
|
||||
c.__init__()
|
||||
self.assertEqual(c, dict(a=555, b=333, c=3, d=1))
|
||||
self.assertEqual(c.setdefault('d', 5), 1)
|
||||
self.assertEqual(c['d'], 1)
|
||||
self.assertEqual(c.setdefault('e', 5), 5)
|
||||
self.assertEqual(c['e'], 5)
|
||||
|
||||
def test_copying(self):
|
||||
# Check that counters are copyable, deepcopyable, picklable, and
|
||||
#have a repr/eval round-trip
|
||||
words = Counter('which witch had which witches wrist watch'.split())
|
||||
update_test = Counter()
|
||||
update_test.update(words)
|
||||
for i, dup in enumerate([
|
||||
words.copy(),
|
||||
copy.copy(words),
|
||||
copy.deepcopy(words),
|
||||
pickle.loads(pickle.dumps(words, 0)),
|
||||
pickle.loads(pickle.dumps(words, 1)),
|
||||
pickle.loads(pickle.dumps(words, 2)),
|
||||
pickle.loads(pickle.dumps(words, -1)),
|
||||
eval(repr(words)),
|
||||
update_test,
|
||||
Counter(words),
|
||||
]):
|
||||
msg = (i, dup, words)
|
||||
self.assert_(dup is not words)
|
||||
self.assertEquals(dup, words)
|
||||
self.assertEquals(len(dup), len(words))
|
||||
self.assertEquals(type(dup), type(words))
|
||||
|
||||
def test_conversions(self):
|
||||
# Convert to: set, list, dict
|
||||
s = 'she sells sea shells by the sea shore'
|
||||
self.assertEqual(sorted(Counter(s).elements()), sorted(s))
|
||||
self.assertEqual(sorted(Counter(s)), sorted(set(s)))
|
||||
self.assertEqual(dict(Counter(s)), dict(Counter(s).items()))
|
||||
self.assertEqual(set(Counter(s)), set(s))
|
||||
|
||||
|
||||
|
||||
import doctest, collections
|
||||
|
||||
def test_main(verbose=None):
|
||||
NamedTupleDocs = doctest.DocTestSuite(module=collections)
|
||||
test_classes = [TestNamedTuple, NamedTupleDocs, TestOneTrickPonyABCs, TestCollectionABCs]
|
||||
test_classes = [TestNamedTuple, NamedTupleDocs, TestOneTrickPonyABCs,
|
||||
TestCollectionABCs, TestCounter]
|
||||
support.run_unittest(*test_classes)
|
||||
support.run_doctest(collections, verbose)
|
||||
|
||||
|
|
|
@ -130,6 +130,9 @@ Library
|
|||
appropriately when it is being used via socket.makefile() objects
|
||||
rather than delaying the close by waiting for garbage collection to do it.
|
||||
|
||||
- Issue #1696199: Add collections.Counter() for rapid and convenient
|
||||
counting.
|
||||
|
||||
- Issue #3860: GzipFile and BZ2File now support the context manager protocol.
|
||||
|
||||
- Issue #4867: Fixed a crash in ctypes when passing a string to a
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue