mirror of
				https://github.com/python/cpython.git
				synced 2025-11-04 03:44:55 +00:00 
			
		
		
		
	
		
			
				
	
	
		
			495 lines
		
	
	
	
		
			17 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
			
		
		
	
	
			495 lines
		
	
	
	
		
			17 KiB
		
	
	
	
		
			TeX
		
	
	
	
	
	
\section{\module{itertools} ---
 | 
						|
         Functions creating iterators for efficient looping}
 | 
						|
 | 
						|
\declaremodule{standard}{itertools}
 | 
						|
\modulesynopsis{Functions creating iterators for efficient looping.}
 | 
						|
\moduleauthor{Raymond Hettinger}{python@rcn.com}
 | 
						|
\sectionauthor{Raymond Hettinger}{python@rcn.com}
 | 
						|
\versionadded{2.3}
 | 
						|
 | 
						|
 | 
						|
This module implements a number of iterator building blocks inspired
 | 
						|
by constructs from the Haskell and SML programming languages.  Each
 | 
						|
has been recast in a form suitable for Python.
 | 
						|
 | 
						|
The module standardizes a core set of fast, memory efficient tools
 | 
						|
that are useful by themselves or in combination.  Standardization helps
 | 
						|
avoid the readability and reliability problems which arise when many
 | 
						|
different individuals create their own slightly varying implementations,
 | 
						|
each with their own quirks and naming conventions.
 | 
						|
 | 
						|
The tools are designed to combine readily with one another.  This makes
 | 
						|
it easy to construct more specialized tools succinctly and efficiently
 | 
						|
in pure Python.
 | 
						|
 | 
						|
For instance, SML provides a tabulation tool: \code{tabulate(f)}
 | 
						|
which produces a sequence \code{f(0), f(1), ...}.  This toolbox
 | 
						|
provides \function{imap()} and \function{count()} which can be combined
 | 
						|
to form \code{imap(f, count())} and produce an equivalent result.
 | 
						|
 | 
						|
Likewise, the functional tools are designed to work well with the
 | 
						|
high-speed functions provided by the \refmodule{operator} module.
 | 
						|
 | 
						|
The module author welcomes suggestions for other basic building blocks
 | 
						|
to be added to future versions of the module.
 | 
						|
 | 
						|
Whether cast in pure python form or C code, tools that use iterators
 | 
						|
are more memory efficient (and faster) than their list based counterparts.
 | 
						|
Adopting the principles of just-in-time manufacturing, they create
 | 
						|
data when and where needed instead of consuming memory with the
 | 
						|
computer equivalent of ``inventory''.
 | 
						|
 | 
						|
The performance advantage of iterators becomes more acute as the number
 | 
						|
of elements increases -- at some point, lists grow large enough to
 | 
						|
severely impact memory cache performance and start running slowly.
 | 
						|
 | 
						|
\begin{seealso}
 | 
						|
  \seetext{The Standard ML Basis Library,
 | 
						|
           \citetitle[http://www.standardml.org/Basis/]
 | 
						|
           {The Standard ML Basis Library}.}
 | 
						|
 | 
						|
  \seetext{Haskell, A Purely Functional Language,
 | 
						|
           \citetitle[http://www.haskell.org/definition/]
 | 
						|
           {Definition of Haskell and the Standard Libraries}.}
 | 
						|
\end{seealso}
 | 
						|
 | 
						|
 | 
						|
\subsection{Itertool functions \label{itertools-functions}}
 | 
						|
 | 
						|
The following module functions all construct and return iterators.
 | 
						|
Some provide streams of infinite length, so they should only be accessed
 | 
						|
by functions or loops that truncate the stream.
 | 
						|
 | 
						|
\begin{funcdesc}{chain}{*iterables}
 | 
						|
  Make an iterator that returns elements from the first iterable until
 | 
						|
  it is exhausted, then proceeds to the next iterable, until all of the
 | 
						|
  iterables are exhausted.  Used for treating consecutive sequences as
 | 
						|
  a single sequence.  Equivalent to:
 | 
						|
 | 
						|
  \begin{verbatim}
 | 
						|
     def chain(*iterables):
 | 
						|
         for it in iterables:
 | 
						|
             for element in it:
 | 
						|
                 yield element
 | 
						|
  \end{verbatim}
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{count}{\optional{n}}
 | 
						|
  Make an iterator that returns consecutive integers starting with \var{n}.
 | 
						|
  If not specified \var{n} defaults to zero.  
 | 
						|
  Does not currently support python long integers.  Often used as an
 | 
						|
  argument to \function{imap()} to generate consecutive data points.
 | 
						|
  Also, used with \function{izip()} to add sequence numbers.  Equivalent to:
 | 
						|
 | 
						|
  \begin{verbatim}
 | 
						|
     def count(n=0):
 | 
						|
         while True:
 | 
						|
             yield n
 | 
						|
             n += 1
 | 
						|
  \end{verbatim}
 | 
						|
 | 
						|
  Note, \function{count()} does not check for overflow and will return
 | 
						|
  negative numbers after exceeding \code{sys.maxint}.  This behavior
 | 
						|
  may change in the future.
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{cycle}{iterable}
 | 
						|
  Make an iterator returning elements from the iterable and saving a
 | 
						|
  copy of each.  When the iterable is exhausted, return elements from
 | 
						|
  the saved copy.  Repeats indefinitely.  Equivalent to:
 | 
						|
 | 
						|
  \begin{verbatim}
 | 
						|
     def cycle(iterable):
 | 
						|
         saved = []
 | 
						|
         for element in iterable:
 | 
						|
             yield element
 | 
						|
             saved.append(element)
 | 
						|
         while saved:
 | 
						|
             for element in saved:
 | 
						|
                   yield element
 | 
						|
  \end{verbatim}
 | 
						|
 | 
						|
  Note, this member of the toolkit may require significant
 | 
						|
  auxiliary storage (depending on the length of the iterable).
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{dropwhile}{predicate, iterable}
 | 
						|
  Make an iterator that drops elements from the iterable as long as
 | 
						|
  the predicate is true; afterwards, returns every element.  Note,
 | 
						|
  the iterator does not produce \emph{any} output until the predicate
 | 
						|
  is true, so it may have a lengthy start-up time.  Equivalent to:
 | 
						|
 | 
						|
  \begin{verbatim}
 | 
						|
     def dropwhile(predicate, iterable):
 | 
						|
         iterable = iter(iterable)
 | 
						|
         for x in iterable:
 | 
						|
             if not predicate(x):
 | 
						|
                 yield x
 | 
						|
                 break
 | 
						|
         for x in iterable:
 | 
						|
             yield x
 | 
						|
  \end{verbatim}
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{groupby}{iterable\optional{, key}}
 | 
						|
  Make an iterator that returns consecutive keys and groups from the
 | 
						|
  \var{iterable}.  \var{key} is a function computing a key value for each
 | 
						|
  element.  If not specified or is \code{None}, \var{key} defaults to an
 | 
						|
  identity function and returns  the element unchanged.  Generally, the
 | 
						|
  iterable needs to already be sorted on the same key function.
 | 
						|
 | 
						|
  The returned group is itself an iterator that shares the underlying
 | 
						|
  iterable with \function{groupby()}.  Because the source is shared, when
 | 
						|
  the \function{groupby} object is advanced, the previous group is no
 | 
						|
  longer visible.  So, if that data is needed later, it should be stored
 | 
						|
  as a list:
 | 
						|
 | 
						|
  \begin{verbatim}
 | 
						|
    groups = []
 | 
						|
    uniquekeys = []
 | 
						|
    for k, g in groupby(data, keyfunc):
 | 
						|
        groups.append(list(g))      # Store group iterator as a list
 | 
						|
        uniquekeys.append(k)
 | 
						|
  \end{verbatim}
 | 
						|
 | 
						|
  \function{groupby()} is equivalent to:
 | 
						|
 | 
						|
  \begin{verbatim}
 | 
						|
    class groupby(object):
 | 
						|
        def __init__(self, iterable, key=None):
 | 
						|
            if key is None:
 | 
						|
                key = lambda x: x
 | 
						|
            self.keyfunc = key
 | 
						|
            self.it = iter(iterable)
 | 
						|
            self.tgtkey = self.currkey = self.currvalue = xrange(0)
 | 
						|
        def __iter__(self):
 | 
						|
            return self
 | 
						|
        def next(self):
 | 
						|
            while self.currkey == self.tgtkey:
 | 
						|
                self.currvalue = self.it.next() # Exit on StopIteration
 | 
						|
                self.currkey = self.keyfunc(self.currvalue)
 | 
						|
            self.tgtkey = self.currkey
 | 
						|
            return (self.currkey, self._grouper(self.tgtkey))
 | 
						|
        def _grouper(self, tgtkey):
 | 
						|
            while self.currkey == tgtkey:
 | 
						|
                yield self.currvalue
 | 
						|
                self.currvalue = self.it.next() # Exit on StopIteration
 | 
						|
                self.currkey = self.keyfunc(self.currvalue)
 | 
						|
  \end{verbatim}
 | 
						|
  \versionadded{2.4}
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{ifilter}{predicate, iterable}
 | 
						|
  Make an iterator that filters elements from iterable returning only
 | 
						|
  those for which the predicate is \code{True}.
 | 
						|
  If \var{predicate} is \code{None}, return the items that are true.
 | 
						|
  Equivalent to:
 | 
						|
 | 
						|
  \begin{verbatim}
 | 
						|
     def ifilter(predicate, iterable):
 | 
						|
         if predicate is None:
 | 
						|
             predicate = bool
 | 
						|
         for x in iterable:
 | 
						|
             if predicate(x):
 | 
						|
                 yield x
 | 
						|
  \end{verbatim}
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{ifilterfalse}{predicate, iterable}
 | 
						|
  Make an iterator that filters elements from iterable returning only
 | 
						|
  those for which the predicate is \code{False}.
 | 
						|
  If \var{predicate} is \code{None}, return the items that are false.
 | 
						|
  Equivalent to:
 | 
						|
 | 
						|
  \begin{verbatim}
 | 
						|
     def ifilterfalse(predicate, iterable):
 | 
						|
         if predicate is None:
 | 
						|
             predicate = bool
 | 
						|
         for x in iterable:
 | 
						|
             if not predicate(x):
 | 
						|
                 yield x
 | 
						|
  \end{verbatim}
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{imap}{function, *iterables}
 | 
						|
  Make an iterator that computes the function using arguments from
 | 
						|
  each of the iterables.  If \var{function} is set to \code{None}, then
 | 
						|
  \function{imap()} returns the arguments as a tuple.  Like
 | 
						|
  \function{map()} but stops when the shortest iterable is exhausted
 | 
						|
  instead of filling in \code{None} for shorter iterables.  The reason
 | 
						|
  for the difference is that infinite iterator arguments are typically
 | 
						|
  an error for \function{map()} (because the output is fully evaluated)
 | 
						|
  but represent a common and useful way of supplying arguments to
 | 
						|
  \function{imap()}.
 | 
						|
  Equivalent to:
 | 
						|
 | 
						|
  \begin{verbatim}
 | 
						|
     def imap(function, *iterables):
 | 
						|
         iterables = map(iter, iterables)
 | 
						|
         while True:
 | 
						|
             args = [i.next() for i in iterables]
 | 
						|
             if function is None:
 | 
						|
                 yield tuple(args)
 | 
						|
             else:
 | 
						|
                 yield function(*args)
 | 
						|
  \end{verbatim}
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{islice}{iterable, \optional{start,} stop \optional{, step}}
 | 
						|
  Make an iterator that returns selected elements from the iterable.
 | 
						|
  If \var{start} is non-zero, then elements from the iterable are skipped
 | 
						|
  until start is reached.  Afterward, elements are returned consecutively
 | 
						|
  unless \var{step} is set higher than one which results in items being
 | 
						|
  skipped.  If \var{stop} is \code{None}, then iteration continues until
 | 
						|
  the iterator is exhausted, if at all; otherwise, it stops at the specified
 | 
						|
  position.  Unlike regular slicing,
 | 
						|
  \function{islice()} does not support negative values for \var{start},
 | 
						|
  \var{stop}, or \var{step}.  Can be used to extract related fields
 | 
						|
  from data where the internal structure has been flattened (for
 | 
						|
  example, a multi-line report may list a name field on every
 | 
						|
  third line).  Equivalent to:
 | 
						|
 | 
						|
  \begin{verbatim}
 | 
						|
     def islice(iterable, *args):
 | 
						|
         s = slice(*args)
 | 
						|
         next, stop, step = s.start or 0, s.stop, s.step or 1
 | 
						|
         for cnt, element in enumerate(iterable):
 | 
						|
             if cnt < next:
 | 
						|
                 continue
 | 
						|
             if stop is not None and cnt >= stop:
 | 
						|
                 break
 | 
						|
             yield element
 | 
						|
             next += step             
 | 
						|
  \end{verbatim}
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{izip}{*iterables}
 | 
						|
  Make an iterator that aggregates elements from each of the iterables.
 | 
						|
  Like \function{zip()} except that it returns an iterator instead of
 | 
						|
  a list.  Used for lock-step iteration over several iterables at a
 | 
						|
  time.  Equivalent to:
 | 
						|
 | 
						|
  \begin{verbatim}
 | 
						|
     def izip(*iterables):
 | 
						|
         iterables = map(iter, iterables)
 | 
						|
         while iterables:
 | 
						|
             result = [i.next() for i in iterables]
 | 
						|
             yield tuple(result)
 | 
						|
  \end{verbatim}
 | 
						|
 | 
						|
  \versionchanged[When no iterables are specified, returns a zero length
 | 
						|
                  iterator instead of raising a TypeError exception]{2.4}  
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{repeat}{object\optional{, times}}
 | 
						|
  Make an iterator that returns \var{object} over and over again.
 | 
						|
  Runs indefinitely unless the \var{times} argument is specified.
 | 
						|
  Used as argument to \function{imap()} for invariant parameters
 | 
						|
  to the called function.  Also used with \function{izip()} to create
 | 
						|
  an invariant part of a tuple record.  Equivalent to:
 | 
						|
 | 
						|
  \begin{verbatim}
 | 
						|
     def repeat(object, times=None):
 | 
						|
         if times is None:
 | 
						|
             while True:
 | 
						|
                 yield object
 | 
						|
         else:
 | 
						|
             for i in xrange(times):
 | 
						|
                 yield object
 | 
						|
  \end{verbatim}
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{starmap}{function, iterable}
 | 
						|
  Make an iterator that computes the function using arguments tuples
 | 
						|
  obtained from the iterable.  Used instead of \function{imap()} when
 | 
						|
  argument parameters are already grouped in tuples from a single iterable
 | 
						|
  (the data has been ``pre-zipped'').  The difference between
 | 
						|
  \function{imap()} and \function{starmap()} parallels the distinction
 | 
						|
  between \code{function(a,b)} and \code{function(*c)}.
 | 
						|
  Equivalent to:
 | 
						|
 | 
						|
  \begin{verbatim}
 | 
						|
     def starmap(function, iterable):
 | 
						|
         iterable = iter(iterable)
 | 
						|
         while True:
 | 
						|
             yield function(*iterable.next())
 | 
						|
  \end{verbatim}
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{takewhile}{predicate, iterable}
 | 
						|
  Make an iterator that returns elements from the iterable as long as
 | 
						|
  the predicate is true.  Equivalent to:
 | 
						|
 | 
						|
  \begin{verbatim}
 | 
						|
     def takewhile(predicate, iterable):
 | 
						|
         for x in iterable:
 | 
						|
             if predicate(x):
 | 
						|
                 yield x
 | 
						|
             else:
 | 
						|
                 break
 | 
						|
  \end{verbatim}
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
\begin{funcdesc}{tee}{iterable\optional{, n=2}}
 | 
						|
  Return \var{n} independent iterators from a single iterable.
 | 
						|
  The case where \var{n} is two is equivalent to:
 | 
						|
 | 
						|
  \begin{verbatim}
 | 
						|
     def tee(iterable):
 | 
						|
         def gen(next, data={}, cnt=[0]):
 | 
						|
             for i in count():
 | 
						|
                 if i == cnt[0]:
 | 
						|
                     item = data[i] = next()
 | 
						|
                     cnt[0] += 1
 | 
						|
                 else:
 | 
						|
                     item = data.pop(i)
 | 
						|
                 yield item
 | 
						|
         it = iter(iterable)
 | 
						|
         return (gen(it.next), gen(it.next))
 | 
						|
  \end{verbatim}
 | 
						|
 | 
						|
  Note, once \function{tee()} has made a split, the original \var{iterable}
 | 
						|
  should not be used anywhere else; otherwise, the \var{iterable} could get
 | 
						|
  advanced without the tee objects being informed.
 | 
						|
 | 
						|
  Note, this member of the toolkit may require significant auxiliary
 | 
						|
  storage (depending on how much temporary data needs to be stored).
 | 
						|
  In general, if one iterator is going to use most or all of the data before
 | 
						|
  the other iterator, it is faster to use \function{list()} instead of
 | 
						|
  \function{tee()}.
 | 
						|
  \versionadded{2.4}
 | 
						|
\end{funcdesc}
 | 
						|
 | 
						|
 | 
						|
\subsection{Examples \label{itertools-example}}
 | 
						|
 | 
						|
The following examples show common uses for each tool and
 | 
						|
demonstrate ways they can be combined.
 | 
						|
 | 
						|
\begin{verbatim}
 | 
						|
 | 
						|
>>> amounts = [120.15, 764.05, 823.14]
 | 
						|
>>> for checknum, amount in izip(count(1200), amounts):
 | 
						|
...     print 'Check %d is for $%.2f' % (checknum, amount)
 | 
						|
...
 | 
						|
Check 1200 is for $120.15
 | 
						|
Check 1201 is for $764.05
 | 
						|
Check 1202 is for $823.14
 | 
						|
 | 
						|
>>> import operator
 | 
						|
>>> for cube in imap(operator.pow, xrange(1,4), repeat(3)):
 | 
						|
...    print cube
 | 
						|
...
 | 
						|
1
 | 
						|
8
 | 
						|
27
 | 
						|
 | 
						|
>>> reportlines = ['EuroPython', 'Roster', '', 'alex', '', 'laura',
 | 
						|
                  '', 'martin', '', 'walter', '', 'samuele']
 | 
						|
>>> for name in islice(reportlines, 3, None, 2):
 | 
						|
...    print name.title()
 | 
						|
...
 | 
						|
Alex
 | 
						|
Laura
 | 
						|
Martin
 | 
						|
Walter
 | 
						|
Samuele
 | 
						|
 | 
						|
# Show a dictionary sorted and grouped by value
 | 
						|
>>> from operator import itemgetter
 | 
						|
>>> d = dict(a=1, b=2, c=1, d=2, e=1, f=2, g=3)
 | 
						|
>>> di = sorted(d.iteritems(), key=itemgetter(1))
 | 
						|
>>> for k, g in groupby(di, key=itemgetter(1)):
 | 
						|
...     print k, map(itemgetter(0), g)
 | 
						|
...
 | 
						|
1 ['a', 'c', 'e']
 | 
						|
2 ['b', 'd', 'f']
 | 
						|
3 ['g']
 | 
						|
 | 
						|
# Find runs of consecutive numbers using groupby.  The key to the solution
 | 
						|
# is differencing with a range so that consecutive numbers all appear in
 | 
						|
# same group.
 | 
						|
>>> data = [ 1,  4,5,6, 10, 15,16,17,18, 22, 25,26,27,28]
 | 
						|
>>> for k, g in groupby(enumerate(data), lambda (i,x):i-x):
 | 
						|
...     print map(operator.itemgetter(1), g)
 | 
						|
... 
 | 
						|
[1]
 | 
						|
[4, 5, 6]
 | 
						|
[10]
 | 
						|
[15, 16, 17, 18]
 | 
						|
[22]
 | 
						|
[25, 26, 27, 28]
 | 
						|
 | 
						|
\end{verbatim}
 | 
						|
 | 
						|
This section shows how itertools can be combined to create other more
 | 
						|
powerful itertools.  Note that \function{enumerate()} and \method{iteritems()}
 | 
						|
already have efficient implementations.  They are included here
 | 
						|
to illustrate how higher level tools can be created from building blocks.
 | 
						|
 | 
						|
\begin{verbatim}
 | 
						|
def take(n, seq):
 | 
						|
    return list(islice(seq, n))
 | 
						|
 | 
						|
def enumerate(iterable):
 | 
						|
    return izip(count(), iterable)
 | 
						|
 | 
						|
def tabulate(function):
 | 
						|
    "Return function(0), function(1), ..."
 | 
						|
    return imap(function, count())
 | 
						|
 | 
						|
def iteritems(mapping):
 | 
						|
    return izip(mapping.iterkeys(), mapping.itervalues())
 | 
						|
 | 
						|
def nth(iterable, n):
 | 
						|
    "Returns the nth item"
 | 
						|
    return list(islice(iterable, n, n+1))
 | 
						|
 | 
						|
def all(seq, pred=bool):
 | 
						|
    "Returns True if pred(x) is True for every element in the iterable"
 | 
						|
    return False not in imap(pred, seq)
 | 
						|
 | 
						|
def any(seq, pred=bool):
 | 
						|
    "Returns True if pred(x) is True at least one element in the iterable"
 | 
						|
    return True in imap(pred, seq)
 | 
						|
 | 
						|
def no(seq, pred=bool):
 | 
						|
    "Returns True if pred(x) is False for every element in the iterable"
 | 
						|
    return True not in imap(pred, seq)
 | 
						|
 | 
						|
def quantify(seq, pred=bool):
 | 
						|
    "Count how many times the predicate is True in the sequence"
 | 
						|
    return sum(imap(pred, seq))
 | 
						|
 | 
						|
def padnone(seq):
 | 
						|
    "Returns the sequence elements and then returns None indefinitely"
 | 
						|
    return chain(seq, repeat(None))
 | 
						|
 | 
						|
def ncycles(seq, n):
 | 
						|
    "Returns the sequence elements n times"
 | 
						|
    return chain(*repeat(seq, n))
 | 
						|
 | 
						|
def dotproduct(vec1, vec2):
 | 
						|
    return sum(imap(operator.mul, vec1, vec2))
 | 
						|
 | 
						|
def flatten(listOfLists):
 | 
						|
    return list(chain(*listOfLists))
 | 
						|
 | 
						|
def repeatfunc(func, times=None, *args):
 | 
						|
    "Repeat calls to func with specified arguments."
 | 
						|
    "Example:  repeatfunc(random.random)"
 | 
						|
    if times is None:
 | 
						|
        return starmap(func, repeat(args))
 | 
						|
    else:
 | 
						|
        return starmap(func, repeat(args, times))
 | 
						|
 | 
						|
def pairwise(iterable):
 | 
						|
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
 | 
						|
    a, b = tee(iterable)
 | 
						|
    try:
 | 
						|
        b.next()
 | 
						|
    except StopIteration:
 | 
						|
        pass
 | 
						|
    return izip(a, b)
 | 
						|
 | 
						|
\end{verbatim}
 |