mirror of
https://github.com/python/cpython.git
synced 2025-09-27 18:59:43 +00:00
SF patch #969791: Add nlargest() and nsmallest() to heapq.
This commit is contained in:
parent
7d019664d7
commit
33ecffb65a
5 changed files with 75 additions and 5 deletions
|
@ -83,6 +83,30 @@ True
|
||||||
>>>
|
>>>
|
||||||
\end{verbatim}
|
\end{verbatim}
|
||||||
|
|
||||||
|
The module also offers two general purpose functions based on heaps.
|
||||||
|
|
||||||
|
\begin{funcdesc}{nlargest}{iterable, n}
|
||||||
|
Return a list with the \var{n} largest elements from the dataset defined
|
||||||
|
by \var{iterable}. Equivalent to: \code{sorted(iterable, reverse=True)[:n]}
|
||||||
|
\versionadded{2.4}
|
||||||
|
\end{funcdesc}
|
||||||
|
|
||||||
|
\begin{funcdesc}{nsmallest}{iterable, n}
|
||||||
|
Return a list with the \var{n} smallest elements from the dataset defined
|
||||||
|
by \var{iterable}. Equivalent to: \code{sorted(iterable)[:n]}
|
||||||
|
\versionadded{2.4}
|
||||||
|
\end{funcdesc}
|
||||||
|
|
||||||
|
Though the above functions appear symmetrical, they each have different
|
||||||
|
speed and space requirements. In particular, \function{nsmallest()}
|
||||||
|
operates on a full copy of the dataset. In contrast, \function{nlargest()}
|
||||||
|
only requires storage space for \var{n} elements.
|
||||||
|
|
||||||
|
Both functions perform best for smaller values of \var{n}. For larger
|
||||||
|
values, it is more efficient to use the \function{sorted()} function. Also,
|
||||||
|
when \code{n==1}, it is more efficient to use the builtin \function{min()}
|
||||||
|
and \function{max()} functions.
|
||||||
|
|
||||||
|
|
||||||
\subsection{Theory}
|
\subsection{Theory}
|
||||||
|
|
||||||
|
|
|
@ -449,7 +449,10 @@ improved performance: \module{Queue}, \module{mutex}, \module{shlex}
|
||||||
|
|
||||||
\item The \module{heapq} module has been converted to C. The resulting
|
\item The \module{heapq} module has been converted to C. The resulting
|
||||||
tenfold improvement in speed makes the module suitable for handling
|
tenfold improvement in speed makes the module suitable for handling
|
||||||
high volumes of data.
|
high volumes of data. In addition, the module has two new functions
|
||||||
|
\function{nlargest()} and \function{nsmallest()} that use heaps to
|
||||||
|
find the largest or smallest n values in a dataset without the
|
||||||
|
expense of a full sort.
|
||||||
|
|
||||||
\item The \module{imaplib} module now supports IMAP's THREAD command.
|
\item The \module{imaplib} module now supports IMAP's THREAD command.
|
||||||
(Contributed by Yves Dionne.)
|
(Contributed by Yves Dionne.)
|
||||||
|
|
36
Lib/heapq.py
36
Lib/heapq.py
|
@ -30,7 +30,7 @@ without surprises: heap[0] is the smallest item, and heap.sort()
|
||||||
maintains the heap invariant!
|
maintains the heap invariant!
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# Original code by Kevin O'Connor, augmented by Tim Peters
|
# Original code by Kevin O'Connor, augmented by Tim Peters and Raymond Hettinger
|
||||||
|
|
||||||
__about__ = """Heap queues
|
__about__ = """Heap queues
|
||||||
|
|
||||||
|
@ -126,7 +126,10 @@ Believe me, real good tape sorts were quite spectacular to watch!
|
||||||
From all times, sorting has always been a Great Art! :-)
|
From all times, sorting has always been a Great Art! :-)
|
||||||
"""
|
"""
|
||||||
|
|
||||||
__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace']
|
__all__ = ['heappush', 'heappop', 'heapify', 'heapreplace', 'nlargest',
|
||||||
|
'nsmallest']
|
||||||
|
|
||||||
|
from itertools import islice, repeat
|
||||||
|
|
||||||
def heappush(heap, item):
|
def heappush(heap, item):
|
||||||
"""Push item onto heap, maintaining the heap invariant."""
|
"""Push item onto heap, maintaining the heap invariant."""
|
||||||
|
@ -168,6 +171,35 @@ def heapify(x):
|
||||||
for i in reversed(xrange(n//2)):
|
for i in reversed(xrange(n//2)):
|
||||||
_siftup(x, i)
|
_siftup(x, i)
|
||||||
|
|
||||||
|
def nlargest(iterable, n):
|
||||||
|
"""Find the n largest elements in a dataset.
|
||||||
|
|
||||||
|
Equivalent to: sorted(iterable, reverse=True)[:n]
|
||||||
|
"""
|
||||||
|
it = iter(iterable)
|
||||||
|
result = list(islice(it, n))
|
||||||
|
if not result:
|
||||||
|
return result
|
||||||
|
heapify(result)
|
||||||
|
_heapreplace = heapreplace
|
||||||
|
sol = result[0] # sol --> smallest of the nlargest
|
||||||
|
for elem in it:
|
||||||
|
if elem <= sol:
|
||||||
|
continue
|
||||||
|
_heapreplace(result, elem)
|
||||||
|
sol = result[0]
|
||||||
|
result.sort(reverse=True)
|
||||||
|
return result
|
||||||
|
|
||||||
|
def nsmallest(iterable, n):
|
||||||
|
"""Find the n smallest elements in a dataset.
|
||||||
|
|
||||||
|
Equivalent to: sorted(iterable)[:n]
|
||||||
|
"""
|
||||||
|
h = list(iterable)
|
||||||
|
heapify(h)
|
||||||
|
return map(heappop, repeat(h, min(n, len(h))))
|
||||||
|
|
||||||
# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos
|
# 'heap' is a heap at all indices >= startpos, except possibly for pos. pos
|
||||||
# is the index of a leaf with a possibly out-of-order value. Restore the
|
# is the index of a leaf with a possibly out-of-order value. Restore the
|
||||||
# heap invariant.
|
# heap invariant.
|
||||||
|
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
from test.test_support import verify, vereq, verbose, TestFailed
|
from test.test_support import verify, vereq, verbose, TestFailed
|
||||||
|
|
||||||
from heapq import heappush, heappop, heapify, heapreplace
|
from heapq import heappush, heappop, heapify, heapreplace, nlargest, nsmallest
|
||||||
import random
|
import random
|
||||||
|
|
||||||
def check_invariant(heap):
|
def check_invariant(heap):
|
||||||
|
@ -84,6 +84,15 @@ def test_main():
|
||||||
data.sort()
|
data.sort()
|
||||||
sorted = [heappop(heap) for i in range(size)]
|
sorted = [heappop(heap) for i in range(size)]
|
||||||
vereq(data, sorted)
|
vereq(data, sorted)
|
||||||
|
|
||||||
|
# 7) Check nlargest() and nsmallest()
|
||||||
|
data = [random.randrange(2000) for i in range(1000)]
|
||||||
|
copy = data[:]
|
||||||
|
copy.sort(reverse=True)
|
||||||
|
vereq(nlargest(data, 400), copy[:400])
|
||||||
|
copy.sort()
|
||||||
|
vereq(nsmallest(data, 400), copy[:400])
|
||||||
|
|
||||||
# Make user happy
|
# Make user happy
|
||||||
if verbose:
|
if verbose:
|
||||||
print "All OK"
|
print "All OK"
|
||||||
|
|
|
@ -416,7 +416,9 @@ Library
|
||||||
os.path.exists(), switched to using os.lstat() directly if possible.
|
os.path.exists(), switched to using os.lstat() directly if possible.
|
||||||
|
|
||||||
- bisect.py and heapq.py now have underlying C implementations
|
- bisect.py and heapq.py now have underlying C implementations
|
||||||
for better performance
|
for better performance.
|
||||||
|
|
||||||
|
- heapq.py has two new functions, nsmallest() and nlargest().
|
||||||
|
|
||||||
- traceback.format_exc has been added (similar to print_exc but it returns
|
- traceback.format_exc has been added (similar to print_exc but it returns
|
||||||
a string).
|
a string).
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue