mirror of
https://github.com/python/cpython.git
synced 2025-12-11 03:20:01 +00:00
Hmm! I thought I checked this in before! Oh well.
Added new heapify() function, which transforms an arbitrary list into a heap in linear time; that's a fundamental tool for using heaps in real life <wink>. Added heapyify() test. Added a "less naive" N-best algorithm to the test suite, and noted that this could actually go much faster (building on heapify()) if we had max-heaps instead of min-heaps (the iterative method is appropriate when all the data isn't known in advance, but when it is known in advance the tradeoffs get murkier).
This commit is contained in:
parent
940dc922c0
commit
28c25527c2
2 changed files with 48 additions and 10 deletions
38
Lib/heapq.py
38
Lib/heapq.py
|
|
@ -13,6 +13,7 @@ heap = [] # creates an empty heap
|
||||||
heappush(heap, item) # pushes a new item on the heap
|
heappush(heap, item) # pushes a new item on the heap
|
||||||
item = heappop(heap) # pops the smallest item from the heap
|
item = heappop(heap) # pops the smallest item from the heap
|
||||||
item = heap[0] # smallest item on the heap without popping it
|
item = heap[0] # smallest item on the heap without popping it
|
||||||
|
heapify(heap) # transform list into a heap, in-place, in linear time
|
||||||
|
|
||||||
Our API differs from textbook heap algorithms as follows:
|
Our API differs from textbook heap algorithms as follows:
|
||||||
|
|
||||||
|
|
@ -136,15 +137,13 @@ def heappush(heap, item):
|
||||||
pos = parentpos
|
pos = parentpos
|
||||||
heap[pos] = item
|
heap[pos] = item
|
||||||
|
|
||||||
def heappop(heap):
|
# The child indices of heap index pos are already heaps, and we want to make
|
||||||
"""Pop the smallest item off the heap, maintaining the heap invariant."""
|
# a heap at index pos too.
|
||||||
endpos = len(heap) - 1
|
def _siftdown(heap, pos):
|
||||||
if endpos <= 0:
|
endpos = len(heap)
|
||||||
return heap.pop()
|
assert pos < endpos
|
||||||
returnitem = heap[0]
|
item = heap[pos]
|
||||||
item = heap.pop()
|
# Sift item into position, down from pos, moving the smaller
|
||||||
pos = 0
|
|
||||||
# Sift item into position, down from the root, moving the smaller
|
|
||||||
# child up, until finding pos such that item <= pos's children.
|
# child up, until finding pos such that item <= pos's children.
|
||||||
childpos = 2*pos + 1 # leftmost child position
|
childpos = 2*pos + 1 # leftmost child position
|
||||||
while childpos < endpos:
|
while childpos < endpos:
|
||||||
|
|
@ -164,8 +163,29 @@ def heappop(heap):
|
||||||
pos = childpos
|
pos = childpos
|
||||||
childpos = 2*pos + 1
|
childpos = 2*pos + 1
|
||||||
heap[pos] = item
|
heap[pos] = item
|
||||||
|
|
||||||
|
def heappop(heap):
|
||||||
|
"""Pop the smallest item off the heap, maintaining the heap invariant."""
|
||||||
|
lastelt = heap.pop() # raises appropriate IndexError if heap is empty
|
||||||
|
if heap:
|
||||||
|
returnitem = heap[0]
|
||||||
|
heap[0] = lastelt
|
||||||
|
_siftdown(heap, 0)
|
||||||
|
else:
|
||||||
|
returnitem = lastelt
|
||||||
return returnitem
|
return returnitem
|
||||||
|
|
||||||
|
def heapify(heap):
|
||||||
|
"""Transform list heap into a heap, in-place, in O(len(heap)) time."""
|
||||||
|
n = len(heap)
|
||||||
|
# Transform bottom-up. The largest index there's any point to looking at
|
||||||
|
# is the largest with a child index in-range, so must have 2*i + 1 < n,
|
||||||
|
# or i < (n-1)/2. If n is even = 2*j, this is (2*j-1)/2 = j-1/2 so
|
||||||
|
# j-1 is the largest, which is n//2 - 1. If n is odd = 2*j+1, this is
|
||||||
|
# (2*j+1-1)/2 = j so j-1 is the largest, and that's again n//2-1.
|
||||||
|
for i in xrange(n//2 - 1, -1, -1):
|
||||||
|
_siftdown(heap, i)
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# Simple sanity test
|
# Simple sanity test
|
||||||
heap = []
|
heap = []
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@
|
||||||
|
|
||||||
from test.test_support import verify, vereq, verbose, TestFailed
|
from test.test_support import verify, vereq, verbose, TestFailed
|
||||||
|
|
||||||
from heapq import heappush, heappop
|
from heapq import heappush, heappop, heapify
|
||||||
import random
|
import random
|
||||||
|
|
||||||
def check_invariant(heap):
|
def check_invariant(heap):
|
||||||
|
|
@ -40,6 +40,24 @@ def test_main():
|
||||||
heappop(heap)
|
heappop(heap)
|
||||||
heap.sort()
|
heap.sort()
|
||||||
vereq(heap, data_sorted[-10:])
|
vereq(heap, data_sorted[-10:])
|
||||||
|
# 4) Test heapify.
|
||||||
|
for size in range(30):
|
||||||
|
heap = [random.random() for dummy in range(size)]
|
||||||
|
heapify(heap)
|
||||||
|
check_invariant(heap)
|
||||||
|
# 5) Less-naive "N-best" algorithm, much faster (if len(data) is big
|
||||||
|
# enough <wink>) than sorting all of data. However, if we had a max
|
||||||
|
# heap instead of a min heap, it would go much faster still via
|
||||||
|
# heapify'ing all of data (linear time), then doing 10 heappops
|
||||||
|
# (10 log-time steps).
|
||||||
|
heap = data[:10]
|
||||||
|
heapify(heap)
|
||||||
|
for item in data[10:]:
|
||||||
|
if item > heap[0]: # this gets rarer and rarer the longer we run
|
||||||
|
heappush(heap, item)
|
||||||
|
heappop(heap)
|
||||||
|
heap.sort()
|
||||||
|
vereq(heap, data_sorted[-10:])
|
||||||
# Make user happy
|
# Make user happy
|
||||||
if verbose:
|
if verbose:
|
||||||
print "All OK"
|
print "All OK"
|
||||||
|
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue