mirror of
https://github.com/python/cpython.git
synced 2025-09-27 02:39:58 +00:00
Install C version of heapq.nsmallest().
This commit is contained in:
parent
c929766361
commit
2e3dfaf707
4 changed files with 182 additions and 17 deletions
|
@ -97,11 +97,6 @@ by \var{iterable}. Equivalent to: \code{sorted(iterable)[:n]}
|
||||||
\versionadded{2.4}
|
\versionadded{2.4}
|
||||||
\end{funcdesc}
|
\end{funcdesc}
|
||||||
|
|
||||||
Though the above functions appear symmetrical, they each have different
|
|
||||||
speed and space requirements. In particular, \function{nsmallest()}
|
|
||||||
operates on a full copy of the dataset. In contrast, \function{nlargest()}
|
|
||||||
only requires storage space for \var{n} elements.
|
|
||||||
|
|
||||||
Both functions perform best for smaller values of \var{n}. For larger
|
Both functions perform best for smaller values of \var{n}. For larger
|
||||||
values, it is more efficient to use the \function{sorted()} function. Also,
|
values, it is more efficient to use the \function{sorted()} function. Also,
|
||||||
when \code{n==1}, it is more efficient to use the builtin \function{min()}
|
when \code{n==1}, it is more efficient to use the builtin \function{min()}
|
||||||
|
|
|
@ -300,7 +300,7 @@ def _siftup(heap, pos):
|
||||||
|
|
||||||
# If available, use C implementation
|
# If available, use C implementation
|
||||||
try:
|
try:
|
||||||
from _heapq import heappush, heappop, heapify, heapreplace
|
from _heapq import heappush, heappop, heapify, heapreplace, nlargest, nsmallest
|
||||||
except ImportError:
|
except ImportError:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
|
@ -4,6 +4,7 @@ from heapq import heappush, heappop, heapify, heapreplace, nlargest, nsmallest
|
||||||
import random
|
import random
|
||||||
import unittest
|
import unittest
|
||||||
from test import test_support
|
from test import test_support
|
||||||
|
import sys
|
||||||
|
|
||||||
|
|
||||||
def heapiter(heap):
|
def heapiter(heap):
|
||||||
|
@ -91,16 +92,28 @@ class TestHeap(unittest.TestCase):
|
||||||
|
|
||||||
def test_nsmallest(self):
|
def test_nsmallest(self):
|
||||||
data = [random.randrange(2000) for i in range(1000)]
|
data = [random.randrange(2000) for i in range(1000)]
|
||||||
self.assertEqual(nsmallest(data, 400), sorted(data)[:400])
|
for i in (0, 1, 2, 10, 100, 400, 999, 1000, 1100):
|
||||||
self.assertEqual(nsmallest(data, 50), sorted(data)[:50])
|
self.assertEqual(nsmallest(data, i), sorted(data)[:i])
|
||||||
|
|
||||||
def test_largest(self):
|
def test_largest(self):
|
||||||
data = [random.randrange(2000) for i in range(1000)]
|
data = [random.randrange(2000) for i in range(1000)]
|
||||||
self.assertEqual(nlargest(data, 400), sorted(data, reverse=True)[:400])
|
for i in (0, 1, 2, 10, 100, 400, 999, 1000, 1100):
|
||||||
|
self.assertEqual(nlargest(data, i), sorted(data, reverse=True)[:i])
|
||||||
|
|
||||||
def test_main():
|
def test_main(verbose=None):
|
||||||
test_support.run_unittest(TestHeap)
|
test_classes = [TestHeap]
|
||||||
|
test_support.run_unittest(*test_classes)
|
||||||
|
|
||||||
|
# verify reference counting
|
||||||
|
if verbose and hasattr(sys, "gettotalrefcount"):
|
||||||
|
import gc
|
||||||
|
counts = [None] * 5
|
||||||
|
for i in xrange(len(counts)):
|
||||||
|
test_support.run_unittest(*test_classes)
|
||||||
|
gc.collect()
|
||||||
|
counts[i] = sys.gettotalrefcount()
|
||||||
|
print counts
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
test_main()
|
test_main(verbose=True)
|
||||||
|
|
||||||
|
|
|
@ -219,7 +219,7 @@ PyDoc_STRVAR(heapify_doc,
|
||||||
static PyObject *
|
static PyObject *
|
||||||
nlargest(PyObject *self, PyObject *args)
|
nlargest(PyObject *self, PyObject *args)
|
||||||
{
|
{
|
||||||
PyObject *heap=NULL, *elem, *rv, *iterable, *sol, *it, *oldelem;
|
PyObject *heap=NULL, *elem, *iterable, *sol, *it, *oldelem;
|
||||||
int i, n;
|
int i, n;
|
||||||
|
|
||||||
if (!PyArg_ParseTuple(args, "Oi:nlargest", &iterable, &n))
|
if (!PyArg_ParseTuple(args, "Oi:nlargest", &iterable, &n))
|
||||||
|
@ -246,10 +246,9 @@ nlargest(PyObject *self, PyObject *args)
|
||||||
if (PyList_GET_SIZE(heap) == 0)
|
if (PyList_GET_SIZE(heap) == 0)
|
||||||
goto sortit;
|
goto sortit;
|
||||||
|
|
||||||
rv = heapify(self, heap);
|
for (i=n/2-1 ; i>=0 ; i--)
|
||||||
if (rv == NULL)
|
if(_siftup((PyListObject *)heap, i) == -1)
|
||||||
goto fail;
|
goto fail;
|
||||||
Py_DECREF(rv);
|
|
||||||
|
|
||||||
sol = PyList_GET_ITEM(heap, 0);
|
sol = PyList_GET_ITEM(heap, 0);
|
||||||
while (1) {
|
while (1) {
|
||||||
|
@ -290,6 +289,162 @@ PyDoc_STRVAR(nlargest_doc,
|
||||||
\n\
|
\n\
|
||||||
Equivalent to: sorted(iterable, reverse=True)[:n]\n");
|
Equivalent to: sorted(iterable, reverse=True)[:n]\n");
|
||||||
|
|
||||||
|
static int
|
||||||
|
_siftdownmax(PyListObject *heap, int startpos, int pos)
|
||||||
|
{
|
||||||
|
PyObject *newitem, *parent;
|
||||||
|
int cmp, parentpos;
|
||||||
|
|
||||||
|
assert(PyList_Check(heap));
|
||||||
|
if (pos >= PyList_GET_SIZE(heap)) {
|
||||||
|
PyErr_SetString(PyExc_IndexError, "index out of range");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
|
||||||
|
newitem = PyList_GET_ITEM(heap, pos);
|
||||||
|
Py_INCREF(newitem);
|
||||||
|
/* Follow the path to the root, moving parents down until finding
|
||||||
|
a place newitem fits. */
|
||||||
|
while (pos > startpos){
|
||||||
|
parentpos = (pos - 1) >> 1;
|
||||||
|
parent = PyList_GET_ITEM(heap, parentpos);
|
||||||
|
cmp = PyObject_RichCompareBool(newitem, parent, Py_LE);
|
||||||
|
if (cmp == -1)
|
||||||
|
return -1;
|
||||||
|
if (cmp == 1)
|
||||||
|
break;
|
||||||
|
Py_INCREF(parent);
|
||||||
|
Py_DECREF(PyList_GET_ITEM(heap, pos));
|
||||||
|
PyList_SET_ITEM(heap, pos, parent);
|
||||||
|
pos = parentpos;
|
||||||
|
}
|
||||||
|
Py_DECREF(PyList_GET_ITEM(heap, pos));
|
||||||
|
PyList_SET_ITEM(heap, pos, newitem);
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
static int
|
||||||
|
_siftupmax(PyListObject *heap, int pos)
|
||||||
|
{
|
||||||
|
int startpos, endpos, childpos, rightpos;
|
||||||
|
int cmp;
|
||||||
|
PyObject *newitem, *tmp;
|
||||||
|
|
||||||
|
assert(PyList_Check(heap));
|
||||||
|
endpos = PyList_GET_SIZE(heap);
|
||||||
|
startpos = pos;
|
||||||
|
if (pos >= endpos) {
|
||||||
|
PyErr_SetString(PyExc_IndexError, "index out of range");
|
||||||
|
return -1;
|
||||||
|
}
|
||||||
|
newitem = PyList_GET_ITEM(heap, pos);
|
||||||
|
Py_INCREF(newitem);
|
||||||
|
|
||||||
|
/* Bubble up the smaller child until hitting a leaf. */
|
||||||
|
childpos = 2*pos + 1; /* leftmost child position */
|
||||||
|
while (childpos < endpos) {
|
||||||
|
/* Set childpos to index of smaller child. */
|
||||||
|
rightpos = childpos + 1;
|
||||||
|
if (rightpos < endpos) {
|
||||||
|
cmp = PyObject_RichCompareBool(
|
||||||
|
PyList_GET_ITEM(heap, childpos),
|
||||||
|
PyList_GET_ITEM(heap, rightpos),
|
||||||
|
Py_LE);
|
||||||
|
if (cmp == -1)
|
||||||
|
return -1;
|
||||||
|
if (cmp == 1)
|
||||||
|
childpos = rightpos;
|
||||||
|
}
|
||||||
|
/* Move the smaller child up. */
|
||||||
|
tmp = PyList_GET_ITEM(heap, childpos);
|
||||||
|
Py_INCREF(tmp);
|
||||||
|
Py_DECREF(PyList_GET_ITEM(heap, pos));
|
||||||
|
PyList_SET_ITEM(heap, pos, tmp);
|
||||||
|
pos = childpos;
|
||||||
|
childpos = 2*pos + 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* The leaf at pos is empty now. Put newitem there, and and bubble
|
||||||
|
it up to its final resting place (by sifting its parents down). */
|
||||||
|
Py_DECREF(PyList_GET_ITEM(heap, pos));
|
||||||
|
PyList_SET_ITEM(heap, pos, newitem);
|
||||||
|
return _siftdownmax(heap, startpos, pos);
|
||||||
|
}
|
||||||
|
|
||||||
|
static PyObject *
|
||||||
|
nsmallest(PyObject *self, PyObject *args)
|
||||||
|
{
|
||||||
|
PyObject *heap=NULL, *elem, *iterable, *los, *it, *oldelem;
|
||||||
|
int i, n;
|
||||||
|
|
||||||
|
if (!PyArg_ParseTuple(args, "Oi:nsmallest", &iterable, &n))
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
it = PyObject_GetIter(iterable);
|
||||||
|
if (it == NULL)
|
||||||
|
return NULL;
|
||||||
|
|
||||||
|
heap = PyList_New(0);
|
||||||
|
if (it == NULL)
|
||||||
|
goto fail;
|
||||||
|
|
||||||
|
for (i=0 ; i<n ; i++ ){
|
||||||
|
elem = PyIter_Next(it);
|
||||||
|
if (elem == NULL)
|
||||||
|
goto sortit;
|
||||||
|
if (PyList_Append(heap, elem) == -1) {
|
||||||
|
Py_DECREF(elem);
|
||||||
|
goto fail;
|
||||||
|
}
|
||||||
|
Py_DECREF(elem);
|
||||||
|
}
|
||||||
|
n = PyList_GET_SIZE(heap);
|
||||||
|
if (n == 0)
|
||||||
|
goto sortit;
|
||||||
|
|
||||||
|
for (i=n/2-1 ; i>=0 ; i--)
|
||||||
|
if(_siftupmax((PyListObject *)heap, i) == -1)
|
||||||
|
goto fail;
|
||||||
|
|
||||||
|
los = PyList_GET_ITEM(heap, 0);
|
||||||
|
while (1) {
|
||||||
|
elem = PyIter_Next(it);
|
||||||
|
if (elem == NULL) {
|
||||||
|
if (PyErr_Occurred())
|
||||||
|
goto fail;
|
||||||
|
else
|
||||||
|
goto sortit;
|
||||||
|
}
|
||||||
|
if (PyObject_RichCompareBool(los, elem, Py_LE)) {
|
||||||
|
Py_DECREF(elem);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
oldelem = PyList_GET_ITEM(heap, 0);
|
||||||
|
PyList_SET_ITEM(heap, 0, elem);
|
||||||
|
Py_DECREF(oldelem);
|
||||||
|
if (_siftupmax((PyListObject *)heap, 0) == -1)
|
||||||
|
goto fail;
|
||||||
|
los = PyList_GET_ITEM(heap, 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
sortit:
|
||||||
|
Py_DECREF(it);
|
||||||
|
if (PyList_Sort(heap) == -1)
|
||||||
|
goto fail;
|
||||||
|
return heap;
|
||||||
|
|
||||||
|
fail:
|
||||||
|
Py_DECREF(it);
|
||||||
|
Py_XDECREF(heap);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
PyDoc_STRVAR(nsmallest_doc,
|
||||||
|
"Find the n smallest elements in a dataset.\n\
|
||||||
|
\n\
|
||||||
|
Equivalent to: sorted(iterable)[:n]\n");
|
||||||
|
|
||||||
static PyMethodDef heapq_methods[] = {
|
static PyMethodDef heapq_methods[] = {
|
||||||
{"heappush", (PyCFunction)heappush,
|
{"heappush", (PyCFunction)heappush,
|
||||||
METH_VARARGS, heappush_doc},
|
METH_VARARGS, heappush_doc},
|
||||||
|
@ -301,6 +456,8 @@ static PyMethodDef heapq_methods[] = {
|
||||||
METH_O, heapify_doc},
|
METH_O, heapify_doc},
|
||||||
{"nlargest", (PyCFunction)nlargest,
|
{"nlargest", (PyCFunction)nlargest,
|
||||||
METH_VARARGS, nlargest_doc},
|
METH_VARARGS, nlargest_doc},
|
||||||
|
{"nsmallest", (PyCFunction)nsmallest,
|
||||||
|
METH_VARARGS, nsmallest_doc},
|
||||||
{NULL, NULL} /* sentinel */
|
{NULL, NULL} /* sentinel */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue