cpython/Lib/test/sortperf.py

"""Sort performance test.

See main() for command line syntax.
See tabulate() for output format.

"""

import sys
import time
import random
import marshal
import tempfile
import os

td = tempfile.gettempdir()

def randfloats(n):
    """Return a list of n random floats in [0, 1)."""
    # Generating floats is expensive, so this writes them out to a file in
    # a temp directory.  If the file already exists, it just reads them
    # back in and shuffles them a bit.
    fn = os.path.join(td, "rr%06d" % n)
    try:
        fp = open(fn, "rb")
    except IOError:
        r = random.random
        result = [r() for i in xrange(n)]
        try:
            try:
                fp = open(fn, "wb")
                marshal.dump(result, fp)
                fp.close()
                fp = None
            finally:
                if fp:
                    try:
                        os.unlink(fn)
                    except os.error:
                        pass
        except IOError, msg:
            print "can't write", fn, ":", msg
    else:
        result = marshal.load(fp)
        fp.close()
        # Shuffle it a bit...
        for i in range(10):
            i = random.randrange(n)
            temp = result[:i]
            del result[:i]
            temp.reverse()
            result.extend(temp)
            del temp
    assert len(result) == n
    return result

def flush():
    sys.stdout.flush()

def doit(L):
    t0 = time.clock()
    L.sort()
    t1 = time.clock()
    print "%6.2f" % (t1-t0),
    flush()

def tabulate(r):
    """Tabulate sort speed for lists of various sizes.

    The sizes are 2**i for i in r (the argument, a list).

    The output displays i, 2**i, and the time to sort arrays of 2**i
    floating point numbers with the following properties:

    *sort: random data
    \sort: descending data
    /sort: ascending data
    ~sort: many duplicates
    =sort: all equal
    !sort: worst case scenario

    """
    cases = ("*sort", "\\sort", "/sort", "~sort", "=sort", "!sort")
    fmt = ("%2s %7s" + " %6s"*len(cases))
    print fmt % (("i", "2**i") + cases)
    for i in r:
        n = 1 << i
        L = randfloats(n)
        print "%2d %7d" % (i, n),
        flush()
        doit(L) # *sort
        L.reverse()
        doit(L) # \sort
        doit(L) # /sort

        # Arrange for lots of duplicates.
        if n > 4:
            del L[4:]
            L = L * (n // 4)
            # Force the elements to be distinct objects, else timings can be
            # artificially low.
            L = map(lambda x: --x, L)
        doit(L) # ~sort
        del L

        # All equal.  Again, force the elements to be distinct objects.
        L = map(abs, [-0.5] * n)
        doit(L) # =sort
        del L

        # This one looks like [3, 2, 1, 0, 0, 1, 2, 3].  It was a bad case
        # for an older implementation of quicksort, which used the median
        # of the first, last and middle elements as the pivot.  It's still
        # a worse-than-average case for samplesort, but on the order of a
        # measly 5% worse, not a quadratic-time disaster as it was with
        # quicksort.
        half = n // 2
        L = range(half - 1, -1, -1)
        L.extend(range(half))
        # Force to float, so that the timings are comparable.  This is
        # significantly faster if we leave tham as ints.
        L = map(float, L)
        doit(L) # !sort
        print

def main():
    """Main program when invoked as a script.

    One argument: tabulate a single row.
    Two arguments: tabulate a range (inclusive).
    Extra arguments are used to seed the random generator.

    """
    # default range (inclusive)
    k1 = 15
    k2 = 20
    if sys.argv[1:]:
        # one argument: single point
        k1 = k2 = int(sys.argv[1])
        if sys.argv[2:]:
            # two arguments: specify range
            k2 = int(sys.argv[2])
            if sys.argv[3:]:
                # derive random seed from remaining arguments
                x = 1
                for a in sys.argv[3:]:
                    x = 69069 * x + hash(a)
                random.seed(x)
    r = range(k1, k2+1)                 # include the end point
    tabulate(r)

if __name__ == '__main__':
    main()