SF patch 658251: Install a C implementation of the Mersenne Twister as the

core generator for random.py.
This commit is contained in:
Raymond Hettinger 2002-12-29 23:03:38 +00:00
parent 5e65ce671c
commit 40f6217092
6 changed files with 981 additions and 320 deletions

View file

@ -18,61 +18,26 @@
negative exponential
gamma
beta
pareto
Weibull
distributions on the circle (angles 0 to 2pi)
---------------------------------------------
circular uniform
von Mises
Translated from anonymously contributed C/C++ source.
General notes on the underlying Mersenne Twister core generator:
Multi-threading note: the random number generator used here is not thread-
safe; it is possible that two calls return the same random value. However,
you can instantiate a different instance of Random() in each thread to get
generators that don't share state, then use .setstate() and .jumpahead() to
move the generators to disjoint segments of the full period. For example,
* The period is 2**19937-1.
* It is one of the most extensively tested generators in existence
* Without a direct way to compute N steps forward, the
semantics of jumpahead(n) are weakened to simply jump
to another distant state and rely on the large period
to avoid overlapping sequences.
* The random() method is implemented in C, executes in
a single Python step, and is, therefore, threadsafe.
def create_generators(num, delta, firstseed=None):
""\"Return list of num distinct generators.
Each generator has its own unique segment of delta elements from
Random.random()'s full period.
Seed the first generator with optional arg firstseed (default is
None, to seed from current time).
""\"
from random import Random
g = Random(firstseed)
result = [g]
for i in range(num - 1):
laststate = g.getstate()
g = Random()
g.setstate(laststate)
g.jumpahead(delta)
result.append(g)
return result
gens = create_generators(10, 1000000)
That creates 10 distinct generators, which can be passed out to 10 distinct
threads. The generators don't share state so can be called safely in
parallel. So long as no thread calls its g.random() more than a million
times (the second argument to create_generators), the sequences seen by
each thread will not overlap.
The period of the underlying Wichmann-Hill generator is 6,953,607,871,644,
and that limits how far this technique can be pushed.
Just for fun, note that since we know the period, .jumpahead() can also be
used to "move backward in time":
>>> g = Random(42) # arbitrary
>>> g.random()
0.25420336316883324
>>> g.jumpahead(6953607871644L - 1) # move *back* one
>>> g.random()
0.25420336316883324
"""
# XXX The docstring sucks.
from math import log as _log, exp as _exp, pi as _pi, e as _e
from math import sqrt as _sqrt, acos as _acos, cos as _cos, sin as _sin
@ -82,32 +47,20 @@ __all__ = ["Random","seed","random","uniform","randint","choice","sample",
"randrange","shuffle","normalvariate","lognormvariate",
"cunifvariate","expovariate","vonmisesvariate","gammavariate",
"stdgamma","gauss","betavariate","paretovariate","weibullvariate",
"getstate","setstate","jumpahead","whseed"]
def _verify(name, computed, expected):
if abs(computed - expected) > 1e-7:
raise ValueError(
"computed value for %s deviates too much "
"(computed %g, expected %g)" % (name, computed, expected))
"getstate","setstate","jumpahead"]
NV_MAGICCONST = 4 * _exp(-0.5)/_sqrt(2.0)
_verify('NV_MAGICCONST', NV_MAGICCONST, 1.71552776992141)
TWOPI = 2.0*_pi
_verify('TWOPI', TWOPI, 6.28318530718)
LOG4 = _log(4.0)
_verify('LOG4', LOG4, 1.38629436111989)
SG_MAGICCONST = 1.0 + _log(4.5)
_verify('SG_MAGICCONST', SG_MAGICCONST, 2.50407739677627)
del _verify
# Translated by Guido van Rossum from C source provided by
# Adrian Baddeley.
# Adrian Baddeley. Adapted by Raymond Hettinger for use with
# the Mersenne Twister core generator.
class Random:
from _random import Random as CoreGenerator
class Random(CoreGenerator):
"""Random number generator base class used by bound module functions.
Used to instantiate instances of Random to get generators that don't
@ -122,7 +75,7 @@ class Random:
"""
VERSION = 1 # used by getstate/setstate
VERSION = 2 # used by getstate/setstate
def __init__(self, x=None):
"""Initialize an instance.
@ -131,12 +84,7 @@ class Random:
"""
self.seed(x)
## -------------------- core generator -------------------
# Specific to Wichmann-Hill generator. Subclasses wishing to use a
# different core generator should override the seed(), random(),
# getstate(), setstate() and jumpahead() methods.
self.gauss_next = None
def seed(self, a=None):
"""Initialize internal state from hashable object.
@ -144,141 +92,26 @@ class Random:
None or no argument seeds from current time.
If a is not None or an int or long, hash(a) is used instead.
If a is an int or long, a is used directly. Distinct values between
0 and 27814431486575L inclusive are guaranteed to yield distinct
internal states (this guarantee is specific to the default
Wichmann-Hill generator).
"""
if a is None:
# Initialize from current time
import time
a = long(time.time() * 256)
if type(a) not in (type(3), type(3L)):
a = hash(a)
a, x = divmod(a, 30268)
a, y = divmod(a, 30306)
a, z = divmod(a, 30322)
self._seed = int(x)+1, int(y)+1, int(z)+1
CoreGenerator.seed(self, a)
self.gauss_next = None
def random(self):
"""Get the next random number in the range [0.0, 1.0)."""
# Wichman-Hill random number generator.
#
# Wichmann, B. A. & Hill, I. D. (1982)
# Algorithm AS 183:
# An efficient and portable pseudo-random number generator
# Applied Statistics 31 (1982) 188-190
#
# see also:
# Correction to Algorithm AS 183
# Applied Statistics 33 (1984) 123
#
# McLeod, A. I. (1985)
# A remark on Algorithm AS 183
# Applied Statistics 34 (1985),198-200
# This part is thread-unsafe:
# BEGIN CRITICAL SECTION
x, y, z = self._seed
x = (171 * x) % 30269
y = (172 * y) % 30307
z = (170 * z) % 30323
self._seed = x, y, z
# END CRITICAL SECTION
# Note: on a platform using IEEE-754 double arithmetic, this can
# never return 0.0 (asserted by Tim; proof too long for a comment).
return (x/30269.0 + y/30307.0 + z/30323.0) % 1.0
def getstate(self):
"""Return internal state; can be passed to setstate() later."""
return self.VERSION, self._seed, self.gauss_next
return self.VERSION, CoreGenerator.getstate(self), self.gauss_next
def setstate(self, state):
"""Restore internal state from object returned by getstate()."""
version = state[0]
if version == 1:
version, self._seed, self.gauss_next = state
if version == 2:
version, internalstate, self.gauss_next = state
CoreGenerator.setstate(self, internalstate)
else:
raise ValueError("state with version %s passed to "
"Random.setstate() of version %s" %
(version, self.VERSION))
def jumpahead(self, n):
"""Act as if n calls to random() were made, but quickly.
n is an int, greater than or equal to 0.
Example use: If you have 2 threads and know that each will
consume no more than a million random numbers, create two Random
objects r1 and r2, then do
r2.setstate(r1.getstate())
r2.jumpahead(1000000)
Then r1 and r2 will use guaranteed-disjoint segments of the full
period.
"""
if not n >= 0:
raise ValueError("n must be >= 0")
x, y, z = self._seed
x = int(x * pow(171, n, 30269)) % 30269
y = int(y * pow(172, n, 30307)) % 30307
z = int(z * pow(170, n, 30323)) % 30323
self._seed = x, y, z
def __whseed(self, x=0, y=0, z=0):
"""Set the Wichmann-Hill seed from (x, y, z).
These must be integers in the range [0, 256).
"""
if not type(x) == type(y) == type(z) == int:
raise TypeError('seeds must be integers')
if not (0 <= x < 256 and 0 <= y < 256 and 0 <= z < 256):
raise ValueError('seeds must be in range(0, 256)')
if 0 == x == y == z:
# Initialize from current time
import time
t = long(time.time() * 256)
t = int((t&0xffffff) ^ (t>>24))
t, x = divmod(t, 256)
t, y = divmod(t, 256)
t, z = divmod(t, 256)
# Zero is a poor seed, so substitute 1
self._seed = (x or 1, y or 1, z or 1)
self.gauss_next = None
def whseed(self, a=None):
"""Seed from hashable object's hash code.
None or no argument seeds from current time. It is not guaranteed
that objects with distinct hash codes lead to distinct internal
states.
This is obsolete, provided for compatibility with the seed routine
used prior to Python 2.1. Use the .seed() method instead.
"""
if a is None:
self.__whseed()
return
a = hash(a)
a, x = divmod(a, 256)
a, y = divmod(a, 256)
a, z = divmod(a, 256)
x = (x + a) % 256 or 1
y = (y + a) % 256 or 1
z = (z + a) % 256 or 1
self.__whseed(x, y, z)
## ---- Methods below this point do not need to be overridden when
## ---- subclassing for the purpose of using a different core generator.
@ -744,6 +577,153 @@ class Random:
u = self.random()
return alpha * pow(-_log(u), 1.0/beta)
## -------------------- Wichmann-Hill -------------------
class WichmannHill(Random):
VERSION = 1 # used by getstate/setstate
def seed(self, a=None):
"""Initialize internal state from hashable object.
None or no argument seeds from current time.
If a is not None or an int or long, hash(a) is used instead.
If a is an int or long, a is used directly. Distinct values between
0 and 27814431486575L inclusive are guaranteed to yield distinct
internal states (this guarantee is specific to the default
Wichmann-Hill generator).
"""
if a is None:
# Initialize from current time
import time
a = long(time.time() * 256)
if not isinstance(a, (int, long)):
a = hash(a)
a, x = divmod(a, 30268)
a, y = divmod(a, 30306)
a, z = divmod(a, 30322)
self._seed = int(x)+1, int(y)+1, int(z)+1
self.gauss_next = None
def random(self):
"""Get the next random number in the range [0.0, 1.0)."""
# Wichman-Hill random number generator.
#
# Wichmann, B. A. & Hill, I. D. (1982)
# Algorithm AS 183:
# An efficient and portable pseudo-random number generator
# Applied Statistics 31 (1982) 188-190
#
# see also:
# Correction to Algorithm AS 183
# Applied Statistics 33 (1984) 123
#
# McLeod, A. I. (1985)
# A remark on Algorithm AS 183
# Applied Statistics 34 (1985),198-200
# This part is thread-unsafe:
# BEGIN CRITICAL SECTION
x, y, z = self._seed
x = (171 * x) % 30269
y = (172 * y) % 30307
z = (170 * z) % 30323
self._seed = x, y, z
# END CRITICAL SECTION
# Note: on a platform using IEEE-754 double arithmetic, this can
# never return 0.0 (asserted by Tim; proof too long for a comment).
return (x/30269.0 + y/30307.0 + z/30323.0) % 1.0
def getstate(self):
"""Return internal state; can be passed to setstate() later."""
return self.VERSION, self._seed, self.gauss_next
def setstate(self, state):
"""Restore internal state from object returned by getstate()."""
version = state[0]
if version == 1:
version, self._seed, self.gauss_next = state
else:
raise ValueError("state with version %s passed to "
"Random.setstate() of version %s" %
(version, self.VERSION))
def jumpahead(self, n):
"""Act as if n calls to random() were made, but quickly.
n is an int, greater than or equal to 0.
Example use: If you have 2 threads and know that each will
consume no more than a million random numbers, create two Random
objects r1 and r2, then do
r2.setstate(r1.getstate())
r2.jumpahead(1000000)
Then r1 and r2 will use guaranteed-disjoint segments of the full
period.
"""
if not n >= 0:
raise ValueError("n must be >= 0")
x, y, z = self._seed
x = int(x * pow(171, n, 30269)) % 30269
y = int(y * pow(172, n, 30307)) % 30307
z = int(z * pow(170, n, 30323)) % 30323
self._seed = x, y, z
def __whseed(self, x=0, y=0, z=0):
"""Set the Wichmann-Hill seed from (x, y, z).
These must be integers in the range [0, 256).
"""
if not type(x) == type(y) == type(z) == int:
raise TypeError('seeds must be integers')
if not (0 <= x < 256 and 0 <= y < 256 and 0 <= z < 256):
raise ValueError('seeds must be in range(0, 256)')
if 0 == x == y == z:
# Initialize from current time
import time
t = long(time.time() * 256)
t = int((t&0xffffff) ^ (t>>24))
t, x = divmod(t, 256)
t, y = divmod(t, 256)
t, z = divmod(t, 256)
# Zero is a poor seed, so substitute 1
self._seed = (x or 1, y or 1, z or 1)
self.gauss_next = None
def whseed(self, a=None):
"""Seed from hashable object's hash code.
None or no argument seeds from current time. It is not guaranteed
that objects with distinct hash codes lead to distinct internal
states.
This is obsolete, provided for compatibility with the seed routine
used prior to Python 2.1. Use the .seed() method instead.
"""
if a is None:
self.__whseed()
return
a = hash(a)
a, x = divmod(a, 256)
a, y = divmod(a, 256)
a, z = divmod(a, 256)
x = (x + a) % 256 or 1
y = (y + a) % 256 or 1
z = (z + a) % 256 or 1
self.__whseed(x, y, z)
## -------------------- test program --------------------
def _test_generator(n, funccall):
@ -768,25 +748,11 @@ def _test_generator(n, funccall):
print 'avg %g, stddev %g, min %g, max %g' % \
(avg, stddev, smallest, largest)
def _test_sample(n):
# For the entire allowable range of 0 <= k <= n, validate that
# the sample is of the correct length and contains only unique items
population = xrange(n)
for k in xrange(n+1):
s = sample(population, k)
uniq = dict.fromkeys(s)
assert len(uniq) == len(s) == k
assert None not in uniq
def _sample_generator(n, k):
# Return a fixed element from the sample. Validates random ordering.
return sample(xrange(n), k)[k//2]
def _test(N=2000):
print 'TWOPI =', TWOPI
print 'LOG4 =', LOG4
print 'NV_MAGICCONST =', NV_MAGICCONST
print 'SG_MAGICCONST =', SG_MAGICCONST
_test_generator(N, 'random()')
_test_generator(N, 'normalvariate(0.0, 1.0)')
_test_generator(N, 'lognormvariate(0.0, 1.0)')
@ -808,25 +774,13 @@ def _test(N=2000):
_test_generator(N, 'weibullvariate(1.0, 1.0)')
_test_generator(N, '_sample_generator(50, 5)') # expected s.d.: 14.4
_test_generator(N, '_sample_generator(50, 45)') # expected s.d.: 14.4
_test_sample(500)
# Test jumpahead.
s = getstate()
jumpahead(N)
r1 = random()
# now do it the slow way
setstate(s)
for i in range(N):
random()
r2 = random()
if r1 != r2:
raise ValueError("jumpahead test failed " + `(N, r1, r2)`)
# Create one instance, seeded from current time, and export its methods
# as module-level functions. The functions are not threadsafe, and state
# is shared across all uses (both in the user's code and in the Python
# libraries), but that's fine for most programs and is easier for the
# casual user than making them instantiate their own Random() instance.
# as module-level functions. The functions share state across all uses
#(both in the user's code and in the Python libraries), but that's fine
# for most programs and is easier for the casual user than making them
# instantiate their own Random() instance.
_inst = Random()
seed = _inst.seed
random = _inst.random
@ -850,7 +804,6 @@ weibullvariate = _inst.weibullvariate
getstate = _inst.getstate
setstate = _inst.setstate
jumpahead = _inst.jumpahead
whseed = _inst.whseed
if __name__ == '__main__':
_test()

View file

@ -1,19 +1,206 @@
from test import test_support
#!/usr/bin/env python
import unittest
import random
import time
from test import test_support
# Ensure that the seed() method initializes all the hidden state. In
# particular, through 2.2.1 it failed to reset a piece of state used by
# (and only by) the .gauss() method.
class TestBasicOps(unittest.TestCase):
# Superclass with tests common to all generators.
# Subclasses must arrange for self.gen to retrieve the Random instance
# to be tested.
for seed in 1, 12, 123, 1234, 12345, 123456, 654321:
for seeder in random.seed, random.whseed:
seeder(seed)
x1 = random.random()
y1 = random.gauss(0, 1)
def randomlist(self, n):
"""Helper function to make a list of random numbers"""
return [self.gen.random() for i in xrange(n)]
seeder(seed)
x2 = random.random()
y2 = random.gauss(0, 1)
def test_autoseed(self):
self.gen.seed()
state1 = self.gen.getstate()
time.sleep(1)
self.gen.seed() # diffent seeds at different times
state2 = self.gen.getstate()
self.assertNotEqual(state1, state2)
test_support.vereq(x1, x2)
test_support.vereq(y1, y2)
def test_saverestore(self):
N = 1000
self.gen.seed()
state = self.gen.getstate()
randseq = self.randomlist(N)
self.gen.setstate(state) # should regenerate the same sequence
self.assertEqual(randseq, self.randomlist(N))
def test_seedargs(self):
for arg in [None, 0, 0L, 1, 1L, -1, -1L, 10**20, -(10**20),
3.14, 1+2j, 'a', tuple('abc')]:
self.gen.seed(arg)
for arg in [range(3), dict(one=1)]:
self.assertRaises(TypeError, self.gen.seed, arg)
def test_jumpahead(self):
self.gen.seed()
state1 = self.gen.getstate()
self.gen.jumpahead(100)
state2 = self.gen.getstate() # s/b distinct from state1
self.assertNotEqual(state1, state2)
self.gen.jumpahead(100)
state3 = self.gen.getstate() # s/b distinct from state2
self.assertNotEqual(state2, state3)
self.assertRaises(TypeError, self.gen.jumpahead) # needs an arg
self.assertRaises(TypeError, self.gen.jumpahead, "ick") # wrong type
self.assertRaises(TypeError, self.gen.jumpahead, 2.3) # wrong type
self.assertRaises(TypeError, self.gen.jumpahead, 2, 3) # too many
def test_sample(self):
# For the entire allowable range of 0 <= k <= N, validate that
# the sample is of the correct length and contains only unique items
N = 100
population = xrange(N)
for k in xrange(N+1):
s = self.gen.sample(population, k)
self.assertEqual(len(s), k)
uniq = dict.fromkeys(s)
self.assertEqual(len(uniq), k)
self.failIf(None in uniq)
def test_gauss(self):
# Ensure that the seed() method initializes all the hidden state. In
# particular, through 2.2.1 it failed to reset a piece of state used
# by (and only by) the .gauss() method.
for seed in 1, 12, 123, 1234, 12345, 123456, 654321:
self.gen.seed(seed)
x1 = self.gen.random()
y1 = self.gen.gauss(0, 1)
self.gen.seed(seed)
x2 = self.gen.random()
y2 = self.gen.gauss(0, 1)
self.assertEqual(x1, x2)
self.assertEqual(y1, y2)
class WichmannHill_TestBasicOps(TestBasicOps):
gen = random.WichmannHill()
def test_strong_jumpahead(self):
# tests that jumpahead(n) semantics correspond to n calls to random()
N = 1000
s = self.gen.getstate()
self.gen.jumpahead(N)
r1 = self.gen.random()
# now do it the slow way
self.gen.setstate(s)
for i in xrange(N):
self.gen.random()
r2 = self.gen.random()
self.assertEqual(r1, r2)
def test_gauss_with_whseed(self):
# Ensure that the seed() method initializes all the hidden state. In
# particular, through 2.2.1 it failed to reset a piece of state used
# by (and only by) the .gauss() method.
for seed in 1, 12, 123, 1234, 12345, 123456, 654321:
self.gen.whseed(seed)
x1 = self.gen.random()
y1 = self.gen.gauss(0, 1)
self.gen.whseed(seed)
x2 = self.gen.random()
y2 = self.gen.gauss(0, 1)
self.assertEqual(x1, x2)
self.assertEqual(y1, y2)
class MersenneTwister_TestBasicOps(TestBasicOps):
gen = random.Random()
def test_referenceImplementation(self):
# Compare the python implementation with results from the original
# code. Create 2000 53-bit precision random floats. Compare only
# the last ten entries to show that the independent implementations
# are tracking. Here is the main() function needed to create the
# list of expected random numbers:
# void main(void){
# int i;
# unsigned long init[4]={61731, 24903, 614, 42143}, length=4;
# init_by_array(init, length);
# for (i=0; i<2000; i++) {
# printf("%.15f ", genrand_res53());
# if (i%5==4) printf("\n");
# }
# }
expected = [0.45839803073713259,
0.86057815201978782,
0.92848331726782152,
0.35932681119782461,
0.081823493762449573,
0.14332226470169329,
0.084297823823520024,
0.53814864671831453,
0.089215024911993401,
0.78486196105372907]
self.gen.seed(61731L + (24903L<<32) + (614L<<64) + (42143L<<96))
actual = self.randomlist(2000)[-10:]
for a, e in zip(actual, expected):
self.assertAlmostEqual(a,e,places=14)
def test_strong_reference_implementation(self):
# Like test_referenceImplementation, but checks for exact bit-level
# equality. This should pass on any box where C double contains
# at least 53 bits of precision (the underlying algorithm suffers
# no rounding errors -- all results are exact).
from math import ldexp
expected = [0x0eab3258d2231fL,
0x1b89db315277a5L,
0x1db622a5518016L,
0x0b7f9af0d575bfL,
0x029e4c4db82240L,
0x04961892f5d673L,
0x02b291598e4589L,
0x11388382c15694L,
0x02dad977c9e1feL,
0x191d96d4d334c6L]
self.gen.seed(61731L + (24903L<<32) + (614L<<64) + (42143L<<96))
actual = self.randomlist(2000)[-10:]
for a, e in zip(actual, expected):
self.assertEqual(long(ldexp(a, 53)), e)
def test_long_seed(self):
# This is most interesting to run in debug mode, just to make sure
# nothing blows up. Under the covers, a dynamically resized array
# is allocated, consuming space proportional to the number of bits
# in the seed. Unfortunately, that's a quadratic-time algorithm,
# so don't make this horribly big.
seed = (1L << (10000 * 8)) - 1 # about 10K bytes
self.gen.seed(seed)
class TestModule(unittest.TestCase):
def testMagicConstants(self):
self.assertAlmostEqual(random.NV_MAGICCONST, 1.71552776992141)
self.assertAlmostEqual(random.TWOPI, 6.28318530718)
self.assertAlmostEqual(random.LOG4, 1.38629436111989)
self.assertAlmostEqual(random.SG_MAGICCONST, 2.50407739677627)
def test__all__(self):
# tests validity but not completeness of the __all__ list
defined = dict.fromkeys(dir(random))
for entry in random.__all__:
self.failUnless(entry in defined)
def test_main():
suite = unittest.TestSuite()
for testclass in (WichmannHill_TestBasicOps,
MersenneTwister_TestBasicOps,
TestModule):
suite.addTest(unittest.makeSuite(testclass))
test_support.run_suite(suite)
if __name__ == "__main__":
test_main()