mirror of
https://github.com/python/cpython.git
synced 2025-09-26 18:29:57 +00:00
Small improvements to the recipes and examples. (GH-19635)
* Add underscores to long numbers to improve readability * Use bigger dataset in the bootstrapping example * Convert single-server queue example to more useful multi-server queue
This commit is contained in:
parent
9c82ea7868
commit
d3a8d616fa
1 changed files with 24 additions and 28 deletions
|
@ -425,29 +425,28 @@ Simulations::
|
||||||
>>> def trial():
|
>>> def trial():
|
||||||
... return choices('HT', cum_weights=(0.60, 1.00), k=7).count('H') >= 5
|
... return choices('HT', cum_weights=(0.60, 1.00), k=7).count('H') >= 5
|
||||||
...
|
...
|
||||||
>>> sum(trial() for i in range(10000)) / 10000
|
>>> sum(trial() for i in range(10_000)) / 10_000
|
||||||
0.4169
|
0.4169
|
||||||
|
|
||||||
>>> # Probability of the median of 5 samples being in middle two quartiles
|
>>> # Probability of the median of 5 samples being in middle two quartiles
|
||||||
>>> def trial():
|
>>> def trial():
|
||||||
... return 2500 <= sorted(choices(range(10000), k=5))[2] < 7500
|
... return 2_500 <= sorted(choices(range(10_000), k=5))[2] < 7_500
|
||||||
...
|
...
|
||||||
>>> sum(trial() for i in range(10000)) / 10000
|
>>> sum(trial() for i in range(10_000)) / 10_000
|
||||||
0.7958
|
0.7958
|
||||||
|
|
||||||
Example of `statistical bootstrapping
|
Example of `statistical bootstrapping
|
||||||
<https://en.wikipedia.org/wiki/Bootstrapping_(statistics)>`_ using resampling
|
<https://en.wikipedia.org/wiki/Bootstrapping_(statistics)>`_ using resampling
|
||||||
with replacement to estimate a confidence interval for the mean of a sample of
|
with replacement to estimate a confidence interval for the mean of a sample::
|
||||||
size five::
|
|
||||||
|
|
||||||
# http://statistics.about.com/od/Applications/a/Example-Of-Bootstrapping.htm
|
# http://statistics.about.com/od/Applications/a/Example-Of-Bootstrapping.htm
|
||||||
from statistics import fmean as mean
|
from statistics import fmean as mean
|
||||||
from random import choices
|
from random import choices
|
||||||
|
|
||||||
data = 1, 2, 4, 4, 10
|
data = [41, 50, 29, 37, 81, 30, 73, 63, 20, 35, 68, 22, 60, 31, 95]
|
||||||
means = sorted(mean(choices(data, k=5)) for i in range(20))
|
means = sorted(mean(choices(data, k=len(data))) for i in range(100))
|
||||||
print(f'The sample mean of {mean(data):.1f} has a 90% confidence '
|
print(f'The sample mean of {mean(data):.1f} has a 90% confidence '
|
||||||
f'interval from {means[1]:.1f} to {means[-2]:.1f}')
|
f'interval from {means[5]:.1f} to {means[94]:.1f}')
|
||||||
|
|
||||||
Example of a `resampling permutation test
|
Example of a `resampling permutation test
|
||||||
<https://en.wikipedia.org/wiki/Resampling_(statistics)#Permutation_tests>`_
|
<https://en.wikipedia.org/wiki/Resampling_(statistics)#Permutation_tests>`_
|
||||||
|
@ -463,7 +462,7 @@ between the effects of a drug versus a placebo::
|
||||||
placebo = [54, 51, 58, 44, 55, 52, 42, 47, 58, 46]
|
placebo = [54, 51, 58, 44, 55, 52, 42, 47, 58, 46]
|
||||||
observed_diff = mean(drug) - mean(placebo)
|
observed_diff = mean(drug) - mean(placebo)
|
||||||
|
|
||||||
n = 10000
|
n = 10_000
|
||||||
count = 0
|
count = 0
|
||||||
combined = drug + placebo
|
combined = drug + placebo
|
||||||
for i in range(n):
|
for i in range(n):
|
||||||
|
@ -476,32 +475,29 @@ between the effects of a drug versus a placebo::
|
||||||
print(f'The one-sided p-value of {count / n:.4f} leads us to reject the null')
|
print(f'The one-sided p-value of {count / n:.4f} leads us to reject the null')
|
||||||
print(f'hypothesis that there is no difference between the drug and the placebo.')
|
print(f'hypothesis that there is no difference between the drug and the placebo.')
|
||||||
|
|
||||||
Simulation of arrival times and service deliveries in a single server queue::
|
Simulation of arrival times and service deliveries for a multiserver queue::
|
||||||
|
|
||||||
|
from heapq import heappush, heappop
|
||||||
from random import expovariate, gauss
|
from random import expovariate, gauss
|
||||||
from statistics import mean, median, stdev
|
from statistics import mean, median, stdev
|
||||||
|
|
||||||
average_arrival_interval = 5.6
|
average_arrival_interval = 5.6
|
||||||
average_service_time = 5.0
|
average_service_time = 15.0
|
||||||
stdev_service_time = 0.5
|
stdev_service_time = 3.5
|
||||||
|
num_servers = 3
|
||||||
|
|
||||||
num_waiting = 0
|
waits = []
|
||||||
arrivals = []
|
arrival_time = 0.0
|
||||||
starts = []
|
servers = [0.0] * num_servers # time when each server becomes available
|
||||||
arrival = service_end = 0.0
|
for i in range(100_000):
|
||||||
for i in range(20000):
|
arrival_time += expovariate(1.0 / average_arrival_interval)
|
||||||
if arrival <= service_end:
|
next_server_available = heappop(servers)
|
||||||
num_waiting += 1
|
wait = max(0.0, next_server_available - arrival_time)
|
||||||
arrival += expovariate(1.0 / average_arrival_interval)
|
waits.append(wait)
|
||||||
arrivals.append(arrival)
|
service_duration = gauss(average_service_time, stdev_service_time)
|
||||||
else:
|
service_completed = arrival_time + wait + service_duration
|
||||||
num_waiting -= 1
|
heappush(servers, service_completed)
|
||||||
service_start = service_end if num_waiting else arrival
|
|
||||||
service_time = gauss(average_service_time, stdev_service_time)
|
|
||||||
service_end = service_start + service_time
|
|
||||||
starts.append(service_start)
|
|
||||||
|
|
||||||
waits = [start - arrival for arrival, start in zip(arrivals, starts)]
|
|
||||||
print(f'Mean wait: {mean(waits):.1f}. Stdev wait: {stdev(waits):.1f}.')
|
print(f'Mean wait: {mean(waits):.1f}. Stdev wait: {stdev(waits):.1f}.')
|
||||||
print(f'Median wait: {median(waits):.1f}. Max wait: {max(waits):.1f}.')
|
print(f'Median wait: {median(waits):.1f}. Max wait: {max(waits):.1f}.')
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue