mirror of
https://github.com/python/cpython.git
synced 2025-08-31 14:07:50 +00:00
gh-115532: Add kernel density estimation to the statistics module (gh-115863)
This commit is contained in:
parent
6a3236fe2e
commit
6d34eb0e36
5 changed files with 285 additions and 41 deletions
|
@ -2353,6 +2353,66 @@ class TestGeometricMean(unittest.TestCase):
|
|||
self.assertAlmostEqual(actual_mean, expected_mean, places=5)
|
||||
|
||||
|
||||
class TestKDE(unittest.TestCase):
|
||||
|
||||
def test_kde(self):
|
||||
kde = statistics.kde
|
||||
StatisticsError = statistics.StatisticsError
|
||||
|
||||
kernels = ['normal', 'gauss', 'logistic', 'sigmoid', 'rectangular',
|
||||
'uniform', 'triangular', 'parabolic', 'epanechnikov',
|
||||
'quartic', 'biweight', 'triweight', 'cosine']
|
||||
|
||||
sample = [-2.1, -1.3, -0.4, 1.9, 5.1, 6.2]
|
||||
|
||||
# The approximate integral of a PDF should be close to 1.0
|
||||
|
||||
def integrate(func, low, high, steps=10_000):
|
||||
"Numeric approximation of a definite function integral."
|
||||
dx = (high - low) / steps
|
||||
midpoints = (low + (i + 1/2) * dx for i in range(steps))
|
||||
return sum(map(func, midpoints)) * dx
|
||||
|
||||
for kernel in kernels:
|
||||
with self.subTest(kernel=kernel):
|
||||
f_hat = kde(sample, h=1.5, kernel=kernel)
|
||||
area = integrate(f_hat, -20, 20)
|
||||
self.assertAlmostEqual(area, 1.0, places=4)
|
||||
|
||||
# Check error cases
|
||||
|
||||
with self.assertRaises(StatisticsError):
|
||||
kde([], h=1.0) # Empty dataset
|
||||
with self.assertRaises(TypeError):
|
||||
kde(['abc', 'def'], 1.5) # Non-numeric data
|
||||
with self.assertRaises(TypeError):
|
||||
kde(iter(sample), 1.5) # Data is not a sequence
|
||||
with self.assertRaises(StatisticsError):
|
||||
kde(sample, h=0.0) # Zero bandwidth
|
||||
with self.assertRaises(StatisticsError):
|
||||
kde(sample, h=0.0) # Negative bandwidth
|
||||
with self.assertRaises(TypeError):
|
||||
kde(sample, h='str') # Wrong bandwidth type
|
||||
with self.assertRaises(StatisticsError):
|
||||
kde(sample, h=1.0, kernel='bogus') # Invalid kernel
|
||||
|
||||
# Test name and docstring of the generated function
|
||||
|
||||
h = 1.5
|
||||
kernel = 'cosine'
|
||||
f_hat = kde(sample, h, kernel)
|
||||
self.assertEqual(f_hat.__name__, 'pdf')
|
||||
self.assertIn(kernel, f_hat.__doc__)
|
||||
self.assertIn(str(h), f_hat.__doc__)
|
||||
|
||||
# Test closed interval for the support boundaries.
|
||||
# In particular, 'uniform' should non-zero at the boundaries.
|
||||
|
||||
f_hat = kde([0], 1.0, 'uniform')
|
||||
self.assertEqual(f_hat(-1.0), 1/2)
|
||||
self.assertEqual(f_hat(1.0), 1/2)
|
||||
|
||||
|
||||
class TestQuantiles(unittest.TestCase):
|
||||
|
||||
def test_specific_cases(self):
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue