mirror of
https://github.com/python/cpython.git
synced 2025-08-02 16:13:13 +00:00
bpo-44151: linear_regression() minor API improvements (GH-26199) (GH-26338)
This commit is contained in:
parent
1c454eb2e4
commit
86779878df
3 changed files with 26 additions and 26 deletions
|
@ -76,7 +76,7 @@ These functions calculate statistics regarding relations between two inputs.
|
||||||
========================= =====================================================
|
========================= =====================================================
|
||||||
:func:`covariance` Sample covariance for two variables.
|
:func:`covariance` Sample covariance for two variables.
|
||||||
:func:`correlation` Pearson's correlation coefficient for two variables.
|
:func:`correlation` Pearson's correlation coefficient for two variables.
|
||||||
:func:`linear_regression` Intercept and slope for simple linear regression.
|
:func:`linear_regression` Slope and intercept for simple linear regression.
|
||||||
========================= =====================================================
|
========================= =====================================================
|
||||||
|
|
||||||
|
|
||||||
|
@ -626,24 +626,25 @@ However, for reading convenience, most of the examples show sorted sequences.
|
||||||
|
|
||||||
.. versionadded:: 3.10
|
.. versionadded:: 3.10
|
||||||
|
|
||||||
.. function:: linear_regression(regressor, dependent_variable)
|
.. function:: linear_regression(independent_variable, dependent_variable)
|
||||||
|
|
||||||
Return the intercept and slope of `simple linear regression
|
Return the slope and intercept of `simple linear regression
|
||||||
<https://en.wikipedia.org/wiki/Simple_linear_regression>`_
|
<https://en.wikipedia.org/wiki/Simple_linear_regression>`_
|
||||||
parameters estimated using ordinary least squares. Simple linear
|
parameters estimated using ordinary least squares. Simple linear
|
||||||
regression describes the relationship between *regressor* and
|
regression describes the relationship between an independent variable *x* and
|
||||||
*dependent variable* in terms of this linear function:
|
a dependent variable *y* in terms of this linear function:
|
||||||
|
|
||||||
*dependent_variable = intercept + slope \* regressor + noise*
|
*y = intercept + slope \* x + noise*
|
||||||
|
|
||||||
where ``intercept`` and ``slope`` are the regression parameters that are
|
where ``slope`` and ``intercept`` are the regression parameters that are
|
||||||
estimated, and noise represents the
|
estimated, and noise represents the
|
||||||
variability of the data that was not explained by the linear regression
|
variability of the data that was not explained by the linear regression
|
||||||
(it is equal to the difference between predicted and actual values
|
(it is equal to the difference between predicted and actual values
|
||||||
of dependent variable).
|
of dependent variable).
|
||||||
|
|
||||||
Both inputs must be of the same length (no less than two), and regressor
|
Both inputs must be of the same length (no less than two), and
|
||||||
needs not to be constant; otherwise :exc:`StatisticsError` is raised.
|
the independent variable *x* needs not to be constant;
|
||||||
|
otherwise :exc:`StatisticsError` is raised.
|
||||||
|
|
||||||
For example, we can use the `release dates of the Monty
|
For example, we can use the `release dates of the Monty
|
||||||
Python films <https://en.wikipedia.org/wiki/Monty_Python#Films>`_, and used
|
Python films <https://en.wikipedia.org/wiki/Monty_Python#Films>`_, and used
|
||||||
|
@ -655,7 +656,7 @@ However, for reading convenience, most of the examples show sorted sequences.
|
||||||
|
|
||||||
>>> year = [1971, 1975, 1979, 1982, 1983]
|
>>> year = [1971, 1975, 1979, 1982, 1983]
|
||||||
>>> films_total = [1, 2, 3, 4, 5]
|
>>> films_total = [1, 2, 3, 4, 5]
|
||||||
>>> intercept, slope = linear_regression(year, films_total)
|
>>> slope, intercept = linear_regression(year, films_total)
|
||||||
>>> round(intercept + slope * 2019)
|
>>> round(intercept + slope * 2019)
|
||||||
16
|
16
|
||||||
|
|
||||||
|
|
|
@ -94,7 +94,7 @@ for two inputs:
|
||||||
>>> correlation(x, y) #doctest: +ELLIPSIS
|
>>> correlation(x, y) #doctest: +ELLIPSIS
|
||||||
0.31622776601...
|
0.31622776601...
|
||||||
>>> linear_regression(x, y) #doctest:
|
>>> linear_regression(x, y) #doctest:
|
||||||
LinearRegression(intercept=1.5, slope=0.1)
|
LinearRegression(slope=0.1, intercept=1.5)
|
||||||
|
|
||||||
|
|
||||||
Exceptions
|
Exceptions
|
||||||
|
@ -919,18 +919,18 @@ def correlation(x, y, /):
|
||||||
raise StatisticsError('at least one of the inputs is constant')
|
raise StatisticsError('at least one of the inputs is constant')
|
||||||
|
|
||||||
|
|
||||||
LinearRegression = namedtuple('LinearRegression', ['intercept', 'slope'])
|
LinearRegression = namedtuple('LinearRegression', ('slope', 'intercept'))
|
||||||
|
|
||||||
|
|
||||||
def linear_regression(regressor, dependent_variable, /):
|
def linear_regression(x, y, /):
|
||||||
"""Intercept and slope for simple linear regression
|
"""Intercept and slope for simple linear regression
|
||||||
|
|
||||||
Return the intercept and slope of simple linear regression
|
Return the intercept and slope of simple linear regression
|
||||||
parameters estimated using ordinary least squares. Simple linear
|
parameters estimated using ordinary least squares. Simple linear
|
||||||
regression describes relationship between *regressor* and
|
regression describes relationship between *x* and
|
||||||
*dependent variable* in terms of linear function:
|
*y* in terms of linear function:
|
||||||
|
|
||||||
dependent_variable = intercept + slope * regressor + noise
|
y = intercept + slope * x + noise
|
||||||
|
|
||||||
where *intercept* and *slope* are the regression parameters that are
|
where *intercept* and *slope* are the regression parameters that are
|
||||||
estimated, and noise represents the variability of the data that was
|
estimated, and noise represents the variability of the data that was
|
||||||
|
@ -940,19 +940,18 @@ def linear_regression(regressor, dependent_variable, /):
|
||||||
|
|
||||||
The parameters are returned as a named tuple.
|
The parameters are returned as a named tuple.
|
||||||
|
|
||||||
>>> regressor = [1, 2, 3, 4, 5]
|
>>> x = [1, 2, 3, 4, 5]
|
||||||
>>> noise = NormalDist().samples(5, seed=42)
|
>>> noise = NormalDist().samples(5, seed=42)
|
||||||
>>> dependent_variable = [2 + 3 * regressor[i] + noise[i] for i in range(5)]
|
>>> y = [2 + 3 * x[i] + noise[i] for i in range(5)]
|
||||||
>>> linear_regression(regressor, dependent_variable) #doctest: +ELLIPSIS
|
>>> linear_regression(x, y) #doctest: +ELLIPSIS
|
||||||
LinearRegression(intercept=1.75684970486..., slope=3.09078914170...)
|
LinearRegression(slope=3.09078914170..., intercept=1.75684970486...)
|
||||||
|
|
||||||
"""
|
"""
|
||||||
n = len(regressor)
|
n = len(x)
|
||||||
if len(dependent_variable) != n:
|
if len(y) != n:
|
||||||
raise StatisticsError('linear regression requires that both inputs have same number of data points')
|
raise StatisticsError('linear regression requires that both inputs have same number of data points')
|
||||||
if n < 2:
|
if n < 2:
|
||||||
raise StatisticsError('linear regression requires at least two data points')
|
raise StatisticsError('linear regression requires at least two data points')
|
||||||
x, y = regressor, dependent_variable
|
|
||||||
xbar = fsum(x) / n
|
xbar = fsum(x) / n
|
||||||
ybar = fsum(y) / n
|
ybar = fsum(y) / n
|
||||||
sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))
|
sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))
|
||||||
|
@ -960,9 +959,9 @@ def linear_regression(regressor, dependent_variable, /):
|
||||||
try:
|
try:
|
||||||
slope = sxy / s2x
|
slope = sxy / s2x
|
||||||
except ZeroDivisionError:
|
except ZeroDivisionError:
|
||||||
raise StatisticsError('regressor is constant')
|
raise StatisticsError('x is constant')
|
||||||
intercept = ybar - slope * xbar
|
intercept = ybar - slope * xbar
|
||||||
return LinearRegression(intercept=intercept, slope=slope)
|
return LinearRegression(slope=slope, intercept=intercept)
|
||||||
|
|
||||||
|
|
||||||
## Normal Distribution #####################################################
|
## Normal Distribution #####################################################
|
||||||
|
|
|
@ -2480,7 +2480,7 @@ class TestLinearRegression(unittest.TestCase):
|
||||||
([1, 2, 3], [21, 22, 23], 20, 1),
|
([1, 2, 3], [21, 22, 23], 20, 1),
|
||||||
([1, 2, 3], [5.1, 5.2, 5.3], 5, 0.1),
|
([1, 2, 3], [5.1, 5.2, 5.3], 5, 0.1),
|
||||||
]:
|
]:
|
||||||
intercept, slope = statistics.linear_regression(x, y)
|
slope, intercept = statistics.linear_regression(x, y)
|
||||||
self.assertAlmostEqual(intercept, true_intercept)
|
self.assertAlmostEqual(intercept, true_intercept)
|
||||||
self.assertAlmostEqual(slope, true_slope)
|
self.assertAlmostEqual(slope, true_slope)
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue