Eliminate duplicated calculations and unnecessary work for linear regression (GH-25922)

This commit is contained in:
Raymond Hettinger 2021-05-06 07:43:13 -07:00 committed by GitHub
parent e8525567dd
commit 55b78ce3c4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23

View file

@ -952,11 +952,16 @@ def linear_regression(regressor, dependent_variable, /):
raise StatisticsError('linear regression requires that both inputs have same number of data points')
if n < 2:
raise StatisticsError('linear regression requires at least two data points')
x, y = regressor, dependent_variable
xbar = fsum(x) / n
ybar = fsum(y) / n
sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))
s2x = fsum((xi - xbar) ** 2.0 for xi in x)
try:
slope = covariance(regressor, dependent_variable) / variance(regressor)
slope = sxy / s2x
except ZeroDivisionError:
raise StatisticsError('regressor is constant')
intercept = fmean(dependent_variable) - slope * fmean(regressor)
intercept = ybar - slope * xbar
return LinearRegression(intercept=intercept, slope=slope)