Skip to content

Commit 2f3a878

Browse files
authored
bpo-44151: linear_regression() minor API improvements (GH-26199)
1 parent 8450e8a commit 2f3a878

File tree

3 files changed

+26
-26
lines changed

3 files changed

+26
-26
lines changed

Doc/library/statistics.rst

Lines changed: 11 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -76,7 +76,7 @@ These functions calculate statistics regarding relations between two inputs.
7676
========================= =====================================================
7777
:func:`covariance` Sample covariance for two variables.
7878
:func:`correlation` Pearson's correlation coefficient for two variables.
79-
:func:`linear_regression` Intercept and slope for simple linear regression.
79+
:func:`linear_regression` Slope and intercept for simple linear regression.
8080
========================= =====================================================
8181

8282

@@ -643,24 +643,25 @@ However, for reading convenience, most of the examples show sorted sequences.
643643

644644
.. versionadded:: 3.10
645645

646-
.. function:: linear_regression(regressor, dependent_variable)
646+
.. function:: linear_regression(independent_variable, dependent_variable)
647647

648-
Return the intercept and slope of `simple linear regression
648+
Return the slope and intercept of `simple linear regression
649649
<https://en.wikipedia.org/wiki/Simple_linear_regression>`_
650650
parameters estimated using ordinary least squares. Simple linear
651-
regression describes the relationship between *regressor* and
652-
*dependent variable* in terms of this linear function:
651+
regression describes the relationship between an independent variable *x* and
652+
a dependent variable *y* in terms of this linear function:
653653

654-
*dependent_variable = intercept + slope \* regressor + noise*
654+
*y = intercept + slope \* x + noise*
655655

656-
where ``intercept`` and ``slope`` are the regression parameters that are
656+
where ``slope`` and ``intercept`` are the regression parameters that are
657657
estimated, and noise represents the
658658
variability of the data that was not explained by the linear regression
659659
(it is equal to the difference between predicted and actual values
660660
of dependent variable).
661661

662-
Both inputs must be of the same length (no less than two), and regressor
663-
needs not to be constant; otherwise :exc:`StatisticsError` is raised.
662+
Both inputs must be of the same length (no less than two), and
663+
the independent variable *x* needs not to be constant;
664+
otherwise :exc:`StatisticsError` is raised.
664665

665666
For example, we can use the `release dates of the Monty
666667
Python films <https://en.wikipedia.org/wiki/Monty_Python#Films>`_, and used
@@ -672,7 +673,7 @@ However, for reading convenience, most of the examples show sorted sequences.
672673

673674
>>> year = [1971, 1975, 1979, 1982, 1983]
674675
>>> films_total = [1, 2, 3, 4, 5]
675-
>>> intercept, slope = linear_regression(year, films_total)
676+
>>> slope, intercept = linear_regression(year, films_total)
676677
>>> round(intercept + slope * 2019)
677678
16
678679

Lib/statistics.py

Lines changed: 14 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@
9494
>>> correlation(x, y) #doctest: +ELLIPSIS
9595
0.31622776601...
9696
>>> linear_regression(x, y) #doctest:
97-
LinearRegression(intercept=1.5, slope=0.1)
97+
LinearRegression(slope=0.1, intercept=1.5)
9898
9999
100100
Exceptions
@@ -932,18 +932,18 @@ def correlation(x, y, /):
932932
raise StatisticsError('at least one of the inputs is constant')
933933

934934

935-
LinearRegression = namedtuple('LinearRegression', ['intercept', 'slope'])
935+
LinearRegression = namedtuple('LinearRegression', ('slope', 'intercept'))
936936

937937

938-
def linear_regression(regressor, dependent_variable, /):
938+
def linear_regression(x, y, /):
939939
"""Intercept and slope for simple linear regression
940940
941941
Return the intercept and slope of simple linear regression
942942
parameters estimated using ordinary least squares. Simple linear
943-
regression describes relationship between *regressor* and
944-
*dependent variable* in terms of linear function:
943+
regression describes relationship between *x* and
944+
*y* in terms of linear function:
945945
946-
dependent_variable = intercept + slope * regressor + noise
946+
y = intercept + slope * x + noise
947947
948948
where *intercept* and *slope* are the regression parameters that are
949949
estimated, and noise represents the variability of the data that was
@@ -953,29 +953,28 @@ def linear_regression(regressor, dependent_variable, /):
953953
954954
The parameters are returned as a named tuple.
955955
956-
>>> regressor = [1, 2, 3, 4, 5]
956+
>>> x = [1, 2, 3, 4, 5]
957957
>>> noise = NormalDist().samples(5, seed=42)
958-
>>> dependent_variable = [2 + 3 * regressor[i] + noise[i] for i in range(5)]
959-
>>> linear_regression(regressor, dependent_variable) #doctest: +ELLIPSIS
960-
LinearRegression(intercept=1.75684970486..., slope=3.09078914170...)
958+
>>> y = [2 + 3 * x[i] + noise[i] for i in range(5)]
959+
>>> linear_regression(x, y) #doctest: +ELLIPSIS
960+
LinearRegression(slope=3.09078914170..., intercept=1.75684970486...)
961961
962962
"""
963-
n = len(regressor)
964-
if len(dependent_variable) != n:
963+
n = len(x)
964+
if len(y) != n:
965965
raise StatisticsError('linear regression requires that both inputs have same number of data points')
966966
if n < 2:
967967
raise StatisticsError('linear regression requires at least two data points')
968-
x, y = regressor, dependent_variable
969968
xbar = fsum(x) / n
970969
ybar = fsum(y) / n
971970
sxy = fsum((xi - xbar) * (yi - ybar) for xi, yi in zip(x, y))
972971
s2x = fsum((xi - xbar) ** 2.0 for xi in x)
973972
try:
974973
slope = sxy / s2x # equivalent to: covariance(x, y) / variance(x)
975974
except ZeroDivisionError:
976-
raise StatisticsError('regressor is constant')
975+
raise StatisticsError('x is constant')
977976
intercept = ybar - slope * xbar
978-
return LinearRegression(intercept=intercept, slope=slope)
977+
return LinearRegression(slope=slope, intercept=intercept)
979978

980979

981980
## Normal Distribution #####################################################

Lib/test/test_statistics.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2501,7 +2501,7 @@ def test_results(self):
25012501
([1, 2, 3], [21, 22, 23], 20, 1),
25022502
([1, 2, 3], [5.1, 5.2, 5.3], 5, 0.1),
25032503
]:
2504-
intercept, slope = statistics.linear_regression(x, y)
2504+
slope, intercept = statistics.linear_regression(x, y)
25052505
self.assertAlmostEqual(intercept, true_intercept)
25062506
self.assertAlmostEqual(slope, true_slope)
25072507

0 commit comments

Comments
 (0)