@@ -11,23 +11,88 @@ def __init__(self, fit_intercept=True):
1111
1212 Notes
1313 -----
14- Given data matrix *X* and target vector *y* , the maximum-likelihood estimate
15- for the regression coefficients, :math:`\ \beta`, is:
14+ Given data matrix **X** and target vector **y** , the maximum-likelihood
15+ estimate for the regression coefficients, :math:`\beta`, is:
1616
1717 .. math::
1818
19- \hat{\beta} =
20- \left(\mathbf{X}^\top \mathbf{X}\right)^{-1} \mathbf{X}^\top \mathbf{y}
19+ \hat{\beta} = \Sigma^{-1} \mathbf{X}^\top \mathbf{y}
20+
21+ where :math:`\Sigma^{-1} = (\mathbf{X}^\top \mathbf{X})^{-1}`.
2122
2223 Parameters
2324 ----------
2425 fit_intercept : bool
25- Whether to fit an additional intercept term in addition to the
26- model coefficients. Default is True.
26+ Whether to fit an intercept term in addition to the model
27+ coefficients. Default is True.
2728 """
2829 self .beta = None
30+ self .sigma_inv = None
2931 self .fit_intercept = fit_intercept
3032
33+ self ._is_fit = False
34+
35+ def update (self , x , y ):
36+ r"""
37+ Incrementally update the least-squares coefficients on a new example
38+ via recursive least-squares (RLS) [1]_ .
39+
40+ Notes
41+ -----
42+ The RLS algorithm [2]_ is used to efficiently update the regression
43+ parameters as new examples become available. For a new example
44+ :math:`(\mathbf{x}_{t+1}, \mathbf{y}_{t+1})`, the parameter updates are
45+
46+ .. math::
47+
48+ \beta_{t+1} = \left(
49+ \mathbf{X}_{1:t}^\top \mathbf{X}_{1:t} +
50+ \mathbf{x}_{t+1}\mathbf{x}_{t+1}^\top \right)^{-1}
51+ \mathbf{X}_{1:t}^\top \mathbf{Y}_{1:t} +
52+ \mathbf{x}_{t+1}^\top \mathbf{y}_{t+1}
53+
54+ where :math:`\beta_{t+1}` are the updated regression coefficients,
55+ :math:`\mathbf{X}_{1:t}` and :math:`\mathbf{Y}_{1:t}` are the set of
56+ examples observed from timestep 1 to *t*.
57+
58+ To perform the above update efficiently, the RLS algorithm makes use of
59+ the Sherman-Morrison formula [3]_ to avoid re-inverting the covariance
60+ matrix on each new update.
61+
62+ References
63+ ----------
64+ .. [1] Gauss, C. F. (1821) _Theoria combinationis observationum
65+ erroribus minimis obnoxiae_, Werke, 4. Gottinge
66+ .. [2] https://en.wikipedia.org/wiki/Recursive_least_squares_filter
67+ .. [3] https://en.wikipedia.org/wiki/Sherman%E2%80%93Morrison_formula
68+
69+ Parameters
70+ ----------
71+ x : :py:class:`ndarray <numpy.ndarray>` of shape `(1, M)`
72+ A single example of rank `M`
73+ y : :py:class:`ndarray <numpy.ndarray>` of shape `(1, K)`
74+ A `K`-dimensional target vector for the current example
75+ """
76+ if not self ._is_fit :
77+ raise RuntimeError ("You must call the `fit` method before calling `update`" )
78+
79+ x , y = np .atleast_2d (x ), np .atleast_2d (y )
80+ beta , S_inv = self .beta , self .sigma_inv
81+
82+ X1 , Y1 = x .shape [0 ], y .shape [0 ]
83+ err_str = f"First dimension of x and y must be 1, but got { X1 } and { Y1 } "
84+ assert X1 == Y1 == 1 , err_str
85+
86+ # convert x to a design vector if we're fitting an intercept
87+ if self .fit_intercept :
88+ x = np .c_ [1 , x ]
89+
90+ # update the inverse of the covariance matrix via Sherman-Morrison
91+ S_inv -= (S_inv @ x .T @ x @ S_inv ) / (1 + x @ S_inv @ x .T )
92+
93+ # update the model coefficients
94+ beta += S_inv @ x .T @ (y - x @ beta )
95+
3196 def fit (self , X , y ):
3297 """
3398 Fit the regression coefficients via maximum likelihood.
@@ -44,8 +109,10 @@ def fit(self, X, y):
44109 if self .fit_intercept :
45110 X = np .c_ [np .ones (X .shape [0 ]), X ]
46111
47- pseudo_inverse = np .linalg .inv (X .T @ X ) @ X .T
48- self .beta = np .dot (pseudo_inverse , y )
112+ self .sigma_inv = np .linalg .pinv (X .T @ X )
113+ self .beta = np .atleast_2d (self .sigma_inv @ X .T @ y )
114+
115+ self ._is_fit = True
49116
50117 def predict (self , X ):
51118 """
@@ -166,22 +233,22 @@ def __init__(self, penalty="l2", gamma=0, fit_intercept=True):
166233 \left(
167234 \sum_{i=0}^N y_i \log(\hat{y}_i) +
168235 (1-y_i) \log(1-\hat{y}_i)
169- \right) - R(\mathbf{b}, \gamma)
236+ \right) - R(\mathbf{b}, \gamma)
170237 \right]
171-
238+
172239 where
173-
240+
174241 .. math::
175-
242+
176243 R(\mathbf{b}, \gamma) = \left\{
177244 \begin{array}{lr}
178245 \frac{\gamma}{2} ||\mathbf{beta}||_2^2 & :\texttt{ penalty = 'l2'}\\
179246 \gamma ||\beta||_1 & :\texttt{ penalty = 'l1'}
180247 \end{array}
181248 \right.
182-
183- is a regularization penalty, :math:`\gamma` is a regularization weight,
184- `N` is the number of examples in **y**, and **b** is the vector of model
249+
250+ is a regularization penalty, :math:`\gamma` is a regularization weight,
251+ `N` is the number of examples in **y**, and **b** is the vector of model
185252 coefficients.
186253
187254 Parameters
@@ -251,10 +318,10 @@ def _NLL(self, X, y, y_pred):
251318 \right]
252319 """
253320 N , M = X .shape
254- beta , gamma = self .beta , self .gamma
321+ beta , gamma = self .beta , self .gamma
255322 order = 2 if self .penalty == "l2" else 1
256323 norm_beta = np .linalg .norm (beta , ord = order )
257-
324+
258325 nll = - np .log (y_pred [y == 1 ]).sum () - np .log (1 - y_pred [y == 0 ]).sum ()
259326 penalty = (gamma / 2 ) * norm_beta ** 2 if order == 2 else gamma * norm_beta
260327 return (penalty + nll ) / N
0 commit comments