From 8cbe95f3f858d71c13d2d8b679629098b2d90647 Mon Sep 17 00:00:00 2001 From: marekwadinger Date: Thu, 14 Mar 2024 12:10:52 +0900 Subject: [PATCH] UPDATE: initialization logic in oSVD and oPCA; exact SVD computation --- river/decomposition/odmd.py | 22 ++---- river/decomposition/opca.py | 48 ++++++++---- river/decomposition/osvd.py | 130 +++++++++++++++++++++++-------- river/decomposition/test_odmd.py | 4 +- 4 files changed, 143 insertions(+), 61 deletions(-) diff --git a/river/decomposition/odmd.py b/river/decomposition/odmd.py index aa9473c009..54c7a434b0 100644 --- a/river/decomposition/odmd.py +++ b/river/decomposition/odmd.py @@ -308,33 +308,27 @@ def update( if isinstance(x, dict): self.feature_names_in_ = list(x.keys()) x = np.array(list(x.values())) - if len(x.shape) == 1: - x_ = x.reshape(1, -1) - else: - x_ = x + x = x.reshape(1, -1) if isinstance(y, dict): assert self.feature_names_in_ == list(y.keys()) y = np.array(list(y.values())) - if len(y.shape) == 1: - y_ = y.reshape(1, -1) - else: - y_ = y + y = y.reshape(1, -1) # Initialize properties which depend on the shape of x if self.n_seen == 0: - self.m = len(x) + self.m = x.shape[1] self._init_update() # Collect buffer of past snapshots to compute xi if self._Y.shape[0] <= self.n_seen: - self._Y = np.vstack([self._Y, y_]) + self._Y = np.vstack([self._Y, y]) elif self._Y.shape[0] > self.n_seen: self._Y = self._Y[self.n_seen :, :] # Initialize A and P with first self.initialize snapshot pairs if bool(self.initialize) and self.n_seen <= self.initialize - 1: - self._X_init[self.n_seen, :] = x_ - self._Y_init[self.n_seen, :] = y_ + self._X_init[self.n_seen, :] = x + self._Y_init[self.n_seen, :] = y if self.n_seen == self.initialize - 1: self.learn_many(self._X_init, self._Y_init) # revert the number of seen samples to avoid doubling @@ -346,9 +340,9 @@ def update( alpha = 1.0 / epsilon self._P = alpha * np.identity(self.r) if self.r < self.m: - x_, y_ = self._truncate_w_svd(x_, y_, svd_modify="update") + x, y = self._truncate_w_svd(x, y, svd_modify="update") - self._update_A_P(x_, y_, 1.0) + self._update_A_P(x, y, 1.0) self.n_seen += 1 diff --git a/river/decomposition/opca.py b/river/decomposition/opca.py index 4251ac9e0d..9acdfd6904 100644 --- a/river/decomposition/opca.py +++ b/river/decomposition/opca.py @@ -66,7 +66,7 @@ class OnlinePCA(Transformer): def __init__( self, - n_components: int, + n_components: int = 2, b: int | None = None, lambda_: float = 0.0, sigma: float = 0.0, @@ -74,11 +74,11 @@ def __init__( seed: int | None = None, ): self.n_components = int(n_components) - if b is None: - # Default to O(r) to maximize the efficiency [Eftekhari, et al. (2019)] - b = n_components + # Default maximizes the efficiency [Eftekhari, et al. (2019)] + if not b: + b = self.n_components else: - assert b >= n_components + b = int(b) self.b = b assert lambda_ >= 0 self.lambda_ = lambda_ @@ -90,10 +90,11 @@ def __init__( self.feature_names_in_: list[str] self.n_features_in_: int # n [Eftekhari, et al. (2019)] self.n_seen: int = 0 # k [Eftekhari, et al. (2019)] - self.Y_k: deque = deque(maxlen=b) - self.P_omega_k: deque = deque(maxlen=b) + self.Y_k: deque + self.P_omega_k: deque self.S_hat: np.ndarray - np.random.seed(seed) + self.seed = seed + np.random.seed(self.seed) def learn_one(self, x: dict | np.ndarray): """_summary_ @@ -109,10 +110,21 @@ def learn_one(self, x: dict | np.ndarray): set(x.keys()) ) x = np.array(list(x.values())) + # TODO: align with OnlineSVD + # x = x.reshape(1, -1) + if self.n_seen == 0: - self.n_features_in_ = len(x) - # r_mat = np.random.randn(self.n_features_in_, self.n_components) - # self.S_hat, _ = np.linalg.qr(r_mat) + self.n_features_in_ = x.shape[0] + if self.n_components == 0: + self.n_components = self.n_features_in_ + # Make b feasible if not set and learn_one is called first + if not self.b: + self.b = self.n_components + self.Y_k = deque(maxlen=self.b) + self.P_omega_k = deque(maxlen=self.b) + # Initialize S_hat with random orthonormal matrix for transform_one + r_mat = np.random.randn(self.n_features_in_, self.n_components) + self.S_hat, _ = np.linalg.qr(r_mat) # Random index set over which s_t is observed omega_t = ~np.isnan(x) # (n_features_in_,) @@ -124,7 +136,8 @@ def learn_one(self, x: dict | np.ndarray): self.P_omega_k.append(P_omega_t) if len(self.Y_k) == self.b: - if not hasattr(self, "S_hat"): + # Reinitialize S_hat now when deque is full + if self.n_seen == self.b - 1: # Let S_hat \in \mathbb{R}^{n \times b} be the _, _, V = np.linalg.svd( np.array(self.Y_k), full_matrices=False @@ -174,5 +187,14 @@ def learn_one(self, x: dict | np.ndarray): def transform_one(self, x: dict | np.ndarray) -> dict: if isinstance(x, dict): x = np.array(list(x.values())) + # If transform one is called before any learning has been done + # TODO: consider raising an runtime error + if not hasattr(self, "S_hat"): + return dict( + zip( + range(self.n_components), + np.zeros(self.n_components), + ) + ) x = x @ self.S_hat - return dict(zip(range(self.n_components), x)) # + return dict(zip(range(self.n_components), x)) diff --git a/river/decomposition/osvd.py b/river/decomposition/osvd.py index e64354bee3..e8a817d614 100644 --- a/river/decomposition/osvd.py +++ b/river/decomposition/osvd.py @@ -125,13 +125,17 @@ def __init__( self._S: np.ndarray self._V: np.ndarray + self.n_seen: int = 0 + def _orthogonalize(self, U_, Sigma_, V_): UQ, UR = np.linalg.qr(U_, mode="complete") VQ, VR = np.linalg.qr(V_, mode="complete") - tU_, tSigma_, tV_ = sp.sparse.linalg.svds( - (UR @ np.diag(Sigma_) @ VR), k=self.n_components - ) - tU_, tSigma_, tV_ = self._sort_svd(tU_, tSigma_, tV_) + A = UR @ np.diag(Sigma_) @ VR + if 0 < self.n_components and self.n_components < min(A.shape): + tU_, tSigma_, tV_ = sp.sparse.linalg.svds(A, k=self.n_components) + tU_, tSigma_, tV_ = self._sort_svd(tU_, tSigma_, tV_) + else: + tU_, tSigma_, tV_ = np.linalg.svd(A, full_matrices=False) return UQ @ tU_, tSigma_, VQ @ tV_ def _sort_svd(self, U, S, V): @@ -163,28 +167,55 @@ def update(self, x: dict | np.ndarray): self.feature_names_in_ = list(x.keys()) x = np.array(list(x.values())) x = x.reshape(1, -1) - m = (x @ self._U).T - p = x.T - self._U @ m - P, _ = np.linalg.qr(p) - Ra = P.T @ p - b = np.concatenate([np.zeros(self._V.shape[1] - 1), [1]]).reshape( - -1, 1 - ) - n = self._V @ b - q = b - self._V.T @ n - Q, _ = np.linalg.qr(q) - z = np.zeros_like(m.T) - K = np.block([[np.diag(self._S), m], [z, Ra]]) + if self.n_seen == 0: + self.n_features_in_ = x.shape[1] + if self.n_components == 0: + self.n_components = self.n_features_in_ + # Make initialize feasible if not set and learn_one is called first + if not self.initialize: + self.initialize = self.n_components + self._X_init = np.empty((self.initialize, self.n_features_in_)) + # Initialize _U with random orthonormal matrix for transform_one + r_mat = np.random.randn(self.n_features_in_, self.n_components) + self._U, _ = np.linalg.qr(r_mat) + + # Initialize if called without learn_many + if bool(self.initialize) and self.n_seen <= self.initialize - 1: + self._X_init[self.n_seen, :] = x + if self.n_seen == self.initialize - 1: + self.learn_many(self._X_init) + # revert the number of seen samples to avoid doubling + self.n_seen -= self._X_init.shape[0] + else: + m = (x @ self._U).T + p = x.T - self._U @ m + P, _ = np.linalg.qr(p) + Ra = P.T @ p + b = np.concatenate([np.zeros(self._V.shape[1] - 1), [1]]).reshape( + -1, 1 + ) + n = self._V @ b + q = b - self._V.T @ n + Q, _ = np.linalg.qr(q) + + z = np.zeros_like(m.T) + K = np.block([[np.diag(self._S), m], [z, Ra]]) + + if 0 < self.n_components and self.n_components < min(K.shape): + U_, Sigma_, V_ = sp.sparse.linalg.svds(K, k=self.n_components) + U_, Sigma_, V_ = self._sort_svd(U_, Sigma_, V_) + else: + U_, Sigma_, V_ = np.linalg.svd(K, full_matrices=False) - U_, Sigma_, V_ = sp.sparse.linalg.svds(K, k=self.n_components) - U_, Sigma_, V_ = self._sort_svd(U_, Sigma_, V_) - U_ = np.column_stack((self._U, P)) @ U_ - V_ = V_ @ np.row_stack((self._V, Q.T)) - # V_ = V_[:, : self.n_components] @ self._V - if self.force_orth: - U_, Sigma_, V_ = self._orthogonalize(U_, Sigma_, V_) - self._U, self._S, self._V = U_, Sigma_, V_ + U_ = np.column_stack((self._U, P)) @ U_ + V_ = V_ @ np.row_stack((self._V, Q.T)) + # V_ = V_[:, : self.n_components] @ self._V + if self.force_orth: + U_, Sigma_, V_ = self._orthogonalize(U_, Sigma_, V_) + self._U, self._S, self._V = U_, Sigma_, V_ + + self.n_seen += 1 def revert(self, x: dict | np.ndarray): if isinstance(x, dict): @@ -204,8 +235,13 @@ def revert(self, x: dict | np.ndarray): - np.row_stack((n, 0.0)) @ np.row_stack((n, np.sqrt(1 - n.T @ n))).T ) - U_, Sigma_, V_ = sp.sparse.linalg.svds(K, k=self.n_components) - U_, Sigma_, V_ = self._sort_svd(U_, Sigma_, V_) + + if 0 < self.n_components and self.n_components < min(K.shape): + U_, Sigma_, V_ = sp.sparse.linalg.svds(K, k=self.n_components) + U_, Sigma_, V_ = self._sort_svd(U_, Sigma_, V_) + else: + U_, Sigma_, V_ = np.linalg.svd(K, full_matrices=False) + # Since the update is not rank-increasing, we can skip computation of P # otherwise we do U_ = np.column_stack((self._U, P)) @ U_ U_ = self._U @ U_[: self.n_components, :] @@ -227,21 +263,33 @@ def learn_many(self, X: np.ndarray | pd.DataFrame): self.feature_names_in_ = [str(i) for i in range(X.shape[0])] self.n_features_in_ = X.shape[1] + if self.n_seen == 0: + self.n_features_in_ = len(X) + if self.n_components == 0: + self.n_components = self.n_features_in_ + if hasattr(self, "_U") and hasattr(self, "_S") and hasattr(self, "_V"): for x in X: self.learn_one(x.reshape(1, -1)) else: - if self.n_components < self.n_features_in_: + if ( + 0 < self.n_components + and self.n_components < self.n_features_in_ + ): self._U, self._S, self._V = sp.sparse.linalg.svds( X.T, k=self.n_components ) - self._U, self._S, self._V = self._sort_svd(self._U, self._S, self._V) + self._U, self._S, self._V = self._sort_svd( + self._U, self._S, self._V + ) else: self._U, self._S, self._V = np.linalg.svd( X.T, full_matrices=False ) + self.n_seen = X.shape[0] + def transform_one(self, x: dict | np.ndarray) -> dict: if isinstance(x, dict): self.feature_names_in_ = list(x.keys()) @@ -249,14 +297,32 @@ def transform_one(self, x: dict | np.ndarray) -> dict: else: self.feature_names_in_ = [str(i) for i in range(x.shape[0])] - x_ = self._U.T @ x.T - return dict(zip(self.feature_names_in_, x_)) + # If transform one is called before any learning has been done + # TODO: consider raising an runtime error + if not hasattr(self, "_U"): + return dict( + zip( + range(self.n_components), + np.zeros(self.n_components), + ) + ) + + x_ = x @ self._U + return dict(zip(range(self.n_components), x_)) def transform_many(self, X: np.ndarray | pd.DataFrame) -> pd.DataFrame: if isinstance(X, pd.DataFrame): self.feature_names_in_ = list(X.columns) X = X.values + + # If transform one is called before any learning has been done + # TODO: consider raising an runtime error + if not hasattr(self, "_U"): + return pd.DataFrame( + np.zeros((X.shape[0], self.n_components)), + index=range(self.n_components), + ) assert X.shape[1] == self.n_features_in_ - X_ = self._U.T @ X.T - return pd.DataFrame(X_.T) + X_ = X @ self._U + return pd.DataFrame(X_) diff --git a/river/decomposition/test_odmd.py b/river/decomposition/test_odmd.py index 1e7b42b223..ee62df62ec 100644 --- a/river/decomposition/test_odmd.py +++ b/river/decomposition/test_odmd.py @@ -141,7 +141,7 @@ def test_one_svd_is_enough(): U = pd.DataFrame({"u": u_[:-2]}) X_ = X.copy() X_["u"] = U - + u_orig, s_orig, _ = sp.sparse.linalg.svds( X.values.T, k=2, return_singular_vectors="u" ) @@ -151,7 +151,7 @@ def test_one_svd_is_enough(): u_out, s_out, _ = sp.sparse.linalg.svds( Y.values.T, k=2, return_singular_vectors="u" ) - + assert (np.abs(u_orig - u_aug[:3, :2]) <= np.abs(u_orig - u_out)).all() assert (np.abs(s_orig - s_aug[:2]) <= np.abs(s_orig - s_out)).all()