Skip to content

Commit

Permalink
pca
Browse files Browse the repository at this point in the history
  • Loading branch information
SmirkCao committed Jun 3, 2019
1 parent 7fdbacc commit 6bb13d2
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 16 deletions.
25 changes: 21 additions & 4 deletions CH16/pca.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,15 +28,32 @@ def fit(self, x):
assert n > 1
assert (np.mean(x, axis=1) == np.zeros(n)).all()
x_ = x.T/np.sqrt(n-1)
u, s, vh = np.linalg.svd(x_)
# mxk kxk kxn: m features , k components, n samples
u, s, vh = np.linalg.svd(x_, full_matrices=False)
self.vh = vh
self.u = u
self.singular_values_ = s
self.explained_variance_ratio_ = s**2/np.sum(s**2)
print(self.u)
print(self.vh)
# print("u:\n", self.u)
# print("s:\n", self.singular_values_)
# print("vh:\n", self.vh)

# sign flip
# sign of keep largest value is positive
max_abs_cols = np.argmax(np.abs(u), axis=0)
signs = np.sign(u[max_abs_cols, range(vh.shape[0])])
u *= signs
vh *= signs[:, np.newaxis]
# print(s)
# print(u)
# print(vh)

# print("max abs cols:\n", max_abs_cols)
# print("max abs cols sign:\n", signs[:, np.newaxis])
self.u = u
self.vh = vh

def fit_transform(self, x):
self.fit(x)
self.components_ = np.dot(self.vh, x)
return self.components_
return self.components_
38 changes: 26 additions & 12 deletions CH16/unit_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def test_e_16_1(self):
[0.29, 0.35, 1, 0.60],
[0.33, 0.32, 0.60, 1]])
ev, sigma = np.linalg.eig(r)

print("\n")
print(40*"*"+"Engine Values"+40*"*")
print(ev)
Expand All @@ -45,14 +45,15 @@ def test_e_16_1(self):
print(s)
print(vh)
# s 特征值, vh 特征向量

def test_ex1601(self):
# raw data
x = np.array([[2, 3, 3, 4, 5, 7],
[2, 4, 5, 5, 6, 8]])

# normalization
x_star = (x-np.mean(x, axis=1).reshape(-1, 1))/np.sqrt(np.var(x, axis=1)).reshape(-1, 1)
x_star = (x-np.mean(x, axis=1).reshape(-1, 1)) / \
np.sqrt(np.var(x, axis=1)).reshape(-1, 1)
print(np.mean(x, axis=1))
print(np.var(x, axis=1))

Expand All @@ -72,6 +73,8 @@ def test_ex1601(self):
rst = np.dot(u[:, :2], y)
print(rst)

# print("np.dot(u*s, vh) \n")
# print(np.dot(u*s, vh))
# s engine value
# vh engine vector

Expand Down Expand Up @@ -116,29 +119,40 @@ def test_pca(self):
PCA分析
"""
print("\n")
# raw data
# raw data from ex1601
x = np.array([[2, 3, 3, 4, 5, 7],
[2, 4, 5, 5, 6, 8]])
# 去掉均值
x = x-np.mean(x, axis=1).reshape(-1, 1)
print(x)
assert (np.mean(x, axis=1) == np.zeros(2)).all()

# for sklearn x.shape == (n_samples, n_features)
pca_sklearn = skpca(n_components=2)
pca_sklearn.fit(x.T)
pca_sklearn_rst = pca_sklearn.fit_transform(x.T).T

print("\n")
print(40*"*"+"sklearn_pca"+40*"*")
print(pca_sklearn.singular_values_)
print(pca_sklearn.explained_variance_ratio_)
print(pca_sklearn.fit_transform(x.T).T)
print("singular values:\n", pca_sklearn.singular_values_)
print("explained variance ratio:\n",
pca_sklearn.explained_variance_ratio_)
print("transform:\n", )

print(40*"*"+"smirk_pca"+40*"*")
pca_test = smirkpca(n_components=2)
rst = pca_test.fit_transform(x)
print(pca_test.singular_values_)
print(pca_test.explained_variance_ratio_)
print(rst)
pca_test_rst = pca_test.fit_transform(x)
print("singular values:\n",
pca_test.singular_values_)
print("explained variance ratio:\n",
pca_test.explained_variance_ratio_)
print("transform:\n", pca_test_rst)

self.assertTrue(np.allclose(pca_sklearn.singular_values_,
pca_test.singular_values_))
self.assertTrue(np.allclose(pca_sklearn_rst, pca_test_rst))
self.assertTrue(np.allclose(pca_sklearn.explained_variance_ratio_,
pca_test.explained_variance_ratio_))

def test_pca_get_fig(self):
pass

0 comments on commit 6bb13d2

Please sign in to comment.