ex16.1

Tiezhu-Li · May 30, 2019 · 7f13c72 · 7f13c72
1 parent a622dc4
commit 7f13c72
Show file tree

Hide file tree

Showing 2 changed files with 79 additions and 0 deletions.
diff --git a/CH16/README.md b/CH16/README.md
@@ -134,7 +134,21 @@ $y_2$的贡献可能更多的体现在文理科的差异上，他们的作用相
 
 这部分数值参考书上内容，如果用numpy做，会有一定出入，回头再复核下。
 
+### 习题16.1
+样本数据主成分分析
+$$
+X=
+\left[
+\begin{matrix}
+2& 3& 3& 4& 5& 7\\
+2& 4& 5& 5& 6& 8
+\end{matrix}
+\right]
+$$
 
+这个题，原来就俩特征，然后主成分依然俩特征。俩特征就可以可视化了。
+1. 首先要规范化，参考16.33，注意，规范化并不代表数据取值范围在$[0, 1]$之间。
+1. 
 
 ## 参考
 
diff --git a/CH16/unit_test.py b/CH16/unit_test.py
@@ -43,3 +43,68 @@ def test_e_16_1(self):
         print(s)
         print(vh)
         # s 特征值， vh 特征向量
+
+    def test_ex1601(self):
+        # raw data
+        x = np.array([[2, 3, 3, 4, 5, 7],
+                      [2, 4, 5, 5, 6, 8]])
+
+        # normalization
+        x_star = (x-np.mean(x, axis=1).reshape(-1, 1))/np.sqrt(np.var(x, axis=1)).reshape(-1, 1)
+        print(np.mean(x, axis=1))
+        print(np.var(x, axis=1))
+
+        print(x_star)
+
+        print(np.var(x_star, axis=1))
+        x_ = x_star.T/np.sqrt(x_star.shape[0]-1)
+        u, s, vh = np.linalg.svd(x_)
+        print(x_)
+        print("\n")
+        print(u)
+        print(s)
+        print(vh)
+        s = s*np.eye(2)
+        # print(vh[:2, :].shape)
+        y = np.dot(s, vh[:2, :])
+        rst = np.dot(u[:, :2], y)
+        print(rst)
+
+        # s engine value
+        # vh engine vector
+
+        # 两个特征可以做可视化
+        plt.figure(figsize=(5, 5))
+        plt.xlim((-5, 5))
+        plt.ylim((-5, 5))
+        plt.grid()
+        plt.scatter(x_star[0, :], x_star[1, :])
+        plt.scatter(vh[:, 0], vh[:, 1])
+
+        # print(rst.shape)
+        # plt.scatter(vh[:, 0]*x_star[0, :], vh[:, 1]*x_star[1, :])
+        plt.plot([0, 0], [1, 0.5], c="black", marker="*")
+        plt.plot([0.5, 1], [0, 0], c="black", marker="*")
+        a = np.array([[0.0, 0.0],
+                      [0.5, 1.0]])
+        b = np.array([[0.7, 1.5],
+                      [0.0, 0.0]])
+        rst = np.dot(vh, a)
+        print(rst)
+        plt.plot(rst[:, 0],
+                 rst[:, 1], c="red", marker="*")
+
+        rst = np.dot(vh, b)
+        print(rst)
+        plt.plot(rst[:, 0],
+                 rst[:, 1], c="red", marker="*")
+
+        # from sklearn.decomposition import PCA
+        # pca = PCA(n_components=2)
+        # # pca in sklearn, (n_samples, n_features)
+        # rst = pca.fit_transform(x.T)
+        # print(rst)
+
+        # plt.scatter(rst[:, 0], rst[:, 1])
+        # plt.show()
+