-
Notifications
You must be signed in to change notification settings - Fork 1
/
preprocess.py
68 lines (51 loc) · 1.89 KB
/
preprocess.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
import numpy as np
import numpy.linalg as npl
class Preprocess:
def __init__(self):
self.U = None # eigenvectors
self.V = None # eigenvalues
self.meanPatch = None
self.epsilon = 1e-4
def computePCA(self,data):
"""
Computes the mean of the data for centering. As well
as the eigenvalue decomposition of the sample
covariance matrix of the centered data.
"""
self.meanPatch = np.mean(data,axis=1)
self.meanPatch = self.meanPatch.reshape(-1,1)
tmpData = data-self.meanPatch
sigma = np.cov(data)
self.V,self.U = npl.eigh(sigma)
self.V = np.flipud(self.V)
self.U = np.fliplr(self.U)
def whiten(self,data,numComponents=-1):
"""
Whitens the data by left multiplication with V^(-1/2)*U
where V is the diagonal matrix of eigenvalues sorted in
decreasing order and U the corresponding eigenvectors.
Reduces dimensionality if numComponents is less than
data dimension.
"""
# construct whitening matrix
if numComponents == -1:
numComponents = self.U.shape[0]
W = np.diag(np.sqrt(1/(self.V+self.epsilon))).dot(self.U.T)
W = W[:numComponents,:]
return W.dot(data-self.meanPatch)
def unwhiten(self,data):
"""
Reconstructs original data from whitened data.
"""
numComponents = data.shape[0]
W = self.U.dot(np.diag(np.sqrt(self.V+self.epsilon)))
W = W[:,:numComponents]
return W.dot(data)+self.meanPatch
def plot_explained_var(self):
"""
Plots variance of data explained as a function of the
number of components.
"""
import matplotlib.pyplot as plt
plt.plot(range(1,self.U.shape[0]+1),np.cumsum(self.V/np.sum(self.V)))
plt.show()