Skip to content

Commit 34d5293

Browse files
committed
update
1 parent 0601277 commit 34d5293

File tree

3 files changed

+40
-17
lines changed

3 files changed

+40
-17
lines changed

unsupervised_class/gmm.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ def gmm(X, K, max_iter=20, smoothing=1e-2):
2525
M[k] = X[np.random.choice(N)]
2626
C[k] = np.eye(D)
2727

28-
costs = np.zeros(max_iter)
28+
costs = []
2929
weighted_pdfs = np.zeros((N, K)) # we'll use these to store the PDF value of sample n and Gaussian k
3030
for i in range(max_iter):
3131
# step 1: determine assignments / resposibilities
@@ -57,7 +57,8 @@ def gmm(X, K, max_iter=20, smoothing=1e-2):
5757
# C[k] = np.sum(R[n,k]*np.outer(X[n] - M[k], X[n] - M[k]) for n in range(N)) / Nk + np.eye(D)*smoothing
5858

5959

60-
costs[i] = np.log(weighted_pdfs.sum(axis=1)).sum()
60+
c = np.log(weighted_pdfs.sum(axis=1)).sum()
61+
costs.append(c)
6162
if i > 0:
6263
if np.abs(costs[i] - costs[i-1]) < 0.1:
6364
break

unsupervised_class/kmeans.py

+34-12
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
import numpy as np
1111
import matplotlib.pyplot as plt
12+
from sklearn.metrics.pairwise import pairwise_distances
1213

1314

1415
def d(u, v):
@@ -30,7 +31,7 @@ def cost(X, R, M):
3031
return cost
3132

3233

33-
def plot_k_means(X, K, max_iter=20, beta=1.0, show_plots=True):
34+
def plot_k_means(X, K, max_iter=20, beta=3.0, show_plots=False):
3435
N, D = X.shape
3536
M = np.zeros((K, D))
3637
# R = np.zeros((N, K))
@@ -40,27 +41,41 @@ def plot_k_means(X, K, max_iter=20, beta=1.0, show_plots=True):
4041
for k in range(K):
4142
M[k] = X[np.random.choice(N)]
4243

43-
costs = np.zeros(max_iter)
44+
costs = []
45+
k = 0
4446
for i in range(max_iter):
47+
k += 1
4548
# step 1: determine assignments / resposibilities
4649
# is this inefficient?
4750
for k in range(K):
4851
for n in range(N):
49-
# R[n,k] = np.exp(-beta*d(M[k], X[n])) / np.sum( np.exp(-beta*d(M[j], X[n])) for j in range(K) )
5052
exponents[n,k] = np.exp(-beta*d(M[k], X[n]))
51-
5253
R = exponents / exponents.sum(axis=1, keepdims=True)
53-
# assert(np.abs(R - R2).sum() < 1e-10)
54+
5455

5556
# step 2: recalculate means
56-
for k in range(K):
57-
M[k] = R[:,k].dot(X) / R[:,k].sum()
57+
# decent vectorization
58+
# for k in range(K):
59+
# M[k] = R[:,k].dot(X) / R[:,k].sum()
60+
# oldM = M
5861

59-
costs[i] = cost(X, R, M)
62+
# full vectorization
63+
M = R.T.dot(X) / R.sum(axis=0, keepdims=True).T
64+
# print("diff M:", np.abs(M - oldM).sum())
65+
66+
c = cost(X, R, M)
67+
costs.append(c)
6068
if i > 0:
61-
if np.abs(costs[i] - costs[i-1]) < 1e-5:
69+
if np.abs(costs[-1] - costs[-2]) < 1e-5:
6270
break
6371

72+
if len(costs) > 1:
73+
if costs[-1] > costs[-2]:
74+
pass
75+
# print("cost increased!")
76+
# print("M:", M)
77+
# print("R.min:", R.min(), "R.max:", R.max())
78+
6479
if show_plots:
6580
plt.plot(costs)
6681
plt.title("Costs")
@@ -71,6 +86,7 @@ def plot_k_means(X, K, max_iter=20, beta=1.0, show_plots=True):
7186
plt.scatter(X[:,0], X[:,1], c=colors)
7287
plt.show()
7388

89+
print("Final cost", costs[-1])
7490
return M, R
7591

7692

@@ -98,13 +114,19 @@ def main():
98114
plt.show()
99115

100116
K = 3 # luckily, we already know this
101-
plot_k_means(X, K)
117+
plot_k_means(X, K, beta=1.0, show_plots=True)
118+
119+
K = 3 # luckily, we already know this
120+
plot_k_means(X, K, beta=3.0, show_plots=True)
121+
122+
K = 3 # luckily, we already know this
123+
plot_k_means(X, K, beta=10.0, show_plots=True)
102124

103125
K = 5 # what happens if we choose a "bad" K?
104-
plot_k_means(X, K, max_iter=30)
126+
plot_k_means(X, K, max_iter=30, show_plots=True)
105127

106128
K = 5 # what happens if we change beta?
107-
plot_k_means(X, K, max_iter=30, beta=0.3)
129+
plot_k_means(X, K, max_iter=30, beta=0.3, show_plots=True)
108130

109131

110132
if __name__ == '__main__':

unsupervised_class/kmeans_fail.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -35,19 +35,19 @@ def donut():
3535
def main():
3636
# donut
3737
X = donut()
38-
plot_k_means(X, 2)
38+
plot_k_means(X, 2, beta=0.1, show_plots=True)
3939

4040
# elongated clusters
4141
X = np.zeros((1000, 2))
4242
X[:500,:] = np.random.multivariate_normal([0, 0], [[1, 0], [0, 20]], 500)
4343
X[500:,:] = np.random.multivariate_normal([5, 0], [[1, 0], [0, 20]], 500)
44-
plot_k_means(X, 2)
44+
plot_k_means(X, 2, beta=0.1, show_plots=True)
4545

4646
# different density
4747
X = np.zeros((1000, 2))
4848
X[:950,:] = np.array([0,0]) + np.random.randn(950, 2)
4949
X[950:,:] = np.array([3,0]) + np.random.randn(50, 2)
50-
plot_k_means(X, 2)
50+
plot_k_means(X, 2, show_plots=True)
5151

5252

5353

0 commit comments

Comments
 (0)