minor changes

lazyprogrammer · lazyprogrammer · commit f6b6768eb5cf · 2016-10-11T14:00:45.000-04:00
diff --git a/linear_regression_class/moore.py b/linear_regression_class/moore.py
@@ -15,12 +15,12 @@
 non_decimal = re.compile(r'[^\d]+')
 
 for line in open('moore.csv'):
-	r = line.split('\t')
+    r = line.split('\t')
 
-	x = int(non_decimal.sub('', r[2].split('[')[0]))
-	y = int(non_decimal.sub('', r[1].split('[')[0]))
-	X.append(x)
-	Y.append(y)
+    x = int(non_decimal.sub('', r[2].split('[')[0]))
+    y = int(non_decimal.sub('', r[1].split('[')[0]))
+    X.append(x)
+    Y.append(y)
 
 
 X = np.array(X)
diff --git a/nlp_class/nb.py b/nlp_class/nb.py
@@ -1,5 +1,6 @@
 # Naive Bayes spam detection for NLP class, which can be found at:
 # https://www.udemy.com/data-science-natural-language-processing-in-python
+# dataset: https://archive.ics.uci.edu/ml/datasets/Spambase
 
 # Author: http://lazyprogrammer.me
 
diff --git a/unsupervised_class/gmm.py b/unsupervised_class/gmm.py
@@ -5,7 +5,7 @@
 from scipy.stats import multivariate_normal
 
 
-def gmm(X, K, max_iter=20):
+def gmm(X, K, max_iter=20, smoothing=10e-3):
     N, D = X.shape
     M = np.zeros((K, D))
     R = np.zeros((N, K))
@@ -34,7 +34,7 @@ def gmm(X, K, max_iter=20):
             Nk = R[:,k].sum()
             pi[k] = Nk / N
             M[k] = R[:,k].dot(X) / Nk
-            C[k] = np.sum(R[n,k]*np.outer(X[n] - M[k], X[n] - M[k]) for n in xrange(N)) / Nk + np.eye(D)*0.001
+            C[k] = np.sum(R[n,k]*np.outer(X[n] - M[k], X[n] - M[k]) for n in xrange(N)) / Nk + np.eye(D)*smoothing
 
 
         costs[i] = np.log(weighted_pdfs.sum(axis=1)).sum()
@@ -54,7 +54,7 @@ def gmm(X, K, max_iter=20):
     print "pi:", pi
     print "means:", M
     print "covariances:", C
-
+    return R
 
 
 def main():