add linear regression 1-d class code, add GMM for some reason was not added yet

lazyprogrammer · lazyprogrammer · commit 5cc29a162c54 · 2015-08-16T05:37:31.000-04:00
diff --git a/linear_regression_class/data_1d.csv b/linear_regression_class/data_1d.csv
@@ -0,0 +1,100 @@
+95.724162408,197.179636092
+35.7576189281,67.5906695414
+28.8168474238,60.8541328206
+99.9584813087,196.907396981
+66.8097483121,125.311128524
+58.2156926413,115.785784589
+53.8210763379,110.762772705
+81.2960821704,157.98528569
+80.6486970595,159.61941373
+78.2528136925,149.003865539
+16.8634836868,31.4675908827
+90.799147557,184.185219661
+53.312737012,103.220610158
+71.6187890238,143.271818357
+18.4105917769,46.7364180117
+57.124346762,107.123794204
+81.1346839164,168.309374007
+82.0152554217,166.82994267
+36.924908251,70.5014555898
+44.5871260438,96.8693702549
+26.662354456,50.3732358383
+74.065057115,145.519170708
+17.7405767273,46.5518746077
+6.8197438189,13.2437863094
+26.3173600923,62.5006665037
+82.8041190512,159.916649576
+39.446536962,77.9842041485
+76.7281205911,147.269183138
+42.2158561432,83.6452954011
+94.3585758425,191.956072692
+42.141785126,91.7327341584
+3.21124860748,8.2514167727
+6.8856734002,16.0963894188
+13.8757721557,33.3544421438
+63.2974036434,131.505745533
+60.4925193638,128.698843062
+79.6072852308,160.310380503
+83.4443630603,160.603245737
+54.8920701084,104.400293158
+62.0459441682,126.969456046
+66.6537579409,124.519167614
+61.3198131584,125.217414427
+47.3195529929,98.7194843794
+81.1509876011,166.432455477
+91.0215136709,179.236872614
+16.8434620066,33.7772337402
+76.5064365659,161.372965132
+71.8657046789,138.575401004
+9.84808948357,17.9033282786
+35.7897124122,74.1085917146
+35.8886919974,74.8213229656
+90.8294487116,179.241547046
+7.26207283678,6.43437237688
+35.359288168,78.086356791
+79.0792537554,154.591789763
+21.5534583299,50.0307538993
+79.4764277802,165.441966192
+48.7679666405,110.897287971
+54.7938620084,114.691885107
+87.5035255175,186.744206306
+38.9491304682,70.6972229828
+9.39561282267,19.1790300473
+11.1211868171,27.022959917
+46.1355582629,87.2924492169
+85.6975735819,167.924614121
+50.1102964028,96.0205484441
+11.0736667277,24.9463246457
+22.2281308507,54.0041458755
+67.5539548043,133.165254434
+12.7871856602,28.0062232213
+46.9654908412,100.605589468
+4.84854064839,13.8572726025
+40.5373319344,82.5906629412
+26.3304612748,50.2318039097
+38.8847263526,79.4817724245
+93.8382665331,191.324317836
+60.0816599704,120.495115436
+50.1139652204,111.352322885
+17.7186852835,44.9376064869
+85.0730311421,172.133203076
+41.3215097262,84.889236
+35.45154709,69.9189912445
+74.0210255434,149.778646331
+90.5197211027,187.593909745
+34.5405970412,66.326858951
+94.5616621258,189.144964109
+52.9562678505,103.44683588
+80.1064364142,158.529430067
+49.822757583,98.6988396336
+61.7817253103,122.647395062
+60.2861083596,124.763346322
+77.3206901868,155.67749312
+15.6463587542,35.4106082545
+71.5406183401,151.323458712
+62.4523981129,129.43395879
+79.0598323714,163.096902614
+72.1976934443,142.898522638
+71.4177534183,148.09420854
+31.0088200801,63.7968762483
+40.3252822158,86.7236853299
diff --git a/linear_regression_class/generate_1d.py b/linear_regression_class/generate_1d.py
@@ -0,0 +1,9 @@
+import numpy as np
+
+N = 100
+with open('data_1d.csv', 'w') as f:
+    X = np.random.uniform(low=0, high=100, size=N)
+    Y = 2*X + 1 + np.random.normal(scale=5, size=N)
+    for i in xrange(N):
+        f.write("%s,%s\n" % (X[i], Y[i]))
+
diff --git a/linear_regression_class/lr_1d.py b/linear_regression_class/lr_1d.py
@@ -0,0 +1,37 @@
+import numpy as np
+import matplotlib.pyplot as plt
+
+# load the data
+X = []
+Y = []
+for line in open('data_1d.csv'):
+    x, y = line.split(',')
+    X.append(float(x))
+    Y.append(float(y))
+
+# let's turn X and Y into numpy arrays since that will be useful later
+X = np.array(X)
+Y = np.array(Y)
+
+
+# let's plot the data to see what it looks like
+plt.scatter(X, Y)
+plt.show()
+
+
+# apply the equations we learned to calculate a and b
+
+# denominator is common
+# note: this could be more efficient if
+#       we only computed the sums and means once
+denominator = X.dot(X) - X.mean() * X.sum()
+a = ( X.dot(Y) - Y.mean()*X.sum() ) / denominator
+b = ( Y.mean() * X.dot(X) - X.mean() * X.dot(Y) ) / denominator
+
+# let's calculate the predicted Y
+Yhat = a*X + b
+
+# let's plot everything together to make sure it worked
+plt.scatter(X, Y)
+plt.plot(X, Yhat)
+plt.show()