Skip to content

Commit 5cc29a1

Browse files
add linear regression 1-d class code, add GMM for some reason was not added yet
1 parent 6052a63 commit 5cc29a1

File tree

3 files changed

+146
-0
lines changed

3 files changed

+146
-0
lines changed

linear_regression_class/data_1d.csv

Lines changed: 100 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,100 @@
1+
95.724162408,197.179636092
2+
35.7576189281,67.5906695414
3+
28.8168474238,60.8541328206
4+
99.9584813087,196.907396981
5+
66.8097483121,125.311128524
6+
58.2156926413,115.785784589
7+
53.8210763379,110.762772705
8+
81.2960821704,157.98528569
9+
80.6486970595,159.61941373
10+
78.2528136925,149.003865539
11+
16.8634836868,31.4675908827
12+
90.799147557,184.185219661
13+
53.312737012,103.220610158
14+
71.6187890238,143.271818357
15+
18.4105917769,46.7364180117
16+
57.124346762,107.123794204
17+
81.1346839164,168.309374007
18+
82.0152554217,166.82994267
19+
36.924908251,70.5014555898
20+
44.5871260438,96.8693702549
21+
26.662354456,50.3732358383
22+
74.065057115,145.519170708
23+
17.7405767273,46.5518746077
24+
6.8197438189,13.2437863094
25+
26.3173600923,62.5006665037
26+
82.8041190512,159.916649576
27+
39.446536962,77.9842041485
28+
76.7281205911,147.269183138
29+
42.2158561432,83.6452954011
30+
94.3585758425,191.956072692
31+
42.141785126,91.7327341584
32+
3.21124860748,8.2514167727
33+
6.8856734002,16.0963894188
34+
13.8757721557,33.3544421438
35+
63.2974036434,131.505745533
36+
60.4925193638,128.698843062
37+
79.6072852308,160.310380503
38+
83.4443630603,160.603245737
39+
54.8920701084,104.400293158
40+
62.0459441682,126.969456046
41+
66.6537579409,124.519167614
42+
61.3198131584,125.217414427
43+
47.3195529929,98.7194843794
44+
81.1509876011,166.432455477
45+
91.0215136709,179.236872614
46+
16.8434620066,33.7772337402
47+
76.5064365659,161.372965132
48+
71.8657046789,138.575401004
49+
9.84808948357,17.9033282786
50+
35.7897124122,74.1085917146
51+
35.8886919974,74.8213229656
52+
90.8294487116,179.241547046
53+
7.26207283678,6.43437237688
54+
35.359288168,78.086356791
55+
79.0792537554,154.591789763
56+
21.5534583299,50.0307538993
57+
79.4764277802,165.441966192
58+
48.7679666405,110.897287971
59+
54.7938620084,114.691885107
60+
87.5035255175,186.744206306
61+
38.9491304682,70.6972229828
62+
9.39561282267,19.1790300473
63+
11.1211868171,27.022959917
64+
46.1355582629,87.2924492169
65+
85.6975735819,167.924614121
66+
50.1102964028,96.0205484441
67+
11.0736667277,24.9463246457
68+
22.2281308507,54.0041458755
69+
67.5539548043,133.165254434
70+
12.7871856602,28.0062232213
71+
46.9654908412,100.605589468
72+
4.84854064839,13.8572726025
73+
40.5373319344,82.5906629412
74+
26.3304612748,50.2318039097
75+
38.8847263526,79.4817724245
76+
93.8382665331,191.324317836
77+
60.0816599704,120.495115436
78+
50.1139652204,111.352322885
79+
17.7186852835,44.9376064869
80+
85.0730311421,172.133203076
81+
41.3215097262,84.889236
82+
35.45154709,69.9189912445
83+
74.0210255434,149.778646331
84+
90.5197211027,187.593909745
85+
34.5405970412,66.326858951
86+
94.5616621258,189.144964109
87+
52.9562678505,103.44683588
88+
80.1064364142,158.529430067
89+
49.822757583,98.6988396336
90+
61.7817253103,122.647395062
91+
60.2861083596,124.763346322
92+
77.3206901868,155.67749312
93+
15.6463587542,35.4106082545
94+
71.5406183401,151.323458712
95+
62.4523981129,129.43395879
96+
79.0598323714,163.096902614
97+
72.1976934443,142.898522638
98+
71.4177534183,148.09420854
99+
31.0088200801,63.7968762483
100+
40.3252822158,86.7236853299
Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
import numpy as np
2+
3+
N = 100
4+
with open('data_1d.csv', 'w') as f:
5+
X = np.random.uniform(low=0, high=100, size=N)
6+
Y = 2*X + 1 + np.random.normal(scale=5, size=N)
7+
for i in xrange(N):
8+
f.write("%s,%s\n" % (X[i], Y[i]))
9+

linear_regression_class/lr_1d.py

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
import numpy as np
2+
import matplotlib.pyplot as plt
3+
4+
# load the data
5+
X = []
6+
Y = []
7+
for line in open('data_1d.csv'):
8+
x, y = line.split(',')
9+
X.append(float(x))
10+
Y.append(float(y))
11+
12+
# let's turn X and Y into numpy arrays since that will be useful later
13+
X = np.array(X)
14+
Y = np.array(Y)
15+
16+
17+
# let's plot the data to see what it looks like
18+
plt.scatter(X, Y)
19+
plt.show()
20+
21+
22+
# apply the equations we learned to calculate a and b
23+
24+
# denominator is common
25+
# note: this could be more efficient if
26+
# we only computed the sums and means once
27+
denominator = X.dot(X) - X.mean() * X.sum()
28+
a = ( X.dot(Y) - Y.mean()*X.sum() ) / denominator
29+
b = ( Y.mean() * X.dot(X) - X.mean() * X.dot(Y) ) / denominator
30+
31+
# let's calculate the predicted Y
32+
Yhat = a*X + b
33+
34+
# let's plot everything together to make sure it worked
35+
plt.scatter(X, Y)
36+
plt.plot(X, Yhat)
37+
plt.show()

0 commit comments

Comments
 (0)