Skip to content

Commit acdc7f8

Browse files
committed
finish lec5 and lec4
1 parent f5a8ba5 commit acdc7f8

17 files changed

+113
-3
lines changed

10601ML-CMU/hw3/ClassificationError.m

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
function [error] = ClassificationError(yHat, yTruth)
2+
error = sum(yHat != yTruth) / length(yTruth);
3+
end
4+

10601ML-CMU/hw3/HW3Data.mat

794 KB
Binary file not shown.

10601ML-CMU/hw3/NB_Classify.m

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
function [yHat] = NB_Classify(D, p, XTest)
2+
m = size(XTest, 1);
3+
yHat = zeros(m, 1);
4+
5+
for i = 1:m
6+
econo_probs = D(1,:) .* XTest(i,:) + (1 - D(1,:)) .* (1 - XTest(i,:));
7+
onion_probs = D(2,:) .* XTest(i,:) + (1 - D(2,:)) .* (1 - XTest(i,:));
8+
9+
econo_score = logProd([log(econo_probs), log(p)]);
10+
onion_score = logProd([log(onion_probs), log(1-p)]);
11+
12+
if econo_score > onion_score
13+
yHat(i) = 1;
14+
else
15+
yHat(i) = 2;
16+
end
17+
end
18+
end
19+

10601ML-CMU/hw3/NB_XGivenY.m

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
function [D] = NB_XGivenY(XTrain, yTrain)
2+
EconoRows = yTrain == 1;
3+
OnionRows = yTrain == 2;
4+
5+
D = [(sum(XTrain(EconoRows,:), 1) .+ 1) / (sum(EconoRows) + 1) ;
6+
(sum(XTrain(OnionRows,:), 1) .+ 1) / (sum(OnionRows) + 1)];
7+
end
8+

10601ML-CMU/hw3/NB_YPrior.m

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
function [p] = NB_YPrior(yTrain)
2+
p = sum(yTrain == 1) / length(yTrain);
3+
end
4+

10601ML-CMU/hw3/dataprocess.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
from scipy.io import loadmat
2+
import numpy as np
3+

10601ML-CMU/hw3/homework3_sol.pdf

180 KB
Binary file not shown.

10601ML-CMU/hw3/hw3-overview.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
2+
hw3 终结,这是一个简单的 bag of words 的naive bayes 判别模型实践
3+
4+
- [python code](./hw3.py)
5+
- [hw3 solution](./homework3_sol.pdf)

10601ML-CMU/hw3/hw3.py

Lines changed: 63 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,63 @@
1+
from scipy.io import loadmat
2+
import numpy as np
3+
4+
mat_file_name = "HW3Data.mat"
5+
data_name = ['Vocabulary', 'XTrain', 'yTrain', 'XTest', 'yTest', 'XTrainSmall','yTrainSmall']
6+
eps = 1e-9
7+
def data_preprocess(file_name):
8+
data = loadmat(file_name)
9+
return [data[e] for e in data_name]
10+
11+
Vocabulary, XTrain, yTrain, XTest, yTest, XTrainSmall,yTrainSmall = data_preprocess(mat_file_name)
12+
13+
def logProd(x):
14+
return x.sum(1)
15+
16+
def XgivenY(XTrain,YTrain):
17+
"""
18+
return : a 2*V matrix represent P(X_w=1|Y=y) with a prior beta(beta[i]) distribution
19+
"""
20+
yTrain = YTrain.squeeze()
21+
Erows = yTrain==1
22+
Orows = yTrain==2
23+
return np.row_stack(((XTrain[Erows].sum(0)+1)/(Erows.sum()+1),
24+
(XTrain[Orows].sum(0)+1)/(Orows.sum()+1)))
25+
26+
def YPrior_Eco(YTrain):
27+
"""
28+
return P(Y==1) with MLE
29+
"""
30+
yTrain = YTrain.squeeze()
31+
return np.sum(YTrain==1) / yTrain.size
32+
33+
def classify(D,p,XTest):
34+
D = np.asarray(D)
35+
XTest = XTest.toarray()
36+
pos_prob = D[0,:]*XTest + (1-D[0,:]) * (1-XTest)
37+
neg_prob = D[1,:] * XTest+(1-D[1,:]) * ( 1 - XTest)
38+
pos_prob = logProd(np.log(pos_prob+eps)) + np.log(p+eps)
39+
neg_prob = logProd(np.log(neg_prob + eps)) + np.log(1-p + eps)
40+
return np.argmax(np.column_stack((pos_prob,neg_prob)),axis=1) + 1
41+
42+
def classificationErr(y_true,y_hat):
43+
y_true = y_true.squeeze()
44+
y_hat = y_hat.squeeze()
45+
return 1 - np.sum(y_true == y_hat) / y_hat.size
46+
47+
def model_err(XTrain,YTrain):
48+
D = XgivenY(XTrain,YTrain)
49+
p = YPrior_Eco(YTrain)
50+
yhat_train = classify(D,p,XTrain)
51+
yhat_test = classify(D,p,XTest)
52+
train_err = classificationErr(YTrain,yhat_train)
53+
test_err = classificationErr(yhat_test,yTest)
54+
print("train err = ",train_err,"test err = ",test_err)
55+
56+
57+
def problem_g():
58+
model_err(XTrain,yTrain)
59+
60+
def problem_h():
61+
model_err(XTrainSmall,yTrainSmall)
62+
63+

10601ML-CMU/hw3/img/problem-i.png

740 KB
Loading

10601ML-CMU/hw3/logProd.m

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
function [log_product] = logProd(x)
2+
log_product = sum(x);
3+
end
4+

10601ML-CMU/lec3/hw2solution.md

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@ $$
1313

1414
[code](./hw2.py)
1515

16-
![Ltheta](img/pb2.png)
16+
![Ltheta](./img/pb2.png)
1717

1818
3. <br>
1919

@@ -27,12 +27,12 @@ $$
2727

2828
4. <br>
2929

30-
![pb2-3.png](img/pb2-3.png)
30+
![pb2-3.png](./img/pb2-3.png)
3131

3232
# problem 3
3333

3434

35-
![pb3-1](img/pb3-1.png)
35+
![pb3-1](./img/pb3-1.png)
3636

3737
**note**:
3838

1.12 MB
Binary file not shown.

10601ML-CMU/lec4/NBayesLogReg.pdf

2.62 KB
Binary file not shown.

10601ML-CMU/lec4/homework3.pdf

166 KB
Binary file not shown.
2.32 MB
Binary file not shown.
Binary file not shown.

0 commit comments

Comments
 (0)