Skip to content

Commit ffa340e

Browse files
committed
ex6 "Support Vector Machines" is done and submitted.
1 parent b752ab8 commit ffa340e

File tree

4 files changed

+54
-44
lines changed

4 files changed

+54
-44
lines changed

ex6/mlclass-ex6/dataset3Params.m

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2,8 +2,8 @@
22
%EX6PARAMS returns your choice of C and sigma for Part 3 of the exercise
33
%where you select the optimal (C, sigma) learning parameters to use for SVM
44
%with RBF kernel
5-
% [C, sigma] = EX6PARAMS(X, y, Xval, yval) returns your choice of C and
6-
% sigma. You should complete this function to return the optimal C and
5+
% [C, sigma] = EX6PARAMS(X, y, Xval, yval) returns your choice of C and
6+
% sigma. You should complete this function to return the optimal C and
77
% sigma based on a cross-validation set.
88
%
99

@@ -15,19 +15,38 @@
1515
% Instructions: Fill in this function to return the optimal C and sigma
1616
% learning parameters found using the cross validation set.
1717
% You can use svmPredict to predict the labels on the cross
18-
% validation set. For example,
18+
% validation set. For example,
1919
% predictions = svmPredict(model, Xval);
2020
% will return the predictions on the cross validation set.
2121
%
22-
% Note: You can compute the prediction error using
22+
% Note: You can compute the prediction error using
2323
% mean(double(predictions ~= yval))
2424
%
2525

26+
fprintf('--------------------------------------------------------------------------------\n');
27+
fprintf('start searching best [C, sigma] values\n');
28+
error_min = inf;
29+
values = [0.01 0.03 0.1 0.3 1 3 10 30];
30+
31+
for _C = values
32+
for _sigma = values
33+
fprintf('Train and evaluate (on cross validation set) for\n[_C, _sigma] = [%f %f]\n', _C, _sigma);
34+
model = svmTrain(X, y, _C, @(x1, x2) gaussianKernel(x1, x2, _sigma));
35+
e = mean(double(svmPredict(model, Xval) ~= yval));
36+
fprintf('prediction error: %f\n', e);
37+
if( e <= error_min )
38+
fprintf('error_min updated!\n');
39+
C = _C;
40+
sigma = _sigma;
41+
error_min = e;
42+
fprintf('[C, sigma] = [%f %f]\n', C, sigma);
43+
end
44+
fprintf('--------\n');
45+
end
46+
end
2647

27-
28-
29-
30-
48+
fprintf('\nfinish searching.\nBest value [C, sigma] = [%f %f] with prediction error = %f\n\n', C, sigma, error_min);
49+
fprintf('--------------------------------------------------------------------------------\n');
3150

3251
% =========================================================================
3352

ex6/mlclass-ex6/emailFeatures.m

Lines changed: 10 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
function x = emailFeatures(word_indices)
22
%EMAILFEATURES takes in a word_indices vector and produces a feature vector
33
%from the word indices
4-
% x = EMAILFEATURES(word_indices) takes in a word_indices vector and
5-
% produces a feature vector from the word indices.
4+
% x = EMAILFEATURES(word_indices) takes in a word_indices vector and
5+
% produces a feature vector from the word indices.
66

77
% Total number of words in the dictionary
88
n = 1899;
@@ -12,20 +12,20 @@
1212

1313
% ====================== YOUR CODE HERE ======================
1414
% Instructions: Fill in this function to return a feature vector for the
15-
% given email (word_indices). To help make it easier to
15+
% given email (word_indices). To help make it easier to
1616
% process the emails, we have have already pre-processed each
1717
% email and converted each word in the email into an index in
1818
% a fixed dictionary (of 1899 words). The variable
1919
% word_indices contains the list of indices of the words
2020
% which occur in one email.
21-
%
21+
%
2222
% Concretely, if an email has the text:
2323
%
2424
% The quick brown fox jumped over the lazy dog.
2525
%
26-
% Then, the word_indices vector for this text might look
26+
% Then, the word_indices vector for this text might look
2727
% like:
28-
%
28+
%
2929
% 60 100 33 44 10 53 60 58 5
3030
%
3131
% where, we have mapped each word onto a number, for example:
@@ -48,14 +48,11 @@
4848
%
4949
%
5050

51-
52-
53-
54-
55-
56-
51+
for i = word_indices
52+
x(i) = 1;
53+
end
5754

5855
% =========================================================================
59-
56+
6057

6158
end

ex6/mlclass-ex6/gaussianKernel.m

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,8 @@
1616
%
1717
%
1818

19-
20-
21-
22-
19+
sim = exp(-1*(x1-x2)'*(x1-x2)/(2*sigma*sigma));
2320

2421
% =============================================================
25-
22+
2623
end

ex6/mlclass-ex6/processEmail.m

Lines changed: 15 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
11
function word_indices = processEmail(email_contents)
22
%PROCESSEMAIL preprocesses a the body of an email and
3-
%returns a list of word_indices
4-
% word_indices = PROCESSEMAIL(email_contents) preprocesses
5-
% the body of an email and returns a list of indices of the
6-
% words contained in the email.
3+
%returns a list of word_indices
4+
% word_indices = PROCESSEMAIL(email_contents) preprocesses
5+
% the body of an email and returns a list of indices of the
6+
% words contained in the email.
77
%
88

99
% Load Vocabulary
@@ -60,13 +60,13 @@
6060
[str, email_contents] = ...
6161
strtok(email_contents, ...
6262
[' @$/#.-:&*+=[]?!(){},''">_<;%' char(10) char(13)]);
63-
63+
6464
% Remove any non alphanumeric characters
6565
str = regexprep(str, '[^a-zA-Z0-9]', '');
6666

67-
% Stem the word
67+
% Stem the word
6868
% (the porterStemmer sometimes has issues, so we use a try catch block)
69-
try str = porterStemmer(strtrim(str));
69+
try str = porterStemmer(strtrim(str));
7070
catch str = ''; continue;
7171
end;
7272

@@ -87,24 +87,21 @@
8787
% vector. Concretely, if str = 'action', then you should
8888
% look up the vocabulary list to find where in vocabList
8989
% 'action' appears. For example, if vocabList{18} =
90-
% 'action', then, you should add 18 to the word_indices
90+
% 'action', then, you should add 18 to the word_indices
9191
% vector (e.g., word_indices = [word_indices ; 18]; ).
92-
%
92+
%
9393
% Note: vocabList{idx} returns a the word with index idx in the
9494
% vocabulary list.
95-
%
95+
%
9696
% Note: You can use strcmp(str1, str2) to compare two strings (str1 and
9797
% str2). It will return 1 only if the two strings are equivalent.
9898
%
9999

100-
101-
102-
103-
104-
105-
106-
107-
100+
for i = 1:length(vocabList)
101+
if(strcmp(str, vocabList{i}))
102+
word_indices = [ word_indices ; i];
103+
end
104+
end
108105

109106
% =============================================================
110107

0 commit comments

Comments
 (0)