ex6 "Support Vector Machines" is done and submitted.

everpeace · everpeace · commit ffa340e19fc2 · 2011-11-21T22:42:30.000+09:00
diff --git a/ex6/mlclass-ex6/dataset3Params.m b/ex6/mlclass-ex6/dataset3Params.m
@@ -2,8 +2,8 @@
 %EX6PARAMS returns your choice of C and sigma for Part 3 of the exercise
 %where you select the optimal (C, sigma) learning parameters to use for SVM
 %with RBF kernel
-%   [C, sigma] = EX6PARAMS(X, y, Xval, yval) returns your choice of C and 
-%   sigma. You should complete this function to return the optimal C and 
+%   [C, sigma] = EX6PARAMS(X, y, Xval, yval) returns your choice of C and
+%   sigma. You should complete this function to return the optimal C and
 %   sigma based on a cross-validation set.
 %
 
@@ -15,19 +15,38 @@
 % Instructions: Fill in this function to return the optimal C and sigma
 %               learning parameters found using the cross validation set.
 %               You can use svmPredict to predict the labels on the cross
-%               validation set. For example, 
+%               validation set. For example,
 %                   predictions = svmPredict(model, Xval);
 %               will return the predictions on the cross validation set.
 %
-%  Note: You can compute the prediction error using 
+%  Note: You can compute the prediction error using
 %        mean(double(predictions ~= yval))
 %
 
+fprintf('--------------------------------------------------------------------------------\n');
+fprintf('start searching best [C, sigma] values\n');
+error_min = inf;
+values = [0.01 0.03 0.1 0.3 1 3 10 30];
+
+for _C = values
+  for _sigma = values
+    fprintf('Train and evaluate (on cross validation set) for\n[_C, _sigma] = [%f %f]\n', _C, _sigma);
+    model = svmTrain(X, y, _C, @(x1, x2) gaussianKernel(x1, x2, _sigma));
+    e = mean(double(svmPredict(model, Xval) ~= yval));
+    fprintf('prediction error: %f\n', e);
+    if( e <= error_min )
+      fprintf('error_min updated!\n');
+      C = _C;
+      sigma = _sigma;
+      error_min = e;
+      fprintf('[C, sigma] = [%f %f]\n', C, sigma);
+    end
+    fprintf('--------\n');
+  end
+end
 
-
-
-
-
+fprintf('\nfinish searching.\nBest value [C, sigma] = [%f %f] with prediction error = %f\n\n', C, sigma, error_min);
+fprintf('--------------------------------------------------------------------------------\n');
 
 % =========================================================================
 
diff --git a/ex6/mlclass-ex6/emailFeatures.m b/ex6/mlclass-ex6/emailFeatures.m
@@ -1,8 +1,8 @@
 function x = emailFeatures(word_indices)
 %EMAILFEATURES takes in a word_indices vector and produces a feature vector
 %from the word indices
-%   x = EMAILFEATURES(word_indices) takes in a word_indices vector and 
-%   produces a feature vector from the word indices. 
+%   x = EMAILFEATURES(word_indices) takes in a word_indices vector and
+%   produces a feature vector from the word indices.
 
 % Total number of words in the dictionary
 n = 1899;
@@ -12,20 +12,20 @@
 
 % ====================== YOUR CODE HERE ======================
 % Instructions: Fill in this function to return a feature vector for the
-%               given email (word_indices). To help make it easier to 
+%               given email (word_indices). To help make it easier to
 %               process the emails, we have have already pre-processed each
 %               email and converted each word in the email into an index in
 %               a fixed dictionary (of 1899 words). The variable
 %               word_indices contains the list of indices of the words
 %               which occur in one email.
-% 
+%
 %               Concretely, if an email has the text:
 %
 %                  The quick brown fox jumped over the lazy dog.
 %
-%               Then, the word_indices vector for this text might look 
+%               Then, the word_indices vector for this text might look
 %               like:
-%               
+%
 %                   60  100   33   44   10     53  60  58   5
 %
 %               where, we have mapped each word onto a number, for example:
@@ -48,14 +48,11 @@
 %
 %
 
-
-
-
-
-
-
+for i = word_indices
+  x(i) = 1;
+end
 
 % =========================================================================
-    
+
 
 end
diff --git a/ex6/mlclass-ex6/gaussianKernel.m b/ex6/mlclass-ex6/gaussianKernel.m
@@ -16,11 +16,8 @@
 %
 %
 
-
-
-
-
+sim = exp(-1*(x1-x2)'*(x1-x2)/(2*sigma*sigma));
 
 % =============================================================
-    
+
 end
diff --git a/ex6/mlclass-ex6/processEmail.m b/ex6/mlclass-ex6/processEmail.m
@@ -1,9 +1,9 @@
 function word_indices = processEmail(email_contents)
 %PROCESSEMAIL preprocesses a the body of an email and
-%returns a list of word_indices 
-%   word_indices = PROCESSEMAIL(email_contents) preprocesses 
-%   the body of an email and returns a list of indices of the 
-%   words contained in the email. 
+%returns a list of word_indices
+%   word_indices = PROCESSEMAIL(email_contents) preprocesses
+%   the body of an email and returns a list of indices of the
+%   words contained in the email.
 %
 
 % Load Vocabulary
@@ -60,13 +60,13 @@
     [str, email_contents] = ...
        strtok(email_contents, ...
               [' @$/#.-:&*+=[]?!(){},''">_<;%' char(10) char(13)]);
-   
+
     % Remove any non alphanumeric characters
     str = regexprep(str, '[^a-zA-Z0-9]', '');
 
-    % Stem the word 
+    % Stem the word
     % (the porterStemmer sometimes has issues, so we use a try catch block)
-    try str = porterStemmer(strtrim(str)); 
+    try str = porterStemmer(strtrim(str));
     catch str = ''; continue;
     end;
 
@@ -87,24 +87,21 @@
     %               vector. Concretely, if str = 'action', then you should
     %               look up the vocabulary list to find where in vocabList
     %               'action' appears. For example, if vocabList{18} =
-    %               'action', then, you should add 18 to the word_indices 
+    %               'action', then, you should add 18 to the word_indices
     %               vector (e.g., word_indices = [word_indices ; 18]; ).
-    % 
+    %
     % Note: vocabList{idx} returns a the word with index idx in the
     %       vocabulary list.
-    % 
+    %
     % Note: You can use strcmp(str1, str2) to compare two strings (str1 and
     %       str2). It will return 1 only if the two strings are equivalent.
     %
 
-
-
-
-
-
-
-
-
+    for i = 1:length(vocabList)
+      if(strcmp(str, vocabList{i}))
+        word_indices = [ word_indices ; i];
+      end
+    end
 
     % =============================================================
 

Original file line number	Diff line number	Diff line change
`@@ -16,11 +16,8 @@`
`16`	`16`	`%`
`17`	`17`	`%`
`18`	`18`
`19`		`-`
`20`		`-`
`21`		`-`
`22`		`-`
	`19`	`+sim = exp(-1(x1-x2)'(x1-x2)/(2sigmasigma));`
`23`	`20`
`24`	`21`	`% =============================================================`
`25`		`-`
	`22`	`+`
`26`	`23`	`end`