From 775d32c302813232c80e648258138f706c1a7c57 Mon Sep 17 00:00:00 2001 From: Riccardo Taormina Date: Sun, 21 Jun 2015 22:08:45 +0800 Subject: [PATCH] First upload Added main files, dataset and NSGA2 example script --- ELMclassifier.m | 76 ++++++++++ Heart.csv | 272 +++++++++++++++++++++++++++++++++++ computeAccuracy.m | 57 ++++++++ computeRelevanceRedundancy.m | 71 +++++++++ deltaElimination.m | 83 +++++++++++ getAlgorithmOptions.m | 81 +++++++++++ normalizeData.m | 40 ++++++ objFunFQEISS.m | 107 ++++++++++++++ objFunWMOSS.m | 107 ++++++++++++++ objFunWQEISS.m | 107 ++++++++++++++ plotFrequencyMatrix.m | 109 ++++++++++++++ quantizeVariable.m | 71 +++++++++ readData.m | 49 +++++++ readme.txt | 68 +++++++++ script_example_NSGAII.m | 193 +++++++++++++++++++++++++ tanhActFun.m | 31 ++++ trainAndValidateELM.m | 78 ++++++++++ transformData.m | 68 +++++++++ 18 files changed, 1668 insertions(+) create mode 100644 ELMclassifier.m create mode 100644 Heart.csv create mode 100644 computeAccuracy.m create mode 100644 computeRelevanceRedundancy.m create mode 100644 deltaElimination.m create mode 100644 getAlgorithmOptions.m create mode 100644 normalizeData.m create mode 100644 objFunFQEISS.m create mode 100644 objFunWMOSS.m create mode 100644 objFunWQEISS.m create mode 100644 plotFrequencyMatrix.m create mode 100644 quantizeVariable.m create mode 100644 readData.m create mode 100644 readme.txt create mode 100644 script_example_NSGAII.m create mode 100644 tanhActFun.m create mode 100644 trainAndValidateELM.m create mode 100644 transformData.m diff --git a/ELMclassifier.m b/ELMclassifier.m new file mode 100644 index 0000000..642e8c3 --- /dev/null +++ b/ELMclassifier.m @@ -0,0 +1,76 @@ +function [trYhat, valYhat,W1,W2,bias] =... + ELMclassifier(trX, trY, valX, nUnits) + +% This function implements an ELM classifier with tanh activation function. +% +% Inputs: trX <- array of training inputs with size = num. features x num. training patterns +% trY <- array of training targets with size = num. categories x num. training patterns +% (for each i-th column of trY only the entry relative to the correct category is 1) +% valX <- array of validation inputs with size = num. features x num. training patterns +% nUnits <- num. hidden units of ELM +% +% Output: +% trYhat <- array of training target predictions with size = 1 x num. training patterns +% (each i-th is an integer = predicted category) +% valYhat <- array of validaiton target predictions with size = 1 x num. validation patterns +% (each i-th is an integer = predicted category) +% W1,W2,bias <- the trained parameters of the ELM +% +% Reference: Huang, G.-B., Zhu, Q.-Y., Siew, C.-K., 2006. Extreme learning machine: Theory and applications. +% Neurocomputing 70, 489–501. doi:10.1016/j.neucom.2005.12.126 +% +% +% +% +% Copyright 2015 Riccardo Taormina (riccardo_taormina@sutd.edu.sg), +% Gulsah Karakaya (gulsahkilickarakaya@gmail.com;), +% Stefano Galelli (stefano_galelli@sutd.edu.sg), +% and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;. +% +% Please refer to README.txt for further information. +% +% +% This file is part of Matlab-Multi-objective-Feature-Selection. +% +% Matlab-Multi-objective-Feature-Selection is free software: you can redistribute +% it and/or modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation, either version 3 of the +% License, or (at your option) any later version. +% +% This code is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with MATLAB_IterativeInputSelection. +% If not, see . +% + +% get number of features and number of patterns for training and validation +[nFeatures,nPatternsTr] = size(trX); +nPatternsVal = size(valX,2); + +% generate random input->hidden weights W1 (between -1 and 1) +W1 = rand(nUnits,nFeatures)*2-1; + +% generate random biases (between 0 and 1) +bias = rand(nUnits,1); + +% compute hidden neuron output matrix H +H = tanhActFun(W1*trX + repmat(bias,[1,nPatternsTr])); + +% compute hidden->output weights W2 +Hinv = pinv(H'); +W2 = Hinv * trY'; + +% get ELM response on training +temp = (H' * W2)'; +[~,temp] = max(temp,[],1); +trYhat = temp'; + +% ... and validation dataset +Hval = tanhActFun(W1*valX + repmat(bias,[1,nPatternsVal])); +temp = (Hval' * W2)'; +[~,temp] = max(temp,[],1); +valYhat = temp'; \ No newline at end of file diff --git a/Heart.csv b/Heart.csv new file mode 100644 index 0000000..99e1837 --- /dev/null +++ b/Heart.csv @@ -0,0 +1,272 @@ +age,sex,chest pain type,rest blood press,serum chol.,fast. bl. sugar > 120,rest. electrcard. res.,max heart rate,ex. induced angina,oldpeak,slope of peak,num. maj. vessels,thal,heart disease +0,1,1,0,0,1,1,0,1,0,1,1,1,1 +70,1,4,130,322,0,2,109,0,2.4,2,3,3,2 +67,0,3,115,564,0,2,160,0,1.6,2,0,7,1 +57,1,2,124,261,0,0,141,0,0.3,1,0,7,2 +64,1,4,128,263,0,0,105,1,0.2,2,1,7,1 +74,0,2,120,269,0,2,121,1,0.2,1,1,3,1 +65,1,4,120,177,0,0,140,0,0.4,1,0,7,1 +56,1,3,130,256,1,2,142,1,0.6,2,1,6,2 +59,1,4,110,239,0,2,142,1,1.2,2,1,7,2 +60,1,4,140,293,0,2,170,0,1.2,2,2,7,2 +63,0,4,150,407,0,2,154,0,4,2,3,7,2 +59,1,4,135,234,0,0,161,0,0.5,2,0,7,1 +53,1,4,142,226,0,2,111,1,0,1,0,7,1 +44,1,3,140,235,0,2,180,0,0,1,0,3,1 +61,1,1,134,234,0,0,145,0,2.6,2,2,3,2 +57,0,4,128,303,0,2,159,0,0,1,1,3,1 +71,0,4,112,149,0,0,125,0,1.6,2,0,3,1 +46,1,4,140,311,0,0,120,1,1.8,2,2,7,2 +53,1,4,140,203,1,2,155,1,3.1,3,0,7,2 +64,1,1,110,211,0,2,144,1,1.8,2,0,3,1 +40,1,1,140,199,0,0,178,1,1.4,1,0,7,1 +67,1,4,120,229,0,2,129,1,2.6,2,2,7,2 +48,1,2,130,245,0,2,180,0,0.2,2,0,3,1 +43,1,4,115,303,0,0,181,0,1.2,2,0,3,1 +47,1,4,112,204,0,0,143,0,0.1,1,0,3,1 +54,0,2,132,288,1,2,159,1,0,1,1,3,1 +48,0,3,130,275,0,0,139,0,0.2,1,0,3,1 +46,0,4,138,243,0,2,152,1,0,2,0,3,1 +51,0,3,120,295,0,2,157,0,0.6,1,0,3,1 +58,1,3,112,230,0,2,165,0,2.5,2,1,7,2 +71,0,3,110,265,1,2,130,0,0,1,1,3,1 +57,1,3,128,229,0,2,150,0,0.4,2,1,7,2 +66,1,4,160,228,0,2,138,0,2.3,1,0,6,1 +37,0,3,120,215,0,0,170,0,0,1,0,3,1 +59,1,4,170,326,0,2,140,1,3.4,3,0,7,2 +50,1,4,144,200,0,2,126,1,0.9,2,0,7,2 +48,1,4,130,256,1,2,150,1,0,1,2,7,2 +61,1,4,140,207,0,2,138,1,1.9,1,1,7,2 +59,1,1,160,273,0,2,125,0,0,1,0,3,2 +42,1,3,130,180,0,0,150,0,0,1,0,3,1 +48,1,4,122,222,0,2,186,0,0,1,0,3,1 +40,1,4,152,223,0,0,181,0,0,1,0,7,2 +62,0,4,124,209,0,0,163,0,0,1,0,3,1 +44,1,3,130,233,0,0,179,1,0.4,1,0,3,1 +46,1,2,101,197,1,0,156,0,0,1,0,7,1 +59,1,3,126,218,1,0,134,0,2.2,2,1,6,2 +58,1,3,140,211,1,2,165,0,0,1,0,3,1 +49,1,3,118,149,0,2,126,0,0.8,1,3,3,2 +44,1,4,110,197,0,2,177,0,0,1,1,3,2 +66,1,2,160,246,0,0,120,1,0,2,3,6,2 +65,0,4,150,225,0,2,114,0,1,2,3,7,2 +42,1,4,136,315,0,0,125,1,1.8,2,0,6,2 +52,1,2,128,205,1,0,184,0,0,1,0,3,1 +65,0,3,140,417,1,2,157,0,0.8,1,1,3,1 +63,0,2,140,195,0,0,179,0,0,1,2,3,1 +45,0,2,130,234,0,2,175,0,0.6,2,0,3,1 +41,0,2,105,198,0,0,168,0,0,1,1,3,1 +61,1,4,138,166,0,2,125,1,3.6,2,1,3,2 +60,0,3,120,178,1,0,96,0,0,1,0,3,1 +59,0,4,174,249,0,0,143,1,0,2,0,3,2 +62,1,2,120,281,0,2,103,0,1.4,2,1,7,2 +57,1,3,150,126,1,0,173,0,0.2,1,1,7,1 +51,0,4,130,305,0,0,142,1,1.2,2,0,7,2 +44,1,3,120,226,0,0,169,0,0,1,0,3,1 +60,0,1,150,240,0,0,171,0,0.9,1,0,3,1 +63,1,1,145,233,1,2,150,0,2.3,3,0,6,1 +57,1,4,150,276,0,2,112,1,0.6,2,1,6,2 +51,1,4,140,261,0,2,186,1,0,1,0,3,1 +58,0,2,136,319,1,2,152,0,0,1,2,3,2 +44,0,3,118,242,0,0,149,0,0.3,2,1,3,1 +47,1,3,108,243,0,0,152,0,0,1,0,3,2 +61,1,4,120,260,0,0,140,1,3.6,2,1,7,2 +57,0,4,120,354,0,0,163,1,0.6,1,0,3,1 +70,1,2,156,245,0,2,143,0,0,1,0,3,1 +76,0,3,140,197,0,1,116,0,1.1,2,0,3,1 +67,0,4,106,223,0,0,142,0,0.3,1,2,3,1 +45,1,4,142,309,0,2,147,1,0,2,3,7,2 +45,1,4,104,208,0,2,148,1,3,2,0,3,1 +39,0,3,94,199,0,0,179,0,0,1,0,3,1 +42,0,3,120,209,0,0,173,0,0,2,0,3,1 +56,1,2,120,236,0,0,178,0,0.8,1,0,3,1 +58,1,4,146,218,0,0,105,0,2,2,1,7,2 +35,1,4,120,198,0,0,130,1,1.6,2,0,7,2 +58,1,4,150,270,0,2,111,1,0.8,1,0,7,2 +41,1,3,130,214,0,2,168,0,2,2,0,3,1 +57,1,4,110,201,0,0,126,1,1.5,2,0,6,1 +42,1,1,148,244,0,2,178,0,0.8,1,2,3,1 +62,1,2,128,208,1,2,140,0,0,1,0,3,1 +59,1,1,178,270,0,2,145,0,4.2,3,0,7,1 +41,0,2,126,306,0,0,163,0,0,1,0,3,1 +50,1,4,150,243,0,2,128,0,2.6,2,0,7,2 +59,1,2,140,221,0,0,164,1,0,1,0,3,1 +61,0,4,130,330,0,2,169,0,0,1,0,3,2 +54,1,4,124,266,0,2,109,1,2.2,2,1,7,2 +54,1,4,110,206,0,2,108,1,0,2,1,3,2 +52,1,4,125,212,0,0,168,0,1,1,2,7,2 +47,1,4,110,275,0,2,118,1,1,2,1,3,2 +66,1,4,120,302,0,2,151,0,0.4,2,0,3,1 +58,1,4,100,234,0,0,156,0,0.1,1,1,7,2 +64,0,3,140,313,0,0,133,0,0.2,1,0,7,1 +50,0,2,120,244,0,0,162,0,1.1,1,0,3,1 +44,0,3,108,141,0,0,175,0,0.6,2,0,3,1 +67,1,4,120,237,0,0,71,0,1,2,0,3,2 +49,0,4,130,269,0,0,163,0,0,1,0,3,1 +57,1,4,165,289,1,2,124,0,1,2,3,7,2 +63,1,4,130,254,0,2,147,0,1.4,2,1,7,2 +48,1,4,124,274,0,2,166,0,0.5,2,0,7,2 +51,1,3,100,222,0,0,143,1,1.2,2,0,3,1 +60,0,4,150,258,0,2,157,0,2.6,2,2,7,2 +59,1,4,140,177,0,0,162,1,0,1,1,7,2 +45,0,2,112,160,0,0,138,0,0,2,0,3,1 +55,0,4,180,327,0,1,117,1,3.4,2,0,3,2 +41,1,2,110,235,0,0,153,0,0,1,0,3,1 +60,0,4,158,305,0,2,161,0,0,1,0,3,2 +54,0,3,135,304,1,0,170,0,0,1,0,3,1 +42,1,2,120,295,0,0,162,0,0,1,0,3,1 +49,0,2,134,271,0,0,162,0,0,2,0,3,1 +46,1,4,120,249,0,2,144,0,0.8,1,0,7,2 +56,0,4,200,288,1,2,133,1,4,3,2,7,2 +66,0,1,150,226,0,0,114,0,2.6,3,0,3,1 +56,1,4,130,283,1,2,103,1,1.6,3,0,7,2 +49,1,3,120,188,0,0,139,0,2,2,3,7,2 +54,1,4,122,286,0,2,116,1,3.2,2,2,3,2 +57,1,4,152,274,0,0,88,1,1.2,2,1,7,2 +65,0,3,160,360,0,2,151,0,0.8,1,0,3,1 +54,1,3,125,273,0,2,152,0,0.5,3,1,3,1 +54,0,3,160,201,0,0,163,0,0,1,1,3,1 +62,1,4,120,267,0,0,99,1,1.8,2,2,7,2 +52,0,3,136,196,0,2,169,0,0.1,2,0,3,1 +52,1,2,134,201,0,0,158,0,0.8,1,1,3,1 +60,1,4,117,230,1,0,160,1,1.4,1,2,7,2 +63,0,4,108,269,0,0,169,1,1.8,2,2,3,2 +66,1,4,112,212,0,2,132,1,0.1,1,1,3,2 +42,1,4,140,226,0,0,178,0,0,1,0,3,1 +64,1,4,120,246,0,2,96,1,2.2,3,1,3,2 +54,1,3,150,232,0,2,165,0,1.6,1,0,7,1 +46,0,3,142,177,0,2,160,1,1.4,3,0,3,1 +67,0,3,152,277,0,0,172,0,0,1,1,3,1 +56,1,4,125,249,1,2,144,1,1.2,2,1,3,2 +34,0,2,118,210,0,0,192,0,0.7,1,0,3,1 +57,1,4,132,207,0,0,168,1,0,1,0,7,1 +64,1,4,145,212,0,2,132,0,2,2,2,6,2 +59,1,4,138,271,0,2,182,0,0,1,0,3,1 +50,1,3,140,233,0,0,163,0,0.6,2,1,7,2 +51,1,1,125,213,0,2,125,1,1.4,1,1,3,1 +54,1,2,192,283,0,2,195,0,0,1,1,7,2 +53,1,4,123,282,0,0,95,1,2,2,2,7,2 +52,1,4,112,230,0,0,160,0,0,1,1,3,2 +40,1,4,110,167,0,2,114,1,2,2,0,7,2 +58,1,3,132,224,0,2,173,0,3.2,1,2,7,2 +41,0,3,112,268,0,2,172,1,0,1,0,3,1 +41,1,3,112,250,0,0,179,0,0,1,0,3,1 +50,0,3,120,219,0,0,158,0,1.6,2,0,3,1 +54,0,3,108,267,0,2,167,0,0,1,0,3,1 +64,0,4,130,303,0,0,122,0,2,2,2,3,1 +51,0,3,130,256,0,2,149,0,0.5,1,0,3,1 +46,0,2,105,204,0,0,172,0,0,1,0,3,1 +55,1,4,140,217,0,0,111,1,5.6,3,0,7,2 +45,1,2,128,308,0,2,170,0,0,1,0,3,1 +56,1,1,120,193,0,2,162,0,1.9,2,0,7,1 +66,0,4,178,228,1,0,165,1,1,2,2,7,2 +38,1,1,120,231,0,0,182,1,3.8,2,0,7,2 +62,0,4,150,244,0,0,154,1,1.4,2,0,3,2 +55,1,2,130,262,0,0,155,0,0,1,0,3,1 +58,1,4,128,259,0,2,130,1,3,2,2,7,2 +43,1,4,110,211,0,0,161,0,0,1,0,7,1 +64,0,4,180,325,0,0,154,1,0,1,0,3,1 +50,0,4,110,254,0,2,159,0,0,1,0,3,1 +53,1,3,130,197,1,2,152,0,1.2,3,0,3,1 +45,0,4,138,236,0,2,152,1,0.2,2,0,3,1 +65,1,1,138,282,1,2,174,0,1.4,2,1,3,2 +69,1,1,160,234,1,2,131,0,0.1,2,1,3,1 +69,1,3,140,254,0,2,146,0,2,2,3,7,2 +67,1,4,100,299,0,2,125,1,0.9,2,2,3,2 +68,0,3,120,211,0,2,115,0,1.5,2,0,3,1 +34,1,1,118,182,0,2,174,0,0,1,0,3,1 +62,0,4,138,294,1,0,106,0,1.9,2,3,3,2 +51,1,4,140,298,0,0,122,1,4.2,2,3,7,2 +46,1,3,150,231,0,0,147,0,3.6,2,0,3,2 +67,1,4,125,254,1,0,163,0,0.2,2,2,7,2 +50,1,3,129,196,0,0,163,0,0,1,0,3,1 +42,1,3,120,240,1,0,194,0,0.8,3,0,7,1 +56,0,4,134,409,0,2,150,1,1.9,2,2,7,2 +41,1,4,110,172,0,2,158,0,0,1,0,7,2 +42,0,4,102,265,0,2,122,0,0.6,2,0,3,1 +53,1,3,130,246,1,2,173,0,0,1,3,3,1 +43,1,3,130,315,0,0,162,0,1.9,1,1,3,1 +56,1,4,132,184,0,2,105,1,2.1,2,1,6,2 +52,1,4,108,233,1,0,147,0,0.1,1,3,7,1 +62,0,4,140,394,0,2,157,0,1.2,2,0,3,1 +70,1,3,160,269,0,0,112,1,2.9,2,1,7,2 +54,1,4,140,239,0,0,160,0,1.2,1,0,3,1 +70,1,4,145,174,0,0,125,1,2.6,3,0,7,2 +54,1,2,108,309,0,0,156,0,0,1,0,7,1 +35,1,4,126,282,0,2,156,1,0,1,0,7,2 +48,1,3,124,255,1,0,175,0,0,1,2,3,1 +55,0,2,135,250,0,2,161,0,1.4,2,0,3,1 +58,0,4,100,248,0,2,122,0,1,2,0,3,1 +54,0,3,110,214,0,0,158,0,1.6,2,0,3,1 +69,0,1,140,239,0,0,151,0,1.8,1,2,3,1 +77,1,4,125,304,0,2,162,1,0,1,3,3,2 +68,1,3,118,277,0,0,151,0,1,1,1,7,1 +58,1,4,125,300,0,2,171,0,0,1,2,7,2 +60,1,4,125,258,0,2,141,1,2.8,2,1,7,2 +51,1,4,140,299,0,0,173,1,1.6,1,0,7,2 +55,1,4,160,289,0,2,145,1,0.8,2,1,7,2 +52,1,1,152,298,1,0,178,0,1.2,2,0,7,1 +60,0,3,102,318,0,0,160,0,0,1,1,3,1 +58,1,3,105,240,0,2,154,1,0.6,2,0,7,1 +64,1,3,125,309,0,0,131,1,1.8,2,0,7,2 +37,1,3,130,250,0,0,187,0,3.5,3,0,3,1 +59,1,1,170,288,0,2,159,0,0.2,2,0,7,2 +51,1,3,125,245,1,2,166,0,2.4,2,0,3,1 +43,0,3,122,213,0,0,165,0,0.2,2,0,3,1 +58,1,4,128,216,0,2,131,1,2.2,2,3,7,2 +29,1,2,130,204,0,2,202,0,0,1,0,3,1 +41,0,2,130,204,0,2,172,0,1.4,1,0,3,1 +63,0,3,135,252,0,2,172,0,0,1,0,3,1 +51,1,3,94,227,0,0,154,1,0,1,1,7,1 +54,1,3,120,258,0,2,147,0,0.4,2,0,7,1 +44,1,2,120,220,0,0,170,0,0,1,0,3,1 +54,1,4,110,239,0,0,126,1,2.8,2,1,7,2 +65,1,4,135,254,0,2,127,0,2.8,2,1,7,2 +57,1,3,150,168,0,0,174,0,1.6,1,0,3,1 +63,1,4,130,330,1,2,132,1,1.8,1,3,7,2 +35,0,4,138,183,0,0,182,0,1.4,1,0,3,1 +41,1,2,135,203,0,0,132,0,0,2,0,6,1 +62,0,3,130,263,0,0,97,0,1.2,2,1,7,2 +43,0,4,132,341,1,2,136,1,3,2,0,7,2 +58,0,1,150,283,1,2,162,0,1,1,0,3,1 +52,1,1,118,186,0,2,190,0,0,2,0,6,1 +61,0,4,145,307,0,2,146,1,1,2,0,7,2 +39,1,4,118,219,0,0,140,0,1.2,2,0,7,2 +45,1,4,115,260,0,2,185,0,0,1,0,3,1 +52,1,4,128,255,0,0,161,1,0,1,1,7,2 +62,1,3,130,231,0,0,146,0,1.8,2,3,7,1 +62,0,4,160,164,0,2,145,0,6.2,3,3,7,2 +53,0,4,138,234,0,2,160,0,0,1,0,3,1 +43,1,4,120,177,0,2,120,1,2.5,2,0,7,2 +47,1,3,138,257,0,2,156,0,0,1,0,3,1 +52,1,2,120,325,0,0,172,0,0.2,1,0,3,1 +68,1,3,180,274,1,2,150,1,1.6,2,0,7,2 +39,1,3,140,321,0,2,182,0,0,1,0,3,1 +53,0,4,130,264,0,2,143,0,0.4,2,0,3,1 +62,0,4,140,268,0,2,160,0,3.6,3,2,3,2 +51,0,3,140,308,0,2,142,0,1.5,1,1,3,1 +60,1,4,130,253,0,0,144,1,1.4,1,1,7,2 +65,1,4,110,248,0,2,158,0,0.6,1,2,6,2 +65,0,3,155,269,0,0,148,0,0.8,1,0,3,1 +60,1,3,140,185,0,2,155,0,3,2,0,3,2 +60,1,4,145,282,0,2,142,1,2.8,2,2,7,2 +54,1,4,120,188,0,0,113,0,1.4,2,1,7,2 +44,1,2,130,219,0,2,188,0,0,1,0,3,1 +44,1,4,112,290,0,2,153,0,0,1,1,3,2 +51,1,3,110,175,0,0,123,0,0.6,1,0,3,1 +59,1,3,150,212,1,0,157,0,1.6,1,0,3,1 +71,0,2,160,302,0,0,162,0,0.4,1,2,3,1 +61,1,3,150,243,1,0,137,1,1,2,0,3,1 +55,1,4,132,353,0,0,132,1,1.2,2,1,7,2 +64,1,3,140,335,0,0,158,0,0,1,0,3,2 +43,1,4,150,247,0,0,171,0,1.5,1,0,3,1 +58,0,3,120,340,0,0,172,0,0,1,0,3,1 +60,1,4,130,206,0,2,132,1,2.4,2,2,7,2 +58,1,2,120,284,0,2,160,0,1.8,2,0,3,2 +49,1,2,130,266,0,0,171,0,0.6,1,0,3,1 +48,1,2,110,229,0,0,168,0,1,3,0,7,2 +52,1,3,172,199,1,0,162,0,0.5,1,0,7,1 +44,1,2,120,263,0,0,173,0,0,1,0,7,1 +56,0,2,140,294,0,2,153,0,1.3,2,0,3,1 +57,1,4,140,192,0,0,148,0,0.4,2,0,6,1 +67,1,4,160,286,0,2,108,1,1.5,2,3,3,2 diff --git a/computeAccuracy.m b/computeAccuracy.m new file mode 100644 index 0000000..0a00844 --- /dev/null +++ b/computeAccuracy.m @@ -0,0 +1,57 @@ +function accuracy = computeAccuracy(Y,Yhat) +% Computes the accuracy of ELM predictions +% +% +% +% Copyright 2015 Riccardo Taormina (riccardo_taormina@sutd.edu.sg), +% Gulsah Karakaya (gulsahkilickarakaya@gmail.com;), +% Stefano Galelli (stefano_galelli@sutd.edu.sg), +% and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;. +% +% Please refer to README.txt for further information. +% +% +% This file is part of Matlab-Multi-objective-Feature-Selection. +% +% Matlab-Multi-objective-Feature-Selection is free software: you can redistribute +% it and/or modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation, either version 3 of the +% License, or (at your option) any later version. +% +% This code is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with MATLAB_IterativeInputSelection. +% If not, see . + + +classes = unique(Y); +nClasses = numel(unique(Y)); +if nClasses == 2 + nClasses = 1; % handle binary classification +end + + +Acc = zeros(1,nClasses); +for j = 1 : nClasses + if nClasses == 1 + thisClass = 2; % the H1 + else + thisClass = classes(j); + end + % ixes + ixes1 = (Y == thisClass); + ixes2 = (Yhat == thisClass); + % compute confusion matrix + tp = sum((ixes1==ixes2)&(ixes1==1)); + tn = sum((ixes1==ixes2)&(ixes1==0)); + fn = sum((ixes1-ixes2)==1); + fp = sum((ixes1-ixes2)==-1); + % compute accuracy + Acc(j) = (tp+tn)/(tp+fn+fp+tn); +end +% get average accuracy +accuracy = mean(Acc); \ No newline at end of file diff --git a/computeRelevanceRedundancy.m b/computeRelevanceRedundancy.m new file mode 100644 index 0000000..acb884d --- /dev/null +++ b/computeRelevanceRedundancy.m @@ -0,0 +1,71 @@ +function [RED,REL] = computeRelevanceRedundancy(data) +% Computes scaled relevance (REL) and redundancy (RED) arrays for a given dataset +% Last column is the predicted variable. +% +% +% Reference: Karakaya, G., Galelli, S., Ahipasaoglu, S.D., Taormina, R., 2015. +% Identifying (Quasi) Equally Informative Subsets in Feature Selection Problems +% for Classification: A Max-Relevance Min-Redundancy Approach. +% IEEE Trans. Cybern. doi:10.1109/TCYB.2015.2444435 +% +% +% +% Copyright 2015 Riccardo Taormina (riccardo_taormina@sutd.edu.sg), +% Gulsah Karakaya (gulsahkilickarakaya@gmail.com;), +% Stefano Galelli (stefano_galelli@sutd.edu.sg), +% and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;. +% +% Please refer to README.txt for further information. +% +% +% This file is part of Matlab-Multi-objective-Feature-Selection. +% +% Matlab-Multi-objective-Feature-Selection is free software: you can redistribute +% it and/or modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation, either version 3 of the +% License, or (at your option) any later version. +% +% This code is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with MATLAB_IterativeInputSelection. +% If not, see . + +% get predictors and predictand +PHI = data(:,1:end-1); +Y = data(:,end); + +% initialize outputs +nInputs = size(PHI,2); +RED = zeros(nInputs); +REL = zeros(nInputs,1); + + +hOut = entropy(Y); +for i = 1 : nInputs + % compute RED + hX = entropy(PHI(:,i)); + for j = i+1 : nInputs + hY = entropy(PHI(:,j)); + hXY = jointentropy(PHI(:,i), PHI(:,j)); + MI = hX+hY-hXY; + SU = 2*MI/(hX+hY); + RED(i,j) = SU; + end + % compute REL + hY = hOut; + hXY = jointentropy(PHI(:,i), Y); + MI = hX+hY-hXY; + SU = 2*MI/(hX+hY); + REL(i) = SU; +end + +% compute max values and scale everything +maxRED = sum(sum(RED(:))); +maxREL = sum(REL(:)); +RED = RED/maxRED; +REL = REL/maxREL; + diff --git a/deltaElimination.m b/deltaElimination.m new file mode 100644 index 0000000..52ab0c6 --- /dev/null +++ b/deltaElimination.m @@ -0,0 +1,83 @@ +function PF = deltaElimination(PF0,delta) +% Performs delta elimination to select QEISS from initial pareto front. +% The QEISS are those with accuracy at most delta% smaller than the highest one. +% +% Reference: Karakaya, G., Galelli, S., Ahipasaoglu, S.D., Taormina, R., 2015. +% Identifying (Quasi) Equally Informative Subsets in Feature Selection Problems +% for Classification: A Max-Relevance Min-Redundancy Approach. +% IEEE Trans. Cybern. doi:10.1109/TCYB.2015.2444435 +% +% +% +% +% Copyright 2015 Riccardo Taormina (riccardo_taormina@sutd.edu.sg), +% Gulsah Karakaya (gulsahkilickarakaya@gmail.com;), +% Stefano Galelli (stefano_galelli@sutd.edu.sg), +% and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;. +% +% Please refer to README.txt for further information. +% +% +% This file is part of Matlab-Multi-objective-Feature-Selection. +% +% Matlab-Multi-objective-Feature-Selection is free software: you can redistribute +% it and/or modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation, either version 3 of the +% License, or (at your option) any later version. +% +% This code is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with MATLAB_IterativeInputSelection. +% If not, see . +% + + +% initialize output as input (eliminate entries later) +fvals = PF0.fvals; +fvals_ext = PF0.fvals_ext; +archive = PF0.archive; +tempArchive = archive; + +% extract accuracies for delta elimination +accuracies = fvals_ext(:,3); + +% find best value of metric +bestValue = min(accuracies); + +delta = (delta/100)*abs(bestValue); + + +% in this array 1 will identify solution to be eliminated +ixesToRemove = zeros(size(tempArchive,1),1); + +% proceed with delta elimination +for i = 1 : numel(tempArchive) + if accuracies(i) > bestValue + delta; + % remove, it's inferior + ixesToRemove(i) = 1; + else + % eliminate inferior subsets + Si = tempArchive{i}; + for j = 1 : numel(tempArchive) + if (j == i) || (ixesToRemove(j) == 1) + % continue loop if solution already removed of + % if comparing same solutions + continue + end + Sj = tempArchive{j}; + if isequal(intersect(Si,Sj),Sj) &&... + (accuracies(j). +% + +% extract attributes (PHI) and predictand (Y) +PHI = data(:,1:end-1); +[nPatterns,nAttrs] = size(PHI); +tempY = data(:,end); + +% restructure predictand (array with same number of columns of number of classes) +classes = unique(tempY); +nClasses = numel(classes); +Y = zeros(nPatterns,nClasses); +for i = 1 : nClasses + thisClass = classes(i); + ixes = (tempY == thisClass); + Y(ixes,i) = 1; +end + + +% Objective Function options +objFunOptions.Y = Y; % predictand +objFunOptions.PHI = PHI; % attributes +objFunOptions.nFolds = 10; % folds for k-fold cross-validation +objFunOptions.nELM = 5; % size of ELM ensemble +objFunOptions.nUnits = 10; % number of units in ELM +objFunOptions.maxCardinality = 20; % maximum cardinality (important for large datasets) + +% Algorithm options +if strcmp(algorithm,'NSGA2') + % NSGA2 + + options = nsgaopt(); % get default options + options.popsize = 100; % populations size + options.maxGen = 100; % max generation + options.numVar = nAttrs; % number of design variables + options.numCons = 0; % number of contraints + options.lb = zeros(1,nAttrs); % lower bound of design variables (0) + options.ub = ones(1,nAttrs); % upper bound of design variables (1) + options.vartype = ones(1,nAttrs); % specify all binary variables + options.outputInterval = 1; % interval between echo on screen + options.plotInterval = 1; % interval between plot updates + options.useParallel = 'no'; % use parallel ('yes'/'no') + options.poolsize = 1; % matlab poolisize (num. parallel threads) + +elseif strcmp(algorithm,'BORG') + options.nvars = nAttrs; % number of design variables + options.nconstrs = 0; % number of contraints + options.NFE = 5000; % number of functions evaluations + options.lowerBounds = zeros(1,nAttrs); % lower bound of design variables (0) + options.upperBounds = ones(1,nAttrs); % upper bound of design variables (1) +else + error('Algorithm not supported!') +end + diff --git a/normalizeData.m b/normalizeData.m new file mode 100644 index 0000000..77737ee --- /dev/null +++ b/normalizeData.m @@ -0,0 +1,40 @@ +function norm_data = normalizeData(data) +% This function normalize data between -1 and +1 +% +% +% +% Copyright 2015 Riccardo Taormina (riccardo_taormina@sutd.edu.sg), +% Gulsah Karakaya (gulsahkilickarakaya@gmail.com;), +% Stefano Galelli (stefano_galelli@sutd.edu.sg), +% and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;. +% +% Please refer to README.txt for further information. +% +% +% This file is part of Matlab-Multi-objective-Feature-Selection. +% +% Matlab-Multi-objective-Feature-Selection is free software: you can redistribute +% it and/or modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation, either version 3 of the +% License, or (at your option) any later version. +% +% This code is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with MATLAB_IterativeInputSelection. +% If not, see . +% + +% get number of observations +nObs = size(data,1); + +% get max and mins +maxs = repmat(max(data),[nObs,1]); +mins = repmat(min(data),[nObs,1]); + +% normalize +norm_data = 2*((data-mins)./(maxs-mins))-1; + diff --git a/objFunFQEISS.m b/objFunFQEISS.m new file mode 100644 index 0000000..e56750a --- /dev/null +++ b/objFunFQEISS.m @@ -0,0 +1,107 @@ +function [fval,dummy] = objFunFQEISS(X,varargin) +global archive fvals objFunOptions suREL suRED ix_solutions +% objective function for developing FQEISS filters +% +% +% +% Copyright 2015 Riccardo Taormina (riccardo_taormina@sutd.edu.sg), +% Gulsah Karakaya (gulsahkilickarakaya@gmail.com;), +% Stefano Galelli (stefano_galelli@sutd.edu.sg), +% and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;. +% +% Please refer to README.txt for further information. +% +% +% This file is part of Matlab-Multi-objective-Feature-Selection. +% +% Matlab-Multi-objective-Feature-Selection is free software: you can redistribute +% it and/or modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation, either version 3 of the +% License, or (at your option) any later version. +% +% This code is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with MATLAB_IterativeInputSelection. +% If not, see . +% + +% initialize fitness values +fval = zeros(1,3); + +% unpack data and parameters +Y = objFunOptions.Y; % targets +PHI = objFunOptions.PHI; % inputs +nFolds = objFunOptions.nFolds; % nFolds for k-fold cross validation +nELM = objFunOptions.nELM; % number of repeats for computing the accuracy obj function +nUnits = objFunOptions.nUnits; % info on dataset +maxCardinality = objFunOptions.maxCardinality; % maximum cardinality + +% retrieve populations size and number of attributes +nAttrs = size(X,2); + +% transform decision variables from continuous to discrete +% 0 or 1 assigned depending on ratio of maxCardinality/nAttrs +% (This has no effect if the search algorithm is binary-coded already!) +varRatio = maxCardinality/nAttrs; +if varRatio > 0.5 + X = X>0.5; +else + X = X>(1 - varRatio); +end + +% get selected features from genotype +featIxes = find(X); + +% get cardinality +cardinality = numel(featIxes); + + +% check if this combination of inputs is already in archive +% if so, assign existing fitness values to this genotype +temp = cellfun(@(x) isequal(x,featIxes),archive,'UniformOutput',false); +archiveIx = find([temp{:}]); +if ~isempty(archiveIx); + % get fval from lookup table + fval = fvals(archiveIx,[1,2,4]); + ix_solutions(archiveIx) = 1; +else + if cardinality > maxCardinality + % if cardinality > maxCardinality do not evaluate and assign very + % high values of the obj functions + fval = [Inf,Inf,numel(featIxes)]; + elseif cardinality == 0 + % no inputs selected, irregular solution + fval = [Inf,Inf,numel(featIxes)]; + else + % found new combination, compute values of obj. functions + + % relevance + REL = sum(suREL(featIxes)); + + % redundancy + if cardinality == 1 + % 1 input selected, no redundancy + RED = 0; + else + temp = nchoosek(featIxes,2); + ixes = (temp(:,2)-1)*nAttrs+temp(:,1); + RED = sum(suRED(ixes)); + end + + % compute ELM classifier accuracy + ACC = trainAndValidateELM(PHI,Y,featIxes,nFolds,nELM,nUnits); + + % fitness values (- for those obj. functions to maximize) + fval = [-REL,RED,cardinality]; + % add solution to archive and fvals + archive = cat(1,archive,featIxes); + fvals = cat(1,fvals,[-REL,RED,-ACC,cardinality]); + ix_solutions = cat(1,ix_solutions,1); + end +end + +dummy = []; \ No newline at end of file diff --git a/objFunWMOSS.m b/objFunWMOSS.m new file mode 100644 index 0000000..74476f3 --- /dev/null +++ b/objFunWMOSS.m @@ -0,0 +1,107 @@ +function [fval,dummy] = objFunWMOSS(X,varargin) +global archive fvals objFunOptions suREL suRED ix_solutions +% objective function for developing WMOSS wrappers +% +% +% +% Copyright 2015 Riccardo Taormina (riccardo_taormina@sutd.edu.sg), +% Gulsah Karakaya (gulsahkilickarakaya@gmail.com;), +% Stefano Galelli (stefano_galelli@sutd.edu.sg), +% and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;. +% +% Please refer to README.txt for further information. +% +% +% This file is part of Matlab-Multi-objective-Feature-Selection. +% +% Matlab-Multi-objective-Feature-Selection is free software: you can redistribute +% it and/or modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation, either version 3 of the +% License, or (at your option) any later version. +% +% This code is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with MATLAB_IterativeInputSelection. +% If not, see . +% + +% initialize fitness values +fval = zeros(1,2); + +% unpack data and parameters +Y = objFunOptions.Y; % targets +PHI = objFunOptions.PHI; % inputs +nFolds = objFunOptions.nFolds; % nFolds for k-fold cross validation +nELM = objFunOptions.nELM; % number of repeats for computing the accuracy obj function +nUnits = objFunOptions.nUnits; % info on dataset +maxCardinality = objFunOptions.maxCardinality; % maximum cardinality + +% retrieve populations size and number of attributes +nAttrs = size(X,2); + +% transform decision variables from continuous to discrete +% 0 or 1 assigned depending on ratio of maxCardinality/nAttrs +% (This has no effect if the search algorithm is binary-coded already!) +varRatio = maxCardinality/nAttrs; +if varRatio > 0.5 + X = X>0.5; +else + X = X>(1 - varRatio); +end + +% get selected features from genotype +featIxes = find(X); + +% get cardinality +cardinality = numel(featIxes); + + +% check if this combination of inputs is already in archive +% if so, assign existing fitness values to this genotype +temp = cellfun(@(x) isequal(x,featIxes),archive,'UniformOutput',false); +archiveIx = find([temp{:}]); +if ~isempty(archiveIx); + % get fval from lookup table + fval = fvals(archiveIx,3:4); + ix_solutions(archiveIx) = 1; +else + if cardinality > maxCardinality + % if cardinality > maxCardinality do not evaluate and assign very + % high values of the obj functions + fval = [Inf,numel(featIxes)]; + elseif cardinality == 0 + % no inputs selected, irregular solution + fval = [Inf,numel(featIxes)]; + else + % found new combination, compute values of obj. functions + + % relevance + REL = sum(suREL(featIxes)); + + % redundancy + if cardinality == 1 + % 1 input selected, no redundancy + RED = 0; + else + temp = nchoosek(featIxes,2); + ixes = (temp(:,2)-1)*nAttrs+temp(:,1); + RED = sum(suRED(ixes)); + end + + % compute ELM classifier accuracy + ACC = trainAndValidateELM(PHI,Y,featIxes,nFolds,nELM,nUnits); + + % fitness values (- for those obj. functions to maximize) + fval = [-ACC,cardinality]; + % add solution to archive and fvals + archive = cat(1,archive,featIxes); + fvals = cat(1,fvals,[-REL,RED,-ACC,cardinality]); + ix_solutions = cat(1,ix_solutions,1); + end +end + +dummy = []; \ No newline at end of file diff --git a/objFunWQEISS.m b/objFunWQEISS.m new file mode 100644 index 0000000..e8df55d --- /dev/null +++ b/objFunWQEISS.m @@ -0,0 +1,107 @@ +function [fval,dummy] = objFunWQEISS(X,varargin) +global archive fvals objFunOptions suREL suRED ix_solutions +% objective function for developing WQEISS wrappers +% +% +% +% Copyright 2015 Riccardo Taormina (riccardo_taormina@sutd.edu.sg), +% Gulsah Karakaya (gulsahkilickarakaya@gmail.com;), +% Stefano Galelli (stefano_galelli@sutd.edu.sg), +% and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;. +% +% Please refer to README.txt for further information. +% +% +% This file is part of Matlab-Multi-objective-Feature-Selection. +% +% Matlab-Multi-objective-Feature-Selection is free software: you can redistribute +% it and/or modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation, either version 3 of the +% License, or (at your option) any later version. +% +% This code is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with MATLAB_IterativeInputSelection. +% If not, see . +% + +% initialize fitness values +fval = zeros(1,4); + +% unpack data and parameters +Y = objFunOptions.Y; % targets +PHI = objFunOptions.PHI; % inputs +nFolds = objFunOptions.nFolds; % nFolds for k-fold cross validation +nELM = objFunOptions.nELM; % number of repeats for computing the accuracy obj function +nUnits = objFunOptions.nUnits; % info on dataset +maxCardinality = objFunOptions.maxCardinality; % maximum cardinality + +% retrieve populations size and number of attributes +nAttrs = size(X,2); + +% transform decision variables from continuous to discrete +% 0 or 1 assigned depending on ratio of maxCardinality/nAttrs +% (This has no effect if the search algorithm is binary-coded already!) +varRatio = maxCardinality/nAttrs; +if varRatio > 0.5 + X = X>0.5; +else + X = X>(1 - varRatio); +end + +% get selected features from genotype +featIxes = find(X); + +% get cardinality +cardinality = numel(featIxes); + + +% check if this combination of inputs is already in archive +% if so, assign existing fitness values to this genotype +temp = cellfun(@(x) isequal(x,featIxes),archive,'UniformOutput',false); +archiveIx = find([temp{:}]); +if ~isempty(archiveIx); + % get fval from lookup table + fval = fvals(archiveIx,:); + ix_solutions(archiveIx) = 1; +else + if cardinality > maxCardinality + % if cardinality > maxCardinality do not evaluate and assign very + % high values of the obj functions + fval = [Inf,Inf,Inf,numel(featIxes)]; + elseif cardinality == 0 + % no inputs selected, irregular solution + fval = [Inf,Inf,Inf,numel(featIxes)]; + else + % found new combination, compute values of obj. functions + + % relevance + REL = sum(suREL(featIxes)); + + % redundancy + if cardinality == 1 + % 1 input selected, no redundancy + RED = 0; + else + temp = nchoosek(featIxes,2); + ixes = (temp(:,2)-1)*nAttrs+temp(:,1); + RED = sum(suRED(ixes)); + end + + % compute ELM classifier accuracy + ACC = trainAndValidateELM(PHI,Y,featIxes,nFolds,nELM,nUnits); + + % fitness values (- for those obj. functions to maximize) + fval = [-REL,RED,-ACC,cardinality]; + % add solution to archive and fvals + archive = cat(1,archive,featIxes); + fvals = cat(1,fvals,[-REL,RED,-ACC,cardinality]); + ix_solutions = cat(1,ix_solutions,1); + end +end + +dummy = []; \ No newline at end of file diff --git a/plotFrequencyMatrix.m b/plotFrequencyMatrix.m new file mode 100644 index 0000000..78ebdc0 --- /dev/null +++ b/plotFrequencyMatrix.m @@ -0,0 +1,109 @@ +function [] = plotFrequencyMatrix(QEISS,nFeat,featNames) +% Plot the frequency matrix of a set of QEISS +% +% Inputs: QEISS <- struct containing QEISS archive and values of the obj. functions +% nFeat <- number of features +% featNames <- cell array of nFeat strings with feature names +% +% Reference: Karakaya, G., Galelli, S., Ahipasaoglu, S.D., Taormina, R., 2015. +% Identifying (Quasi) Equally Informative Subsets in Feature Selection Problems +% for Classification: A Max-Relevance Min-Redundancy Approach. +% IEEE Trans. Cybern. doi:10.1109/TCYB.2015.2444435% +% +% +% Copyright 2015 Riccardo Taormina (riccardo_taormina@sutd.edu.sg), +% Gulsah Karakaya (gulsahkilickarakaya@gmail.com;), +% Stefano Galelli (stefano_galelli@sutd.edu.sg), +% and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;. +% +% Please refer to README.txt for further information. +% +% +% This file is part of Matlab-Multi-objective-Feature-Selection. +% +% Matlab-Multi-objective-Feature-Selection is free software: you can redistribute +% it and/or modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation, either version 3 of the +% License, or (at your option) any later version. +% +% This code is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with MATLAB_IterativeInputSelection. +% If not, see . +% + + +% get archive +archive = QEISS.archive; +nSubsets = numel(archive); +accuracies = -QEISS.fvals_ext(:,3); +cardinalities = QEISS.fvals_ext(:,4); + +% inizialize frequency matrix +fMat = zeros(nSubsets,nFeat); + +% fill frequency matrix +for i = 1 : nSubsets + thisSubsetIxes = archive{i}; + fMat(i,thisSubsetIxes) = 1; +end + +% add padding for pColor + +% plot +temp = fMat.*repmat(cardinalities*nFeat,1,nFeat); % colour by cardinality +temp2 = [cardinalities,accuracies]; +[~,sIxes] = sortrows(temp2,[1,2]); % sort by cardinality, then RMSE +temp = temp(sIxes,:); +imagesc(temp); +myColorMap = flipud(gray(max(unique(temp))+1)); % fliupud so that blanks are white +colormap(myColorMap) + +% title and labels +% xlabel('feature id.') % x-axis label +ylabel('subset id.') % y-axis label +% check if feature names are available. if so, prints them +if (nargin >= 3) && ~isempty(featNames) + set(gca,'XTick',1:nFeat) + % rotate labels + hx = get(gca,'XLabel'); % Handle to xlabel + pos = get(hx,'Position'); + y = pos(2); + set(gca,'xticklabel',{[]}); % clear labels + + % Place the new labels + for i = 1:nFeat + t(i) = text(i,y,featNames{i}); + set(t(i),'Rotation',45,'HorizontalAlignment','right','FontSize',10) + end +else + set(gca,'XTick',1:nFeat,'XTickLabel',1:nFeat); +end + +set(gca,'YTick',1:nSubsets) + +% add secondary x axis +temp = sum(fMat)/nSubsets*100; +labels = arrayfun(@(x) sprintf('%3.2f%%',x),temp,... + 'UniformOutput',false); +text(0.875:1:nFeat, zeros(1,nFeat),labels,... + 'HorizontalAlignment','left','Rotation',90); + +% add secondary y axis +% check if metricToPrint was given, otherwise go for RMSE +temp = accuracies + +labels = arrayfun(@(x) sprintf('%3.3f',x),temp(sIxes),... + 'UniformOutput',false); +xs = repmat(nFeat+0.625,1,nSubsets+1); +ys = [0,1:nSubsets]; +text(xs,ys,cat(1,'accuracy',labels)); +[~,ixBest] = max(accuracies(sIxes)) +text(xs(ixBest),ys(ixBest+1),labels(ixBest),'Color','red'); + +axis square + diff --git a/quantizeVariable.m b/quantizeVariable.m new file mode 100644 index 0000000..6b16e93 --- /dev/null +++ b/quantizeVariable.m @@ -0,0 +1,71 @@ +function quantY = quantizeVariable(Y,nBins,type) +% Unsupervised quantization of continuous variable +% +% Inputs: Y <- the variable to discretize +% nBins <- number of bins employed for quantization +% type <- "equalfreq" --> each bin has same same num. of observations +% <- "equalwidth" --> each bin has same width +% +% +% Output: +% quantY <- the quantized variable (mean values of all observations in the bin) +% +% +% +% Copyright 2015 Riccardo Taormina (riccardo_taormina@sutd.edu.sg), +% Gulsah Karakaya (gulsahkilickarakaya@gmail.com;), +% Stefano Galelli (stefano_galelli@sutd.edu.sg), +% and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;. +% +% Please refer to README.txt for further information. +% +% +% This file is part of Matlab-Multi-objective-Feature-Selection. +% +% Matlab-Multi-objective-Feature-Selection is free software: you can redistribute +% it and/or modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation, either version 3 of the +% License, or (at your option) any later version. +% +% This code is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with MATLAB_IterativeInputSelection. +% If not, see . +% + + +if strcmp(type,'equalfreq') + % bins with same height + temp1 = sort(Y); + temp2 = ceil(linspace(1,numel(Y),nBins+1)); + steps = temp1(temp2); + quantY = Y; + for i = 1 : nBins + if i == 1 + ixes = (Y>=steps(1)) .* (Y<=steps(2)); + else + ixes = (Y>steps(i)) .* (Y<=steps(i+1)); + end + quantY(logical(ixes)) = mean(Y(logical(ixes))); + end +elseif strcmp(type,'equalwidth') + % bins with same width + maxY = max(Y); minY = min(Y); + steps = linspace(minY,maxY,nBins+1); + quantY = Y; + for i = 1 : nBins + if i == 1 + ixes = (Y>=steps(1)) .* (Y<=steps(2)); + else + ixes = (Y>steps(i)) .* (Y<=steps(i+1)); + end + quantY(logical(ixes)) = mean(Y(logical(ixes))); + end +else + error('Type not recognized!!') +end + diff --git a/readData.m b/readData.m new file mode 100644 index 0000000..46a413a --- /dev/null +++ b/readData.m @@ -0,0 +1,49 @@ +function [data,varNames,varTypes] = readFile(filePath) +% This function reads experiment data from .csv file with n columns +% +% The .csv file has to be structured as follows +% Column 1 to n-1 = attributes +% Last column = variable to be predicted +% +% 1st row = name of attributes and variable to be predicted +% 2nd row = attribute and variable types (0 = Real, 1 = Categorical) +% remaining rows = the data samples +% +% +% +% Copyright 2015 Riccardo Taormina (riccardo_taormina@sutd.edu.sg), +% Gulsah Karakaya (gulsahkilickarakaya@gmail.com;), +% Stefano Galelli (stefano_galelli@sutd.edu.sg), +% and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;. +% +% Please refer to README.txt for further information. +% +% +% This file is part of Matlab-Multi-objective-Feature-Selection. +% +% Matlab-Multi-objective-Feature-Selection is free software: you can redistribute +% it and/or modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation, either version 3 of the +% License, or (at your option) any later version. +% +% This code is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with MATLAB_IterativeInputSelection. +% If not, see . +% + +% load file using xlsread +[temp_a,temp_b] = xlsread(filePath); + +% extract attribute and variable types +varTypes = temp_a(1,:); + +% extract data samples +data = temp_a(2:end,:); + +% get names +varNames = temp_b(1,:); diff --git a/readme.txt b/readme.txt new file mode 100644 index 0000000..1fa351f --- /dev/null +++ b/readme.txt @@ -0,0 +1,68 @@ +This toolbox implements the WQEISS/WMOSS/FQEISS feature selection techniques +for classification problems presented in Karakaya et al. (2015). + +Currently only the NSGA-II algorithm (Deb et al., 2002) is supported, but additional scripts +for the Borg version(Hadka and Reed, 2012) will be uploaded shortly. + +This toolbox depends on the following contributions which have to be +donwloaded and referenced. + +1) Song Lin's NGPM toolbox for NSGA-II, + http://in.mathworks.com/matlabcentral/fileexchange/31166-ngpm-a-nsga-ii-program-in-matlab-v1-4 + +2) Hanchuan Peng's Mutual Information computation toolbox + http://www.mathworks.com/matlabcentral/fileexchange/14888-mutual-information-computation + +3) Yi Cao's Pareto-front toolbox + http://www.mathworks.com/matlabcentral/fileexchange/17251-pareto-front + +The workings of the NSGA-II version of the algorithms are illustrated in "script_example_NSGAII.m" +for the "Heart" dataset of the UCI Repository (Lichman, 2013). + + + + +References: + +Deb, K., Pratap, A., Agarwal, S., Meyarivan, T., 2002. +A Fast and Elitist Multiobjective Genetic Algorithm. +IEEE Trans. Evol. Comput. 6, 182–197. doi:10.1109/4235.996017 + +Hadka, D., Reed, P., 2012. +Borg: An Auto-Adaptive Many-Objective Evolutionary Computing Framework. +Evol. Comput. 21, 1–30. doi:10.1162/EVCO_a_00075 + +Lichman, M. (2013). UCI Machine Learning Repository [http://archive.ics.uci.edu/ml]. +Irvine, CA: University of California, School of Information and Computer Science. + +Karakaya, G., Galelli, S., Ahipasaoglu, S.D., Taormina, R., 2015. +Identifying (Quasi) Equally Informative Subsets in Feature Selection Problems for Classification: +A Max-Relevance Min-Redundancy Approach. +IEEE Trans. Cybern. doi:10.1109/TCYB.2015.2444435 + + + +Copyright 2015 Riccardo Taormina (riccardo_taormina@sutd.edu.sg), + Gulsah Karakaya (gulsahkilickarakaya@gmail.com;), + Stefano Galelli (stefano_galelli@sutd.edu.sg), + and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;. + +Please refer to README.txt for further information. + + +This file is part of Matlab-Multi-objective-Feature-Selection. + + Matlab-Multi-objective-Feature-Selection is free software: you can redistribute + it and/or modify it under the terms of the GNU General Public License + as published by the Free Software Foundation, either version 3 of the + License, or (at your option) any later version. + + This code is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with MATLAB_IterativeInputSelection. + If not, see . + diff --git a/script_example_NSGAII.m b/script_example_NSGAII.m new file mode 100644 index 0000000..a22fcd4 --- /dev/null +++ b/script_example_NSGAII.m @@ -0,0 +1,193 @@ +% This script illustrates the NSGA-II implementation of the +% WQEISS/WMOSS/FQEISS feature selection techniques described in: +% +% Karakaya, G., Galelli, S., Ahipasaoglu, S.D., Taormina, R., 2015. +% Identifying (Quasi) Equally Informative Subsets in Feature Selection Problems +% for Classification: A Max-Relevance Min-Redundancy Approach. +% IEEE Trans. Cybern. doi:10.1109/TCYB.2015.2444435 +% +% +% Copyright 2015 Riccardo Taormina (riccardo_taormina@sutd.edu.sg), +% Gulsah Karakaya (gulsahkilickarakaya@gmail.com;), +% Stefano Galelli (stefano_galelli@sutd.edu.sg), +% and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;. +% +% Please refer to README.txt for further information. +% +% +% This file is part of Matlab-Multi-objective-Feature-Selection. +% +% Matlab-Multi-objective-Feature-Selection is free software: you can redistribute +% it and/or modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation, either version 3 of the +% License, or (at your option) any later version. +% +% This code is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with MATLAB_IterativeInputSelection. +% If not, see . + + +clc; clear; + +%% include paths +addpath('..\..\Work\Code\toolboxes\mi'); % Peng's mutual information +addpath('..\toolboxes\nsga2_MATLAB_alternative'); % LIN's NPGM (for NSGA-II) +addpath('..\toolboxes\pareto_front'); % Yi Cao's paretofront toolbox + + + +%% Load and prepare dataset + +% load dataset +filePath = 'Heart.csv'; +[orig_data,varNames,varTypes] = readData(filePath); + +% transform data +transf_data = transformData(orig_data,varTypes); + +% normalize data +norm_data = normalizeData(transf_data); + +% compute relevance and redundacy +global suRED suREL +[suRED,suREL] = computeRelevanceRedundancy(norm_data); + + +%% Prepare for launching the algorithms + +% specify GO algorithm to use (BORG or NSGA2) +GOalgorithm = 'NSGA2'; + +% get algorithm options +global objFunOptions + +[options,objFunOptions] = ... + getAlgorithmOptions(GOalgorithm,norm_data); + +% initialize overall archive and array containing the values of the +% objctive functions (fvals) +global archive fvals ix_solutions +archive = {}; % archive of all solutions explored +fvals = []; % values of the obj function explored + % RELEVANCE - REDUNDACY - ACCURACY - #INPUTS + +ix_solutions = []; % this will track which solutions are found by each algorithm + +%% launch WQEISS +fprintf ('Launching WQEISS\n') + +% define number of obj functions and the matlab function coding them +options.numObj = 4; +options.objfun = @objFunWQEISS; + +% launch +nsga2(options); + +% get solutions indexes for WQEISS +ixWQEISS = find(ix_solutions); + + +% compute final pareto front +ixesPF = find(paretofront(fvals(ixWQEISS,:))); +PF_WQEISS.archive = archive(ixWQEISS(ixesPF)); +PF_WQEISS.fvals = fvals(ixWQEISS(ixesPF),:); +PF_WQEISS.fvals_ext = fvals(ixWQEISS(ixesPF),:); + + + +%% launch WMOSS +fprintf ('Launching WMOSS\n') + +% define number of obj functions and the matlab function coding them +options.numObj = 2; +options.objfun = @objFunWMOSS; + +% launch +ix_solutions = zeros(numel(archive),1); % re-initialize ix_solutions. + % at the start of the algorithm, none + % of solutions in the archive have been + % found yet; +nsga2(options); +% get solutions indexes for WMOSS +ixWMOSS = find(ix_solutions); + +% compute final pareto front +ixesPF = find(paretofront(fvals(ixWMOSS,3:4))); +PF_WMOSS.archive = archive(ixWMOSS(ixesPF)); +PF_WMOSS.fvals = fvals(ixWMOSS(ixesPF),[3,4]); +PF_WMOSS.fvals_ext = fvals(ixWMOSS(ixesPF),:); + + +%% launch FQEISS +fprintf ('Launching FQEISS\n') + +% define number of obj functions and the matlab function coding them +options.numObj = 3; +options.objfun = @objFunFQEISS; + +% launch +ix_solutions = zeros(numel(archive),1); % re-initialize ix_solutions. + % at the start of the algorithm, none + % of solutions in the archive have been + % found yet; +nsga2(options); +% get solutions indexes for FQEISS +ixFQEISS = find(ix_solutions); + +% compute final pareto front +ixesPF = find(paretofront(fvals(ixFQEISS,[1,2,4]))); +PF_FQEISS.archive = archive(ixFQEISS(ixesPF)); +PF_FQEISS.fvals = fvals(ixFQEISS(ixesPF),[1,2,4]); +PF_FQEISS.fvals_ext = fvals(ixFQEISS(ixesPF),:); + +%% delta elimination for WQEISS and WMOSS +delta = 5; +PFdelta_WQEISS = deltaElimination(PF_WQEISS,delta); +PFdelta_FQEISS = deltaElimination(PF_FQEISS,delta); + +%% Plot WMOSS vs PFdeltas +figure; +subplot(1,2,1); +plot(PF_WMOSS.fvals_ext(:,4), -PF_WMOSS.fvals_ext(:,3),'ro'); +hold on +plot(PFdelta_WQEISS.fvals_ext(:,4), -PFdelta_WQEISS.fvals_ext(:,3),'k.'); +legend({'WMOSS','WQEISS'}) +title('WMOSS vs WQEISS') +xlabel('Cardinality') +ylabel('Accuracy') +axis square + +subplot(1,2,2); +plot(PF_WMOSS.fvals_ext(:,4), -PF_WMOSS.fvals_ext(:,3),'ro'); +hold on +plot(PFdelta_FQEISS.fvals_ext(:,4), -PFdelta_FQEISS.fvals_ext(:,3),'k.'); +legend({'WMOSS','FQEISS'}) +title('WMOSS vs FQEISS') +xlabel('Cardinality') +ylabel('Accuracy') +axis square +%% Plot Frequency matrices +figure('name','WQEISS (left) and FQEISS (right) frequency matrices'); +subplot(1,2,1); +plotFrequencyMatrix(PFdelta_WQEISS,options.numVar,varNames) + +subplot(1,2,2); +plotFrequencyMatrix(PFdelta_FQEISS,options.numVar,varNames) + + + + + + + + + + + + + diff --git a/tanhActFun.m b/tanhActFun.m new file mode 100644 index 0000000..b220bce --- /dev/null +++ b/tanhActFun.m @@ -0,0 +1,31 @@ +function y = tanhActFun(x) +% hyperbolic tangent activation function for ELM +% +% +% +% Copyright 2015 Riccardo Taormina (riccardo_taormina@sutd.edu.sg), +% Gulsah Karakaya (gulsahkilickarakaya@gmail.com;), +% Stefano Galelli (stefano_galelli@sutd.edu.sg), +% and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;. +% +% Please refer to README.txt for further information. +% +% +% This file is part of Matlab-Multi-objective-Feature-Selection. +% +% Matlab-Multi-objective-Feature-Selection is free software: you can redistribute +% it and/or modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation, either version 3 of the +% License, or (at your option) any later version. +% +% This code is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with MATLAB_IterativeInputSelection. +% If not, see . +% + +y = 1-2./(exp(2*x)+1); \ No newline at end of file diff --git a/trainAndValidateELM.m b/trainAndValidateELM.m new file mode 100644 index 0000000..f29a0c3 --- /dev/null +++ b/trainAndValidateELM.m @@ -0,0 +1,78 @@ +function accuracy = trainAndValidateELM(PHI,Y,featIxes,nFolds,nELM,nUnits) +% This function trains and validate an ELM classifier with k-fold +% cross-validation +% +% Inputs: PHI <- array of training inputs with size = num. patterns x num. features +% Y <- array of training targets with size = num. patterns x num. categories +% (for each i-th column of trY only the entry relative to the correct category is 1) +% featIxes <- features selected (they are columns of PHI) +% nFolds <- num. folds for cross validation +% nELM <- num. ELM in the ensemble +% nUnits <- num. hidden units of ELM +% +% Output: +% accuracy <- accuracy of the predictions of the cross-validated ELM ensemble +% +% +% +% Copyright 2015 Riccardo Taormina (riccardo_taormina@sutd.edu.sg), +% Gulsah Karakaya (gulsahkilickarakaya@gmail.com;), +% Stefano Galelli (stefano_galelli@sutd.edu.sg), +% and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;. +% +% Please refer to README.txt for further information. +% +% +% This file is part of Matlab-Multi-objective-Feature-Selection. +% +% Matlab-Multi-objective-Feature-Selection is free software: you can redistribute +% it and/or modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation, either version 3 of the +% License, or (at your option) any later version. +% +% This code is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with MATLAB_IterativeInputSelection. +% If not, see . +% + + +% initialize +Yhat = zeros(size(Y,1),1); +accuracies = zeros(1,nELM) + Inf; +for j = 1 : nELM + + % k-fold cross validation + lData = size(Y,1); + lFold = floor(lData/nFolds); + + for i = 1 : nFolds + % select trainind and validation data + ix1 = (i-1)*lFold+1; + if i == nFolds + ix2 = lData; + else + ix2 = i*lFold; + end + valIxes = ix1:ix2; % select the validation chunk + trIxes = setdiff(1:lData,valIxes); % obtain training indexes by set difference + + % create datasets + trX = PHI(trIxes,featIxes); trY = Y(trIxes,:); + valX = PHI(valIxes,featIxes); + + % train and test ELM + [~,Yhat(valIxes)] =... + ELMclassifier(trX', trY', valX', nUnits); + end + % compute accuracy after cross-validaiton + [~,temp] = max(Y'); + Y_ = temp'; + + accuracies(j) = computeAccuracy(Y_,Yhat); +end +accuracy = mean(accuracies); \ No newline at end of file diff --git a/transformData.m b/transformData.m new file mode 100644 index 0000000..53fec6d --- /dev/null +++ b/transformData.m @@ -0,0 +1,68 @@ +function transf_data = transformData(data,varTypes) +% This function transforms the data by +% 1) map the values of categorical attributes between 0 and the number of categories +% +% 2) quantize real valued attributes using nBins numnber of bins +% modify the value of nBins for sparses/denser discretization +% quantType = 'equalfreq' <-- each bin has same num. observations +% quantType = 'equalwidth' <-- each bin has same width +% +% +% +% Copyright 2015 Riccardo Taormina (riccardo_taormina@sutd.edu.sg), +% Gulsah Karakaya (gulsahkilickarakaya@gmail.com;), +% Stefano Galelli (stefano_galelli@sutd.edu.sg), +% and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;. +% +% Please refer to README.txt for further information. +% +% +% This file is part of Matlab-Multi-objective-Feature-Selection. +% +% Matlab-Multi-objective-Feature-Selection is free software: you can redistribute +% it and/or modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation, either version 3 of the +% License, or (at your option) any later version. +% +% This code is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with MATLAB_IterativeInputSelection. +% If not, see . +% + +% discretization options +nBins = 10; +quantType = 'equalfreq'; + +% initialize output array +[nObs,nVars] = size(data); +transf_data = zeros(nObs,nVars); + + +% loop through all variables +for i = 1 : nVars + % get current attribute + attr = data(:,i); + attrType = varTypes(i); + % transform accordingly with its varType + if attrType == 0 + % real-valued, discretize + transf_data(:,i) = quantizeVariable(attr,nBins,quantType); + elseif attrType == 1 + % categorical data, sort them between 0 and num. categories + + % get categories + categories = unique(attr); + for j = 1 : numel(categories) + ixes = (attr == categories(j)); + attr(ixes) = j-1; + end + transf_data(:,i) = attr; + else + error('Attribute num#%d, type not recognized!',i); + end +end \ No newline at end of file