Skip to content

Commit

Permalink
Add WQEISS for regression problems
Browse files Browse the repository at this point in the history
  • Loading branch information
rtaormina committed Apr 16, 2016
1 parent ab6d031 commit 3455fc1
Show file tree
Hide file tree
Showing 9 changed files with 1,636 additions and 19 deletions.
1,032 changes: 1,032 additions & 0 deletions Concrete_Data.csv

Large diffs are not rendered by default.

74 changes: 74 additions & 0 deletions ELMregression.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
function [trYhat, valYhat,W1,W2,bias] =...
ELMregression(trX, trY, valX, nUnits)

% This function implements an ELM classifier with tanh activation function.
%
% Inputs: trX <- array of training inputs with size = num. features x num. training patterns
% trY <- array of training targets with size = num. categories x num. training patterns
% (for each i-th column of trY only the entry relative to the correct category is 1)
% valX <- array of validation inputs with size = num. features x num. training patterns
% nUnits <- num. hidden units of ELM
%
% Output:
% trYhat <- array of training target predictions with size = 1 x num. training patterns
% (each i-th is an integer = predicted category)
% valYhat <- array of validaiton target predictions with size = 1 x num. validation patterns
% (each i-th is an integer = predicted category)
% W1,W2,bias <- the trained parameters of the ELM
%
% Reference: Huang, G.-B., Zhu, Q.-Y., Siew, C.-K., 2006. Extreme learning machine: Theory and applications.
% Neurocomputing 70, 489–501. doi:10.1016/j.neucom.2005.12.126
%
%
%
%
% Copyright 2016 Riccardo Taormina (riccardo_taormina@sutd.edu.sg),
% Gulsah Karakaya (gulsahkilickarakaya@gmail.com;),
% Stefano Galelli (stefano_galelli@sutd.edu.sg),
% and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;.
%
% Please refer to README.txt for further information.
%
%
% This file is part of Matlab-Multi-objective-Feature-Selection.
%
% Matlab-Multi-objective-Feature-Selection is free software: you can redistribute
% it and/or modify it under the terms of the GNU General Public License
% as published by the Free Software Foundation, either version 3 of the
% License, or (at your option) any later version.
%
% This code is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with MATLAB_IterativeInputSelection.
% If not, see <http://www.gnu.org/licenses/>.
%

% get number of features and number of patterns for training and validation
[nFeatures,nPatternsTr] = size(trX);
nPatternsVal = size(valX,2);

% generate random input->hidden weights W1 (between -1 and 1)
W1 = rand(nUnits,nFeatures)*2-1;

% generate random biases (between 0 and 1)
bias = rand(nUnits,1);

% compute hidden neuron output matrix H
H = sigActFun(W1*trX + repmat(bias,[1,nPatternsTr]));

% compute hidden->output weights W2
Hinv = pinv(H');
W2 = Hinv * trY';

% get ELM response on training
temp = (H' * W2)';
[~,temp] = max(temp,[],1);
trYhat = temp';

% ... and validation dataset
Hval = sigActFun(W1*valX + repmat(bias,[1,nPatternsVal]));
valYhat = (Hval' * W2)';
48 changes: 48 additions & 0 deletions computeSU.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
function SU = computeSU(x,y)
% Computes simmetric uncertainty between two variables
%
%
% Copyright 2016 Riccardo Taormina (riccardo_taormina@sutd.edu.sg),
% Gulsah Karakaya (gulsahkilickarakaya@gmail.com;),
% Stefano Galelli (stefano_galelli@sutd.edu.sg),
% and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;.
%
% Please refer to README.txt for further information.
%
%
% This file is part of Matlab-Multi-objective-Feature-Selection.
%
% Matlab-Multi-objective-Feature-Selection is free software: you can redistribute
% it and/or modify it under the terms of the GNU General Public License
% as published by the Free Software Foundation, either version 3 of the
% License, or (at your option) any later version.
%
% This code is distributed in the hope that it will be useful,
% but WITHOUT ANy WARRANTy; without even the implied warranty of
% MERCHANTABILITy or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% you should have received a copy of the GNU General Public License
% along with MATLAB_IterativeInputSelection.
% If not, see <http://www.gnu.org/licenses/>.

% discretization options
nBins = 20;
quantType = 'equalwidth';

% quantize variables
x = quantizeVariable(x,nBins,quantType);
y = quantizeVariable(y,nBins,quantType);

% compute entropies
hX = entropy(x);
hy = entropy(y);
hXy = jointentropy(x, y);

% compute mutual information
MI = hX+hy-hXy;

% compute symmetric uncertainty
SU = 2*MI/(hX+hy);


49 changes: 32 additions & 17 deletions getAlgorithmOptions.m
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
function [options,objFunOptions] = getAlgorithmOptions(algorithm,data)
function [options,objFunOptions] = getAlgorithmOptions(algorithm,data,varargin)
% Options for the algorithms (NSGAII/BORG) and the objective function
%
%
%
% Copyright 2015 Riccardo Taormina (riccardo_taormina@sutd.edu.sg),
% Copyright 2016 Riccardo Taormina (riccardo_taormina@sutd.edu.sg),
% Gulsah Karakaya (gulsahkilickarakaya@gmail.com;),
% Stefano Galelli (stefano_galelli@sutd.edu.sg),
% and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;.
Expand All @@ -28,29 +28,44 @@
% If not, see <http://www.gnu.org/licenses/>.
%

% check nargin
if nargin == 2
problem_type = 'CLASSIFICATION';
elseif (nargin == 3) && varargin{1} == true
problem_type = 'REGRESSION';
else
error('Problem type not recognized!')
end



% extract attributes (PHI) and predictand (Y)
PHI = data(:,1:end-1);
[nPatterns,nAttrs] = size(PHI);
tempY = data(:,end);

% restructure predictand (array with same number of columns of number of classes)
classes = unique(tempY);
nClasses = numel(classes);
Y = zeros(nPatterns,nClasses);
for i = 1 : nClasses
thisClass = classes(i);
ixes = (tempY == thisClass);
Y(ixes,i) = 1;
if strcmp(problem_type, 'CLASSIFICATION')
% restructure predictand (array with same number of columns of number of classes)
classes = unique(tempY);
nClasses = numel(classes);
Y = zeros(nPatterns,nClasses);
for i = 1 : nClasses
thisClass = classes(i);
ixes = (tempY == thisClass);
Y(ixes,i) = 1;
end
else
Y = tempY;
end


% Objective Function options
objFunOptions.Y = Y; % predictand
objFunOptions.PHI = PHI; % attributes
objFunOptions.nFolds = 10; % folds for k-fold cross-validation
objFunOptions.nELM = 5; % size of ELM ensemble
objFunOptions.nUnits = 10; % number of units in ELM
objFunOptions.maxCardinality = 20; % maximum cardinality (important for large datasets)
objFunOptions.Y = Y; % predictand
objFunOptions.PHI = PHI; % attributes
objFunOptions.nFolds = 10; % folds for k-fold cross-validation
objFunOptions.nELM = 10; % size of ELM ensemble
objFunOptions.nUnits = 50; % number of units in ELM
objFunOptions.maxCardinality = 20; % maximum cardinality (important for large datasets)

% Algorithm options
if strcmp(algorithm,'NSGA2')
Expand All @@ -73,7 +88,7 @@
options.nvars = nAttrs; % number of design variables
options.nconstrs = 0; % number of contraints
options.NFE = 5000; % number of functions evaluations
options.lowerBounds = zeros(1,nAttrs); % lower bound of design variables (0)
options.lowerBounds = -ones(1,nAttrs); % lower bound of design variables (-1)
options.upperBounds = ones(1,nAttrs); % upper bound of design variables (1)
else
error('Algorithm not supported!')
Expand Down
108 changes: 108 additions & 0 deletions objFunWQEISS_regression.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
function [fval,dummy] = objFunWQEISS_regression(X,varargin)
global archive fvals objFunOptions suREL suRED ix_solutions
% objective function for developing WQEISS wrappers
%
%
%
% Copyright 2016 Riccardo Taormina (riccardo_taormina@sutd.edu.sg),
% Gulsah Karakaya (gulsahkilickarakaya@gmail.com;),
% Stefano Galelli (stefano_galelli@sutd.edu.sg),
% and Selin Damla Ahipasaoglu (ahipasaoglu@sutd.edu.sg;.
%
% Please refer to README.txt for further information.
%
%
% This file is part of Matlab-Multi-objective-Feature-Selection.
%
% Matlab-Multi-objective-Feature-Selection is free software: you can redistribute
% it and/or modify it under the terms of the GNU General Public License
% as published by the Free Software Foundation, either version 3 of the
% License, or (at your option) any later version.
%
% This code is distributed in the hope that it will be useful,
% but WITHOUT ANY WARRANTY; without even the implied warranty of
% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
% GNU General Public License for more details.
%
% You should have received a copy of the GNU General Public License
% along with MATLAB_IterativeInputSelection.
% If not, see <http://www.gnu.org/licenses/>.
%


% initialize fitness values
fval = zeros(1,4);

% unpack data and parameters
Y = objFunOptions.Y; % targets
PHI = objFunOptions.PHI; % inputs
nFolds = objFunOptions.nFolds; % nFolds for k-fold cross validation
nELM = objFunOptions.nELM; % number of repeats for computing the accuracy obj function
nUnits = objFunOptions.nUnits; % info on dataset
maxCardinality = objFunOptions.maxCardinality; % maximum cardinality

% retrieve populations size and number of attributes
nAttrs = size(X,2);

% transform decision variables from continuous to discrete
% 0 or 1 assigned depending on ratio of maxCardinality/nAttrs
% (This has no effect if the search algorithm is binary-coded already!)
varRatio = maxCardinality/nAttrs;
if varRatio > 0.5
X = X>0.5;
else
X = X>(1 - varRatio);
end

% get selected features from genotype
featIxes = find(X);

% get cardinality
cardinality = numel(featIxes);


% check if this combination of inputs is already in archive
% if so, assign existing fitness values to this genotype
temp = cellfun(@(x) isequal(x,featIxes),archive,'UniformOutput',false);
archiveIx = find([temp{:}]);
if ~isempty(archiveIx);
% get fval from lookup table
fval = fvals(archiveIx,:);
ix_solutions(archiveIx) = 1;
else
if cardinality > maxCardinality
% if cardinality > maxCardinality do not evaluate and assign very
% high values of the obj functions
fval = [Inf,Inf,Inf,numel(featIxes)];
elseif cardinality == 0
% no inputs selected, irregular solution
fval = [Inf,Inf,Inf,numel(featIxes)];
else
% found new combination, compute values of obj. functions

% relevance
REL = sum(suREL(featIxes));

% redundancy
if cardinality == 1
% 1 input selected, no redundancy
RED = 0;
else
temp = nchoosek(featIxes,2);
ixes = (temp(:,2)-1)*nAttrs+temp(:,1);
RED = sum(suRED(ixes));
end

% compute ELM classifier accuracy
SU = trainAndValidateELM_regression(PHI,Y,featIxes,nFolds,nELM,nUnits);

% fitness values (- for those obj. functions to maximize)
fval = [-REL,RED,-SU,cardinality];
% add solution to archive and fvals
archive = cat(1,archive,featIxes);
fvals = cat(1,fvals,[-REL,RED,-SU,cardinality]);
ix_solutions = cat(1,ix_solutions,1);
end
end

dummy = [];
Loading

0 comments on commit 3455fc1

Please sign in to comment.