1 function [idx,score] = fss(D,cl,N,MODE)
2 % FSS - feature subset selection and feature ranking
3 % the method is motivated by the max-relevance-min-redundancy (mRMR)
4 % approach [1]. However, the default method uses partial correlation,
5 % which has been developed from scratch. PCCM [3] describes
6 % a similar idea, but is more complicated.
7 % An alternative method based on FSDD is implemented, too.
9 % [idx,score] = fss(D,cl)
10 % [idx,score] = fss(D,cl,MODE)
11 % [idx,score] = fss(D,cl,MODE)
13 % D data - each column represents a feature
15 % Mode 'Pearson' [default] correlation
17 % 'FSDD' feature selection algorithm based on a distance discriminant [2]
18 % %%% 'MRMR','MID','MIQ' max-relevance, min redundancy [1] - not supported yet.
20 % score score of the feature
21 % idx ranking of the feature
22 % [tmp,idx]=sort(-score)
24 % see also: TRAIN_SC, XVAL, ROW_COL_DELETION
27 % [1] Peng, H.C., Long, F., and Ding, C.,
28 % Feature selection based on mutual information: criteria of max-dependency, max-relevance, and min-redundancy,
29 % IEEE Transactions on Pattern Analysis and Machine Intelligence,
30 % Vol. 27, No. 8, pp.1226-1238, 2005.
31 % [2] Jianning Liang, Su Yang, Adam Winstanley,
32 % Invariant optimal feature selection: A distance discriminant and feature ranking based solution,
33 % Pattern Recognition, Volume 41, Issue 5, May 2008, Pages 1429-1439.
34 % ISSN 0031-3203, DOI: 10.1016/j.patcog.2007.10.018.
35 % [3] K. Raghuraj Rao and S. Lakshminarayanan
36 % Partial correlation based variable selection approach for multivariate data classification methods
37 % Chemometrics and Intelligent Laboratory Systems
38 % Volume 86, Issue 1, 15 March 2007, Pages 68-81
39 % http://dx.doi.org/10.1016/j.chemolab.2006.08.007
41 % $Id: fss.m 9104 2011-11-15 15:14:10Z carandraug $
42 % Copyright (C) 2009,2010 by Alois Schloegl <alois.schloegl@gmail.com>
43 % This function is part of the NaN-toolbox
44 % http://pub.ist.ac.at/~schloegl/matlab/NaN/
46 % This program is free software; you can redistribute it and/or modify
47 % it under the terms of the GNU General Public License as published by
48 % the Free Software Foundation; either version 3 of the License, or
49 % (at your option) any later version.
51 % This program is distributed in the hope that it will be useful,
52 % but WITHOUT ANY WARRANTY; without even the implied warranty of
53 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
54 % GNU General Public License for more details.
56 % You should have received a copy of the GNU General Public License
57 % along with this program; If not, see <http://www.gnu.org/licenses/>.
69 if isempty(N), N = size(D,2); end
70 score = repmat(NaN,1,size(D,2));
72 if 0, %strcmpi(MODE,'MRMR') || strcmpi(MODE,'MID') || strcmpi(MODE,'MIQ');
73 %% RMRM/MID/MIQ is not supported
76 [tmp,t] = sort([cl,D]);
77 cl = t(:,1:size(cl,2));
80 V(k) = mi(cl, D(:,k));
83 W(k,m) = mi(D(:,m), D(:,k));
85 MID(k) = V(k) - mean(W(k,:));
86 MIQ(k) = V(k) / mean(W(k,:));
89 if strcmpi(MODE,'MIQ')
90 [score,idx] = sort(MIQ,[],'descend');
92 [score,idx] = sort(MID,[],'descend');
95 elseif strcmpi(MODE,'FSDD');
99 m(k,:) = mean(D(j==k,:),1);
100 v(k,:) = var(D(j==k,:),1);
104 s2 = mean(m.^2,1,n) - m0.^2;
105 score = (s2 - 2*mean(v,1,n)) ./ v0;
106 [t,idx] = sort(-score);
108 elseif isempty(MODE) || strcmpi(MODE,'rank') || strcmpi(MODE,'Pearson')
110 if strcmpi(MODE,'rank'),
113 idx = repmat(NaN,1,N);
117 %%%%% compute partial correlation (X,Y|Z)
118 % r = partcorrcoef(cl, D(:,f), D(:,~f)); % obsolete, not very robust
120 %% this is a more robust version
121 X = cl; Y = D(:,f); Z = D(:,~f);
128 [s,ix] = max(sumsq(r,1));
138 ix = ~any(isnan([x,y]),2);
139 H = sparse(x(ix),y(ix));
141 Iij = pij.*log2(pij./(sum(pij,2)*sum(pij,1)));