X-Git-Url: https://git.creatis.insa-lyon.fr/pubgit/?p=CreaPhase.git;a=blobdiff_plain;f=octave_packages%2Fnan-2.5.5%2Ffss.m;fp=octave_packages%2Fnan-2.5.5%2Ffss.m;h=f858b32253e5c532556f4ac5bd269ccf1c49138c;hp=0000000000000000000000000000000000000000;hb=f5f7a74bd8a4900f0b797da6783be80e11a68d86;hpb=1705066eceaaea976f010f669ce8e972f3734b05 diff --git a/octave_packages/nan-2.5.5/fss.m b/octave_packages/nan-2.5.5/fss.m new file mode 100644 index 0000000..f858b32 --- /dev/null +++ b/octave_packages/nan-2.5.5/fss.m @@ -0,0 +1,144 @@ +function [idx,score] = fss(D,cl,N,MODE) +% FSS - feature subset selection and feature ranking +% the method is motivated by the max-relevance-min-redundancy (mRMR) +% approach [1]. However, the default method uses partial correlation, +% which has been developed from scratch. PCCM [3] describes +% a similar idea, but is more complicated. +% An alternative method based on FSDD is implemented, too. +% +% [idx,score] = fss(D,cl) +% [idx,score] = fss(D,cl,MODE) +% [idx,score] = fss(D,cl,MODE) +% +% D data - each column represents a feature +% cl classlabel +% Mode 'Pearson' [default] correlation +% 'rank' correlation +% 'FSDD' feature selection algorithm based on a distance discriminant [2] +% %%% 'MRMR','MID','MIQ' max-relevance, min redundancy [1] - not supported yet. +% +% score score of the feature +% idx ranking of the feature +% [tmp,idx]=sort(-score) +% +% see also: TRAIN_SC, XVAL, ROW_COL_DELETION +% +% REFERENCES: +% [1] Peng, H.C., Long, F., and Ding, C., +% Feature selection based on mutual information: criteria of max-dependency, max-relevance, and min-redundancy, +% IEEE Transactions on Pattern Analysis and Machine Intelligence, +% Vol. 27, No. 8, pp.1226-1238, 2005. +% [2] Jianning Liang, Su Yang, Adam Winstanley, +% Invariant optimal feature selection: A distance discriminant and feature ranking based solution, +% Pattern Recognition, Volume 41, Issue 5, May 2008, Pages 1429-1439. +% ISSN 0031-3203, DOI: 10.1016/j.patcog.2007.10.018. +% [3] K. Raghuraj Rao and S. Lakshminarayanan +% Partial correlation based variable selection approach for multivariate data classification methods +% Chemometrics and Intelligent Laboratory Systems +% Volume 86, Issue 1, 15 March 2007, Pages 68-81 +% http://dx.doi.org/10.1016/j.chemolab.2006.08.007 + +% $Id: fss.m 9104 2011-11-15 15:14:10Z carandraug $ +% Copyright (C) 2009,2010 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or modify +% it under the terms of the GNU General Public License as published by +% the Free Software Foundation; either version 3 of the License, or +% (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; If not, see . + + if nargin<3 + MODE = []; + N = []; + elseif ischar(N) + MODE = N; + N = []; + elseif nargin<4, + MODE = []; + end + + if isempty(N), N = size(D,2); end + score = repmat(NaN,1,size(D,2)); + + if 0, %strcmpi(MODE,'MRMR') || strcmpi(MODE,'MID') || strcmpi(MODE,'MIQ'); + %% RMRM/MID/MIQ is not supported + %% TODO: FIXME + + [tmp,t] = sort([cl,D]); + cl = t(:,1:size(cl,2)); + D = t(:,1:size(D,2)); + for k = 1:N, + V(k) = mi(cl, D(:,k)); + + for m = 1:N, + W(k,m) = mi(D(:,m), D(:,k)); + end + MID(k) = V(k) - mean(W(k,:)); + MIQ(k) = V(k) / mean(W(k,:)); + end + + if strcmpi(MODE,'MIQ') + [score,idx] = sort(MIQ,[],'descend'); + else + [score,idx] = sort(MID,[],'descend'); + end + + elseif strcmpi(MODE,'FSDD'); + [b,i,j]=unique(cl); + for k=1:length(b) + n(k,1) = sum(j==k); + m(k,:) = mean(D(j==k,:),1); + v(k,:) = var(D(j==k,:),1); + end + m0 = mean(m,1,n); + v0 = var(D,[],1); + s2 = mean(m.^2,1,n) - m0.^2; + score = (s2 - 2*mean(v,1,n)) ./ v0; + [t,idx] = sort(-score); + + elseif isempty(MODE) || strcmpi(MODE,'rank') || strcmpi(MODE,'Pearson') + cl = cat2bin(cl); + if strcmpi(MODE,'rank'), + [tmp,D] = sort(D,1); + end + idx = repmat(NaN,1,N); + for k = 1:N, + f = isnan(score); + + %%%%% compute partial correlation (X,Y|Z) + % r = partcorrcoef(cl, D(:,f), D(:,~f)); % obsolete, not very robust + + %% this is a more robust version + X = cl; Y = D(:,f); Z = D(:,~f); + if (k>1) + X = X-Z*(Z\X); + Y = Y-Z*(Z\Y); + end + r = corrcoef(X,Y); + + [s,ix] = max(sumsq(r,1)); + f = find(f); + idx(k) = f(ix); + score(idx(k)) = s; + end + + end +end + +function I = mi(x,y) + ix = ~any(isnan([x,y]),2); + H = sparse(x(ix),y(ix)); + pij = H./sum(ix); + Iij = pij.*log2(pij./(sum(pij,2)*sum(pij,1))); + Iij(isnan(Iij)) = 0; + I = sum(Iij(:)); +end