X-Git-Url: https://git.creatis.insa-lyon.fr/pubgit/?a=blobdiff_plain;f=octave_packages%2Fnan-2.5.5%2Frow_col_deletion.m;fp=octave_packages%2Fnan-2.5.5%2Frow_col_deletion.m;h=578687bd54fc0e65a69f9c8ca314b6bfe7b706c4;hb=c880e8788dfc484bf23ce13fa2787f2c6bca4863;hp=0000000000000000000000000000000000000000;hpb=1705066eceaaea976f010f669ce8e972f3734b05;p=CreaPhase.git diff --git a/octave_packages/nan-2.5.5/row_col_deletion.m b/octave_packages/nan-2.5.5/row_col_deletion.m new file mode 100644 index 0000000..578687b --- /dev/null +++ b/octave_packages/nan-2.5.5/row_col_deletion.m @@ -0,0 +1,113 @@ +function [rix,cix] = row_col_deletion(d,c,w) +% ROW_COL_DELETION selects the rows and columns for removing any missing values. +% A heuristic based on maximizing the number of remaining sample values +% is used. In other words, if there are more rows than columns, it is +% more likely that a row-wise deletion will be applied and vice versa. +% +% [rix,cix] = row_col_deletion(d) +% [rix,cix] = row_col_deletion(d,c,w) +% +% Input: +% d data (each row is a sample, each column a feature) +% c classlabels (not really used) [OPTIONAL] +% w weight for each sample vector [OPTIONAL] +% Output: +% rix selected samples +% cix selected columns +% +% d(rix,cix) does not contain any NaN's i.e. missing values +% +% see also: TRAIN_SC, TEST_SC + +% $Id$ +% Copyright (C) 2009,2010 by Alois Schloegl +% This function is part of the NaN-toolbox +% http://pub.ist.ac.at/~schloegl/matlab/NaN/ + +% This program is free software; you can redistribute it and/or +% modify it under the terms of the GNU General Public License +% as published by the Free Software Foundation; either version 3 +% of the License, or (at your option) any later version. +% +% This program is distributed in the hope that it will be useful, +% but WITHOUT ANY WARRANTY; without even the implied warranty of +% MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +% GNU General Public License for more details. +% +% You should have received a copy of the GNU General Public License +% along with this program; if not, write to the Free Software +% Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA. + + +if nargin > 2, + if isempty(w) || all(w==w(1)), + ix = ~isnan(c); + else + ix = ~any(isnan(c) | isnan(w)); + end; + d = d(ix,:); %% ignore samples with invalid c or w + w = w(ix,:); + +elseif nargin > 1, + d = d(~isnan(c),:); %% ignore samples with invalid c or w + w = []; +else + w = []; +end; + + +if 0, + % decides whether row-wise or column-wise deletion removes less data. + % rix and cix are the resulting index vectors + % either row-wise or column-wise deletion, but not a combination of both, is used. + % this is obsolete + + n = numel(d); + cix = find(~any(isnan(d),1)); + rix = find(~any(isnan(d),2)); + nr = length(rix)*size(d,2); % number of elements after row-wise deletion + nc = length(cix)*size(d,1); % number of elements after column-wise deletion + + if (nr>nc) + cix = 1:size(d,2); % select all columns + %fprintf(1,'row-wise deletion (%i,%i,%i)\n',n,nr,nc); + else + rix = 1:size(d,1); % select all rows + %fprintf(1,'column-wise deletion (%i,%i,%i)\n',n,nr,nc); + end; + +else + + %% a mix of row- and column-wise deletion is possible + if ~isempty(w) && (abs(sum(w)-1) > log2(N)*eps || any(w<0) || any(~isfinite(w))) + error('weight vector must contain only non-negative and finite values'); + end; + [N,M] = size(d); + rix = ones(N,1); cix = ones(1,M); + while 1; + e = ~isnan(d(rix>0,cix>0)); + if ~isempty(w), + colCost = mean(e, 1, w(rix>0)/sum(w(rix>0)))'; % cost of deleting columns + else + colCost = mean(e, 1)'; % cost of deleting columns + end; + rowCost = mean(e, 2); % cost of deleting rows + [tmp,ix] = sort([colCost; rowCost]); + + if abs(tmp(1)-1) < log2(N)*eps, break; end; % stopping criterion + + if diff(tmp(1:2))==0, warning('row/col deletion: arbitrary selection [%i,%i]',ix(1:2)); end; + ix = ix(1); + if (ix<=sum(cix)) + tmp = find(cix>0); + cix(tmp(ix)) = 0; + else + tmp = find(rix>0); + rix(tmp(ix-sum(cix))) = 0; + end; + end; + rix = find(rix); + cix = find(cix); + +end +