1 function [R,CC]=xval(D,classlabel,MODE,arg4)
2 % XVAL is used for crossvalidation
4 % [R,CC] = xval(D,classlabel)
5 % .. = xval(D,classlabel,CLASSIFIER)
6 % .. = xval(D,classlabel,CLASSIFIER,type)
7 % .. = xval(D,{classlabel,W},CLASSIFIER)
8 % .. = xval(D,{classlabel,W,NG},CLASSIFIER)
11 % load_fisheriris; %builtin iris dataset
13 % K = 5; NG = [1:length(C)]'*K/length(C);
14 % [R,CC] = xval(meas,{C,[],NG},'NBC');
17 % D: data features (one feature per column, one sample per row)
18 % classlabel labels of each sample, must have the same number of rows as D.
19 % Two different encodings are supported:
20 % {-1,1}-encoding (multiple classes with separate columns for each class) or
22 % So [1;2;3;1;4] is equivalent to
28 % Note, samples with classlabel=0 are ignored.
30 % CLASSIFIER can be any classifier supported by train_sc (default='LDA')
31 % {'REG','MDA','MD2','QDA','QDA2','LD2','LD3','LD4','LD5','LD6','NBC','aNBC','WienerHopf', 'RDA','GDBC',
32 % 'SVM','RBF','PSVM','SVM11','SVM:LIN4','SVM:LIN0','SVM:LIN1','SVM:LIN2','SVM:LIN3','WINNOW'}
33 % these can be modified by ###/GSVD, ###/sparse and ###/DELETION.
34 % /DELETION removes in case of NaN's either the rows or the columns (which removes less data values) with any NaN
35 % /sparse and /GSVD preprocess the data an reduce it to some lower-dimensional space.
36 % Hyperparameters (like alpha for PLA, gamma/lambda for RDA, c_value for SVM, etc) can be defined as
37 % CLASSIFIER.hyperparameter.alpha, etc. and
38 % CLASSIFIER.TYPE = 'PLA' (as listed above).
39 % See train_sc for details.
40 % W: weights for each sample (row) in D.
41 % default: [] (i.e. all weights are 1)
42 % number of elements in W must match the number of rows of D
43 % NG: used to define the type of cross-valdiation
44 % Leave-One-Out-Method (LOOM): NG = [1:length(classlabel)]' (default)
45 % Leave-K-Out-Method: NG = ceil([1:length(classlabel)]'/K)
46 % K-fold XV: NG = ceil([1:length(classlabel)]'*K/length(classlabel))
47 % group-wise XV (if samples are not indepentent) can be also defined here
48 % samples from the same group (dependent samples) get the same identifier
49 % samples from different groups get different classifiers
50 % TYPE: defines the type of cross-validation procedure if NG is not specified
51 % 'LOOM' leave-one-out-method
52 % k k-fold crossvalidation
55 % R contains the resulting performance metric
56 % CC contains the classifier
58 % plota(R) shows the confusion matrix of the results
60 % see also: TRAIN_SC, TEST_SC, CLASSIFY, PLOTA
63 % [1] R. Duda, P. Hart, and D. Stork, Pattern Classification, second ed.
64 % John Wiley & Sons, 2001.
65 % [2] A. Schlögl, J. Kronegg, J.E. Huggins, S. G. Mason;
66 % Evaluation criteria in BCI research.
67 % (Eds.) G. Dornhege, J.R. Millan, T. Hinterberger, D.J. McFarland, K.-R.Müller;
68 % Towards Brain-Computer Interfacing, MIT Press, 2007, p.327-342
71 % Copyright (C) 2008,2009,2010 by Alois Schloegl <alois.schloegl@gmail.com>
72 % This function is part of the NaN-toolbox
73 % http://pub.ist.ac.at/~schloegl/matlab/NaN/
75 % This program is free software; you can redistribute it and/or
76 % modify it under the terms of the GNU General Public License
77 % as published by the Free Software Foundation; either version 3
78 % of the License, or (at your option) any later version.
80 % This program is distributed in the hope that it will be useful,
81 % but WITHOUT ANY WARRANTY; without even the implied warranty of
82 % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
83 % GNU General Public License for more details.
85 % You should have received a copy of the GNU General Public License
86 % along with this program; if not, write to the Free Software
87 % Foundation, Inc., 51 Franklin Street - Fifth Floor, Boston, MA 02110-1301, USA.
89 if (nargin<3) || isempty(MODE),
96 elseif ~isfield(MODE,'TYPE')
104 if iscell(classlabel)
105 [b,i,C] = unique(classlabel{:,1});
106 if size(classlabel,2)>1,
107 W = [classlabel{:,2}];
109 if size(classlabel,2)>2,
110 [Label,tmp1,NG] = unique(classlabel{:,3});
112 elseif size(classlabel,2)>1,
113 %% group-wise classvalidation
116 if size(classlabel,2)==2,
117 warning('This option defines W and NG in an ambigous way - use instead xval(D,{C,[],NG},...) or xval(D,{C,W},...)');
119 [Label,tmp1,NG] = unique(classlabel(:,3));
124 if all(W==1), W = []; end;
126 error('length of data and classlabel does not fit');
129 % use only valid samples
130 ix0 = find(~any(isnan(C),2));
133 if (nargin<4) || strcmpi(arg4,'LOOM')
137 elseif isnumeric(arg4)
140 NG = ceil((1:length(C))'*arg4/length(C));
141 elseif length(arg4)==2,
142 NG = ceil((1:length(C))'*arg4(1)/length(C));
150 error('length of data and classlabel does not fit');
152 if ~isfield(MODE,'hyperparameter')
153 MODE.hyperparameter = [];
156 cl = repmat(NaN,size(classlabel,1),1);
158 ix = ix0(NG(ix0)~=k);
160 CC = train_sc(D(ix,:), C(ix), MODE);
162 CC = train_sc(D(ix,:), C(ix), MODE, W(ix));
164 ix = ix0(NG(ix0)==k);
165 r = test_sc(CC, D(ix,:));
166 cl(ix,1) = r.classlabel;
169 %R = kappa(C,cl,'notIgnoreNAN',W);
170 R = kappa(C,cl,[],W);
174 if isnumeric(R.Label)
175 R.Label = cellstr(int2str(R.Label));
181 CC = train_sc(D,C,MODE);
183 CC = train_sc(D,C,MODE,W);
185 CC.Labels = 1:max(C);
186 %CC.Labels = unique(C);