octave_packages/nan-2.5.5/kappa.m

   1 function [kap,se,H,z,p0,SA,R]=kappa(d,c,arg3,w)
   2 % KAPPA estimates Cohen's kappa coefficient
   3 %   and related statistics
   4 %
   5 % [...] = kappa(d1,d2);
   6 %       NaN's are handled as missing values and are ignored
   7 % [...] = kappa(d1,d2,'notIgnoreNAN');
   8 %       NaN's are handled as just another Label.
   9 % [kap,sd,H,z,ACC,sACC,MI] = kappa(...);
  10 % X = kappa(...);
  11 %
  12 % d1    data of scorer 1
  13 % d2    data of scorer 2
  14 %
  15 % kap   Cohen's kappa coefficient point
  16 % se    standard error of the kappa estimate
  17 % H     Concordance matrix, i.e. confusion matrix
  18 % z     z-score
  19 % ACC   overall agreement (accuracy)
  20 % sACC  specific accuracy
  21 % MI    Mutual information or transfer information (in [bits])
  22 % X     is a struct containing all the fields above
  23 %       For two classes, a number of additional summary statistics including
  24 %         TPR, FPR, FDR, PPV, NPF, F1, dprime, Matthews Correlation coefficient (MCC) or
  25 %       Phi coefficient (PHI=MCC), Specificity and Sensitivity
  26 %       are provided. Note, the positive category must the larger label (in d and c), otherwise
  27 %       the confusion matrix becomes transposed and the summary statistics are messed up.
  28 %
  29 %
  30 % Reference(s):
  31 % [1] Cohen, J. (1960). A coefficient of agreement for nominal scales. Educational and Psychological Measurement, 20, 37-46.
  32 % [2] J Bortz, GA Lienert (1998) Kurzgefasste Statistik f|r die klassische Forschung, Springer Berlin - Heidelberg.
  33 %        Kapitel 6: Uebereinstimmungsmasze fuer subjektive Merkmalsurteile. p. 265-270.
  34 % [3] http://www.cmis.csiro.au/Fiona.Evans/personal/msc/html/chapter3.html
  35 % [4] Kraemer, H. C. (1982). Kappa coefficient. In S. Kotz and N. L. Johnson (Eds.),
  36 %        Encyclopedia of Statistical Sciences. New York: John Wiley & Sons.
  37 % [5] http://ourworld.compuserve.com/homepages/jsuebersax/kappa.htm
  38 % [6] http://en.wikipedia.org/wiki/Receiver_operating_characteristic
  39
  40 %       $Id: kappa.m 9608 2012-02-10 09:56:25Z schloegl $
  41 %       Copyright (c) 1997-2006,2008,2009,2011 by Alois Schloegl <alois.schloegl@gmail.com>
  42 %       This function is part of the NaN-toolbox
  43 %       http://pub.ist.ac.at/~schloegl/matlab/NaN/
  44 %
  45 %    BioSig is free software: you can redistribute it and/or modify
  46 %    it under the terms of the GNU General Public License as published by
  47 %    the Free Software Foundation, either version 3 of the License, or
  48 %    (at your option) any later version.
  49 %
  50 %    BioSig is distributed in the hope that it will be useful,
  51 %    but WITHOUT ANY WARRANTY; without even the implied warranty of
  52 %    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  53 %    GNU General Public License for more details.
  54 %
  55 %    You should have received a copy of the GNU General Public License
  56 %    along with BioSig.  If not, see <http://www.gnu.org/licenses/>.
  57
  58
  59 mode.ignoreNAN = 1;
  60 kk = [];
  61 if nargin>2
  62         if ischar(arg3)
  63                 if strcmpi(arg3,'notIgnoreNAN')
  64                         mode.ignoreNAN = 0;
  65                  end
  66         else
  67                 kk = arg3;
  68         end
  69 end;
  70 if nargin<4
  71         w = [];
  72 end;
  73
  74 if nargin>1,
  75         d = d(:);
  76         c = c(:);
  77
  78         tmp    = [d;c];
  79         maxtmp = max(tmp);
  80         tmp(isnan(tmp)) = maxtmp+1;
  81         [X.Label,i,j]   = unique(tmp);
  82         c = j(1+numel(d):end);
  83         d = j(1:numel(d));
  84         kk = max(j);
  85         maxCLASS = kk - any(tmp>maxtmp);
  86
  87         if mode.ignoreNAN,
  88                 if any(j > maxCLASS)
  89 %                       fprintf(2,'Warning KAPPA: some elements are NaN. These are handled as missing values and are ignored.\n');
  90 %                       fprintf(2,'If NaN should be handled as just another label, use kappa(..,''notIgnoreNaN'').\n');
  91                         ix = find((c<=maxCLASS) & (d<=maxCLASS));
  92                         d = d(ix); c=c(ix);
  93                         if ~isempty(w), w = w(ix); end;
  94                         kk = kk - 1;
  95                 end;
  96                 X.Label(X.Label>maxtmp) = [];
  97         else
  98                 X.Label(X.Label>maxtmp) = NaN;
  99         end;
 100
 101         if isempty(w)
 102                 H = full( sparse (d, c, 1, kk, kk) );
 103         elseif ~isempty(w),
 104                 H = full( sparse (d, c, w, kk, kk) );
 105         end;
 106
 107 else
 108         X.Label = 1:min(size(d));
 109         H = d(X.Label,X.Label);
 110
 111 end;
 112
 113 s = warning;
 114 warning('off');
 115
 116 N = sum(H(:));
 117 p0  = sum(diag(H))/N;  %accuracy of observed agreement, overall agreement
 118 %OA = sum(diag(H))/N);
 119
 120 p_i = sum(H,1);
 121 pi_ = sum(H,2)';
 122
 123 SA  = 2*diag(H)'./(p_i+pi_); % specific agreement
 124
 125 pe  = (p_i*pi_')/(N*N);  % estimate of change agreement
 126
 127 px  = sum(p_i.*pi_.*(p_i+pi_))/(N*N*N);
 128
 129 %standard error
 130 kap = (p0-pe)/(1-pe);
 131 sd  = sqrt((pe+pe*pe-px)/(N*(1-pe*pe)));
 132
 133 %standard error
 134 se  = sqrt((p0+pe*pe-px)/N)/(1-pe);
 135 if ~isreal(se),
 136         z = NaN;
 137 else
 138         z = kap/se;
 139 end
 140
 141 if ((1 < nargout) && (nargout<7))
 142         warning(s);
 143         return;
 144 end;
 145
 146 % Nykopp's entropy
 147 pwi = sum(H,2)/N;                       % p(x_i)
 148 pwj = sum(H,1)/N;                       % p(y_j)
 149 pji = H./repmat(sum(H,2),1,size(H,2));  % p(y_j | x_i)
 150 R   = - sumskipnan(pwj.*log2(pwj)) + sumskipnan(pwi'*(pji.*log2(pji)));
 151
 152 if (nargout>1), return; end;
 153
 154 X.kappa = kap;
 155 X.kappa_se = se;
 156 X.data = H;
 157 X.H    = X.data;
 158 X.z    = z;
 159 X.ACC  = p0;
 160 X.sACC = SA;
 161 X.MI   = R;
 162 X.datatype = 'confusion';
 163
 164 if length(H)==2,
 165         % see http://en.wikipedia.org/wiki/Receiver_operating_characteristic
 166         % Note that the confusion matrix used here is has positive values in
 167         % the 2nd row and column, moreover the true values are indicated by
 168         % rows (transposed). Thus, in summary H(1,1) and H(2,2) are exchanged
 169         % as compared to the wikipedia article.
 170         X.TP  = H(2,2);
 171         X.TN  = H(1,1);
 172         X.FP  = H(1,2);
 173         X.FN  = H(2,1);
 174         X.FNR = H(2,1) / sum(H(2,:));
 175         X.FPR = H(1,2) / sum(H(1,:));
 176         X.TPR = H(2,2) / sum(H(2,:));
 177         X.PPV = H(2,2) / sum(H(:,2));
 178         X.NPV = H(1,1) / sum(H(:,1));
 179         X.FDR = H(1,2) / sum(H(:,2));
 180         X.MCC = det(H) / sqrt(prod([sum(H), sum(H')]));
 181         X.PHI = X.MCC;
 182         X.F1  = 2 * X.TP / (sum(H(2,:)) + sum(H(:,2)));
 183         X.Sensitivity = X.TPR;  %% hit rate, recall
 184         X.Specificity = 1 - X.FPR;
 185         X.Precision   = X.PPV;
 186         X.dprime = norminv(X.TPR) - norminv(X.FDR);
 187 end;
 188
 189 kap = X;
 190 warning(s);