octave_packages/nan-2.5.5/partcorrcoef.m

   1 function [R,sig,ci1,ci2] = partcorrcoef(X,Y,Z,Mode)
   2 % PARTCORRCOEF calculates the partial correlation between X and Y
   3 % after removing the influence of Z.
   4 % X, Y and Z can contain missing values encoded with NaN.
   5 % NaN's are skipped, NaN do not result in a NaN output.
   6 % (Its assumed that the occurence of NaN's is uncorrelated)
   7 % The output gives NaN, only if there are insufficient input data.
   8 %
   9 %  The partial correlation  is defined as
  10 %  pcc(xy|z)=(cc(x,y)-cc(x,z)*cc(y,z))/sqrt((1-cc(x,y)�)*((1-cc(x,z)�)))
  11 %
  12 %
  13 % PARTCORRCOEF(X [,Mode]);
  14 %      calculates the (auto-)correlation matrix of X
  15 % PARTCORRCOEF(X,Y,Z);
  16 % PARTCORRCOEF(X,Y,Z,[]);
  17 % PARTCORRCOEF(X,Y,Z,'Pearson');
  18 % PARTCORRCOEF(X,Y,Z,'Rank');
  19 % PARTCORRCOEF(X,Y,Z,'Spearman');
  20 %
  21 % Mode=[] [default]
  22 %       removes from X and Y the part that can be explained by Z
  23 %       and computes the correlation of the remaining part.
  24 %       Ideally, this is equivalent to Mode='Pearson', however, in practice
  25 %       this is more accurate.
  26 % Mode='Pearson' or 'parametric'
  27 % Mode='Spearman'
  28 % Mode='Rank'
  29 %       computes the partial correlation based on cc(x,y),cc(x,z) and cc(y,z)
  30 %       with the respective mode.
  31 %
  32 % [R,p,ci1,ci2] = PARTCORRCOEF(...);
  33 %  r is the partialcorrelation matrix
  34 %       r(i,j) is the partial correlation coefficient r between X(:,i) and Y(:,j)
  35 %       when influence of Z is removed.
  36 %  p    gives the significance of PCC
  37 %       It tests the null hypothesis that the product moment correlation coefficient is zero
  38 %       using Student's t-test on the statistic t = r sqrt(N-Nz-2)/sqrt(1-r^2)
  39 %       where N is the number of samples (Statistics, M. Spiegel, Schaum series).
  40 %  p > alpha: do not reject the Null hypothesis: "R is zero".
  41 %  p < alpha: The alternative hypothesis "R2 is larger than zero" is true with probability (1-alpha).
  42 %  ci1  lower 0.95 confidence interval
  43 %  ci2  upper 0.95 confidence interval
  44 %
  45 % see also: SUMSKIPNAN, COVM, COV, COR, SPEARMAN, RANKCORR, RANKS, CORRCOEF
  46 %
  47 % REFERENCES:
  48 % on the partial correlation coefficient
  49 % [1] http://www.tufts.edu/~gdallal/partial.htm
  50 % [2] http://www.nag.co.uk/numeric/fl/manual/pdf/G02/g02byf.pdf
  51
  52 %    $Id: partcorrcoef.m 8351 2011-06-24 17:35:07Z carandraug $
  53 %    Copyright (C) 2000-2002,2009 by Alois Schloegl <alois.schloegl@gmail.com>
  54 %    This function is part of the NaN-toolbox
  55 %    http://pub.ist.ac.at/~schloegl/matlab/NaN/
  56
  57 %    This program is free software; you can redistribute it and/or modify
  58 %    it under the terms of the GNU General Public License as published by
  59 %    the Free Software Foundation; either version 3 of the License, or
  60 %    (at your option) any later version.
  61 %
  62 %    This program is distributed in the hope that it will be useful,
  63 %    but WITHOUT ANY WARRANTY; without even the implied warranty of
  64 %    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  65 %    GNU General Public License for more details.
  66 %
  67 %    You should have received a copy of the GNU General Public License
  68 %    along with this program; if not, write to the Free Software
  69 %    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
  70
  71 % Features:
  72 % + interprets NaN's as missing value
  73 % + Pearson's correlation
  74 % + Spearman's rank correlation
  75 % + Rank correlation (non-parametric, non-Spearman)
  76 % + is fast, using an efficient algorithm O(n.log(n)) for calculating the ranks
  77 % + significance test for null-hypthesis: r=0
  78 % + confidence interval (0.99) included
  79 % - rank correlation works for cell arrays, too (no check for missing values).
  80 % + compatible with Octave and Matlab
  81
  82
  83 if nargin==3
  84         Mode=[];
  85 elseif nargin==4,
  86 else
  87         error('Error PARTCORRCOEF: Missing argument(s)\n');
  88 end;
  89
  90 if isempty(Z)
  91         R = corrcoef(X,Y,Mode);
  92
  93 elseif isempty(Mode)
  94         if ~isempty(Z)
  95         for j=1:size(X,2)
  96                 ix = ~any(isnan(Z),2) & ~isnan(X(:,j));
  97                 X(:,j) = X(:,j) - Z*(Z(ix,:)\X(ix,j));
  98         end;
  99         for j=1:size(Y,2)
 100                 ix = ~any(isnan(Z),2) & ~isnan(Y(:,j));
 101                 Y(:,j) = Y(:,j) - Z*(Z(ix,:)\Y(ix,j));
 102         end;
 103         end;
 104         R = corrcoef(X,Y,Mode);
 105
 106 else
 107         rxy = corrcoef(X,Y,Mode);
 108         rxz = corrcoef(X,Z,Mode);
 109         if isempty(Y),
 110                 ryz = rxz;
 111         else
 112                 ryz = corrcoef(Y,Z,Mode);
 113         end;
 114
 115         %rxy,rxz,ryz
 116         R = (rxy-rxz*ryz')./sqrt((1-rxz.^2)*(1-ryz.^2)');
 117
 118 end;
 119
 120 if nargout<2,
 121         return,
 122 end;
 123
 124 % SIGNIFICANCE TEST
 125 %warning off;   % prevent division-by-zero warnings in Matlab.
 126 NN=size(X,1)-size(Z,2);
 127
 128 tmp = 1 - R.*R;
 129 tmp(tmp<0) = 0;         % prevent tmp<0 i.e. imag(t)~=0
 130 t   = R.*sqrt(max(NN-2,0)./tmp);
 131
 132 if exist('t_cdf','file')
 133         sig = t_cdf(t,NN-2);
 134 elseif exist('tcdf','file')
 135         sig = tcdf(t,NN-2);
 136 else
 137         fprintf('Warning CORRCOEF: significance test not completed because of missing TCDF-function\n')
 138         sig = repmat(nan,size(R));
 139 end;
 140 sig  = 2 * min(sig,1 - sig);
 141
 142 if nargout<3,
 143         return,
 144 end;
 145
 146
 147 % CONFIDENCE INTERVAL
 148 if exist('flag_implicit_significance','file'),
 149         alpha = flag_implicit_significance;
 150 else
 151         alpha = 0.01;
 152 end;
 153
 154 fprintf(1,'CORRCOEF: confidence interval is based on alpha=%f\n',alpha);
 155
 156 tmp = R;
 157 %tmp(ix1 | ix2) = nan;          % avoid division-by-zero warning
 158 z   = log((1+tmp)./(1-tmp))/2;  % Fisher's z-transform;
 159 %sz  = 1./sqrt(NN-3);           % standard error of z
 160 sz  = sqrt(2)*erfinv(1-2*alpha)./sqrt(NN-3);            % confidence interval for alpha of z
 161
 162 ci1 = tanh(z-sz);
 163 ci2 = tanh(z+sz);
 164
 165
 166