X-Git-Url: https://git.creatis.insa-lyon.fr/pubgit/?p=CreaPhase.git;a=blobdiff_plain;f=octave_packages%2Fstatistics-1.1.3%2Fanovan.m;fp=octave_packages%2Fstatistics-1.1.3%2Fanovan.m;h=4c936b940f8e57f4065e5d64ee36e9cc1570ed0b;hp=0000000000000000000000000000000000000000;hb=f5f7a74bd8a4900f0b797da6783be80e11a68d86;hpb=1705066eceaaea976f010f669ce8e972f3734b05 diff --git a/octave_packages/statistics-1.1.3/anovan.m b/octave_packages/statistics-1.1.3/anovan.m new file mode 100644 index 0000000..4c936b9 --- /dev/null +++ b/octave_packages/statistics-1.1.3/anovan.m @@ -0,0 +1,359 @@ +## Copyright (C) 2003-2005 Andy Adler +## +## This program is free software; you can redistribute it and/or modify it under +## the terms of the GNU General Public License as published by the Free Software +## Foundation; either version 3 of the License, or (at your option) any later +## version. +## +## This program is distributed in the hope that it will be useful, but WITHOUT +## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more +## details. +## +## You should have received a copy of the GNU General Public License along with +## this program; if not, see . + +## -*- texinfo -*- +## @deftypefn {Function File} {[@var{pval}, @var{f}, @var{df_b}, @var{df_e}] =} anovan (@var{data}, @var{grps}) +## @deftypefnx {Function File} {[@var{pval}, @var{f}, @var{df_b}, @var{df_e}] =} anovan (@var{data}, @var{grps}, 'param1', @var{value1}) +## Perform a multi-way analysis of variance (ANOVA). The goal is to test +## whether the population means of data taken from @var{k} different +## groups are all equal. +## +## Data is a single vector @var{data} with groups specified by +## a corresponding matrix of group labels @var{grps}, where @var{grps} +## has the same number of rows as @var{data}. For example, if +## @var{data} = [1.1;1.2]; @var{grps}= [1,2,1; 1,5,2]; +## then data point 1.1 was measured under conditions 1,2,1 and +## data point 1.2 was measured under conditions 1,5,2. +## Note that groups do not need to be sequentially numbered. +## +## By default, a 'linear' model is used, computing the N main effects +## with no interactions. this may be modified by param 'model' +## +## p= anovan(data,groups, 'model', modeltype) +## - modeltype = 'linear': compute N main effects +## - modeltype = 'interaction': compute N effects and +## N*(N-1) two-factor interactions +## - modeltype = 'full': compute interactions at all levels +## +## Under the null of constant means, the statistic @var{f} follows an F +## distribution with @var{df_b} and @var{df_e} degrees of freedom. +## +## The p-value (1 minus the CDF of this distribution at @var{f}) is +## returned in @var{pval}. +## +## If no output argument is given, the standard one-way ANOVA table is +## printed. +## +## BUG: DFE is incorrect for modeltypes != full +## @end deftypefn + +## Author: Andy Adler +## Based on code by: KH +## $Id: anovan.m 10203 2012-04-12 13:47:32Z carandraug $ +## +## TESTING RESULTS: +## 1. ANOVA ACCURACY: www.itl.nist.gov/div898/strd/anova/anova.html +## Passes 'easy' test. Comes close on 'Average'. Fails 'Higher'. +## This could be fixed with higher precision arithmetic +## 2. Matlab anova2 test +## www.mathworks.com/access/helpdesk/help/toolbox/stats/anova2.html +## % From web site: +## popcorn= [ 5.5 4.5 3.5; 5.5 4.5 4.0; 6.0 4.0 3.0; +## 6.5 5.0 4.0; 7.0 5.5 5.0; 7.0 5.0 4.5]; +## % Define groups so reps = 3 +## groups = [ 1 1;1 2;1 3;1 1;1 2;1 3;1 1;1 2;1 3; +## 2 1;2 2;2 3;2 1;2 2;2 3;2 1;2 2;2 3 ]; +## anovan( vec(popcorn'), groups, 'model', 'full') +## % Results same as Matlab output +## 3. Matlab anovan test +## www.mathworks.com/access/helpdesk/help/toolbox/stats/anovan.html +## % From web site +## y = [52.7 57.5 45.9 44.5 53.0 57.0 45.9 44.0]'; +## g1 = [1 2 1 2 1 2 1 2]; +## g2 = {'hi';'hi';'lo';'lo';'hi';'hi';'lo';'lo'}; +## g3 = {'may'; 'may'; 'may'; 'may'; 'june'; 'june'; 'june'; 'june'}; +## anovan( y', [g1',g2',g3']) +## % Fails because we always do interactions + +function [PVAL, FSTAT, DF_B, DFE] = anovan (data, grps, varargin) + + if nargin <= 1 + usage ("anovan (data, grps)"); + end + + # test supplied parameters + modeltype= 'linear'; + for idx= 3:2:nargin + param= varargin{idx-2}; + value= varargin{idx-1}; + + if strcmp(param, 'model') + modeltype= value; +# elseif strcmp(param # add other parameters here + else + error(sprintf('parameter %s is not supported', param)); + end + end + + if ~isvector (data) + error ("anova: for `anova (data, grps)', data must be a vector"); + endif + + nd = size (grps,1); # number of data points + nw = size (grps,2); # number of anova "ways" + if (~ isvector (data) || (length(data) ~= nd)) + error ("anova: grps must be a matrix of the same number of rows as data"); + endif + + [g,grp_map] = relabel_groups (grps); + if strcmp(modeltype, 'linear') + max_interact = 1; + elseif strcmp(modeltype,'interaction') + max_interact = 2; + elseif strcmp(modeltype,'full') + max_interact = rows(grps); + else + error(sprintf('modeltype %s is not supported', modeltype)); + end + ng = length(grp_map); + int_tbl = interact_tbl (nw, ng, max_interact ); + [gn, gs, gss] = raw_sums(data, g, ng, int_tbl); + + stats_tbl = int_tbl(2:size(int_tbl,1),:)>0; + nstats= size(stats_tbl,1); + stats= zeros( nstats+1, 5); # SS, DF, MS, F, p + for i= 1:nstats + [SS, DF, MS]= factor_sums( gn, gs, gss, stats_tbl(i,:), ng, nw); + stats(i,1:3)= [SS, DF, MS]; + end + + # The Mean squared error is the data - avg for each possible measurement + # This calculation doesn't work unless there is replication for all grps +# SSE= sum( gss(sel) ) - sum( gs(sel).^2 ./ gn(sel) ); + SST= gss(1) - gs(1)^2/gn(1); + SSE= SST - sum(stats(:,1)); + sel = select_pat( ones(1,nw), ng, nw); %incorrect for modeltypes != full + DFE= sum( (gn(sel)-1).*(gn(sel)>0) ); + MSE= SSE/DFE; + stats(nstats+1,1:3)= [SSE, DFE, MSE]; + + for i= 1:nstats + MS= stats(i,3); + DF= stats(i,2); + F= MS/MSE; + pval = 1 - fcdf (F, DF, DFE); + stats(i,4:5)= [F, pval]; + end + + if nargout==0; + printout( stats, stats_tbl ); + else + PVAL= stats(1:nstats,5); + FSTAT=stats(1:nstats,4); + DF_B= stats(1:nstats,2); + DF_E= DFE; + end +endfunction + + +# relabel groups to a mapping from 1 to ng +# Input +# grps input grouping +# Output +# g relabelled grouping +# grp_map map from output to input grouping +function [g,grp_map] = relabel_groups(grps) + grp_vec= vec(grps); + s= sort (grp_vec); + uniq = 1+[0;find(diff(s))]; + # mapping from new grps to old groups + grp_map = s(uniq); + # create new group g + ngroups= length(uniq); + g= zeros(size(grp_vec)); + for i = 1:ngroups + g( find( grp_vec== grp_map(i) ) ) = i; + end + g= reshape(g, size(grps)); +endfunction + +# Create interaction table +# +# Input: +# nw number of "ways" +# ng number of ANOVA groups +# max_interact maximum number of interactions to consider +# default is nw +function int_tbl =interact_tbl(nw, ng, max_interact) + combin= 2^nw; + inter_tbl= zeros( combin, nw); + idx= (0:combin-1)'; + for i=1:nw; + inter_tbl(:,i) = ( rem(idx,2^i) >= 2^(i-1) ); + end + + # find elements with more than max_interact 1's + idx = ( sum(inter_tbl',1) > max_interact ); + inter_tbl(idx,:) =[]; + combin= size(inter_tbl,1); # update value + + #scale inter_tbl + # use ng+1 to map combinations of groups to integers + # this would be lots easier with a hash data structure + int_tbl = inter_tbl .* (ones(combin,1) * (ng+1).^(0:nw-1) ); +endfunction + +# Calculate sums for each combination +# +# Input: +# g relabelled grouping matrix +# ng number of ANOVA groups +# max_interact +# +# Output (virtual (ng+1)x(nw) matrices): +# gn number of data sums in each group +# gs sum of data in each group +# gss sumsqr of data in each group +function [gn, gs, gss] = raw_sums(data, g, ng, int_tbl); + nw= size(g,2); + ndata= size(g,1); + gn= gs= gss= zeros((ng+1)^nw, 1); + for i=1:ndata + # need offset by one for indexing + datapt= data(i); + idx = 1+ int_tbl*g(i,:)'; + gn(idx) +=1; + gs(idx) +=datapt; + gss(idx) +=datapt^2; + end +endfunction + +# Calcualte the various factor sums +# Input: +# gn number of data sums in each group +# gs sum of data in each group +# gss sumsqr of data in each group +# select binary vector of factor for this "way"? +# ng number of ANOVA groups +# nw number of ways + +function [SS,DF]= raw_factor_sums( gn, gs, gss, select, ng, nw); + sel= select_pat( select, ng, nw); + ss_raw= gs(sel).^2 ./ gn(sel); + SS= sum( ss_raw( ~isnan(ss_raw) )); + if length(find(select>0))==1 + DF= sum(gn(sel)>0)-1; + else + DF= 1; #this isn't the real DF, but needed to multiply + end +endfunction + +function [SS, DF, MS]= factor_sums( gn, gs, gss, select, ng, nw); + SS=0; + DF=1; + + ff = find(select); + lff= length(ff); + # zero terms added, one term subtracted, two added, etc + for i= 0:2^lff-1 + remove= find( rem( floor( i * 2.^(-lff+1:0) ), 2) ); + sel1= select; + if ~isempty(remove) + sel1( ff( remove ) )=0; + end + [raw_sum,raw_df]= raw_factor_sums(gn,gs,gss,sel1,ng,nw); + + add_sub= (-1)^length(remove); + SS+= add_sub*raw_sum; + DF*= raw_df; + end + + MS= SS/DF; +endfunction + +# Calcualte the various factor sums +# Input: +# select binary vector of factor for this "way"? +# ng number of ANOVA groups +# nw number of ways +function sel= select_pat( select, ng, nw); + # if select(i) is zero, remove nonzeros + # if select(i) is zero, remove zero terms for i + field=[]; + + if length(select) ~= nw; + error("length of select must be = nw"); + end + ng1= ng+1; + + if isempty(field) + # expand 0:(ng+1)^nw in base ng+1 + field= (0:(ng1)^nw-1)'* ng1.^(-nw+1:0); + field= rem( floor( field), ng1); + # select zero or non-zero elements + field= field>0; + end + sel= find( all( field == ones(ng1^nw,1)*select(:)', 2) ); +endfunction + + +function printout( stats, stats_tbl ); + nw= size( stats_tbl,2); + [jnk,order]= sort( sum(stats_tbl,2) ); + + printf('\n%d-way ANOVA Table (Factors A%s):\n\n', nw, ... + sprintf(',%c',toascii('A')+(1:nw-1)) ); + printf('Source of Variation Sum Sqr df MeanSS Fval p-value\n'); + printf('*********************************************************************\n'); + printf('Error %10.2f %4d %10.2f\n', stats( size(stats,1),1:3)); + + for i= order(:)' + str= sprintf(' %c x',toascii('A')+find(stats_tbl(i,:)>0)-1 ); + str= str(1:length(str)-2); # remove x + printf('Factor %15s %10.2f %4d %10.2f %7.3f %7.6f\n', ... + str, stats(i,:) ); + end + printf('\n'); +endfunction + +#{ +# Test Data from http://maths.sci.shu.ac.uk/distance/stats/14.shtml +data=[7 9 9 8 12 10 ... + 9 8 10 11 13 13 ... + 9 10 10 12 10 12]'; +grp = [1,1; 1,1; 1,2; 1,2; 1,3; 1,3; + 2,1; 2,1; 2,2; 2,2; 2,3; 2,3; + 3,1; 3,1; 3,2; 3,2; 3,3; 3,3]; +data=[7 9 9 8 12 10 9 8 ... + 9 8 10 11 13 13 10 11 ... + 9 10 10 12 10 12 10 12]'; +grp = [1,4; 1,4; 1,5; 1,5; 1,6; 1,6; 1,7; 1,7; + 2,4; 2,4; 2,5; 2,5; 2,6; 2,6; 2,7; 2,7; + 3,4; 3,4; 3,5; 3,5; 3,6; 3,6; 3,7; 3,7]; +# Test Data from http://maths.sci.shu.ac.uk/distance/stats/9.shtml +data=[9.5 11.1 11.9 12.8 ... + 10.9 10.0 11.0 11.9 ... + 11.2 10.4 10.8 13.4]'; +grp= [1:4,1:4,1:4]'; +# Test Data from http://maths.sci.shu.ac.uk/distance/stats/13.shtml +data=[7.56 9.68 11.65 ... + 9.98 9.69 10.69 ... + 7.23 10.49 11.77 ... + 8.22 8.55 10.72 ... + 7.59 8.30 12.36]'; +grp = [1,1;1,2;1,3; + 2,1;2,2;2,3; + 3,1;3,2;3,3; + 4,1;4,2;4,3; + 5,1;5,2;5,3]; +# Test Data from www.mathworks.com/ +# access/helpdesk/help/toolbox/stats/linear10.shtml +data=[23 27 43 41 15 17 3 9 20 63 55 90]; +grp= [ 1 1 1 1 2 2 2 2 3 3 3 3; + 1 1 2 2 1 1 2 2 1 1 2 2]'; +#} + + +