X-Git-Url: https://git.creatis.insa-lyon.fr/pubgit/?p=CreaPhase.git;a=blobdiff_plain;f=octave_packages%2Fstatistics-1.1.3%2Fanderson_darling_test.m;fp=octave_packages%2Fstatistics-1.1.3%2Fanderson_darling_test.m;h=802d0d003d34627e9a3f686e54e190ec98971d67;hp=0000000000000000000000000000000000000000;hb=c880e8788dfc484bf23ce13fa2787f2c6bca4863;hpb=1705066eceaaea976f010f669ce8e972f3734b05 diff --git a/octave_packages/statistics-1.1.3/anderson_darling_test.m b/octave_packages/statistics-1.1.3/anderson_darling_test.m new file mode 100644 index 0000000..802d0d0 --- /dev/null +++ b/octave_packages/statistics-1.1.3/anderson_darling_test.m @@ -0,0 +1,153 @@ +## Author: Paul Kienzle +## This program is granted to the public domain. + +## -*- texinfo -*- +## @deftypefn {Function File} {[@var{q}, @var{Asq}, @var{info}] = } = @ +## anderson_darling_test (@var{x}, @var{distribution}) +## +## Test the hypothesis that @var{x} is selected from the given distribution +## using the Anderson-Darling test. If the returned @var{q} is small, reject +## the hypothesis at the @var{q}*100% level. +## +## The Anderson-Darling @math{@var{A}^2} statistic is calculated as follows: +## +## @example +## @iftex +## A^2_n = -n - \sum_{i=1}^n (2i-1)/n log(z_i (1-z_{n-i+1})) +## @end iftex +## @ifnottex +## n +## A^2_n = -n - SUM (2i-1)/n log(@math{z_i} (1 - @math{z_@{n-i+1@}})) +## i=1 +## @end ifnottex +## @end example +## +## where @math{z_i} is the ordered position of the @var{x}'s in the CDF of the +## distribution. Unlike the Kolmogorov-Smirnov statistic, the +## Anderson-Darling statistic is sensitive to the tails of the +## distribution. +## +## The @var{distribution} argument must be a either @t{"uniform"}, @t{"normal"}, +## or @t{"exponential"}. +## +## For @t{"normal"}' and @t{"exponential"} distributions, estimate the +## distribution parameters from the data, convert the values +## to CDF values, and compare the result to tabluated critical +## values. This includes an correction for small @var{n} which +## works well enough for @var{n} >= 8, but less so from smaller @var{n}. The +## returned @code{info.Asq_corrected} contains the adjusted statistic. +## +## For @t{"uniform"}, assume the values are uniformly distributed +## in (0,1), compute @math{@var{A}^2} and return the corresponding @math{p}-value from +## @code{1-anderson_darling_cdf(A^2,n)}. +## +## If you are selecting from a known distribution, convert your +## values into CDF values for the distribution and use @t{"uniform"}. +## Do not use @t{"uniform"} if the distribution parameters are estimated +## from the data itself, as this sharply biases the @math{A^2} statistic +## toward smaller values. +## +## [1] Stephens, MA; (1986), "Tests based on EDF statistics", in +## D'Agostino, RB; Stephens, MA; (eds.) Goodness-of-fit Techinques. +## New York: Dekker. +## +## @seealso{anderson_darling_cdf} +## @end deftypefn + +function [q,Asq,info] = anderson_darling_test(x,dist) + + if size(x,1) == 1, x=x(:); end + x = sort(x); + n = size(x,1); + use_cdf = 0; + + # Compute adjustment and critical values to use for stats. + switch dist + case 'normal', + # This expression for adj is used in R. + # Note that the values from NIST dataplot don't work nearly as well. + adj = 1 + (.75 + 2.25/n)/n; + qvals = [ 0.1, 0.05, 0.025, 0.01 ]; + Acrit = [ 0.631, 0.752, 0.873, 1.035]; + x = stdnormal_cdf(zscore(x)); + + case 'uniform', + ## Put invalid data at the limits of the distribution + ## This will drive the statistic to infinity. + x(x<0) = 0; + x(x>1) = 1; + adj = 1.; + qvals = [ 0.1, 0.05, 0.025, 0.01 ]; + Acrit = [ 1.933, 2.492, 3.070, 3.857 ]; + use_cdf = 1; + + case 'XXXweibull', + adj = 1 + 0.2/sqrt(n); + qvals = [ 0.1, 0.05, 0.025, 0.01 ]; + Acrit = [ 0.637, 0.757, 0.877, 1.038]; + ## XXX FIXME XXX how to fit alpha and sigma? + x = wblcdf (x, ones(n,1)*sigma, ones(n,1)*alpha); + + case 'exponential', + adj = 1 + 0.6/n; + qvals = [ 0.1, 0.05, 0.025, 0.01 ]; + # Critical values depend on n. Choose the appropriate critical set. + # These values come from NIST dataplot/src/dp8.f. + Acritn = [ + 0, 1.022, 1.265, 1.515, 1.888 + 11, 1.045, 1.300, 1.556, 1.927; + 21, 1.062, 1.323, 1.582, 1.945; + 51, 1.070, 1.330, 1.595, 1.951; + 101, 1.078, 1.341, 1.606, 1.957; + ]; + # FIXME: consider interpolating in the critical value table. + Acrit = Acritn(lookup(Acritn(:,1),n),2:5); + + lambda = 1./mean(x); # exponential parameter estimation + x = expcdf(x, 1./(ones(n,1)*lambda)); + + otherwise + # FIXME consider implementing more of distributions; a number + # of them are defined in NIST dataplot/src/dp8.f. + error("Anderson-Darling test for %s not implemented", dist); + endswitch + + if any(x<0 | x>1) + error('Anderson-Darling test requires data in CDF form'); + endif + + i = [1:n]'*ones(1,size(x,2)); + Asq = -n - sum( (2*i-1) .* (log(x) + log(1-x(n:-1:1,:))) )/n; + + # Lookup adjusted critical value in the cdf (if uniform) or in the + # the critical table. + if use_cdf + q = 1-anderson_darling_cdf(Asq*adj, n); + else + idx = lookup([-Inf,Acrit],Asq*adj); + q = [1,qvals](idx); + endif + + if nargout > 2, + info.Asq = Asq; + info.Asq_corrected = Asq*adj; + info.Asq_critical = [100*(1-qvals); Acrit]'; + info.p = 1-q; + info.p_is_precise = use_cdf; + endif +endfunction + +%!demo +%! c = anderson_darling_test(10*rande(12,10000),'exponential'); +%! tabulate(100*c,100*[unique(c),1]); +%! % The Fc column should report 100, 250, 500, 1000, 10000 more or less. + +%!demo +%! c = anderson_darling_test(randn(12,10000),'normal'); +%! tabulate(100*c,100*[unique(c),1]); +%! % The Fc column should report 100, 250, 500, 1000, 10000 more or less. + +%!demo +%! c = anderson_darling_test(rand(12,10000),'uniform'); +%! hist(100*c,1:2:99); +%! % The histogram should be flat more or less.