octave_packages/statistics-1.1.3/cl_multinom.m

   1 ## Copyright (C) 2009 Levente Torok <TorokLev@gmail.com>
   2 ##
   3 ## This program is free software; you can redistribute it and/or modify it under
   4 ## the terms of the GNU General Public License as published by the Free Software
   5 ## Foundation; either version 3 of the License, or (at your option) any later
   6 ## version.
   7 ##
   8 ## This program is distributed in the hope that it will be useful, but WITHOUT
   9 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  11 ## details.
  12 ##
  13 ## You should have received a copy of the GNU General Public License along with
  14 ## this program; if not, see <http://www.gnu.org/licenses/>.
  15
  16 ## -*- texinfo -*-
  17 ##
  18 ## @deftypefn {Function File} {@var{CL} =} cl_multinom (@var{x}, @var{N}, @var{b}, @var{calculation_type} ) - Confidence level of multinomial portions
  19 ##    Returns confidence level of multinomial parameters estimated @math{ p = x / sum(x) } with predefined confidence interval @var{b}.
  20 ##    Finite population is also considered.
  21 ##
  22 ## This function calculates the level of confidence at which the samples represent the true distribution
  23 ## given that there is a predefined tolerance (confidence interval).
  24 ## This is the upside down case of the typical excercises at which we want to get the confidence interval
  25 ## given the confidence level (and the estimated parameters of the underlying distribution).
  26 ## But once we accept (lets say at elections) that we have a standard predefined
  27 ## maximal acceptable error rate (e.g. @var{b}=0.02 ) in the estimation and we just want to know that how sure we
  28 ## can be that the measured proportions are the same as in the
  29 ## entire population (ie. the expected value and mean of the samples are roghly the same) we need to use this function.
  30 ##
  31 ## @subheading Arguments
  32 ## @itemize @bullet
  33 ## @item @var{x}  : int vector  : sample frequencies bins
  34 ## @item @var{N}  : int         : Population size that was sampled by x. If N<sum(x), infinite number assumed
  35 ## @item @var{b}  : real, vector :  confidence interval
  36 ##            if vector, it should be the size of x containing confence interval for each cells
  37 ##            if scalar, each cell will have the same value of b unless it is zero or -1
  38 ##            if value is 0, b=.02 is assumed which is standard choice at elections
  39 ##            otherwise it is calculated in a way that one sample in a cell alteration defines the confidence interval
  40 ## @item @var{calculation_type}  : string    : (Optional), described below
  41 ##           "bromaghin"     (default) - do not change it unless you have a good reason to do so
  42 ##           "cochran"
  43 ##           "agresti_cull"  this is not exactly the solution at reference given below but an adjustment of the solutions above
  44 ## @end itemize
  45 ##
  46 ## @subheading Returns
  47 ##   Confidence level.
  48 ##
  49 ## @subheading Example
  50 ##   CL = cl_multinom( [27;43;19;11], 10000, 0.05 )
  51 ##     returns 0.69 confidence level.
  52 ##
  53 ## @subheading References
  54 ##
  55 ## "bromaghin" calculation type (default) is based on
  56 ## is based on the article
  57 ##   Jeffrey F. Bromaghin, "Sample Size Determination for Interval Estimation of Multinomial Probabilities", The American Statistician  vol 47, 1993, pp 203-206.
  58 ##
  59 ## "cochran" calculation type
  60 ## is based on article
  61 ##   Robert T. Tortora, "A Note on Sample Size Estimation for Multinomial Populations", The American Statistician, , Vol 32. 1978,  pp 100-102.
  62 ##
  63 ## "agresti_cull" calculation type
  64 ## is based on article in which Quesenberry Hurst and Goodman result is combined
  65 ##   A. Agresti and B.A. Coull, "Approximate is better than \"exact\" for interval estimation of binomial portions", The American Statistician, Vol. 52, 1998, pp 119-126
  66 ##
  67 ## @end deftypefn
  68
  69 function CL = cl_multinom( x, N, b = .05, calculation_type = "bromaghin")
  70
  71     if (nargin < 2 || nargin > 4)
  72         print_usage;
  73     elseif (!ischar (calculation_type))
  74         error ("Argument calculation_type must be a string");
  75     endif
  76
  77     k = rows(x);
  78     nn = sum(x);
  79     p = x / nn;
  80
  81     if (isscalar( b ))
  82         if (b==0) b=0.02; endif
  83         b = ones( rows(x), 1 ) * b;
  84
  85         if (b<0)  b=1 ./ max( x, 1 ); endif
  86     endif
  87     bb = b .* b;
  88
  89     if (N==nn)
  90         CL = 1;
  91         return;
  92     endif
  93
  94     if (N<nn)
  95         fpc = 1;
  96     else
  97         fpc = (N-1) / (N-nn); # finite population correction tag
  98     endif
  99
 100     beta = p.*(1-p);
 101
 102     switch calculation_type
 103       case {"cochran"}
 104         t = sqrt( fpc * nn * bb ./ beta )
 105         alpha = ( 1 - normcdf( t )) * 2
 106
 107       case {"bromaghin"}
 108         t = sqrt(  fpc * (nn * 2 * bb )./ ( beta - 2 * bb + sqrt( beta .* beta - bb .* ( 4*beta - 1 ))) );
 109         alpha = ( 1 - normcdf( t )) * 2;
 110
 111       case {"agresti_cull"}
 112         ts = fpc * nn * bb ./ beta ;
 113         if ( k<=2 )
 114           alpha = 1 - chi2cdf( ts, k-1 ); % adjusted Wilson interval
 115         else
 116           alpha = 1 - chi2cdf( ts/k, 1 ); % Goodman interval with Bonferroni argument
 117         endif
 118       otherwise
 119         error ("Unknown calculation type '%s'", calculation_type);
 120     endswitch
 121
 122     CL = 1 - max( alpha );
 123
 124 endfunction