octave_packages/m/statistics/models/logistic_regression.m

   1 ## Copyright (C) 1995-2012 Kurt Hornik
   2 ##
   3 ## This file is part of Octave.
   4 ##
   5 ## Octave is free software; you can redistribute it and/or modify it
   6 ## under the terms of the GNU General Public License as published by
   7 ## the Free Software Foundation; either version 3 of the License, or (at
   8 ## your option) any later version.
   9 ##
  10 ## Octave is distributed in the hope that it will be useful, but
  11 ## WITHOUT ANY WARRANTY; without even the implied warranty of
  12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13 ## General Public License for more details.
  14 ##
  15 ## You should have received a copy of the GNU General Public License
  16 ## along with Octave; see the file COPYING.  If not, see
  17 ## <http://www.gnu.org/licenses/>.
  18
  19 ## -*- texinfo -*-
  20 ## @deftypefn {Function File} {[@var{theta}, @var{beta}, @var{dev}, @var{dl}, @var{d2l}, @var{p}] =} logistic_regression (@var{y}, @var{x}, @var{print}, @var{theta}, @var{beta})
  21 ## Perform ordinal logistic regression.
  22 ##
  23 ## Suppose @var{y} takes values in @var{k} ordered categories, and let
  24 ## @code{gamma_i (@var{x})} be the cumulative probability that @var{y}
  25 ## falls in one of the first @var{i} categories given the covariate
  26 ## @var{x}.  Then
  27 ##
  28 ## @example
  29 ## [theta, beta] = logistic_regression (y, x)
  30 ## @end example
  31 ##
  32 ## @noindent
  33 ## fits the model
  34 ##
  35 ## @example
  36 ## logit (gamma_i (x)) = theta_i - beta' * x,   i = 1 @dots{} k-1
  37 ## @end example
  38 ##
  39 ## The number of ordinal categories, @var{k}, is taken to be the number
  40 ## of distinct values of @code{round (@var{y})}.  If @var{k} equals 2,
  41 ## @var{y} is binary and the model is ordinary logistic regression.  The
  42 ## matrix @var{x} is assumed to have full column rank.
  43 ##
  44 ## Given @var{y} only, @code{theta = logistic_regression (y)}
  45 ## fits the model with baseline logit odds only.
  46 ##
  47 ## The full form is
  48 ##
  49 ## @example
  50 ## @group
  51 ## [theta, beta, dev, dl, d2l, gamma]
  52 ##    = logistic_regression (y, x, print, theta, beta)
  53 ## @end group
  54 ## @end example
  55 ##
  56 ## @noindent
  57 ## in which all output arguments and all input arguments except @var{y}
  58 ## are optional.
  59 ##
  60 ## Setting @var{print} to 1 requests summary information about the fitted
  61 ## model to be displayed.  Setting @var{print} to 2 requests information
  62 ## about convergence at each iteration.  Other values request no
  63 ## information to be displayed.  The input arguments @var{theta} and
  64 ## @var{beta} give initial estimates for @var{theta} and @var{beta}.
  65 ##
  66 ## The returned value @var{dev} holds minus twice the log-likelihood.
  67 ##
  68 ## The returned values @var{dl} and @var{d2l} are the vector of first
  69 ## and the matrix of second derivatives of the log-likelihood with
  70 ## respect to @var{theta} and @var{beta}.
  71 ##
  72 ## @var{p} holds estimates for the conditional distribution of @var{y}
  73 ## given @var{x}.
  74 ## @end deftypefn
  75
  76 ## Original for MATLAB written by Gordon K Smyth <gks@maths.uq.oz.au>,
  77 ## U of Queensland, Australia, on Nov 19, 1990.  Last revision Aug 3,
  78 ## 1992.
  79
  80 ## Author: Gordon K Smyth <gks@maths.uq.oz.au>,
  81 ## Adapted-By: KH <Kurt.Hornik@wu-wien.ac.at>
  82 ## Description: Ordinal logistic regression
  83
  84 ## Uses the auxiliary functions logistic_regression_derivatives and
  85 ## logistic_regression_likelihood.
  86
  87 function [theta, beta, dev, dl, d2l, p] = logistic_regression (y, x, print, theta, beta)
  88
  89   ## check input
  90   y = round (vec (y));
  91   [my, ny] = size (y);
  92   if (nargin < 2)
  93     x = zeros (my, 0);
  94   endif;
  95   [mx, nx] = size (x);
  96   if (mx != my)
  97     error ("logistic_regression: X and Y must have the same number of observations");
  98   endif
  99
 100   ## initial calculations
 101   x = -x;
 102   tol = 1e-6; incr = 10; decr = 2;
 103   ymin = min (y); ymax = max (y); yrange = ymax - ymin;
 104   z  = (y * ones (1, yrange)) == ((y * 0 + 1) * (ymin : (ymax - 1)));
 105   z1 = (y * ones (1, yrange)) == ((y * 0 + 1) * ((ymin + 1) : ymax));
 106   z  = z(:, any (z));
 107   z1 = z1 (:, any(z1));
 108   [mz, nz] = size (z);
 109
 110   ## starting values
 111   if (nargin < 3)
 112     print = 0;
 113   endif;
 114   if (nargin < 4)
 115     beta = zeros (nx, 1);
 116   endif;
 117   if (nargin < 5)
 118     g = cumsum (sum (z))' ./ my;
 119     theta = log (g ./ (1 - g));
 120   endif;
 121   tb = [theta; beta];
 122
 123   ## likelihood and derivatives at starting values
 124   [g, g1, p, dev] = logistic_regression_likelihood (y, x, tb, z, z1);
 125   [dl, d2l] = logistic_regression_derivatives (x, z, z1, g, g1, p);
 126   epsilon = std (vec (d2l)) / 1000;
 127
 128   ## maximize likelihood using Levenberg modified Newton's method
 129   iter = 0;
 130   while (abs (dl' * (d2l \ dl) / length (dl)) > tol)
 131     iter = iter + 1;
 132     tbold = tb;
 133     devold = dev;
 134     tb = tbold - d2l \ dl;
 135     [g, g1, p, dev] = logistic_regression_likelihood (y, x, tb, z, z1);
 136     if ((dev - devold) / (dl' * (tb - tbold)) < 0)
 137       epsilon = epsilon / decr;
 138     else
 139       while ((dev - devold) / (dl' * (tb - tbold)) > 0)
 140         epsilon = epsilon * incr;
 141          if (epsilon > 1e+15)
 142            error ("logistic_regression: epsilon too large");
 143          endif
 144          tb = tbold - (d2l - epsilon * eye (size (d2l))) \ dl;
 145          [g, g1, p, dev] = logistic_regression_likelihood (y, x, tb, z, z1);
 146          disp ("epsilon"); disp (epsilon);
 147       endwhile
 148     endif
 149     [dl, d2l] = logistic_regression_derivatives (x, z, z1, g, g1, p);
 150     if (print == 2)
 151       disp ("Iteration"); disp (iter);
 152       disp ("Deviance"); disp (dev);
 153       disp ("First derivative"); disp (dl');
 154       disp ("Eigenvalues of second derivative"); disp (eig (d2l)');
 155     endif
 156   endwhile
 157
 158   ## tidy up output
 159
 160   theta = tb (1 : nz, 1);
 161   beta  = tb ((nz + 1) : (nz + nx), 1);
 162
 163   if (print >= 1)
 164     printf ("\n");
 165     printf ("Logistic Regression Results:\n");
 166     printf ("\n");
 167     printf ("Number of Iterations: %d\n", iter);
 168     printf ("Deviance:             %f\n", dev);
 169     printf ("Parameter Estimates:\n");
 170     printf ("     Theta         S.E.\n");
 171     se = sqrt (diag (inv (-d2l)));
 172     for i = 1 : nz
 173       printf ("   %8.4f     %8.4f\n", tb (i), se (i));
 174     endfor
 175     if (nx > 0)
 176       printf ("      Beta         S.E.\n");
 177       for i = (nz + 1) : (nz + nx)
 178         printf ("   %8.4f     %8.4f\n", tb (i), se (i));
 179       endfor
 180     endif
 181   endif
 182
 183   if (nargout == 6)
 184     if (nx > 0)
 185       e = ((x * beta) * ones (1, nz)) + ((y * 0 + 1) * theta');
 186     else
 187       e = (y * 0 + 1) * theta';
 188     endif
 189     gamma = diff ([(y * 0), (exp (e) ./ (1 + exp (e))), (y * 0 + 1)]')';
 190   endif
 191
 192 endfunction