octave_packages/statistics-1.1.3/monotone_smooth.m

   1 ## Copyright (C) 2011 Nir Krakauer <nkrakauer@ccny.cuny.edu>
   2 ## Copyright (C) 2011 Carnë Draug <carandraug+dev@gmail.com>
   3 ##
   4 ## This program is free software; you can redistribute it and/or modify it under
   5 ## the terms of the GNU General Public License as published by the Free Software
   6 ## Foundation; either version 3 of the License, or (at your option) any later
   7 ## version.
   8 ##
   9 ## This program is distributed in the hope that it will be useful, but WITHOUT
  10 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  12 ## details.
  13 ##
  14 ## You should have received a copy of the GNU General Public License along with
  15 ## this program; if not, see <http://www.gnu.org/licenses/>.
  16
  17 ## -*- texinfo -*-
  18 ## @deftypefn {Function File} {@var{yy} =} monotone_smooth (@var{x}, @var{y}, @var{h})
  19 ## Produce a smooth monotone increasing approximation to a sampled functional
  20 ## dependence y(x) using a kernel method (an Epanechnikov smoothing kernel is
  21 ## applied to y(x); this is integrated to yield the monotone increasing form.
  22 ## See Reference 1 for details.)
  23 ##
  24 ## @subheading Arguments
  25 ##
  26 ## @itemize @bullet
  27 ## @item
  28 ## @var{x} is a vector of values of the independent variable.
  29 ##
  30 ## @item
  31 ## @var{y} is a vector of values of the dependent variable, of the same size as
  32 ## @var{x}. For best performance, it is recommended that the @var{y} already be
  33 ## fairly smooth, e.g. by applying a kernel smoothing to the original values if
  34 ## they are noisy.
  35 ##
  36 ## @item
  37 ## @var{h} is the kernel bandwidth to use. If @var{h} is not given, a "reasonable"
  38 ## value is computed.
  39 ##
  40 ## @end itemize
  41 ##
  42 ## @subheading Return values
  43 ##
  44 ## @itemize @bullet
  45 ## @item
  46 ## @var{yy} is the vector of smooth monotone increasing function values at @var{x}.
  47 ##
  48 ## @end itemize
  49 ##
  50 ## @subheading Examples
  51 ##
  52 ## @example
  53 ## @group
  54 ## x = 0:0.1:10;
  55 ## y = (x .^ 2) + 3 * randn(size(x)); %typically non-monotonic from the added noise
  56 ## ys = ([y(1) y(1:(end-1))] + y + [y(2:end) y(end)])/3; %crudely smoothed via
  57 ## moving average, but still typically non-monotonic
  58 ## yy = monotone_smooth(x, ys); %yy is monotone increasing in x
  59 ## plot(x, y, '+', x, ys, x, yy)
  60 ## @end group
  61 ## @end example
  62 ##
  63 ## @subheading References
  64 ##
  65 ## @enumerate
  66 ## @item
  67 ## Holger Dette, Natalie Neumeyer and Kay F. Pilz (2006), A simple nonparametric
  68 ## estimator of a strictly monotone regression function, @cite{Bernoulli}, 12:469-490
  69 ## @item
  70 ## Regine Scheder (2007), R Package 'monoProc', Version 1.0-6,
  71 ## @url{http://cran.r-project.org/web/packages/monoProc/monoProc.pdf} (The
  72 ## implementation here is based on the monoProc function mono.1d)
  73 ## @end enumerate
  74 ## @end deftypefn
  75
  76 ## Author: Nir Krakauer <nkrakauer@ccny.cuny.edu>
  77 ## Description: Nonparametric monotone increasing regression
  78
  79 function yy = monotone_smooth (x, y, h)
  80
  81   if (nargin < 2 || nargin > 3)
  82     print_usage ();
  83   elseif (!isnumeric (x) || !isvector (x))
  84     error ("first argument x must be a numeric vector")
  85   elseif (!isnumeric (y) || !isvector (y))
  86     error ("second argument y must be a numeric vector")
  87   elseif (numel (x) != numel (y))
  88     error ("x and y must have the same number of elements")
  89   elseif (nargin == 3 && (!isscalar (h) || !isnumeric (h)))
  90     error ("third argument 'h' (kernel bandwith) must a numeric scalar")
  91   endif
  92
  93   n = numel(x);
  94
  95   %set filter bandwidth at a reasonable default value, if not specified
  96   if (nargin != 3)
  97     s = std(x);
  98     h = s / (n^0.2);
  99   end
 100
 101   x_min = min(x);
 102   x_max = max(x);
 103
 104   y_min = min(y);
 105   y_max = max(y);
 106
 107   %transform range of x to [0, 1]
 108   xl = (x - x_min) / (x_max - x_min);
 109
 110   yy = ones(size(y));
 111
 112   %Epanechnikov smoothing kernel (with finite support)
 113   %K_epanech_kernel = @(z) (3/4) * ((1 - z).^2) .* (abs(z) < 1);
 114
 115   K_epanech_int = @(z) mean(((abs(z) < 1)/2) - (3/4) * (z .* (abs(z) < 1) - (1/3) * (z.^3) .* (abs(z) < 1)) + (z < -1));
 116
 117   %integral of kernels up to t
 118   monotone_inverse = @(t) K_epanech_int((y - t) / h);
 119
 120   %find the value of the monotone smooth function at each point in x
 121   niter_max = 150; %maximum number of iterations for estimating each value (should not be reached in most cases)
 122   for l = 1:n
 123
 124     tmax = y_max;
 125     tmin = y_min;
 126     wmin = monotone_inverse(tmin);
 127     wmax = monotone_inverse(tmax);
 128     if (wmax == wmin)
 129       yy(l) = tmin;
 130     else
 131       wt = xl(l);
 132       iter_max_reached = 1;
 133       for i = 1:niter_max
 134         wt_scaled = (wt - wmin) / (wmax - wmin);
 135         tn        = tmin + wt_scaled * (tmax - tmin) ;
 136         wn        = monotone_inverse(tn);
 137         wn_scaled = (wn - wmin) / (wmax - wmin);
 138
 139         %if (abs(wt-wn) < 1E-4) || (tn < (y_min-0.1)) || (tn > (y_max+0.1))
 140         %% criterion for break in the R code -- replaced by the following line to
 141         %% hopefully be less dependent on the scale of y
 142         if (abs(wt_scaled-wn_scaled) < 1E-4) || (wt_scaled < -0.1) || (wt_scaled > 1.1)
 143           iter_max_reached = 0;
 144           break
 145         endif
 146         if wn > wt
 147           tmax = tn;
 148           wmax = wn;
 149         else
 150           tmin = tn;
 151           wmin = wn;
 152         endif
 153       endfor
 154       if iter_max_reached
 155         warning("at x = %g, maximum number of iterations %d reached without convergence; approximation may not be optimal", x(l), niter_max)
 156       endif
 157       yy(l) = tmin + (wt - wmin) * (tmax - tmin) / (wmax - wmin);
 158     endif
 159   endfor
 160 endfunction