octave_packages/optim-1.2.0/cg_min.m

   1 ## Copyright (C) 2002 Etienne Grossmann <etienne@egdn.net>
   2 ## Copyright (C) 2009 Levente Torok <TorokLev@gmail.com>
   3 ##
   4 ## This program is free software; you can redistribute it and/or modify it under
   5 ## the terms of the GNU General Public License as published by the Free Software
   6 ## Foundation; either version 3 of the License, or (at your option) any later
   7 ## version.
   8 ##
   9 ## This program is distributed in the hope that it will be useful, but WITHOUT
  10 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  11 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  12 ## details.
  13 ##
  14 ## You should have received a copy of the GNU General Public License along with
  15 ## this program; if not, see <http://www.gnu.org/licenses/>.
  16
  17 ## -*- texinfo -*-
  18 ## @deftypefn {Function File} {[@var{x0},@var{v},@var{nev}]} cg_min ( @var{f},@var{df},@var{args},@var{ctl} )
  19 ## NonLinear Conjugate Gradient method to minimize function @var{f}.
  20 ##
  21 ## @subheading Arguments
  22 ## @itemize @bullet
  23 ## @item @var{f}   : string   : Name of function. Return a real value
  24 ## @item @var{df}  : string   : Name of f's derivative. Returns a (R*C) x 1 vector
  25 ## @item @var{args}: cell     : Arguments passed to f.@*
  26 ## @item @var{ctl}   : 5-vec    : (Optional) Control variables, described below
  27 ## @end itemize
  28 ##
  29 ## @subheading Returned values
  30 ## @itemize @bullet
  31 ## @item @var{x0}    : matrix   : Local minimum of f
  32 ## @item @var{v}     : real     : Value of f in x0
  33 ## @item @var{nev}   : 1 x 2    : Number of evaluations of f and of df
  34 ## @end itemize
  35 ##
  36 ## @subheading Control Variables
  37 ## @itemize @bullet
  38 ## @item @var{ctl}(1)       : 1 or 2 : Select stopping criterion amongst :
  39 ## @item @var{ctl}(1)==0    : Default value
  40 ## @item @var{ctl}(1)==1    : Stopping criterion : Stop search when value doesn't
  41 ## improve, as tested by @math{ ctl(2) > Deltaf/max(|f(x)|,1) }
  42 ## where Deltaf is the decrease in f observed in the last iteration
  43 ## (each iteration consists R*C line searches).
  44 ## @item @var{ctl}(1)==2    : Stopping criterion : Stop search when updates are small,
  45 ## as tested by @math{ ctl(2) > max @{ dx(i)/max(|x(i)|,1) | i in 1..N @}}
  46 ## where  dx is the change in the x that occured in the last iteration.
  47 ## @item @var{ctl}(2)       : Threshold used in stopping tests.           Default=10*eps
  48 ## @item @var{ctl}(2)==0    : Default value
  49 ## @item @var{ctl}(3)       : Position of the minimized argument in args  Default=1
  50 ## @item @var{ctl}(3)==0    : Default value
  51 ## @item @var{ctl}(4)       : Maximum number of function evaluations      Default=inf
  52 ## @item @var{ctl}(4)==0    : Default value
  53 ## @item @var{ctl}(5)       : Type of optimization:
  54 ## @item @var{ctl}(5)==1    : "Fletcher-Reves" method
  55 ## @item @var{ctl}(5)==2    : "Polak-Ribiere" (Default)
  56 ## @item @var{ctl}(5)==3    : "Hestenes-Stiefel" method
  57 ## @end itemize
  58 ##
  59 ## @var{ctl} may have length smaller than 4. Default values will be used if ctl is
  60 ## not passed or if nan values are given.
  61 ## @subheading Example:
  62 ##
  63 ## function r=df( l )  b=[1;0;-1]; r = -( 2*l@{1@} - 2*b + rand(size(l@{1@}))); endfunction @*
  64 ## function r=ff( l )  b=[1;0;-1]; r = (l@{1@}-b)' * (l@{1@}-b); endfunction @*
  65 ## ll = @{ [10; 2; 3] @}; @*
  66 ## ctl(5) = 3; @*
  67 ## [x0,v,nev]=cg_min( "ff", "df", ll, ctl ) @*
  68 ##
  69 ## Comment:  In general, BFGS method seems to be better performin in many cases but requires more computation per iteration
  70 ## @seealso{ bfgsmin, http://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient }
  71 ## @end deftypefn
  72
  73 function [x,v,nev] = cg_min (f, dfn, args, ctl)
  74
  75 verbose = 0;
  76
  77 crit = 1;                       # Default control variables
  78 tol = 10*eps;
  79 narg = 1;
  80 maxev = inf;
  81 method = 2;
  82
  83 if nargin >= 4,                 # Read arguments
  84   if                    !isnan (ctl(1)) && ctl(1) ~= 0, crit  = ctl(1); end
  85   if length (ctl)>=2 && !isnan (ctl(2)) && ctl(2) ~= 0, tol   = ctl(2); end
  86   if length (ctl)>=3 && !isnan (ctl(3)) && ctl(3) ~= 0, narg  = ctl(3); end
  87   if length (ctl)>=4 && !isnan (ctl(4)) && ctl(4) ~= 0, maxev = ctl(4); end
  88   if length (ctl)>=5 && !isnan (ctl(5)) && ctl(5) ~= 0, method= ctl(5); end
  89 end
  90
  91 if iscell (args),               # List of arguments
  92   x = args{narg};
  93 else                                    # Single argument
  94   x = args;
  95   args = {args};
  96 end
  97
  98 if narg > length (args),        # Check
  99   error ("cg_min : narg==%i, length (args)==%i\n",
 100          narg, length (args));
 101 end
 102
 103 [R, C] = size(x);
 104 N = R*C;
 105 x = reshape (x,N,1) ;
 106
 107 nev = [0, 0];
 108
 109 v = feval (f, args);
 110 nev(1)++;
 111
 112 dxn = lxn = dxn_1 = -feval( dfn, args );
 113 nev(2)++;
 114
 115 done = 0;
 116
 117 ## TEMP
 118 ## tb = ts = zeros (1,100);
 119
 120                                 # Control params for line search
 121 ctlb = [10*sqrt(eps), narg, maxev];
 122 if crit == 2, ctlb(1) = tol; end
 123
 124 x0 = x;
 125 v0 = v;
 126
 127 nline = 0;
 128 while nev(1) <= maxev ,
 129   ## xprev = x ;
 130   ctlb(3) = maxev - nev(1);     # Update # of evals
 131
 132
 133   ## wiki alg 4.
 134   [alpha, vnew, nev0] = brent_line_min (f, dxn, args, ctlb);
 135
 136   nev += nev0;
 137   ## wiki alg 5.
 138   x = x + alpha * dxn;
 139
 140   if nline >= N,
 141     if crit == 1,
 142       done = tol > (v0 - vnew) / max (1, abs (v0));
 143     else
 144       done = tol > norm ((x-x0)(:));
 145     end
 146     nline = 1;
 147     x0 = x;
 148     v0 = vnew;
 149   else
 150     nline++;
 151   end
 152   if done || nev(1) >= maxev,  return  end
 153
 154   if vnew > v + eps ,
 155     printf("cg_min: step increased cost function\n");
 156     keyboard
 157   end
 158
 159   # if abs(1-(x-xprev)'*dxn/norm(dxn)/norm(x-xprev))>1000*eps,
 160   #  printf("cg_min: step is not in the right direction\n");
 161   #  keyboard
 162   # end
 163
 164   # update x at the narg'th position of args cellarray
 165   args{narg} = reshape (x, R, C);
 166
 167   v = feval (f, args);
 168   nev(1)++;
 169
 170   if verbose, printf("cg_min : nev=%4i, v=%8.3g\n",nev(1),v) ; end
 171
 172   ## wiki alg 1:
 173   dxn = -feval (dfn, args);
 174   nev(2)++;
 175
 176   # wiki alg 2:
 177   switch method
 178
 179     case 1 # Fletcher-Reenves method
 180           nu = dxn' * dxn;
 181       de  = dxn_1' * dxn_1;
 182
 183     case 2 # Polak-Ribiere method
 184       nu = (dxn-dxn_1)' * dxn;
 185       de  = dxn_1' * dxn_1;
 186
 187     case 3 # Hestenes-Stiefel method
 188       nu = (dxn-dxn_1)' * dxn;
 189           de  = (dxn-dxn_1)' * lxn;
 190
 191         otherwise
 192       error("No method like this");
 193
 194   endswitch
 195
 196   if nu == 0,
 197         return
 198   endif
 199
 200   if de == 0,
 201     error("Numerical instability!");
 202   endif
 203   beta = nu / de;
 204   beta = max( 0, beta );
 205   ## wiki alg 3.   update dxn, lxn, point
 206   dxn_1 = dxn;
 207   dxn = lxn = dxn_1 + beta*lxn ;
 208
 209 end
 210
 211 if verbose, printf ("cg_min: Too many evaluatiosn!\n"); end
 212
 213 endfunction
 214
 215 %!demo
 216 %! P = 15; # Number of parameters
 217 %! R = 20; # Number of observations (must have R >= P)
 218 %!
 219 %! obsmat = randn (R, P);
 220 %! truep = randn (P, 1);
 221 %! xinit = randn (P, 1);
 222 %! obses = obsmat * truep;
 223 %!
 224 %! msq = @(x) mean (x (!isnan(x)).^2);
 225 %! ff  = @(x) msq (obses - obsmat * x{1}) + 1;
 226 %! dff = @(x) 2 / rows (obses) * obsmat.' * (-obses + obsmat * x{1});
 227 %!
 228 %! tic;
 229 %! [xlev,vlev,nlev] = cg_min (ff, dff, xinit) ;
 230 %! toc;
 231 %!
 232 %! printf ("  Costs :     init=%8.3g, final=%8.3g, best=%8.3g\n", ...
 233 %!         ff ({xinit}), vlev, ff ({truep}));
 234 %!
 235 %! if (max (abs (xlev-truep)) > 100*sqrt (eps))
 236 %!   printf ("Error is too big : %8.3g\n", max (abs (xlev-truep)));
 237 %! else
 238 %!   printf ("All tests ok\n");
 239 %! endif
 240
 241 %!demo
 242 %! N = 1 + floor (30 * rand ());
 243 %! truemin = randn (N, 1);
 244 %! offset  = 100 * randn ();
 245 %! metric = randn (2 * N, N);
 246 %! metric = metric.' * metric;
 247 %!
 248 %! if (N > 1)
 249 %!   [u,d,v] = svd (metric);
 250 %!   d = (0.1+[0:(1/(N-1)):1]).^2;
 251 %!   metric = u * diag (d) * u.';
 252 %! endif
 253 %!
 254 %! testfunc = @(x) sum((x{1}-truemin)'*metric*(x{1}-truemin)) + offset;
 255 %! dtestf = @(x) metric' * 2*(x{1}-truemin);
 256 %!
 257 %! xinit = 10 * randn (N, 1);
 258 %!
 259 %! [x, v, niter] = cg_min (testfunc, dtestf, xinit);
 260 %!
 261 %! if (any (abs (x-truemin) > 100 * sqrt(eps)))
 262 %!   printf ("NOT OK 1\n");
 263 %! else
 264 %!   printf ("OK 1\n");
 265 %! endif
 266 %!
 267 %! if (v-offset > 1e-8)
 268 %!   printf ("NOT OK 2\n");
 269 %! else
 270 %!   printf ("OK 2\n");
 271 %! endif
 272 %!
 273 %! printf ("nev=%d  N=%d  errx=%8.3g   errv=%8.3g\n",...
 274 %!         niter (1), N, max (abs (x-truemin)), v-offset);
 275
 276 %!demo
 277 %! P = 2; # Number of parameters
 278 %! R = 3; # Number of observations
 279 %!
 280 %! obsmat = randn (R, P);
 281 %! truep  = randn (P, 1);
 282 %! xinit  = randn (P, 1);
 283 %!
 284 %! obses = obsmat * truep;
 285 %!
 286 %! msq = @(x) mean (x (!isnan(x)).^2);
 287 %! ff = @(xx) msq (xx{3} - xx{2} * xx{1}) + 1;
 288 %! dff = @(xx) 2 / rows(xx{3}) * xx{2}.' * (-xx{3} + xx{2}*xx{1});
 289 %!
 290 %! tic;
 291 %! x = {xinit, obsmat, obses};
 292 %! [xlev, vlev, nlev] = cg_min (ff, dff, x);
 293 %! toc;
 294 %!
 295 %! xinit_ = {xinit, obsmat, obses};
 296 %! xtrue_ = {truep, obsmat, obses};
 297 %! printf ("  Costs :     init=%8.3g, final=%8.3g, best=%8.3g\n", ...
 298 %!         ff (xinit_), vlev, ff (xtrue_));
 299 %!
 300 %! if (max (abs(xlev-truep)) > 100*sqrt (eps))
 301 %!   printf ("Error is too big : %8.3g\n", max (abs (xlev-truep)));
 302 %! else
 303 %!   printf ("All tests ok\n");
 304 %! endif
 305