minimize.m

   1 ## Copyright (C) 2002 Etienne Grossmann <etienne@egdn.net>
   2 ##
   3 ## This program is free software; you can redistribute it and/or modify it under
   4 ## the terms of the GNU General Public License as published by the Free Software
   5 ## Foundation; either version 3 of the License, or (at your option) any later
   6 ## version.
   7 ##
   8 ## This program is distributed in the hope that it will be useful, but WITHOUT
   9 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  11 ## details.
  12 ##
  13 ## You should have received a copy of the GNU General Public License along with
  14 ## this program; if not, see <http://www.gnu.org/licenses/>.
  15
  16 ## [x,v,nev,...] = minimize (f,args,...) - Minimize f
  17 ##
  18 ## ARGUMENTS
  19 ## f    : string  : Name of function. Must return a real value
  20 ## args : list or : List of arguments to f (by default, minimize the first)
  21 ##        matrix  : f's only argument
  22 ##
  23 ## RETURNED VALUES
  24 ## x   : matrix  : Local minimum of f. Let's suppose x is M-by-N.
  25 ## v   : real    : Value of f in x0
  26 ## nev : integer : Number of function evaluations
  27 ##     or 1 x 2  : Number of function and derivative evaluations (if
  28 ##                 derivatives are used)
  29 ##
  30 ##
  31 ## Extra arguments are either a succession of option-value pairs or a single
  32 ## list or struct of option-value pairs (for unary options, the value in the
  33 ## struct is ignored).
  34 ##
  35 ## OPTIONS : DERIVATIVES   Derivatives may be used if one of these options
  36 ## ---------------------   uesd. Otherwise, the Nelder-Mean (see
  37 ##                         nelder_mead_min) method is used.
  38 ##
  39 ## 'd2f', d2f     : Name of a function that returns the value of f, of its
  40 ##                  1st and 2nd derivatives : [fx,dfx,d2fx] = feval (d2f, x)
  41 ##                  where fx is a real number, dfx is 1x(M*N) and d2fx is
  42 ##                  (M*N)x(M*N). A Newton-like method (d2_min) will be used.
  43 ##
  44 ## 'hess'         : Use [fx,dfx,d2fx] = leval (f, args) to compute 1st and
  45 ##                  2nd derivatives, and use a Newton-like method (d2_min).
  46 ##
  47 ## 'd2i', d2i     : Name of a function that returns the value of f, of its
  48 ##                  1st and pseudo-inverse of second derivatives :
  49 ##                  [fx,dfx,id2fx] = feval (d2i, x) where fx is a real
  50 ##                  number, dfx is 1x(M*N) and d2ix is (M*N)x(M*N).
  51 ##                  A Newton-like method will be used (see d2_min).
  52 ##
  53 ## 'ihess'        : Use [fx,dfx,id2fx] = leval (f, args) to compute 1st
  54 ##                  derivative and the pseudo-inverse of 2nd derivatives,
  55 ##                  and use a Newton-like method (d2_min).
  56 ##
  57 ##            NOTE : df, d2f or d2i take the same arguments as f.
  58 ##
  59 ## 'order', n     : Use derivatives of order n. If the n'th order derivative
  60 ##                  is not specified by 'df', 'd2f' or 'd2i', it will be
  61 ##                  computed numerically. Currently, only order 1 works.
  62 ##
  63 ## 'ndiff'        : Use a variable metric method (bfgs) using numerical
  64 ##                  differentiation.
  65 ##
  66 ## OPTIONS : STOPPING CRITERIA  Default is to use 'tol'
  67 ## ---------------------------
  68 ## 'ftol', ftol   : Stop search when value doesn't improve, as tested by
  69 ##
  70 ##              ftol > Deltaf/max(|f(x)|,1)
  71 ##
  72 ##                 where Deltaf is the decrease in f observed in the last
  73 ##                 iteration.                                 Default=10*eps
  74 ##
  75 ## 'utol', utol   : Stop search when updates are small, as tested by
  76 ##
  77 ##              tol > max { dx(i)/max(|x(i)|,1) | i in 1..N }
  78 ##
  79 ##                 where  dx is the change in the x that occured in the last
  80 ##                 iteration.
  81 ##
  82 ## 'dtol',dtol    : Stop search when derivatives are small, as tested by
  83 ##
  84 ##              dtol > max { df(i)*max(|x(i)|,1)/max(v,1) | i in 1..N }
  85 ##
  86 ##                 where x is the current minimum, v is func(x) and df is
  87 ##                 the derivative of f in x. This option is ignored if
  88 ##                 derivatives are not used in optimization.
  89 ##
  90 ## MISC. OPTIONS
  91 ## -------------
  92 ## 'maxev', m     : Maximum number of function evaluations             <inf>
  93 ##
  94 ## 'narg' , narg  : Position of the minimized argument in args           <1>
  95 ## 'isz'  , step  : Initial step size (only for 0 and 1st order method)  <1>
  96 ##                  Should correspond to expected distance to minimum
  97 ## 'verbose'      : Display messages during execution
  98 ##
  99 ## 'backend'      : Instead of performing the minimization itself, return
 100 ##                  [backend, control], the name and control argument of the
 101 ##                  backend used by minimize(). Minimimzation can then be
 102 ##                  obtained without the overhead of minimize by calling, if
 103 ##                  a 0 or 1st order method is used :
 104 ##
 105 ##              [x,v,nev] = feval (backend, args, control)
 106 ##
 107 ##                  or, if a 2nd order method is used :
 108 ##
 109 ##              [x,v,nev] = feval (backend, control.d2f, args, control)
 110
 111 function [x,v,nev,varargout] = minimize (f,args,varargin)
 112
 113 ## Oldies
 114 ##
 115 ## 'df' , df      : Name of a function that returns the derivatives of f
 116 ##                  in x : dfx = feval (df, x) where dfx is 1x(M*N). A
 117 ##                  variable metric method (see bfgs) will be used.
 118 ##
 119 ## 'jac'          : Use [fx, dfx] = leval(f, args) to compute derivatives
 120 ##                  and use a variable metric method (bfgs).
 121 ##
 122
 123
 124 # ####################################################################
 125 # Read the options ###################################################
 126 # ####################################################################
 127 # Options with a value
 128 op1 = "ftol utol dtol df d2f d2i order narg maxev isz";
 129 # Boolean options
 130 op0 = "verbose backend jac hess ihess ndiff" ;
 131
 132 default = struct ("backend",0,"verbose",0,\
 133                     "df","",  "df", "","d2f","","d2i","",  \
 134                     "hess", 0,  "ihess", 0,  "jac", 0,"ndiff", 0,  \
 135                     "ftol" ,nan, "utol",nan, "dtol", nan,\
 136                     "order",nan, "narg",nan, "maxev",nan,\
 137                     "isz",  nan);
 138
 139 if nargin == 3                  # Accomodation to struct and list optional
 140   tmp = varargin{1};
 141
 142   if isstruct (tmp)
 143     opls = {};
 144     for [v,k] = tmp             # Treat separately unary and binary opts
 145       if findstr ([" ",k," "],op0)
 146         opls (end+1) = {k};        # append k
 147       else
 148         opls (end+[1:2]) = {k, v}; # append k and v
 149       end
 150     end
 151   elseif iscell (tmp)
 152     opls = tmp;
 153   else
 154     opls = {tmp};
 155   end
 156 else
 157   opls = varargin;
 158 end
 159 ops = read_options (opls,\
 160                     "op0",op0, "op1",op1, "default",default);
 161
 162 backend=ops.backend; verbose=ops.verbose;
 163 df=ops.df; d2f=ops.d2f; d2i=ops.d2i;
 164 hess=ops.hess; ihess=ops.ihess; jac=ops.jac;
 165 ftol=ops.ftol; utol=ops.utol; dtol=ops.dtol;
 166 order=ops.order; narg=ops.narg; maxev=ops.maxev;
 167 isz=ops.isz; ndiff=ops.ndiff;
 168
 169 if length (df), error ("Option 'df' doesn't exist any more. Sorry.\n");end
 170 if jac, error ("Option 'jac' doesn't exist any more. Sorry.\n");end
 171
 172                                 # Basic coherence checks #############
 173
 174 ws = "";                        # Warning string
 175 es = "";                        # Error string
 176
 177                                 # Warn if more than 1 differential is given
 178 if !!length (df) + !!length (d2f) + !!length (d2i) + jac + hess + ihess + \
 179       ndiff > 1
 180                                 # Order of preference of
 181   if length (d2i), ws = [ws,"d2i='",d2i,"', "]; end
 182   if length (d2f), ws = [ws,"d2f='",d2f,"', "]; end
 183   if length (df),  ws = [ws,"df='",df,"', "]; end
 184   if hess       ,  ws = [ws,"hess, "]; end
 185   if ihess      ,  ws = [ws,"ihess, "]; end
 186   if jac        ,  ws = [ws,"jac, "]; end
 187   if ndiff      ,  ws = [ws,"ndiff, "]; end
 188   ws = ws(1:length(ws)-2);
 189   ws = ["Options ",ws," were passed. Only one will be used\n"]
 190 end
 191
 192                                 # Check that enough args are passed to call
 193                                 # f(), unless backend is specified, in which
 194                                 # case I don't need to call f()
 195 if ! isnan (narg) && ! backend
 196   if narg > 1
 197     es = [es,sprintf("narg=%i, but a single argument was passed\n",narg)];
 198   end
 199 end
 200
 201 if length (ws), warn (ws); end
 202 if length (es), error (es); end # EOF Basic coherence checks #########
 203
 204
 205 op = 0;                         # Set if any option is passed and should be
 206                                 # passed to backend
 207
 208 if ! isnan (ftol)   , ctls.ftol    = ftol;  op = 1; end
 209 if ! isnan (utol)   , ctls.utol    = utol;  op = 1; end
 210 if ! isnan (dtol)   , ctls.dtol    = dtol;  op = 1; end
 211 if ! isnan (maxev)  , ctls.maxev   = maxev; op = 1; end
 212 if ! isnan (narg)   , ctls.narg    = narg;  op = 1; end
 213 if ! isnan (isz)    , ctls.isz     = isz;   op = 1; end
 214 if         verbose  , ctls.verbose = 1;     op = 1; end
 215
 216                                 # defaults That are used in this function :
 217 if isnan (narg), narg = 1; end
 218
 219                                 # ##########################################
 220                                 # Choose one optimization method ###########
 221                                 # Choose according to available derivatives
 222 if     ihess, d2f = f;  ctls.id2f = 1; op = 1;
 223 elseif hess,  d2f = f;
 224 end
 225
 226
 227 if     length (d2i), method = "d2_min"; ctls.id2f = 1; op = 1; d2f = d2i;
 228 elseif length (d2f), method = "d2_min";
 229 ### elseif length (df) , method = "bfgsmin"; ctls.df  = df; op = 1;
 230 ### elseif jac         , method = "bfgsmin"; ctls.jac = 1 ; op = 1;
 231   ## else                 method = "nelder_mead_min";
 232   ## end
 233                                 # Choose method because ndiff is passed ####
 234 elseif ndiff       , method = "bfgsmin";
 235
 236                                 # Choose method by specifying order ########
 237 elseif ! isnan (order)
 238
 239   if     order == 0, method = "nelder_mead_min";
 240   elseif order == 1
 241     method = "bfgsmin";
 242
 243   elseif order == 2
 244     if ! (length (d2f) || length (d2i))
 245       error ("minimize(): 'order' is 2, but 2nd differential is missing\n");
 246     end
 247   else
 248     error ("minimize(): 'order' option only implemented for order<=2\n");
 249   end
 250 else                            # Default is nelder_mead_min
 251   method = "nelder_mead_min";
 252 end                             # EOF choose method ########################
 253
 254 if verbose
 255   printf ("minimize(): Using '%s' as back-end\n",method);
 256 end
 257
 258                                 # More checks ##############################
 259 ws = "";
 260 if !isnan (isz) && strcmp (method,"d2_min")
 261   ws = [ws,"option 'isz' is passed to method that doesn't use it"];
 262 end
 263 if length (ws), warn (ws); end
 264                                 # EOF More checks ##########################
 265
 266 if     strcmp (method, "d2_min"), all_args = {f, d2f, args};
 267 elseif strcmp (method, "bfgsmin"),all_args = {f, args};
 268 else                              all_args = {f, args};
 269 end
 270                                 # Eventually add ctls to argument list
 271 if op, all_args{end+1} = ctls; end
 272
 273 if ! backend                    # Call the backend ###################
 274   if strcmp (method, "d2_min"),
 275     [x,v,nev,h] = d2_min(all_args{:});
 276                                 # Eventually return inverse of Hessian
 277     if nargout > 3, varargout{1} = h; vr_val_cnt=2; end
 278   elseif strcmp (method, "bfgsmin")
 279     nev = nan;
 280     if !iscell(args), args = {args}; end
 281     if isnan (ftol), ftol = 1e-12; end # Use bfgsmin's defaults
 282     if isnan (utol), utol = 1e-6; end
 283     if isnan (dtol), dtol = 1e-5; end
 284     if isnan (maxev), maxev = inf; end
 285     [x, v, okcv] = bfgsmin (f, args, {maxev,verbose,1,narg,0,ftol,utol,dtol});
 286   else
 287     [x,v,nev] = feval (method, all_args{:});
 288   end
 289
 290 else                            # Don't call backend, just return its name
 291                                 # and arguments.
 292
 293   x = method;
 294   if op, v = ctls; else v = []; end
 295 end
 296
 297