octave_packages/optim-1.2.0/nonlin_residmin.m

   1 ## Copyright (C) 2010, 2011 Olaf Till <olaf.till@uni-jena.de>
   2 ##
   3 ## This program is free software; you can redistribute it and/or modify it under
   4 ## the terms of the GNU General Public License as published by the Free Software
   5 ## Foundation; either version 3 of the License, or (at your option) any later
   6 ## version.
   7 ##
   8 ## This program is distributed in the hope that it will be useful, but WITHOUT
   9 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
  10 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  11 ## details.
  12 ##
  13 ## You should have received a copy of the GNU General Public License along with
  14 ## this program; if not, see <http://www.gnu.org/licenses/>.
  15
  16 ## -*- texinfo -*-
  17 ## @deftypefn {Function File} {[@var{p}, @var{resid}, @var{cvg}, @var{outp}] =} nonlin_residmin (@var{f}, @var{pin})
  18 ## @deftypefnx {Function File} {[@var{p}, @var{resid}, @var{cvg}, @var{outp}] =} nonlin_residmin (@var{f}, @var{pin}, @var{settings})
  19 ## Frontend for nonlinear minimization of residuals returned by a model
  20 ## function.
  21 ##
  22 ## The functions supplied by the user have a minimal
  23 ## interface; any additionally needed constants (e.g. observed values)
  24 ## can be supplied by wrapping the user functions into anonymous
  25 ## functions.
  26 ##
  27 ## The following description applies to usage with vector-based
  28 ## parameter handling. Differences in usage for structure-based
  29 ## parameter handling will be explained in a separate section below.
  30 ##
  31 ## @var{f}: function returning the array of residuals. It gets a column
  32 ## vector of real parameters as argument. In gradient determination,
  33 ## this function may be called with an informational second argument,
  34 ## whose content depends on the function for gradient determination.
  35 ##
  36 ## @var{pin}: real column vector of initial parameters.
  37 ##
  38 ## @var{settings}: structure whose fields stand for optional settings
  39 ## referred to below. The fields can be set by @code{optimset()} with
  40 ## Octave versions 3.3.55 or greater; with older Octave versions, the
  41 ## fields must be set directly as structure-fields in the correct case.
  42 ##
  43 ## The returned values are the column vector of final parameters
  44 ## @var{p}, the final array of residuals @var{resid}, an integer
  45 ## @var{cvg} indicating if and how optimization succeeded or failed, and
  46 ## a structure @var{outp} with additional information, curently with
  47 ## only one field: @var{niter}, the number of iterations. @var{cvg} is
  48 ## greater than zero for success and less than or equal to zero for
  49 ## failure; its possible values depend on the used backend and currently
  50 ## can be @code{0} (maximum number of iterations exceeded), @code{2}
  51 ## (parameter change less than specified precision in two consecutive
  52 ## iterations), or @code{3} (improvement in objective function --- e.g.
  53 ## sum of squares --- less than specified).
  54 ##
  55 ## @var{settings}:
  56 ##
  57 ## @code{Algorithm}: String specifying the backend. Default:
  58 ## @code{"lm_svd_feasible"}. The latter is currently the only backend
  59 ## distributed with this package. It is described in a separate section
  60 ## below.
  61 ##
  62 ## @code{dfdp}: Function computing the jacobian of the residuals with
  63 ## respect to the parameters, assuming residuals are reshaped to a
  64 ## vector. Default: finite differences. Will be called with the column
  65 ## vector of parameters and an informational structure as arguments. The
  66 ## structure has the fields @code{f}: value of residuals for current
  67 ## parameters, reshaped to a column vector, @code{fixed}: logical vector
  68 ## indicating which parameters are not optimized, so these partial
  69 ## derivatives need not be computed and can be set to zero,
  70 ## @code{diffp}, @code{diff_onesided}, @code{lbound}, @code{ubound}:
  71 ## identical to the user settings of this name, @code{plabels}:
  72 ## 1-dimensional cell-array of column-cell-arrays, each column with
  73 ## labels for all parameters, the first column contains the numerical
  74 ## indices of the parameters. The default jacobian function will call
  75 ## the model function with the second argument set with fields @code{f}:
  76 ## as the @code{f} passed to the jacobian function, @code{plabels}:
  77 ## cell-array of 1x1 cell-arrays with the entries of the
  78 ## column-cell-arrays of @code{plabels} as passed to the jacobian
  79 ## function corresponding to current parameter, @code{side}: @code{0}
  80 ## for one-sided interval, @code{1} or @code{2}, respectively, for the
  81 ## sides of a two-sided interval, and @code{parallel}: logical scalar
  82 ## indicating parallel computation of partial derivatives.
  83 ##
  84 ## @code{diffp}: column vector of fractional intervals (doubled for
  85 ## central intervals) supposed to be used by jacobian functions
  86 ## performing finite differencing. Default: @code{.001 * ones (size (parameters))}. The default jacobian function will use these as
  87 ## absolute intervals for parameters with value zero.
  88 ##
  89 ## @code{diff_onesided}: logical column vector indicating that one-sided
  90 ## intervals should be used by jacobian functions performing finite
  91 ## differencing. Default: @code{false (size (parameters))}.
  92 ##
  93 ## @code{complex_step_derivative_f},
  94 ## @code{complex_step_derivative_inequc},
  95 ## @code{complex_step_derivative_equc}: logical scalars, default: false.
  96 ## Estimate Jacobian of model function, general inequality constraints,
  97 ## and general equality constraints, respectively, with complex step
  98 ## derivative approximation. Use only if you know that your model
  99 ## function, function of general inequality constraints, or function of
 100 ## general equality constraints, respectively, is suitable for this. No
 101 ## user function for the respective Jacobian must be specified.
 102 ##
 103 ## @code{cstep}: scalar step size for complex step derivative
 104 ## approximation. Default: 1e-20.
 105 ##
 106 ## @code{fixed}: logical column vector indicating which parameters
 107 ## should not be optimized, but kept to their inital value. Fixing is
 108 ## done independently of the backend, but the backend may choose to fix
 109 ## additional parameters under certain conditions.
 110 ##
 111 ## @code{lbound}, @code{ubound}: column vectors of lower and upper
 112 ## bounds for parameters. Default: @code{-Inf} and @code{+Inf},
 113 ## respectively. The bounds are non-strict, i.e. parameters are allowed
 114 ## to be exactly equal to a bound. The default jacobian function will
 115 ## respect bounds (but no further inequality constraints) in finite
 116 ## differencing.
 117 ##
 118 ## @code{inequc}: Further inequality constraints. Cell-array containing
 119 ## up to four entries, two entries for linear inequality constraints
 120 ## and/or one or two entries for general inequality constraints. Either
 121 ## linear or general constraints may be the first entries, but the two
 122 ## entries for linear constraints must be adjacent and, if two entries
 123 ## are given for general constraints, they also must be adjacent. The
 124 ## two entries for linear constraints are a matrix (say @code{m}) and a
 125 ## vector (say @code{v}), specifying linear inequality constraints of
 126 ## the form @code{m.' * parameters + v >= 0}. The first entry for
 127 ## general constraints must be a differentiable vector valued function
 128 ## (say @code{h}), specifying general inequality constraints of the form
 129 ## @code{h (p[, idx]) >= 0}; @code{p} is the column vector of optimized
 130 ## paraters and the optional argument @code{idx} is a logical index.
 131 ## @code{h} has to return the values of all constraints if @code{idx} is
 132 ## not given. It may choose to return only the indexed constraints if
 133 ## @code{idx} is given (so computation of the other constraints can be
 134 ## spared); in this case, the additional setting @code{inequc_f_idx} has
 135 ## to be set to @code{true}. In gradient determination, this function
 136 ## may be called with an informational third argument, whose content
 137 ## depends on the function for gradient determination. If a second entry
 138 ## for general inequality constraints is given, it must be a function
 139 ## computing the jacobian of the constraints with respect to the
 140 ## parameters. For this function, the description of @code{dfdp} above
 141 ## applies, with 2 exceptions: 1) it is called with 3 arguments since it
 142 ## has an additional argument @code{idx} --- a logical index --- at
 143 ## second position, indicating which rows of the jacobian must be
 144 ## returned (if the function chooses to return only indexed rows, the
 145 ## additional setting @code{inequc_df_idx} has to be set to
 146 ## @code{true}). 2) the default jacobian function calls @code{h} with 3
 147 ## arguments, since the argument @code{idx} is also supplied. Note that
 148 ## specifying linear constraints as general constraints will generally
 149 ## waste performance, even if further, non-linear, general constraints
 150 ## are also specified.
 151 ##
 152 ## @code{equc}: Equality constraints. Specified the same way as
 153 ## inequality constraints (see @code{inequc}).
 154 ##
 155 ## @code{cpiv}: Function for complementary pivoting, usable in
 156 ## algorithms for constraints. Default: @ cpiv_bard. Only the default
 157 ## function is supplied with the package.
 158 ##
 159 ## @code{weights}: Array of weights for the residuals. Dimensions must
 160 ## match.
 161 ##
 162 ## @code{TolFun}: Minimum fractional improvement in objective function
 163 ## (e.g. sum of squares) in an iteration (abortion criterium). Default:
 164 ## .0001.
 165 ##
 166 ## @code{MaxIter}: Maximum number of iterations (abortion criterium).
 167 ## Default: backend-specific.
 168 ##
 169 ## @code{fract_prec}: Column Vector, minimum fractional change of
 170 ## parameters in an iteration (abortion criterium if violated in two
 171 ## consecutive iterations). Default: backend-specific.
 172 ##
 173 ## @code{max_fract_change}: Column Vector, enforced maximum fractional
 174 ## change in parameters in an iteration. Default: backend-specific.
 175 ##
 176 ## @code{Display}: String indicating the degree of verbosity. Default:
 177 ## @code{"off"}. Possible values are currently @code{"off"} (no
 178 ## messages) and @code{"iter"} (some messages after each iteration).
 179 ## Support of this setting and its exact interpretation are
 180 ## backend-specific.
 181 ##
 182 ## @code{plot_cmd}: Function enabling backend to plot results or
 183 ## intermediate results. Will be called with current computed
 184 ## residuals. Default: plot nothing.
 185 ##
 186 ## @code{debug}: Logical scalar, default: @code{false}. Will be passed
 187 ## to the backend, which might print debugging information if true.
 188 ##
 189 ## Structure-based parameter handling
 190 ##
 191 ## The setting @code{param_order} is a cell-array with names of the
 192 ## optimized parameters. If not given, and initial parameters are a
 193 ## structure, all parameters in the structure are optimized. If initial
 194 ## parameters are a structure, it is an error if @code{param_order} is
 195 ## not given and there are any non-structure-based configuration items
 196 ## or functions.
 197 ##
 198 ## The initial parameters @var{pin} can be given as a structure
 199 ## containing at least all fields named in @code{param_order}. In this
 200 ## case the returned parameters @var{p} will also be a structure.
 201 ##
 202 ## Each user-supplied function can be called with the argument
 203 ## containing the current parameters being a structure instead of a
 204 ## column vector. For this, a corresponding setting must be set to
 205 ## @code{true}: @code{f_pstruct} (model function), @code{dfdp_pstruct}
 206 ## (jacobian of model function), @code{f_inequc_pstruct} (general
 207 ## inequality constraints), @code{df_inequc_pstruct} (jacobian of
 208 ## general inequality constraints), @code{f_equc_pstruct} (general
 209 ## equality constraints), and @code{df_equc_pstruct} (jacobian of
 210 ## general equality constraints). If a jacobian-function is configured
 211 ## in such a way, it must return the columns of the jacobian as fields
 212 ## of a structure under the respective parameter names.
 213 ##
 214 ## Similarly, for specifying linear constraints, instead of the matrix
 215 ## (called @code{m} above), a structure containing the rows of the
 216 ## matrix in fields under the respective parameter names can be given.
 217 ## In this case, rows containing only zeros need not be given.
 218 ##
 219 ## The vector-based settings @code{lbound}, @code{ubound},
 220 ## @code{fixed}, @code{diffp}, @code{diff_onesided}, @code{fract_prec},
 221 ## and @code{max_fract_change} can be replaced by the setting
 222 ## @code{param_config}. It is a structure that can contain fields named
 223 ## in @code{param_order}. For each such field, there may be subfields
 224 ## with the same names as the above vector-based settings, but
 225 ## containing a scalar value for the respective parameter. If
 226 ## @code{param_config} is specified, none of the above
 227 ## vector/matrix-based settings may be used.
 228 ##
 229 ## Additionally, named parameters are allowed to be non-scalar real
 230 ## arrays. In this case, their dimensions are given by the setting
 231 ## @code{param_dims}, a cell-array of dimension vectors, each containing
 232 ## at least two dimensions; if not given, dimensions are taken from the
 233 ## initial parameters, if these are given in a structure. Any
 234 ## vector-based settings or not structure-based linear constraints then
 235 ## must correspond to an order of parameters with all parameters
 236 ## reshaped to vectors and concatenated in the user-given order of
 237 ## parameter names. Structure-based settings or structure-based initial
 238 ## parameters must contain arrays with dimensions reshapable to those of
 239 ## the respective parameters.
 240 ##
 241 ## Description of backends (currently only one)
 242 ##
 243 ## "lm_svd_feasible"
 244 ##
 245 ## A Levenberg/Marquardt algorithm using singular value decomposition
 246 ## and featuring constraints which must be met by the initial parameters
 247 ## and are attempted to be kept met throughout the optimization.
 248 ##
 249 ## Parameters with identical lower and upper bounds will be fixed.
 250 ##
 251 ## Returned value @var{cvg} will be @code{0}, @code{2}, or @code{3}.
 252 ##
 253 ## Backend-specific defaults are: @code{MaxIter}: 20, @code{fract_prec}:
 254 ## @code{zeros (size (parameters))}, @code{max_fract_change}: @code{Inf}
 255 ## for all parameters.
 256 ##
 257 ## Interpretation of @code{Display}: if set to @code{"iter"}, currently
 258 ## @code{plot_cmd} is evaluated for each iteration, and some further
 259 ## diagnostics may be printed.
 260 ##
 261 ## Specific option: @code{lm_svd_feasible_alt_s}: if falling back to
 262 ## nearly gradient descent, do it more like original Levenberg/Marquardt
 263 ## method, with descent in each gradient component; for testing only.
 264 ##
 265 ## @seealso {nonlin_curvefit}
 266 ## @end deftypefn
 267
 268 function [p, resid, cvg, outp] = nonlin_residmin (varargin)
 269
 270   if (nargin == 1)
 271     p = __nonlin_residmin__ (varargin{1});
 272     return;
 273   endif
 274
 275   if (nargin < 2 || nargin > 3)
 276     print_usage ();
 277   endif
 278
 279   if (nargin == 2)
 280     varargin{3} = struct ();
 281   endif
 282
 283   varargin{4} = struct ();
 284
 285   [p, resid, cvg, outp] = __nonlin_residmin__ (varargin{:});
 286
 287 endfunction
 288
 289 %!demo
 290 %!  ## Example for linear inequality constraints
 291 %!  ## (see also the same example in 'demo nonlin_curvefit')
 292 %!
 293 %!  ## independents
 294 %!  indep = 1:5;
 295 %!  ## residual function:
 296 %!  f = @ (p) p(1) * exp (p(2) * indep) - [1, 2, 4, 7, 14];
 297 %!  ## initial values:
 298 %!  init = [.25; .25];
 299 %!  ## linear constraints, A.' * parametervector + B >= 0
 300 %!  A = [1; -1]; B = 0; # p(1) >= p(2);
 301 %!  settings = optimset ("inequc", {A, B});
 302 %!
 303 %!  ## start optimization
 304 %!  [p, residuals, cvg, outp] = nonlin_residmin (f, init, settings)