1 ## Copyright (C) 2008-2012 VZLU Prague, a.s.
3 ## This file is part of Octave.
5 ## Octave is free software; you can redistribute it and/or modify it
6 ## under the terms of the GNU General Public License as published by
7 ## the Free Software Foundation; either version 3 of the License, or (at
8 ## your option) any later version.
10 ## Octave is distributed in the hope that it will be useful, but
11 ## WITHOUT ANY WARRANTY; without even the implied warranty of
12 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 ## General Public License for more details.
15 ## You should have received a copy of the GNU General Public License
16 ## along with Octave; see the file COPYING. If not, see
17 ## <http://www.gnu.org/licenses/>.
19 ## Author: Jaroslav Hajek <highegg@gmail.com>
22 ## @deftypefn {Function File} {} fsolve (@var{fcn}, @var{x0}, @var{options})
23 ## @deftypefnx {Function File} {[@var{x}, @var{fvec}, @var{info}, @var{output}, @var{fjac}] =} fsolve (@var{fcn}, @dots{})
24 ## Solve a system of nonlinear equations defined by the function @var{fcn}.
25 ## @var{fcn} should accept a vector (array) defining the unknown variables,
26 ## and return a vector of left-hand sides of the equations. Right-hand sides
27 ## are defined to be zeros.
28 ## In other words, this function attempts to determine a vector @var{x} such
29 ## that @code{@var{fcn} (@var{x})} gives (approximately) all zeros.
30 ## @var{x0} determines a starting guess. The shape of @var{x0} is preserved
31 ## in all calls to @var{fcn}, but otherwise it is treated as a column vector.
32 ## @var{options} is a structure specifying additional options.
33 ## Currently, @code{fsolve} recognizes these options:
34 ## @code{"FunValCheck"}, @code{"OutputFcn"}, @code{"TolX"},
35 ## @code{"TolFun"}, @code{"MaxIter"}, @code{"MaxFunEvals"},
36 ## @code{"Jacobian"}, @code{"Updating"}, @code{"ComplexEqn"}
37 ## @code{"TypicalX"}, @code{"AutoScaling"} and @code{"FinDiffType"}.
39 ## If @code{"Jacobian"} is @code{"on"}, it specifies that @var{fcn},
40 ## called with 2 output arguments, also returns the Jacobian matrix
41 ## of right-hand sides at the requested point. @code{"TolX"} specifies
42 ## the termination tolerance in the unknown variables, while
43 ## @code{"TolFun"} is a tolerance for equations. Default is @code{1e-7}
44 ## for both @code{"TolX"} and @code{"TolFun"}.
46 ## If @code{"AutoScaling"} is on, the variables will be automatically scaled
47 ## according to the column norms of the (estimated) Jacobian. As a result,
48 ## TolF becomes scaling-independent. By default, this option is off, because
49 ## it may sometimes deliver unexpected (though mathematically correct) results.
51 ## If @code{"Updating"} is "on", the function will attempt to use Broyden
52 ## updates to update the Jacobian, in order to reduce the amount of Jacobian
54 ## If your user function always calculates the Jacobian (regardless of number
55 ## of output arguments), this option provides no advantage and should be set to
58 ## @code{"ComplexEqn"} is @code{"on"}, @code{fsolve} will attempt to solve
59 ## complex equations in complex variables, assuming that the equations possess a
60 ## complex derivative (i.e., are holomorphic). If this is not what you want,
61 ## should unpack the real and imaginary parts of the system to get a real
64 ## For description of the other options, see @code{optimset}.
66 ## On return, @var{fval} contains the value of the function @var{fcn}
67 ## evaluated at @var{x}, and @var{info} may be one of the following values:
71 ## Converged to a solution point. Relative residual error is less than
72 ## specified by TolFun.
75 ## Last relative step size was less that TolX.
78 ## Last relative decrease in residual was less than TolF.
81 ## Iteration limit exceeded.
84 ## The trust region radius became excessively small.
87 ## Note: If you only have a single nonlinear equation of one variable, using
88 ## @code{fzero} is usually a much better idea.
90 ## Note about user-supplied Jacobians:
91 ## As an inherent property of the algorithm, Jacobian is always requested for a
92 ## solution vector whose residual vector is already known, and it is the last
93 ## accepted successful step. Often this will be one of the last two calls, but
94 ## not always. If the savings by reusing intermediate results from residual
95 ## calculation in Jacobian calculation are significant, the best strategy is to
96 ## employ OutputFcn: After a vector is evaluated for residuals, if OutputFcn is
97 ## called with that vector, then the intermediate results should be saved for
98 ## future Jacobian evaluation, and should be kept until a Jacobian evaluation
99 ## is requested or until outputfcn is called with a different vector, in which
100 ## case they should be dropped in favor of this most recent vector. A short
101 ## example how this can be achieved follows:
104 ## function [fvec, fjac] = user_func (x, optimvalues, state)
105 ## persistent sav = [], sav0 = [];
107 ## ## evaluation call
109 ## sav0.x = x; # mark saved vector
110 ## ## calculate fvec, save results to sav0.
111 ## elseif (nargout == 2)
112 ## ## calculate fjac using sav.
115 ## ## outputfcn call.
116 ## if (all (x == sav0.x))
119 ## ## maybe output iteration status, etc.
125 ## fsolve (@@user_func, x0, optimset ("OutputFcn", @@user_func, @dots{}))
127 ## @seealso{fzero, optimset}
130 ## PKG_ADD: ## Discard result to avoid polluting workspace with ans at startup.
131 ## PKG_ADD: [~] = __all_opts__ ("fsolve");
133 function [x, fvec, info, output, fjac] = fsolve (fcn, x0, options = struct ())
135 ## Get default options if requested.
136 if (nargin == 1 && ischar (fcn) && strcmp (fcn, 'defaults'))
137 x = optimset ("MaxIter", 400, "MaxFunEvals", Inf, \
138 "Jacobian", "off", "TolX", 1e-7, "TolFun", 1e-7,
139 "OutputFcn", [], "Updating", "on", "FunValCheck", "off",
140 "ComplexEqn", "off", "FinDiffType", "central",
141 "TypicalX", [], "AutoScaling", "off");
145 if (nargin < 2 || nargin > 3 || ! ismatrix (x0))
150 fcn = str2func (fcn, "global");
151 elseif (iscell (fcn))
152 fcn = @(x) make_fcn_jac (x, fcn{1}, fcn{2});
158 has_jac = strcmpi (optimget (options, "Jacobian", "off"), "on");
159 cdif = strcmpi (optimget (options, "FinDiffType", "central"), "central");
160 maxiter = optimget (options, "MaxIter", 400);
161 maxfev = optimget (options, "MaxFunEvals", Inf);
162 outfcn = optimget (options, "OutputFcn");
163 updating = strcmpi (optimget (options, "Updating", "on"), "on");
164 complexeqn = strcmpi (optimget (options, "ComplexEqn", "off"), "on");
166 ## Get scaling matrix using the TypicalX option. If set to "auto", the
167 ## scaling matrix is estimated using the Jacobian.
168 typicalx = optimget (options, "TypicalX");
169 if (isempty (typicalx))
170 typicalx = ones (n, 1);
172 autoscale = strcmpi (optimget (options, "AutoScaling", "off"), "on");
177 funvalchk = strcmpi (optimget (options, "FunValCheck", "off"), "on");
180 ## Replace fcn with a guarded version.
181 fcn = @(x) guarded_eval (fcn, x, complexeqn);
184 ## These defaults are rather stringent. I think that normally, user
185 ## prefers accuracy to performance.
187 macheps = eps (class (x0));
189 tolx = optimget (options, "TolX", 1e-7);
190 tolf = optimget (options, "TolFun", 1e-7);
200 ## Initial evaluation.
201 ## Handle arbitrary shapes of x and f and remember them.
202 fvec = fcn (reshape (x, xsiz));
209 if (! isempty (outfcn))
210 optimvalues.iter = niter;
211 optimvalues.funccount = nfev;
212 optimvalues.fval = fn;
213 optimvalues.searchdirection = zeros (n, 1);
215 stop = outfcn (x, optimvalues, state);
225 while (niter < maxiter && nfev < maxfev && ! info)
227 ## Calculate function value and Jacobian (possibly via FD).
229 [fvec, fjac] = fcn (reshape (x, xsiz));
230 ## If the Jacobian is sparse, disable Broyden updating.
237 fjac = __fdjac__ (fcn, reshape (x, xsiz), fvec, typicalx, cdif);
238 nfev += (1 + cdif) * length (x);
241 ## For square and overdetermined systems, we update a QR
242 ## factorization of the Jacobian to avoid solving a full system in each
243 ## step. In this case, we pass a triangular matrix to __dogleg__.
244 useqr = updating && m >= n && n > 10;
247 ## FIXME: Currently, pivoting is mostly useless because the \ operator
248 ## cannot exploit the resulting props of the triangular factor.
249 ## Unpivoted QR is significantly faster so it doesn't seem right to pivot
250 ## just to get invariance. Original MINPACK didn't pivot either, at least
251 ## when qr updating was used.
252 [q, r] = qr (fjac, 0);
256 ## Get column norms, use them as scaling factors.
257 jcn = norm (fjac, 'columns').';
262 ## Rescale adaptively.
263 ## FIXME: the original minpack used the following rescaling strategy:
264 ## dg = max (dg, jcn);
265 ## but it seems not good if we start with a bad guess yielding Jacobian
266 ## columns with large norms that later decrease, because the corresponding
267 ## variable will still be overscaled. So instead, we only give the old
268 ## scaling a small momentum, but do not honor it.
270 dg = max (0.1*dg, jcn);
276 ## FIXME: something better?
277 delta = factor * max (xn, 1);
280 ## It also seems that in the case of fast (and inhomogeneously) changing
281 ## Jacobian, the Broyden updates are of little use, so maybe we could
282 ## skip them if a big disproportional change is expected. The question is,
283 ## of course, how to define the above terms :)
291 while (niter <= maxiter && nfev < maxfev && ! info)
293 ## Get trust-region model (dogleg) minimizer.
296 s = - __dogleg__ (r, qtf, dg, delta);
299 s = - __dogleg__ (fjac, fvec, dg, delta);
305 delta = min (delta, sn);
308 fvec1 = fcn (reshape (x + s, xsiz)) (:);
313 ## Scaled actual reduction.
314 actred = 1 - (fn1/fn)^2;
319 ## Scaled predicted reduction, and ratio.
322 prered = 1 - (t/fn)^2;
323 ratio = actred / prered;
330 if (ratio < min(max(0.1, 0.8*lastratio), 0.9))
335 if (delta <= 1e1*macheps*xn)
336 ## Trust region became uselessly small.
345 if (abs (1-ratio) <= 0.1)
347 elseif (ratio >= 0.5 || nsuc > 1)
348 delta = max (delta, 1.4142*sn);
353 ## Successful iteration.
363 ## FIXME: should outputfcn be only called after a successful iteration?
364 if (! isempty (outfcn))
365 optimvalues.iter = niter;
366 optimvalues.funccount = nfev;
367 optimvalues.fval = fn;
368 optimvalues.searchdirection = s;
370 stop = outfcn (x, optimvalues, state);
377 ## Tests for termination conditions. A mysterious place, anything
378 ## can happen if you change something here...
380 ## The rule of thumb (which I'm not sure M*b is quite following)
381 ## is that for a tolerance that depends on scaling, only 0 makes
382 ## sense as a default value. But 0 usually means uselessly long
383 ## iterations, so we need scaling-independent tolerances wherever
386 ## FIXME -- why tolf*n*xn? If abs (e) ~ abs(x) * eps is a vector
387 ## of perturbations of x, then norm (fjac*e) <= eps*n*xn, i.e. by
388 ## tolf ~ eps we demand as much accuracy as we can expect.
391 ## The following tests done only after successful step.
392 elseif (ratio >= 1e-4)
393 ## This one is classic. Note that we use scaled variables again,
394 ## but compare to scaled step, so nothing bad.
397 ## Again a classic one. It seems weird to use the same tolf
398 ## for two different tests, but that's what M*b manual appears
400 elseif (actred < tolf)
405 ## Criterion for recalculating Jacobian.
406 if (! updating || nfail == 2 || nsuciter < 2)
410 ## Compute the scaled Broyden update.
412 u = (fvec1 - q*w) / sn;
413 v = dg .* ((dg .* s) / sn);
415 ## Update the QR factorization.
416 [q, r] = qrupdate (q, r, u, v);
419 v = dg .* ((dg .* s) / sn);
421 ## update the Jacobian
427 ## Restore original shapes.
428 x = reshape (x, xsiz);
429 fvec = reshape (fvec, fsiz);
431 output.iterations = niter;
432 output.successful = nsuciter;
433 output.funcCount = nfev;
437 ## An assistant function that evaluates a function handle and checks for
439 function [fx, jx] = guarded_eval (fun, x, complexeqn)
447 if (! complexeqn && ! (isreal (fx) && isreal (jx)))
448 error ("fsolve:notreal", "fsolve: non-real value encountered");
449 elseif (complexeqn && ! (isnumeric (fx) && isnumeric(jx)))
450 error ("fsolve:notnum", "fsolve: non-numeric value encountered");
451 elseif (any (isnan (fx(:))))
452 error ("fsolve:isnan", "fsolve: NaN value encountered");
456 function [fx, jx] = make_fcn_jac (x, fcn, fjac)
463 %!function retval = __f (p)
467 %! retval = zeros (3, 1);
468 %! retval(1) = sin(x) + y**2 + log(z) - 7;
469 %! retval(2) = 3*x + 2**y -z**3 + 1;
470 %! retval(3) = x + y + z - 5;
473 %! x_opt = [ 0.599054;
477 %! [x, fval, info] = fsolve (@__f, [ 0.5; 2.0; 2.5 ]);
478 %! assert (info > 0);
479 %! assert (norm (x - x_opt, Inf) < tol);
480 %! assert (norm (fval) < tol);
482 %!function retval = __f (p)
487 %! retval = zeros (4, 1);
488 %! retval(1) = 3*x + 4*y + exp (z + w) - 1.007;
489 %! retval(2) = 6*x - 4*y + exp (3*z + w) - 11;
490 %! retval(3) = x^4 - 4*y^2 + 6*z - 8*w - 20;
491 %! retval(4) = x^2 + 2*y^3 + z - w - 4;
494 %! x_opt = [ -0.767297326653401, 0.590671081117440, 1.47190018629642, -1.52719341133957 ];
496 %! [x, fval, info] = fsolve (@__f, [-1, 1, 2, -1]);
497 %! assert (info > 0);
498 %! assert (norm (x - x_opt, Inf) < tol);
499 %! assert (norm (fval) < tol);
501 %!function retval = __f (p)
505 %! retval = zeros (3, 1);
506 %! retval(1) = sin(x) + y**2 + log(z) - 7;
507 %! retval(2) = 3*x + 2**y -z**3 + 1;
508 %! retval(3) = x + y + z - 5;
509 %! retval(4) = x*x + y - z*log(z) - 1.36;
512 %! x_opt = [ 0.599054;
516 %! [x, fval, info] = fsolve (@__f, [ 0.5; 2.0; 2.5 ]);
517 %! assert (info > 0);
518 %! assert (norm (x - x_opt, Inf) < tol);
519 %! assert (norm (fval) < tol);
521 %!function retval = __f (p)
525 %! retval = zeros (3, 1);
526 %! retval(1) = sin(x) + y**2 + log(z) - 7;
527 %! retval(2) = 3*x + 2**y -z**3 + 1;
528 %! retval(3) = x + y + z - 5;
531 %! x_opt = [ 0.599054;
535 %! opt = optimset ("Updating", "qrp");
536 %! [x, fval, info] = fsolve (@__f, [ 0.5; 2.0; 2.5 ], opt);
537 %! assert (info > 0);
538 %! assert (norm (x - x_opt, Inf) < tol);
539 %! assert (norm (fval) < tol);
545 %! noise = 1e-5 * sin (100*x);
546 %! y = exp (-a0*x) + b0 + noise;
550 %! [c, fval, info, output] = fsolve (@(c) (exp(-c(1)*x) + c(2) - y), [0, 0]);
551 %! assert (info > 0);
552 %! assert (norm (c - c_opt, Inf) < tol);
553 %! assert (norm (fval) < norm (noise));
556 %!function y = cfun (x)
557 %! y(1) = (1+i)*x(1)^2 - (1-i)*x(2) - 2;
558 %! y(2) = sqrt (x(1)*x(2)) - (1-2i)*x(3) + (3-4i);
559 %! y(3) = x(1) * x(2) - x(3)^2 + (3+2i);
563 %! x_opt = [-1+i, 1-i, 2+i];
566 %! [x, f, info] = fsolve (@cfun, x, optimset ("ComplexEqn", "on"));
568 %! assert (norm (f) < tol);
569 %! assert (norm (x - x_opt, Inf) < tol);
571 ## Solve the double dogleg trust-region least-squares problem:
572 ## Minimize norm(r*x-b) subject to the constraint norm(d.*x) <= delta,
573 ## x being a convex combination of the gauss-newton and scaled gradient.
575 ## TODO: error checks
576 ## TODO: handle singularity, or leave it up to mldivide?
578 function x = __dogleg__ (r, b, d, delta)
579 ## Get Gauss-Newton direction.
583 ## GN is too big, get scaled gradient.
587 ## Normalize and rescale.
589 ## Get the line minimizer in s direction.
591 snm = (sn / tn) / tn;
593 ## Get the dogleg path minimizer.
595 dxn = delta/xn; snmd = snm/delta;
596 t = (bn/sn) * (bn/xn) * snmd;
597 t -= dxn * snmd^2 - sqrt ((t-dxn)^2 + (1-dxn^2)*(1-snmd^2));
598 alpha = dxn*(1-snmd^2) / t;
606 ## Form the appropriate convex combination.
607 x = alpha * x + ((1-alpha) * min (snm, delta)) * s;