1 ## Copyright (C) 2002 Etienne Grossmann <etienne@egdn.net>
2 ## Copyright (C) 2009 Levente Torok <TorokLev@gmail.com>
4 ## This program is free software; you can redistribute it and/or modify it under
5 ## the terms of the GNU General Public License as published by the Free Software
6 ## Foundation; either version 3 of the License, or (at your option) any later
9 ## This program is distributed in the hope that it will be useful, but WITHOUT
10 ## ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 ## FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
14 ## You should have received a copy of the GNU General Public License along with
15 ## this program; if not, see <http://www.gnu.org/licenses/>.
18 ## @deftypefn {Function File} {[@var{x0},@var{v},@var{nev}]} cg_min ( @var{f},@var{df},@var{args},@var{ctl} )
19 ## NonLinear Conjugate Gradient method to minimize function @var{f}.
21 ## @subheading Arguments
23 ## @item @var{f} : string : Name of function. Return a real value
24 ## @item @var{df} : string : Name of f's derivative. Returns a (R*C) x 1 vector
25 ## @item @var{args}: cell : Arguments passed to f.@*
26 ## @item @var{ctl} : 5-vec : (Optional) Control variables, described below
29 ## @subheading Returned values
31 ## @item @var{x0} : matrix : Local minimum of f
32 ## @item @var{v} : real : Value of f in x0
33 ## @item @var{nev} : 1 x 2 : Number of evaluations of f and of df
36 ## @subheading Control Variables
38 ## @item @var{ctl}(1) : 1 or 2 : Select stopping criterion amongst :
39 ## @item @var{ctl}(1)==0 : Default value
40 ## @item @var{ctl}(1)==1 : Stopping criterion : Stop search when value doesn't
41 ## improve, as tested by @math{ ctl(2) > Deltaf/max(|f(x)|,1) }
42 ## where Deltaf is the decrease in f observed in the last iteration
43 ## (each iteration consists R*C line searches).
44 ## @item @var{ctl}(1)==2 : Stopping criterion : Stop search when updates are small,
45 ## as tested by @math{ ctl(2) > max @{ dx(i)/max(|x(i)|,1) | i in 1..N @}}
46 ## where dx is the change in the x that occured in the last iteration.
47 ## @item @var{ctl}(2) : Threshold used in stopping tests. Default=10*eps
48 ## @item @var{ctl}(2)==0 : Default value
49 ## @item @var{ctl}(3) : Position of the minimized argument in args Default=1
50 ## @item @var{ctl}(3)==0 : Default value
51 ## @item @var{ctl}(4) : Maximum number of function evaluations Default=inf
52 ## @item @var{ctl}(4)==0 : Default value
53 ## @item @var{ctl}(5) : Type of optimization:
54 ## @item @var{ctl}(5)==1 : "Fletcher-Reves" method
55 ## @item @var{ctl}(5)==2 : "Polak-Ribiere" (Default)
56 ## @item @var{ctl}(5)==3 : "Hestenes-Stiefel" method
59 ## @var{ctl} may have length smaller than 4. Default values will be used if ctl is
60 ## not passed or if nan values are given.
61 ## @subheading Example:
63 ## function r=df( l ) b=[1;0;-1]; r = -( 2*l@{1@} - 2*b + rand(size(l@{1@}))); endfunction @*
64 ## function r=ff( l ) b=[1;0;-1]; r = (l@{1@}-b)' * (l@{1@}-b); endfunction @*
65 ## ll = @{ [10; 2; 3] @}; @*
67 ## [x0,v,nev]=cg_min( "ff", "df", ll, ctl ) @*
69 ## Comment: In general, BFGS method seems to be better performin in many cases but requires more computation per iteration
70 ## @seealso{ bfgsmin, http://en.wikipedia.org/wiki/Nonlinear_conjugate_gradient }
73 function [x,v,nev] = cg_min (f, dfn, args, ctl)
77 crit = 1; # Default control variables
83 if nargin >= 4, # Read arguments
84 if !isnan (ctl(1)) && ctl(1) ~= 0, crit = ctl(1); end
85 if length (ctl)>=2 && !isnan (ctl(2)) && ctl(2) ~= 0, tol = ctl(2); end
86 if length (ctl)>=3 && !isnan (ctl(3)) && ctl(3) ~= 0, narg = ctl(3); end
87 if length (ctl)>=4 && !isnan (ctl(4)) && ctl(4) ~= 0, maxev = ctl(4); end
88 if length (ctl)>=5 && !isnan (ctl(5)) && ctl(5) ~= 0, method= ctl(5); end
91 if iscell (args), # List of arguments
93 else # Single argument
98 if narg > length (args), # Check
99 error ("cg_min : narg==%i, length (args)==%i\n",
100 narg, length (args));
105 x = reshape (x,N,1) ;
112 dxn = lxn = dxn_1 = -feval( dfn, args );
118 ## tb = ts = zeros (1,100);
120 # Control params for line search
121 ctlb = [10*sqrt(eps), narg, maxev];
122 if crit == 2, ctlb(1) = tol; end
128 while nev(1) <= maxev ,
130 ctlb(3) = maxev - nev(1); # Update # of evals
134 [alpha, vnew, nev0] = brent_line_min (f, dxn, args, ctlb);
142 done = tol > (v0 - vnew) / max (1, abs (v0));
144 done = tol > norm ((x-x0)(:));
152 if done || nev(1) >= maxev, return end
155 printf("cg_min: step increased cost function\n");
159 # if abs(1-(x-xprev)'*dxn/norm(dxn)/norm(x-xprev))>1000*eps,
160 # printf("cg_min: step is not in the right direction\n");
164 # update x at the narg'th position of args cellarray
165 args{narg} = reshape (x, R, C);
170 if verbose, printf("cg_min : nev=%4i, v=%8.3g\n",nev(1),v) ; end
173 dxn = -feval (dfn, args);
179 case 1 # Fletcher-Reenves method
183 case 2 # Polak-Ribiere method
184 nu = (dxn-dxn_1)' * dxn;
187 case 3 # Hestenes-Stiefel method
188 nu = (dxn-dxn_1)' * dxn;
189 de = (dxn-dxn_1)' * lxn;
192 error("No method like this");
201 error("Numerical instability!");
204 beta = max( 0, beta );
205 ## wiki alg 3. update dxn, lxn, point
207 dxn = lxn = dxn_1 + beta*lxn ;
211 if verbose, printf ("cg_min: Too many evaluatiosn!\n"); end
216 %! P = 15; # Number of parameters
217 %! R = 20; # Number of observations (must have R >= P)
219 %! obsmat = randn (R, P);
220 %! truep = randn (P, 1);
221 %! xinit = randn (P, 1);
222 %! obses = obsmat * truep;
224 %! msq = @(x) mean (x (!isnan(x)).^2);
225 %! ff = @(x) msq (obses - obsmat * x{1}) + 1;
226 %! dff = @(x) 2 / rows (obses) * obsmat.' * (-obses + obsmat * x{1});
229 %! [xlev,vlev,nlev] = cg_min (ff, dff, xinit) ;
232 %! printf (" Costs : init=%8.3g, final=%8.3g, best=%8.3g\n", ...
233 %! ff ({xinit}), vlev, ff ({truep}));
235 %! if (max (abs (xlev-truep)) > 100*sqrt (eps))
236 %! printf ("Error is too big : %8.3g\n", max (abs (xlev-truep)));
238 %! printf ("All tests ok\n");
242 %! N = 1 + floor (30 * rand ());
243 %! truemin = randn (N, 1);
244 %! offset = 100 * randn ();
245 %! metric = randn (2 * N, N);
246 %! metric = metric.' * metric;
249 %! [u,d,v] = svd (metric);
250 %! d = (0.1+[0:(1/(N-1)):1]).^2;
251 %! metric = u * diag (d) * u.';
254 %! testfunc = @(x) sum((x{1}-truemin)'*metric*(x{1}-truemin)) + offset;
255 %! dtestf = @(x) metric' * 2*(x{1}-truemin);
257 %! xinit = 10 * randn (N, 1);
259 %! [x, v, niter] = cg_min (testfunc, dtestf, xinit);
261 %! if (any (abs (x-truemin) > 100 * sqrt(eps)))
262 %! printf ("NOT OK 1\n");
264 %! printf ("OK 1\n");
267 %! if (v-offset > 1e-8)
268 %! printf ("NOT OK 2\n");
270 %! printf ("OK 2\n");
273 %! printf ("nev=%d N=%d errx=%8.3g errv=%8.3g\n",...
274 %! niter (1), N, max (abs (x-truemin)), v-offset);
277 %! P = 2; # Number of parameters
278 %! R = 3; # Number of observations
280 %! obsmat = randn (R, P);
281 %! truep = randn (P, 1);
282 %! xinit = randn (P, 1);
284 %! obses = obsmat * truep;
286 %! msq = @(x) mean (x (!isnan(x)).^2);
287 %! ff = @(xx) msq (xx{3} - xx{2} * xx{1}) + 1;
288 %! dff = @(xx) 2 / rows(xx{3}) * xx{2}.' * (-xx{3} + xx{2}*xx{1});
291 %! x = {xinit, obsmat, obses};
292 %! [xlev, vlev, nlev] = cg_min (ff, dff, x);
295 %! xinit_ = {xinit, obsmat, obses};
296 %! xtrue_ = {truep, obsmat, obses};
297 %! printf (" Costs : init=%8.3g, final=%8.3g, best=%8.3g\n", ...
298 %! ff (xinit_), vlev, ff (xtrue_));
300 %! if (max (abs(xlev-truep)) > 100*sqrt (eps))
301 %! printf ("Error is too big : %8.3g\n", max (abs (xlev-truep)));
303 %! printf ("All tests ok\n");