|
| 1 | +% minimize.m - minimize a smooth differentiable multivariate function using |
| 2 | +% LBFGS (Limited memory LBFGS) or CG (Conjugate Gradients) |
| 3 | +% Usage: [X, fX, i] = minimize(X, F, p, other, ... ) |
| 4 | +% where |
| 5 | +% X is an initial guess (any type: vector, matrix, cell array, struct) |
| 6 | +% F is the objective function (function pointer or name) |
| 7 | +% p parameters - if p is a number, it corresponds to p.length below |
| 8 | +% p.length allowed 1) # linesearches or 2) if -ve minus # func evals |
| 9 | +% p.method minimization method, 'BFGS', 'LBFGS' or 'CG' |
| 10 | +% p.verbosity 0 quiet, 1 line, 2 line + warnings (default), 3 graphical |
| 11 | +% p.mem number of directions used in LBFGS (default 100) |
| 12 | +% other, ... other parameters, passed to the function F |
| 13 | +% X returned minimizer |
| 14 | +% fX vector of function values showing minimization progress |
| 15 | +% i final number of linesearches or function evaluations |
| 16 | +% The function F must take the following syntax [f, df] = F(X, other, ...) |
| 17 | +% where f is the function value and df its partial derivatives. The types of X |
| 18 | +% and df must be identical (vector, matrix, cell array, struct, etc). |
| 19 | +% |
| 20 | +% Copyright (C) 1996 - 2011 by Carl Edward Rasmussen, 2011-10-13. |
| 21 | + |
| 22 | +% Permission is hereby granted, free of charge, to any person OBTAINING A COPY |
| 23 | +% OF THIS SOFTWARE AND ASSOCIATED DOCUMENTATION FILES (THE "SOFTWARE"), TO DEAL |
| 24 | +% IN THE SOFTWARE WITHOUT RESTRICTION, INCLUDING WITHOUT LIMITATION THE RIGHTS |
| 25 | +% to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 26 | +% copies of the Software, and to permit persons to whom the Software is |
| 27 | +% furnished to do so, subject to the following conditions: |
| 28 | +% |
| 29 | +% The above copyright notice and this permission notice shall be included in |
| 30 | +% all copies or substantial portions of the Software. |
| 31 | +% |
| 32 | +% THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 33 | +% IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 34 | +% FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 35 | +% AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 36 | +% LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 37 | +% OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 38 | +% SOFTWARE. |
| 39 | + |
| 40 | +function [X, fX, i] = minimize_new(X, F, p, varargin) |
| 41 | +if isnumeric(p), p = struct('length', p); end % convert p to struct |
| 42 | +if p.length > 0, p.S = 'linesearch #'; else p.S = 'function evaluation #'; end; |
| 43 | +x = unwrap(X); % convert initial guess to vector |
| 44 | +if ~isfield(p,'method'), if length(x) > 1000, p.method = @LBFGS; |
| 45 | + else p.method = @BFGS; end; end % set default method |
| 46 | +if ~isfield(p,'verbosity'), p.verbosity = 2; end % default 1 line text output |
| 47 | +if ~isfield(p,'MFEPLS'), p.MFEPLS = 10; end % Max Func Evals Per Line Search |
| 48 | +if ~isfield(p,'MSR'), p.MSR = 100; end % Max Slope Ratio default |
| 49 | +f(F, X, varargin{:}); % set up the function f |
| 50 | +[fx, dfx] = f(x); % initial function value and derivatives |
| 51 | +if p.verbosity, printf('Initial Function Value %4.6e\r', fx); end |
| 52 | +if p.verbosity > 2, |
| 53 | + clf; subplot(211); hold on; xlabel(p.S); ylabel('function value'); |
| 54 | + plot(p.length < 0, fx, '+'); drawnow; |
| 55 | +end |
| 56 | +[x, fX, i] = feval(p.method, x, fx, dfx, p); % minimize using direction method |
| 57 | +X = rewrap(X, x); % convert answer to original representation |
| 58 | +if p.verbosity, printf('\n'); end |
| 59 | + |
| 60 | +function [x, fx, i] = CG(x0, fx0, dfx0, p) |
| 61 | +if ~isfield(p, 'SIG'), p.SIG = 0.1; end % default for line search quality |
| 62 | +i = p.length < 0; ok = 0; % initialize resource counter |
| 63 | +r = -dfx0; s = -r'*r; b = -1/(s-1); bs = -1; fx = fx0; % steepest descent |
| 64 | +while i < abs(p.length) |
| 65 | + b = b*bs/min(b*s,bs/p.MSR); % suitable initial step size using slope ratio |
| 66 | + [x, b, fx0, dfx, i] = lineSearch(x0, fx0, dfx0, r, s, b, i, p); |
| 67 | + if i < 0 % if line search failed |
| 68 | + i = -i; if ok, ok = 0; r = -dfx; else break; end % try steepest or stop |
| 69 | + else |
| 70 | + ok = 1; bs = b*s; % record step times slope (for slope ratio method) |
| 71 | + r = (dfx'*(dfx-dfx0))/(dfx0'*dfx0)*r - dfx; % Polack-Ribiere CG |
| 72 | + end |
| 73 | + s = r'*dfx; if s >= 0, r = -dfx; s = r'*dfx; ok = 0; end % slope must be -ve |
| 74 | + x0 = x; dfx0 = dfx; fx = [fx; fx0]; % replace old values with new ones |
| 75 | +end |
| 76 | + |
| 77 | +function [x, fx, i] = BFGS(x0, fx0, dfx0, p) |
| 78 | +if ~isfield(p, 'SIG'), p.SIG = 0.5; end % default for line search quality |
| 79 | +i = p.length < 0; ok = 0; % initialize resource counter |
| 80 | +x = x0; fx = fx0; r = -dfx0; s = -r'*r; b = -1/(s-1); H = eye(length(x0)); |
| 81 | +while i < abs(p.length) |
| 82 | + [x, b, fx0, dfx, i] = lineSearch(x0, fx0, dfx0, r, s, b, i, p); |
| 83 | + if i < 0 |
| 84 | + i = -i; if ok, ok = 0; else break; end; % try steepest or stop |
| 85 | + else |
| 86 | + ok = 1; t = x - x0; y = dfx - dfx0; ty = t'*y; Hy = H*y; |
| 87 | + H = H + (ty+y'*Hy)/ty^2*t*t' - 1/ty*Hy*t' - 1/ty*t*Hy'; % BFGS update |
| 88 | + end |
| 89 | + r = -H*dfx; s = r'*dfx; x0 = x; dfx0 = dfx; fx = [fx; fx0]; |
| 90 | +end |
| 91 | + |
| 92 | +function [x, fx, i] = LBFGS(x0, fx0, dfx0, p) |
| 93 | +if ~isfield(p, 'SIG'), p.SIG = 0.5; end % default for line search quality |
| 94 | +n = length(x0); k = 0; ok = 0; x = x0; fx = fx0; bs = -1/p.MSR; |
| 95 | +if isfield(p, 'mem'), m = p.mem; else m = min(100, n); end % set memory size |
| 96 | +a = zeros(1, m); t = zeros(n, m); y = zeros(n, m); % allocate memory |
| 97 | +i = p.length < 0; % initialize resource counter |
| 98 | +while i < abs(p.length) |
| 99 | + q = dfx0; |
| 100 | + for j = rem(k-1:-1:max(0,k-m),m)+1 |
| 101 | + a(j) = t(:,j)'*q/rho(j); q = q-a(j)*y(:,j); |
| 102 | + end |
| 103 | + if k == 0, r = -q/(q'*q); else r = -t(:,j)'*y(:,j)/(y(:,j)'*y(:,j))*q; end |
| 104 | + for j = rem(max(0,k-m):k-1,m)+1 |
| 105 | + r = r-t(:,j)*(a(j)+y(:,j)'*r/rho(j)); |
| 106 | + end |
| 107 | + s = r'*dfx0; if s >= 0, r = -dfx0; s = r'*dfx0; k = 0; ok = 0; end |
| 108 | + b = bs/min(bs,s/p.MSR); % suitable initial step size (usually 1) |
| 109 | + if isnan(r) | isinf(r) % if nonsense direction |
| 110 | + i = -i; % try steepest or stop |
| 111 | + else |
| 112 | + [x, b, fx0, dfx, i] = lineSearch(x0, fx0, dfx0, r, s, b, i, p); |
| 113 | + end |
| 114 | + if i < 0 % if line search failed |
| 115 | + i = -i; if ok, ok = 0; k = 0; else break; end % try steepest or stop |
| 116 | + else |
| 117 | + j = rem(k,m)+1; t(:,j) = x-x0; y(:,j) = dfx-dfx0; rho(j) = t(:,j)'*y(:,j); |
| 118 | + ok = 1; k = k+1; bs = b*s; |
| 119 | + end |
| 120 | + x0 = x; dfx0 = dfx; fx = [fx; fx0]; % replace and add values |
| 121 | +end |
| 122 | + |
| 123 | +function [x, a, fx, df, i] = lineSearch(x0, f0, df0, d, s, a, i, p) |
| 124 | +if p.length < 0, LIMIT = min(p.MFEPLS, -i-p.length); else LIMIT = p.MFEPLS; end |
| 125 | +p0.x = 0.0; p0.f = f0; p0.df = df0; p0.s = s; p1 = p0; % init p0 and p1 |
| 126 | +j = 0; p3.x = a; wp(p0, p.SIG, 0); % set step & Wolfe-Powell conditions |
| 127 | +if p.verbosity > 2 |
| 128 | + A = [-a a]/5; nd = norm(d); |
| 129 | + subplot(212); hold off; plot(0, f0, 'k+'); hold on; plot(nd*A, f0+s*A, 'k-'); |
| 130 | + xlabel('distance in line search direction'); ylabel('function value'); |
| 131 | +end |
| 132 | +while 1 % keep extrapolating as long as necessary |
| 133 | + ok = 0; while ~ok & j < LIMIT |
| 134 | + try % try, catch and bisect to safeguard extrapolation evaluation |
| 135 | + j = j+1; [p3.f p3.df] = f(x0+p3.x*d); p3.s = p3.df'*d; ok = 1; |
| 136 | + if isnan(p3.f+p3.s) | isinf(p3.f+p3.s) |
| 137 | + error('Objective function returned Inf or NaN',''); |
| 138 | + end; |
| 139 | + catch |
| 140 | + if p.verbosity > 1, printf('\n'); warning(lasterr); end % warn or silence |
| 141 | + p3.x = (p1.x+p3.x)/2; ok = 0; p3.f = NaN; % bisect, and retry |
| 142 | + end |
| 143 | + end |
| 144 | + if p.verbosity > 2 |
| 145 | + plot(nd*p3.x, p3.f, 'b+'); plot(nd*(p3.x+A), p3.f+p3.s*A, 'b-'); drawnow |
| 146 | + end |
| 147 | + if wp(p3) | j >= LIMIT, break; end % done? |
| 148 | + p0 = p1; p1 = p3; % move points back one unit |
| 149 | + p3.x = p0.x + minCubic(p1.x-p0.x, p1.f-p0.f, p0.s, p1.s, 1); % cubic extrap |
| 150 | +end |
| 151 | +while 1 % keep interpolating as long as necessary |
| 152 | + if p1.f > p3.f, p2 = p3; else p2 = p1; end % make p2 the best so far |
| 153 | + if wp(p2) > 1 | j >= LIMIT, break; end % done? |
| 154 | + p2.x = p1.x + minCubic(p3.x-p1.x, p3.f-p1.f, p1.s, p3.s, 0); % cubic interp |
| 155 | + j = j+1; [p2.f p2.df] = f(x0+p2.x*d); p2.s = p2.df'*d; |
| 156 | + if p.verbosity > 2 |
| 157 | + plot(nd*p2.x, p2.f, 'r+'); plot(nd*(p2.x+A), p2.f+p2.s*A, 'r'); drawnow |
| 158 | + end |
| 159 | + if wp(p2) > -1 & p2.s > 0 | wp(p2) < -1, p3 = p2; else p1 = p2; end % bracket |
| 160 | +end |
| 161 | +x = x0+p2.x*d; fx = p2.f; df = p2.df; a = p2.x; % return the value found |
| 162 | +if p.length < 0, i = i+j; else i = i+1; end % count func evals or line searches |
| 163 | +if p.verbosity, printf('%s %6i; value %4.6e\r', p.S, i, fx); end |
| 164 | +if wp(p2) < 2, i = -i; end % indicate faliure |
| 165 | +if p.verbosity > 2 |
| 166 | + if i>0, plot(norm(d)*p2.x, fx, 'go'); end |
| 167 | + subplot(211); plot(abs(i), fx, '+'); drawnow; |
| 168 | +end |
| 169 | + |
| 170 | +function z = minCubic(x, df, s0, s1, extr) % minimizer of approximating cubic |
| 171 | +INT = 0.1; EXT = 5.0; % interpolate and extrapolation limits |
| 172 | +A = -6*df+3*(s0+s1)*x; B = 3*df-(2*s0+s1)*x; |
| 173 | +if B<0, z = s0*x/(s0-s1); else z = -s0*x*x/(B+sqrt(B*B-A*s0*x)); end |
| 174 | +if extr % are we extrapolating? |
| 175 | + if ~isreal(z) | ~isfinite(z) | z < x | z > x*EXT, z = EXT*x; end % fix bad z |
| 176 | + z = max(z, (1+INT)*x); % extrapolate by at least INT |
| 177 | +else % else, we are interpolating |
| 178 | + if ~isreal(z) | ~isfinite(z) | z < 0 | z > x, z = x/2; end; % fix bad z |
| 179 | + z = min(max(z, INT*x), (1-INT)*x); % at least INT away from the boundaries |
| 180 | +end |
| 181 | + |
| 182 | +function y = wp(p, SIG, RHO) |
| 183 | +persistent a b c sig rho; |
| 184 | +if nargin == 3 % if three arguments, then set up the Wolfe-Powell conditions |
| 185 | + a = RHO*p.s; b = p.f; c = -SIG*p.s; sig = SIG; rho = RHO; y= 0; |
| 186 | +else |
| 187 | + if p.f > a*p.x+b % function value too large? |
| 188 | + if a > 0, y = -1; else y = -2; end |
| 189 | + else |
| 190 | + if p.s < -c, y = 0; elseif p.s > c, y = 1; else y = 2; end |
| 191 | +% if sig*abs(p.s) > c, a = rho*p.s; b = p.f-a*p.x; c = sig*abs(p.s); end |
| 192 | + end |
| 193 | +end |
| 194 | + |
| 195 | +function [fx, dfx] = f(varargin) |
| 196 | +persistent F p; |
| 197 | +if nargout == 0 |
| 198 | + p = varargin; if ischar(p{1}), F = str2func(p{1}); else F = p{1}; end |
| 199 | +else |
| 200 | + [fx, dfx] = F(rewrap(p{2}, varargin{1}), p{3:end}); dfx = unwrap(dfx); |
| 201 | +end |
| 202 | + |
| 203 | +function v = unwrap(s) % extract num elements of s (any type) into v (vector) |
| 204 | +v = []; |
| 205 | +if isnumeric(s) |
| 206 | + v = s(:); % numeric values are recast to column vector |
| 207 | +elseif isstruct(s) |
| 208 | + v = unwrap(struct2cell(orderfields(s))); % alphabetize, conv to cell, recurse |
| 209 | +elseif iscell(s) % cell array elements are |
| 210 | + for i = 1:numel(s), v = [v; unwrap(s{i})]; end % handled sequentially |
| 211 | +end % other types are ignored |
| 212 | + |
| 213 | +function [s v] = rewrap(s, v) % map elements of v (vector) onto s (any type) |
| 214 | +if isnumeric(s) |
| 215 | + if numel(v) < numel(s) |
| 216 | + error('The vector for conversion contains too few elements') |
| 217 | + end |
| 218 | + s = reshape(v(1:numel(s)), size(s)); % numeric values are reshaped |
| 219 | + v = v(numel(s)+1:end); % remaining arguments passed on |
| 220 | +elseif isstruct(s) |
| 221 | + [s p] = orderfields(s); p(p) = 1:numel(p); % alphabetize, store ordering |
| 222 | + [t v] = rewrap(struct2cell(s), v); % convert to cell, recurse |
| 223 | + s = orderfields(cell2struct(t,fieldnames(s),1),p); % conv to struct, reorder |
| 224 | +elseif iscell(s) |
| 225 | + for i = 1:numel(s) % cell array elements are handled sequentially |
| 226 | + [s{i} v] = rewrap(s{i}, v); |
| 227 | + end |
| 228 | +end % other types are not processed |
| 229 | + |
| 230 | +function printf(varargin) |
| 231 | +fprintf(varargin{:}); if exist('fflush','builtin'), fflush(stdout); end |
0 commit comments