From e5744470196a4f6a7970187095303195af8e1c51 Mon Sep 17 00:00:00 2001 From: Sword York Date: Sun, 4 Jan 2015 01:42:09 +0800 Subject: [PATCH] VL-BFGS --- BFGS/VL-BFGS/bfgs.m | 24 ++++++++ BFGS/VL-BFGS/gradient_of_function.m | 11 ++++ BFGS/VL-BFGS/lbfgs.m | 56 +++++++++++++++++++ BFGS/VL-BFGS/strongwolfe.m | 65 +++++++++++++++++++++ BFGS/VL-BFGS/test.m | 62 ++++++++++++++++++++ BFGS/VL-BFGS/vlbfgs.m | 87 +++++++++++++++++++++++++++++ BFGS/VL-BFGS/wolfe.m | 12 ++++ 7 files changed, 317 insertions(+) create mode 100644 BFGS/VL-BFGS/bfgs.m create mode 100644 BFGS/VL-BFGS/gradient_of_function.m create mode 100644 BFGS/VL-BFGS/lbfgs.m create mode 100644 BFGS/VL-BFGS/strongwolfe.m create mode 100644 BFGS/VL-BFGS/test.m create mode 100644 BFGS/VL-BFGS/vlbfgs.m create mode 100644 BFGS/VL-BFGS/wolfe.m diff --git a/BFGS/VL-BFGS/bfgs.m b/BFGS/VL-BFGS/bfgs.m new file mode 100644 index 0000000..217a857 --- /dev/null +++ b/BFGS/VL-BFGS/bfgs.m @@ -0,0 +1,24 @@ +function [xp, v, h_x] = bfgs(f,x0) + xk = x0; + Im = eye(2); + Hk = Im; + df = gradient_of_function(f, xk); + e = 1e-4; + h_x = xk; + while norm(df) > e + pk = -Hk * df; + alpha = wolfe(f, xk, pk); + %alpha = 0.2 + sk = alpha * pk; + xk = xk + sk; %x_k+1 + yk = gradient_of_function(f, xk) - df; %df_k+1 - df_k + df = yk + df; %df_k+1 + rok = 1 / (yk' * sk); + Hk = (Im - rok*sk*yk') * Hk * (Im - rok * yk * sk') + rok * sk * sk'; + s = size(h_x); + h_x(:, s(2)+1) = xk; + end + xp = xk; + v = f(xp); + h_x; +end \ No newline at end of file diff --git a/BFGS/VL-BFGS/gradient_of_function.m b/BFGS/VL-BFGS/gradient_of_function.m new file mode 100644 index 0000000..657fe2c --- /dev/null +++ b/BFGS/VL-BFGS/gradient_of_function.m @@ -0,0 +1,11 @@ +function g = gradient_of_function(f, xk) + len = length(xk); + g = zeros(len, 1); + delta = 1e-5; + for i=1:len + t = zeros(len, 1); + t(i) = delta; + df = f(xk+t) - f(xk); + g(i) = df / delta; + end +end \ No newline at end of file diff --git a/BFGS/VL-BFGS/lbfgs.m b/BFGS/VL-BFGS/lbfgs.m new file mode 100644 index 0000000..47a80a5 --- /dev/null +++ b/BFGS/VL-BFGS/lbfgs.m @@ -0,0 +1,56 @@ +function [xp, v, h_x] = lbfgs(f, x0, m) + xk = x0; + Im = eye(2); + Hk = Im; + s = []; + y = []; + g_fk = gradient_of_function(f, xk); + pk = -Hk * g_fk; + alpha = wolfe(f, xk, pk); + k = 0; + h_x = xk; + s(:,k+1) = alpha * pk; + xk = xk + s(:,k+1); + y(:,k+1) = gradient_of_function(f, xk) - g_fk; + g_fk = y(:,k+1) + g_fk; + e = 1e-4; + sh = size(h_x); + h_x(:, sh(2)+1) = xk; + k = k + 1; + while norm(g_fk) > e + pk = - lbfgs_loop(s, y, k, m, g_fk); + %alpha = strongwolfe(f, xk, pk,2); + alpha = wolfe(f, xk, pk); + s(:,k+1) = alpha * pk; + xk = xk + s(:,k+1); + y(:,k+1) = gradient_of_function(f, xk) - g_fk; + g_fk = y(:,k+1) + g_fk; + sh = size(h_x); + h_x(:, sh(2)+1) = xk; + k = k + 1; + end + xp = xk; + v = f(xp); + +end + +function [r] = lbfgs_loop(s, y, k, m, g_fk) + q = g_fk; + Im = eye(2); + Hk = (s(:,k)' * y(:,k)) / (y(:,k)' * y(:,k)) * Im; + if k >= m + tail = k - m; + else + tail = 0; + end + a = zeros(1, m); + for i = k-1:-1:tail + a(i+1) = (1/(y(:,i+1)' * s(:,i+1))) * s(:,i+1)' * q; + q = q - a(i+1) * y(:,i+1); + end + r = Hk * q; + for i = tail:k-1 + b = (1/(y(:,i+1)' * s(:,i+1))) * y(:,i+1)'*r; + r = r + s(:,i+1) * (a(i+1) - b); + end +end \ No newline at end of file diff --git a/BFGS/VL-BFGS/strongwolfe.m b/BFGS/VL-BFGS/strongwolfe.m new file mode 100644 index 0000000..aaeccc2 --- /dev/null +++ b/BFGS/VL-BFGS/strongwolfe.m @@ -0,0 +1,65 @@ +function alphas = strongwolfe(f, x0, pk, alpham) + alphap = 0; + c1 = 1e-4; + c2 = 0.9; + alphax = alpham*rand(1); + phi = @(a)f(x0 + a * pk); + fx0 = phi(0); + g_fx0 = gradient_of_function(phi, 0); + fxp = fx0; + i = 1; + while 1 + fxx = phi(alphax); + if (fxx > fx0 + c1 * alphax * g_fx0) || ((i > 1) && (fxx > fxp)) + alphas = zoom(f,x0,pk,alphap,alphax); + return; + end + g_fxx = gradient_of_function(phi, alphax); + + if abs(g_fxx) <= -c2 * g_fx0 + alphas = alphax; + return; + end + + if g_fxx >= 0 + alphas = zoom(f,x0,pk,alphax,alphap); + return; + end + alphap = alphax; + fxp = fxx; + alphax = alphax + (alpham-alphax)*rand(1); + i = i + 1; + end +end + + +function alphas = zoom(f,x0,d,alphal,alphah) + + c1 = 1e-4; + c2 = 0.9; + + phi = @(a)f(x0 + a * d); + fx0 = phi(0); + g_fx0 = gradient_of_function(phi, 0); + while 1 + alphax = 1/2*(alphal+alphah); + xx = x0 + alphax*d; + fxx = f(xx); + g_fxx = gradient_of_function(phi, alphax); + + xl = x0 + alphal*d; + fxl = f(xl); + if ((fxx > fx0 + c1*alphax*g_fx0) || (fxx >= fxl)), + alphah = alphax; + else + if abs(g_fxx) <= -c2*g_fx0 + alphas = alphax; + return; + end + if g_fxx*(alphah-alphal) >= 0, + alphah = alphal; + end + alphal = alphax; + end + end +end \ No newline at end of file diff --git a/BFGS/VL-BFGS/test.m b/BFGS/VL-BFGS/test.m new file mode 100644 index 0000000..37b5c15 --- /dev/null +++ b/BFGS/VL-BFGS/test.m @@ -0,0 +1,62 @@ +f = (@(X) (exp(X(1,:)-1) + exp(1-X(2,:)) + (X(1,:) - X(2,:)).^2)); + +%f = (@(X) (sin(0.5*X(1,:).^2 - 0.25 * X(2,:).^2 + 3) .* cos(2*X(1,:) + 1 - exp(X(2,:))) )) +%f = (@(X) (1-X(1,:)).^2 + 100 * (X(2,:) - X(1,:).^2).^2); + +plot_type = 1; +[X, Y] = meshgrid([-2:0.1:2]); +XX = [reshape(X, 1, numel(X)); reshape(Y, 1, numel(Y))]; + +if plot_type == 0 + contour(X, Y, reshape(f(XX), length(X), length(X)), 50) +else + surf(X, Y, reshape(f(XX), length(X), length(X))) +end + +hold on; + + +x0 = [-1; -1]; +[x, v, h] = lbfgs(f, x0, 2); + +% plot step +for i=2:length(h) + tmp1 = h(:,i-1); + tmp2 = h(:,i); + if plot_type == 0 + quiver(tmp1(1),tmp1(2),tmp2(1)-tmp1(1),tmp2(2)-tmp1(2), 0, 'r','LineWidth',3) + else + quiver3(tmp1(1),tmp1(2),(f(tmp1) + 0.5)*1.1, tmp2(1)-tmp1(1),tmp2(2)-tmp1(2), (f(tmp2) - f(tmp1))*1.1 , 0, 'r','LineWidth',4) + end +end + + +[x, v, h] = bfgs(f, x0); + +% plot step +for i=2:length(h) + tmp1 = h(:,i-1); + tmp2 = h(:,i); + if plot_type == 0 + quiver(tmp1(1),tmp1(2),tmp2(1)-tmp1(1),tmp2(2)-tmp1(2), 0, 'g','LineWidth',2) + else + quiver3(tmp1(1),tmp1(2),(f(tmp1) + 0.5)*1.1, tmp2(1)-tmp1(1),tmp2(2)-tmp1(2), (f(tmp2) - f(tmp1))*1.1 , 0, 'g','LineWidth',3) + end +end + + +disp 'vlbfgs' + +x0 = [-1; -1]; +[x, v, h] = vlbfgs(f, x0, 2); + +% plot step +for i=2:length(h) + tmp1 = h(:,i-1); + tmp2 = h(:,i); + if plot_type == 0 + quiver(tmp1(1),tmp1(2),tmp2(1)-tmp1(1),tmp2(2)-tmp1(2), 0, 'b','LineWidth',2) + else + quiver3(tmp1(1),tmp1(2),(f(tmp1) + 0.5)*1.1, tmp2(1)-tmp1(1),tmp2(2)-tmp1(2), (f(tmp2) - f(tmp1))*1.1 , 0, 'y','LineWidth',3) + end +end \ No newline at end of file diff --git a/BFGS/VL-BFGS/vlbfgs.m b/BFGS/VL-BFGS/vlbfgs.m new file mode 100644 index 0000000..813aa19 --- /dev/null +++ b/BFGS/VL-BFGS/vlbfgs.m @@ -0,0 +1,87 @@ +function [xp, v, h_x] = vlbfgs(f, x0, m) + xk = x0; + Im = eye(2); + Hk = Im; + s = []; + y = []; + g_fk = gradient_of_function(f, xk); + pk = -Hk * g_fk; + alpha = wolfe(f, xk, pk); + k = 0; + h_x = xk; + s(:,k+1) = alpha * pk; + xk = xk + s(:,k+1); + y(:,k+1) = gradient_of_function(f, xk) - g_fk; + g_fk = y(:,k+1) + g_fk; + e = 1e-4; + sh = size(h_x); + h_x(:, sh(2)+1) = xk; + k = k + 1; + while norm(g_fk) > e + pk = vlbfgs_loop(s, y, k, m, g_fk); + %alpha = strongwolfe(f, xk, pk,2); + alpha = wolfe(f, xk, pk); + s(:,k+1) = alpha * pk; + xk = xk + s(:,k+1); + y(:,k+1) = gradient_of_function(f, xk) - g_fk; + g_fk = y(:,k+1) + g_fk; + sh = size(h_x); + h_x(:, sh(2)+1) = xk; + k = k + 1; + end + xp = xk; + v = f(xp); + +end + + +function [r] = vlbfgs_loop(s, y, k, m, g_fk) + + %initial b[] + b = []; + if k >= m + tail = k - m; + else + tail = 0; + end + + b(:,2*m + 1) = g_fk; + j = 0; + for i = k-1:-1:tail + b(:,m - j) = s(:,i+1); + b(:,2*m - j) = y(:,i+1); + j = j + 1; + end + + bb = zeros(2*m+1, 2*m+1); + for i = 1:2*m+1 + for j = 1:2*m+1 + bb(i,j) = b(:,i)' * b(:,j); + end + end + + % vlbfgs + kesi = zeros(1,2*m+1); + kesi(2*m+1) = -1; + size_of_f = size(g_fk); + d= size_of_f(1); + a = zeros(1, m); + + for i = k-1:-1:tail + j = i - (k - m) + 1; + a(i+1) = kesi * bb(:,j) / bb(j,j+m); + kesi(m+j) = kesi(m+j) - a(i+1); + end + + for i = 1:2*m+1 + kesi(i) = bb(m,2*m) / bb(2*m,2*m) * kesi(i); + end + + for i = tail:k-1 + j = i - (k - m) + 1; + beta = kesi * bb(:,m+j) / bb(j,j+m); + kesi(j) = kesi(j) + a(i+1) - beta; + end + + r = sum(repmat(kesi, d, 1) .* b, 2); +end \ No newline at end of file diff --git a/BFGS/VL-BFGS/wolfe.m b/BFGS/VL-BFGS/wolfe.m new file mode 100644 index 0000000..658b52b --- /dev/null +++ b/BFGS/VL-BFGS/wolfe.m @@ -0,0 +1,12 @@ +function alpha = wolfe(f, xk, pk) +% f target +% xk the point +% pk descent direction + alpha = 1; + ro = 0.2; + c = 1e-4; + g = gradient_of_function(f, xk); + while (f(xk + alpha*pk) > (f(xk) + c * alpha * g' * pk)) + alpha = ro * alpha; + end +end \ No newline at end of file