Skip to content

Commit

Permalink
VL-BFGS
Browse files Browse the repository at this point in the history
  • Loading branch information
SwordYork committed Jan 3, 2015
1 parent a663d70 commit e574447
Show file tree
Hide file tree
Showing 7 changed files with 317 additions and 0 deletions.
24 changes: 24 additions & 0 deletions BFGS/VL-BFGS/bfgs.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
function [xp, v, h_x] = bfgs(f,x0)
xk = x0;
Im = eye(2);
Hk = Im;
df = gradient_of_function(f, xk);
e = 1e-4;
h_x = xk;
while norm(df) > e
pk = -Hk * df;
alpha = wolfe(f, xk, pk);
%alpha = 0.2
sk = alpha * pk;
xk = xk + sk; %x_k+1
yk = gradient_of_function(f, xk) - df; %df_k+1 - df_k
df = yk + df; %df_k+1
rok = 1 / (yk' * sk);
Hk = (Im - rok*sk*yk') * Hk * (Im - rok * yk * sk') + rok * sk * sk';
s = size(h_x);
h_x(:, s(2)+1) = xk;
end
xp = xk;
v = f(xp);
h_x;
end
11 changes: 11 additions & 0 deletions BFGS/VL-BFGS/gradient_of_function.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
function g = gradient_of_function(f, xk)
len = length(xk);
g = zeros(len, 1);
delta = 1e-5;
for i=1:len
t = zeros(len, 1);
t(i) = delta;
df = f(xk+t) - f(xk);
g(i) = df / delta;
end
end
56 changes: 56 additions & 0 deletions BFGS/VL-BFGS/lbfgs.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
function [xp, v, h_x] = lbfgs(f, x0, m)
xk = x0;
Im = eye(2);
Hk = Im;
s = [];
y = [];
g_fk = gradient_of_function(f, xk);
pk = -Hk * g_fk;
alpha = wolfe(f, xk, pk);
k = 0;
h_x = xk;
s(:,k+1) = alpha * pk;
xk = xk + s(:,k+1);
y(:,k+1) = gradient_of_function(f, xk) - g_fk;
g_fk = y(:,k+1) + g_fk;
e = 1e-4;
sh = size(h_x);
h_x(:, sh(2)+1) = xk;
k = k + 1;
while norm(g_fk) > e
pk = - lbfgs_loop(s, y, k, m, g_fk);
%alpha = strongwolfe(f, xk, pk,2);
alpha = wolfe(f, xk, pk);
s(:,k+1) = alpha * pk;
xk = xk + s(:,k+1);
y(:,k+1) = gradient_of_function(f, xk) - g_fk;
g_fk = y(:,k+1) + g_fk;
sh = size(h_x);
h_x(:, sh(2)+1) = xk;
k = k + 1;
end
xp = xk;
v = f(xp);

end

function [r] = lbfgs_loop(s, y, k, m, g_fk)
q = g_fk;
Im = eye(2);
Hk = (s(:,k)' * y(:,k)) / (y(:,k)' * y(:,k)) * Im;
if k >= m
tail = k - m;
else
tail = 0;
end
a = zeros(1, m);
for i = k-1:-1:tail
a(i+1) = (1/(y(:,i+1)' * s(:,i+1))) * s(:,i+1)' * q;
q = q - a(i+1) * y(:,i+1);
end
r = Hk * q;
for i = tail:k-1
b = (1/(y(:,i+1)' * s(:,i+1))) * y(:,i+1)'*r;
r = r + s(:,i+1) * (a(i+1) - b);
end
end
65 changes: 65 additions & 0 deletions BFGS/VL-BFGS/strongwolfe.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
function alphas = strongwolfe(f, x0, pk, alpham)
alphap = 0;
c1 = 1e-4;
c2 = 0.9;
alphax = alpham*rand(1);
phi = @(a)f(x0 + a * pk);
fx0 = phi(0);
g_fx0 = gradient_of_function(phi, 0);
fxp = fx0;
i = 1;
while 1
fxx = phi(alphax);
if (fxx > fx0 + c1 * alphax * g_fx0) || ((i > 1) && (fxx > fxp))
alphas = zoom(f,x0,pk,alphap,alphax);
return;
end
g_fxx = gradient_of_function(phi, alphax);

if abs(g_fxx) <= -c2 * g_fx0
alphas = alphax;
return;
end

if g_fxx >= 0
alphas = zoom(f,x0,pk,alphax,alphap);
return;
end
alphap = alphax;
fxp = fxx;
alphax = alphax + (alpham-alphax)*rand(1);
i = i + 1;
end
end


function alphas = zoom(f,x0,d,alphal,alphah)

c1 = 1e-4;
c2 = 0.9;

phi = @(a)f(x0 + a * d);
fx0 = phi(0);
g_fx0 = gradient_of_function(phi, 0);
while 1
alphax = 1/2*(alphal+alphah);
xx = x0 + alphax*d;
fxx = f(xx);
g_fxx = gradient_of_function(phi, alphax);

xl = x0 + alphal*d;
fxl = f(xl);
if ((fxx > fx0 + c1*alphax*g_fx0) || (fxx >= fxl)),
alphah = alphax;
else
if abs(g_fxx) <= -c2*g_fx0
alphas = alphax;
return;
end
if g_fxx*(alphah-alphal) >= 0,
alphah = alphal;
end
alphal = alphax;
end
end
end
62 changes: 62 additions & 0 deletions BFGS/VL-BFGS/test.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
f = (@(X) (exp(X(1,:)-1) + exp(1-X(2,:)) + (X(1,:) - X(2,:)).^2));

%f = (@(X) (sin(0.5*X(1,:).^2 - 0.25 * X(2,:).^2 + 3) .* cos(2*X(1,:) + 1 - exp(X(2,:))) ))
%f = (@(X) (1-X(1,:)).^2 + 100 * (X(2,:) - X(1,:).^2).^2);

plot_type = 1;
[X, Y] = meshgrid([-2:0.1:2]);
XX = [reshape(X, 1, numel(X)); reshape(Y, 1, numel(Y))];

if plot_type == 0
contour(X, Y, reshape(f(XX), length(X), length(X)), 50)
else
surf(X, Y, reshape(f(XX), length(X), length(X)))
end

hold on;


x0 = [-1; -1];
[x, v, h] = lbfgs(f, x0, 2);

% plot step
for i=2:length(h)
tmp1 = h(:,i-1);
tmp2 = h(:,i);
if plot_type == 0
quiver(tmp1(1),tmp1(2),tmp2(1)-tmp1(1),tmp2(2)-tmp1(2), 0, 'r','LineWidth',3)
else
quiver3(tmp1(1),tmp1(2),(f(tmp1) + 0.5)*1.1, tmp2(1)-tmp1(1),tmp2(2)-tmp1(2), (f(tmp2) - f(tmp1))*1.1 , 0, 'r','LineWidth',4)
end
end


[x, v, h] = bfgs(f, x0);

% plot step
for i=2:length(h)
tmp1 = h(:,i-1);
tmp2 = h(:,i);
if plot_type == 0
quiver(tmp1(1),tmp1(2),tmp2(1)-tmp1(1),tmp2(2)-tmp1(2), 0, 'g','LineWidth',2)
else
quiver3(tmp1(1),tmp1(2),(f(tmp1) + 0.5)*1.1, tmp2(1)-tmp1(1),tmp2(2)-tmp1(2), (f(tmp2) - f(tmp1))*1.1 , 0, 'g','LineWidth',3)
end
end


disp 'vlbfgs'

x0 = [-1; -1];
[x, v, h] = vlbfgs(f, x0, 2);

% plot step
for i=2:length(h)
tmp1 = h(:,i-1);
tmp2 = h(:,i);
if plot_type == 0
quiver(tmp1(1),tmp1(2),tmp2(1)-tmp1(1),tmp2(2)-tmp1(2), 0, 'b','LineWidth',2)
else
quiver3(tmp1(1),tmp1(2),(f(tmp1) + 0.5)*1.1, tmp2(1)-tmp1(1),tmp2(2)-tmp1(2), (f(tmp2) - f(tmp1))*1.1 , 0, 'y','LineWidth',3)
end
end
87 changes: 87 additions & 0 deletions BFGS/VL-BFGS/vlbfgs.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
function [xp, v, h_x] = vlbfgs(f, x0, m)
xk = x0;
Im = eye(2);
Hk = Im;
s = [];
y = [];
g_fk = gradient_of_function(f, xk);
pk = -Hk * g_fk;
alpha = wolfe(f, xk, pk);
k = 0;
h_x = xk;
s(:,k+1) = alpha * pk;
xk = xk + s(:,k+1);
y(:,k+1) = gradient_of_function(f, xk) - g_fk;
g_fk = y(:,k+1) + g_fk;
e = 1e-4;
sh = size(h_x);
h_x(:, sh(2)+1) = xk;
k = k + 1;
while norm(g_fk) > e
pk = vlbfgs_loop(s, y, k, m, g_fk);
%alpha = strongwolfe(f, xk, pk,2);
alpha = wolfe(f, xk, pk);
s(:,k+1) = alpha * pk;
xk = xk + s(:,k+1);
y(:,k+1) = gradient_of_function(f, xk) - g_fk;
g_fk = y(:,k+1) + g_fk;
sh = size(h_x);
h_x(:, sh(2)+1) = xk;
k = k + 1;
end
xp = xk;
v = f(xp);

end


function [r] = vlbfgs_loop(s, y, k, m, g_fk)

%initial b[]
b = [];
if k >= m
tail = k - m;
else
tail = 0;
end

b(:,2*m + 1) = g_fk;
j = 0;
for i = k-1:-1:tail
b(:,m - j) = s(:,i+1);
b(:,2*m - j) = y(:,i+1);
j = j + 1;
end

bb = zeros(2*m+1, 2*m+1);
for i = 1:2*m+1
for j = 1:2*m+1
bb(i,j) = b(:,i)' * b(:,j);
end
end

% vlbfgs
kesi = zeros(1,2*m+1);
kesi(2*m+1) = -1;
size_of_f = size(g_fk);
d= size_of_f(1);
a = zeros(1, m);

for i = k-1:-1:tail
j = i - (k - m) + 1;
a(i+1) = kesi * bb(:,j) / bb(j,j+m);
kesi(m+j) = kesi(m+j) - a(i+1);
end

for i = 1:2*m+1
kesi(i) = bb(m,2*m) / bb(2*m,2*m) * kesi(i);
end

for i = tail:k-1
j = i - (k - m) + 1;
beta = kesi * bb(:,m+j) / bb(j,j+m);
kesi(j) = kesi(j) + a(i+1) - beta;
end

r = sum(repmat(kesi, d, 1) .* b, 2);
end
12 changes: 12 additions & 0 deletions BFGS/VL-BFGS/wolfe.m
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
function alpha = wolfe(f, xk, pk)
% f target
% xk the point
% pk descent direction
alpha = 1;
ro = 0.2;
c = 1e-4;
g = gradient_of_function(f, xk);
while (f(xk + alpha*pk) > (f(xk) + c * alpha * g' * pk))
alpha = ro * alpha;
end
end

0 comments on commit e574447

Please sign in to comment.