Skip to content

Commit f81ae04

Browse files
committed
Separate diagonal quasi-Newton types
Closes #290
1 parent 9cb279b commit f81ae04

File tree

3 files changed

+116
-75
lines changed

3 files changed

+116
-75
lines changed

src/DiagonalHessianApproximation.jl

Lines changed: 90 additions & 49 deletions
Original file line numberDiff line numberDiff line change
@@ -1,20 +1,24 @@
1-
export DiagonalQN, SpectralGradient
1+
export DiagonalPSB, DiagonalAndrei, SpectralGradient
22

33
"""
4-
Implementation of the diagonal quasi-Newton approximations described in
4+
DiagonalPSB(d)
5+
6+
Construct a linear operator that represents a diagonal PSB quasi-Newton approximation
7+
as described in
58
69
M. Zhu, J. L. Nazareth and H. Wolkowicz
710
The Quasi-Cauchy Relation and Diagonal Updating.
811
SIAM Journal on Optimization, vol. 9, number 4, pp. 1192-1204, 1999.
912
https://doi.org/10.1137/S1052623498331793.
1013
11-
and
14+
The approximation satisfies the weak secant equation and is not guaranteed to be
15+
positive definite.
1216
13-
Andrei, N.
14-
A diagonal quasi-Newton updating method for unconstrained optimization.
15-
https://doi.org/10.1007/s11075-018-0562-7
17+
# Arguments
18+
19+
- `d::AbstractVector`: initial diagonal approximation.
1620
"""
17-
mutable struct DiagonalQN{T <: Real, I <: Integer, V <: AbstractVector{T}, F} <:
21+
mutable struct DiagonalPSB{T <: Real, I <: Integer, V <: AbstractVector{T}, F} <:
1822
AbstractDiagonalQuasiNewtonOperator{T}
1923
d::V # Diagonal of the operator
2024
nrow::I
@@ -30,32 +34,19 @@ mutable struct DiagonalQN{T <: Real, I <: Integer, V <: AbstractVector{T}, F} <:
3034
args5::Bool
3135
use_prod5!::Bool # true for 5-args mul! and for composite operators created with operators that use the 3-args mul!
3236
allocated5::Bool # true for 5-args mul!, false for 3-args mul! until the vectors are allocated
33-
psb::Bool
3437
end
3538

36-
"""
37-
DiagonalQN(d)
38-
39-
Construct a linear operator that represents a diagonal quasi-Newton approximation.
40-
The approximation satisfies the weak secant equation and is not guaranteed to be
41-
positive definite.
42-
43-
# Arguments
44-
45-
- `d::AbstractVector`: initial diagonal approximation;
46-
- `psb::Bool`: whether to use the diagonal PSB update or the Andrei update.
47-
"""
48-
function DiagonalQN(d::AbstractVector{T}, psb::Bool = false) where {T <: Real}
39+
@doc (@doc DiagonalPSB) function DiagonalPSB(d::AbstractVector{T}) where {T <: Real}
4940
prod = (res, v, α, β) -> mulSquareOpDiagonal!(res, d, v, α, β)
5041
n = length(d)
51-
DiagonalQN(d, n, n, true, true, prod, prod, prod, 0, 0, 0, true, true, true, psb)
42+
DiagonalPSB(d, n, n, true, true, prod, prod, prod, 0, 0, 0, true, true, true)
5243
end
5344

5445
# update function
5546
# s = x_{k+1} - x_k
5647
# y = ∇f(x_{k+1}) - ∇f(x_k)
5748
function push!(
58-
B::DiagonalQN{T, I, V, F},
49+
B::DiagonalPSB{T, I, V, F},
5950
s0::V,
6051
y0::V,
6152
) where {T <: Real, I <: Integer, V <: AbstractVector{T}, F}
@@ -71,35 +62,97 @@ function push!(
7162
sT_y = dot(s, y)
7263
sT_B_s = dot(s2, B.d)
7364
q = sT_y - sT_B_s
74-
if B.psb
75-
q /= trA2
76-
B.d .+= q .* s .^ 2
77-
else
78-
sT_s = dot(s, s)
79-
q += sT_s
80-
q /= trA2
81-
B.d .+= q .* s .^ 2 .- 1
82-
end
65+
q /= trA2
66+
B.d .+= q .* s .^ 2
8367
return B
8468
end
8569

8670
"""
87-
reset!(op::DiagonalQN)
88-
Resets the DiagonalQN data of the given operator.
71+
reset!(op::AbstractDiagonalQuasiNewtonOperator)
72+
73+
Reset the diagonal data of the given operator.
8974
"""
90-
function reset!(op::DiagonalQN{T}) where {T <: Real}
75+
function reset!(op::AbstractDiagonalQuasiNewtonOperator{T}) where {T <: Real}
9176
op.d .= one(T)
9277
op.nprod = 0
9378
op.ntprod = 0
9479
op.nctprod = 0
9580
return op
9681
end
9782

83+
"""
84+
DiagonalAndrei(d)
85+
86+
Construct a linear operator that represents a diagonal quasi-Newton approximation
87+
as described in
88+
89+
Andrei, N.
90+
A diagonal quasi-Newton updating method for unconstrained optimization.
91+
https://doi.org/10.1007/s11075-018-0562-7
92+
93+
The approximation satisfies the weak secant equation and is not guaranteed to be
94+
positive definite.
95+
96+
# Arguments
97+
98+
- `d::AbstractVector`: initial diagonal approximation.
99+
"""
100+
mutable struct DiagonalAndrei{T <: Real, I <: Integer, V <: AbstractVector{T}, F} <:
101+
AbstractDiagonalQuasiNewtonOperator{T}
102+
d::V # Diagonal of the operator
103+
nrow::I
104+
ncol::I
105+
symmetric::Bool
106+
hermitian::Bool
107+
prod!::F
108+
tprod!::F
109+
ctprod!::F
110+
nprod::I
111+
ntprod::I
112+
nctprod::I
113+
args5::Bool
114+
use_prod5!::Bool # true for 5-args mul! and for composite operators created with operators that use the 3-args mul!
115+
allocated5::Bool # true for 5-args mul!, false for 3-args mul! until the vectors are allocated
116+
end
117+
118+
@doc (@doc DiagonalAndrei) function DiagonalAndrei(d::AbstractVector{T}) where {T <: Real}
119+
prod = (res, v, α, β) -> mulSquareOpDiagonal!(res, d, v, α, β)
120+
n = length(d)
121+
DiagonalAndrei(d, n, n, true, true, prod, prod, prod, 0, 0, 0, true, true, true)
122+
end
123+
124+
# update function
125+
# s = x_{k+1} - x_k
126+
# y = ∇f(x_{k+1}) - ∇f(x_k)
127+
function push!(
128+
B::DiagonalAndrei{T, I, V, F},
129+
s0::V,
130+
y0::V,
131+
) where {T <: Real, I <: Integer, V <: AbstractVector{T}, F}
132+
s0Norm = norm(s0, 2)
133+
if s0Norm == 0
134+
error("Cannot update DiagonalQN operator with s=0")
135+
end
136+
# sᵀBs = sᵀy can be scaled by ||s||² without changing the update
137+
s = (si / s0Norm for si s0)
138+
s2 = (si^2 for si s)
139+
y = (yi / s0Norm for yi y0)
140+
trA2 = dot(s2, s2)
141+
sT_y = dot(s, y)
142+
sT_B_s = dot(s2, B.d)
143+
q = sT_y - sT_B_s
144+
sT_s = dot(s, s)
145+
q += sT_s
146+
q /= trA2
147+
B.d .+= q .* s .^ 2 .- 1
148+
return B
149+
end
150+
98151
"""
99152
Implementation of a spectral gradient quasi-Newton approximation described in
100153
101-
Birgin, E. G., Martínez, J. M., & Raydan, M.
102-
Spectral Projected Gradient Methods: Review and Perspectives.
154+
Birgin, E. G., Martínez, J. M., & Raydan, M.
155+
Spectral Projected Gradient Methods: Review and Perspectives.
103156
https://doi.org/10.18637/jss.v060.i03
104157
"""
105158
mutable struct SpectralGradient{T <: Real, I <: Integer, F} <:
@@ -152,15 +205,3 @@ function push!(
152205
B.d[1] = dot(s, y) / dot(s, s)
153206
return B
154207
end
155-
156-
"""
157-
reset!(op::SpectralGradient)
158-
Resets the SpectralGradient data of the given operator.
159-
"""
160-
function reset!(op::SpectralGradient{T}) where {T <: Real}
161-
op.d[1] = one(T)
162-
op.nprod = 0
163-
op.ntprod = 0
164-
op.nctprod = 0
165-
return op
166-
end

src/LinearOperators.jl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ include("kron.jl")
1616
# quasi-Newton operators
1717
include("qn.jl")
1818

19-
# Hessian diagonal approximations
19+
# diagonal Hessian approximations
2020
include("DiagonalHessianApproximation.jl")
2121

2222
# Special operators

test/test_diag.jl

Lines changed: 25 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ x1 = x0 + [1.0, 0.0, 1.0]
1717
grad = eval(grad_fun)
1818
s = x1 - x0
1919
y = grad(x1) - grad(x0)
20-
B = DiagonalQN([1.0, -1.0, 1.0])
20+
B = DiagonalAndrei([1.0, -1.0, 1.0])
2121
push!(B, s, y)
2222
@test abs(dot(s, B * s) - dot(s, y)) <= 1e-10
2323
end
@@ -28,53 +28,53 @@ end
2828
grad = eval(grad_fun)
2929
s = x1 - x0
3030
y = grad(x1) - grad(x0)
31-
B = DiagonalQN([1.0, -1.0, 1.0], true)
31+
B = DiagonalPSB([1.0, -1.0, 1.0])
3232
push!(B, s, y)
3333
@test abs(dot(s, B * s) - dot(s, y)) <= 1e-10
3434
end
3535
end
3636

3737
@testset "Hard coded test" begin
38+
Bref = Dict{Symbol, Dict{Any, Any}}()
39+
Bref[:∇f] = Dict{Any, Any}()
40+
Bref[:∇g] = Dict{Any, Any}()
41+
Bref[:∇h] = Dict{Any, Any}()
42+
Bref[:∇f][DiagonalPSB] = [2, -1, 2]
43+
Bref[:∇f][DiagonalAndrei] = [2, -2, 2]
44+
Bref[:∇g][DiagonalPSB] = [1 + (sin(-1) - exp(-1) - 1) / 2, -1, 1 + (sin(-1) - exp(-1) - 1) / 2]
45+
Bref[:∇g][DiagonalAndrei] = [(1 + sin(-1) - exp(-1)) / 2, -2, (1 + sin(-1) - exp(-1)) / 2]
46+
Bref[:∇h][DiagonalPSB] = [-5 / 2, -1, -5 / 2]
47+
Bref[:∇h][DiagonalAndrei] = [-5 / 2, -2, -5 / 2]
48+
49+
Bref_spg = Dict{Any, Any}()
50+
Bref_spg[:∇f] = 2
51+
Bref_spg[:∇g] = (1 - exp(-1) + sin(-1)) / 2
52+
Bref_spg[:∇h] = -5 / 2
53+
3854
for grad_fun in (:∇f, :∇g, :∇h)
3955
grad = eval(grad_fun)
4056
s = x1 - x0
4157
y = grad(x1) - grad(x0)
42-
for psb (false, true)
43-
B = DiagonalQN([1.0, -1.0, 1.0], psb)
44-
if grad_fun == :∇f
45-
Bref = psb ? [2, -1, 2] : [2, -2, 2]
46-
elseif grad_fun == :∇g
47-
Bref =
48-
psb ? [1 + (sin(-1) - exp(-1) - 1) / 2, -1, 1 + (sin(-1) - exp(-1) - 1) / 2] :
49-
[(1 + sin(-1) - exp(-1)) / 2, -2, (1 + sin(-1) - exp(-1)) / 2]
50-
else
51-
Bref = psb ? [-5 / 2, -1, -5 / 2] : [-5 / 2, -2, -5 / 2]
52-
end
58+
for DQN (DiagonalPSB, DiagonalAndrei)
59+
B = DQN([1.0, -1.0, 1.0])
5360
push!(B, s, y)
54-
@test norm(B.d - Bref) <= 1e-10
61+
@test norm(B.d - Bref[grad_fun][DQN]) <= 1e-10
5562
end
5663

5764
B = SpectralGradient(1.0, 3)
58-
if grad_fun == :∇f
59-
Bref = 2
60-
elseif grad_fun == :∇g
61-
Bref = (1 - exp(-1) + sin(-1)) / 2
62-
else
63-
Bref = -5 / 2
64-
end
6565
push!(B, s, y)
66-
@test abs(B.d[1] - Bref) <= 1e-10
66+
@test abs(B.d[1] - Bref_spg[grad_fun]) <= 1e-10
6767
end
6868
end
6969

7070
@testset "Allocations test" begin
7171
d = rand(5)
72-
A = DiagonalQN(d)
72+
A = DiagonalAndrei(d)
7373
v = rand(5)
7474
u = similar(v)
7575
mul!(u, A, v)
7676
@test (@allocated mul!(u, A, v)) == 0
77-
B = DiagonalQN(d, true)
77+
B = DiagonalPSB(d)
7878
mul!(u, B, v)
7979
@test (@allocated mul!(u, B, v)) == 0
8080
C = SpectralGradient(rand(), 5)
@@ -83,7 +83,7 @@ end
8383
end
8484

8585
@testset "reset" begin
86-
B = DiagonalQN([1.0, -1.0, 1.0], false)
86+
B = DiagonalAndrei([1.0, -1.0, 1.0])
8787
s = x1 - x0
8888
y = ∇f(x1) - ∇f(x0)
8989
push!(B, s, y)

0 commit comments

Comments
 (0)