Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
139 changes: 90 additions & 49 deletions src/DiagonalHessianApproximation.jl
Original file line number Diff line number Diff line change
@@ -1,20 +1,24 @@
export DiagonalQN, SpectralGradient
export DiagonalPSB, DiagonalAndrei, SpectralGradient

"""
Implementation of the diagonal quasi-Newton approximations described in
DiagonalPSB(d)

Construct a linear operator that represents a diagonal PSB quasi-Newton approximation
as described in

M. Zhu, J. L. Nazareth and H. Wolkowicz
The Quasi-Cauchy Relation and Diagonal Updating.
SIAM Journal on Optimization, vol. 9, number 4, pp. 1192-1204, 1999.
https://doi.org/10.1137/S1052623498331793.

and
The approximation satisfies the weak secant equation and is not guaranteed to be
positive definite.

Andrei, N.
A diagonal quasi-Newton updating method for unconstrained optimization.
https://doi.org/10.1007/s11075-018-0562-7
# Arguments

- `d::AbstractVector`: initial diagonal approximation.
"""
mutable struct DiagonalQN{T <: Real, I <: Integer, V <: AbstractVector{T}, F} <:
mutable struct DiagonalPSB{T <: Real, I <: Integer, V <: AbstractVector{T}, F} <:
AbstractDiagonalQuasiNewtonOperator{T}
d::V # Diagonal of the operator
nrow::I
Expand All @@ -30,32 +34,19 @@ mutable struct DiagonalQN{T <: Real, I <: Integer, V <: AbstractVector{T}, F} <:
args5::Bool
use_prod5!::Bool # true for 5-args mul! and for composite operators created with operators that use the 3-args mul!
allocated5::Bool # true for 5-args mul!, false for 3-args mul! until the vectors are allocated
psb::Bool
end

"""
DiagonalQN(d)

Construct a linear operator that represents a diagonal quasi-Newton approximation.
The approximation satisfies the weak secant equation and is not guaranteed to be
positive definite.

# Arguments

- `d::AbstractVector`: initial diagonal approximation;
- `psb::Bool`: whether to use the diagonal PSB update or the Andrei update.
"""
function DiagonalQN(d::AbstractVector{T}, psb::Bool = false) where {T <: Real}
@doc (@doc DiagonalPSB) function DiagonalPSB(d::AbstractVector{T}) where {T <: Real}
prod = (res, v, α, β) -> mulSquareOpDiagonal!(res, d, v, α, β)
n = length(d)
DiagonalQN(d, n, n, true, true, prod, prod, prod, 0, 0, 0, true, true, true, psb)
DiagonalPSB(d, n, n, true, true, prod, prod, prod, 0, 0, 0, true, true, true)
end

# update function
# s = x_{k+1} - x_k
# y = ∇f(x_{k+1}) - ∇f(x_k)
function push!(
B::DiagonalQN{T, I, V, F},
B::DiagonalPSB{T, I, V, F},
s0::V,
y0::V,
) where {T <: Real, I <: Integer, V <: AbstractVector{T}, F}
Expand All @@ -71,35 +62,97 @@ function push!(
sT_y = dot(s, y)
sT_B_s = dot(s2, B.d)
q = sT_y - sT_B_s
if B.psb
q /= trA2
B.d .+= q .* s .^ 2
else
sT_s = dot(s, s)
q += sT_s
q /= trA2
B.d .+= q .* s .^ 2 .- 1
end
q /= trA2
B.d .+= q .* s .^ 2
return B
end

"""
reset!(op::DiagonalQN)
Resets the DiagonalQN data of the given operator.
reset!(op::AbstractDiagonalQuasiNewtonOperator)

Reset the diagonal data of the given operator.
"""
function reset!(op::DiagonalQN{T}) where {T <: Real}
function reset!(op::AbstractDiagonalQuasiNewtonOperator{T}) where {T <: Real}
op.d .= one(T)
op.nprod = 0
op.ntprod = 0
op.nctprod = 0
return op
end

"""
DiagonalAndrei(d)

Construct a linear operator that represents a diagonal quasi-Newton approximation
as described in

Andrei, N.
A diagonal quasi-Newton updating method for unconstrained optimization.
https://doi.org/10.1007/s11075-018-0562-7

The approximation satisfies the weak secant equation and is not guaranteed to be
positive definite.

# Arguments

- `d::AbstractVector`: initial diagonal approximation.
"""
mutable struct DiagonalAndrei{T <: Real, I <: Integer, V <: AbstractVector{T}, F} <:
AbstractDiagonalQuasiNewtonOperator{T}
d::V # Diagonal of the operator
nrow::I
ncol::I
symmetric::Bool
hermitian::Bool
prod!::F
tprod!::F
ctprod!::F
nprod::I
ntprod::I
nctprod::I
args5::Bool
use_prod5!::Bool # true for 5-args mul! and for composite operators created with operators that use the 3-args mul!
allocated5::Bool # true for 5-args mul!, false for 3-args mul! until the vectors are allocated
end

@doc (@doc DiagonalAndrei) function DiagonalAndrei(d::AbstractVector{T}) where {T <: Real}
prod = (res, v, α, β) -> mulSquareOpDiagonal!(res, d, v, α, β)
n = length(d)
DiagonalAndrei(d, n, n, true, true, prod, prod, prod, 0, 0, 0, true, true, true)
end

# update function
# s = x_{k+1} - x_k
# y = ∇f(x_{k+1}) - ∇f(x_k)
function push!(
B::DiagonalAndrei{T, I, V, F},
s0::V,
y0::V,
) where {T <: Real, I <: Integer, V <: AbstractVector{T}, F}
s0Norm = norm(s0, 2)
if s0Norm == 0
error("Cannot update DiagonalQN operator with s=0")
end
# sᵀBs = sᵀy can be scaled by ||s||² without changing the update
s = (si / s0Norm for si ∈ s0)
s2 = (si^2 for si ∈ s)
y = (yi / s0Norm for yi ∈ y0)
trA2 = dot(s2, s2)
sT_y = dot(s, y)
sT_B_s = dot(s2, B.d)
q = sT_y - sT_B_s
sT_s = dot(s, s)
q += sT_s
q /= trA2
B.d .+= q .* s .^ 2 .- 1
return B
end

"""
Implementation of a spectral gradient quasi-Newton approximation described in

Birgin, E. G., Martínez, J. M., & Raydan, M.
Spectral Projected Gradient Methods: Review and Perspectives.
Birgin, E. G., Martínez, J. M., & Raydan, M.
Spectral Projected Gradient Methods: Review and Perspectives.
https://doi.org/10.18637/jss.v060.i03
"""
mutable struct SpectralGradient{T <: Real, I <: Integer, F} <:
Expand Down Expand Up @@ -152,15 +205,3 @@ function push!(
B.d[1] = dot(s, y) / dot(s, s)
return B
end

"""
reset!(op::SpectralGradient)
Resets the SpectralGradient data of the given operator.
"""
function reset!(op::SpectralGradient{T}) where {T <: Real}
op.d[1] = one(T)
op.nprod = 0
op.ntprod = 0
op.nctprod = 0
return op
end
2 changes: 1 addition & 1 deletion src/LinearOperators.jl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ include("kron.jl")
# quasi-Newton operators
include("qn.jl")

# Hessian diagonal approximations
# diagonal Hessian approximations
include("DiagonalHessianApproximation.jl")

# Special operators
Expand Down
50 changes: 25 additions & 25 deletions test/test_diag.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ x1 = x0 + [1.0, 0.0, 1.0]
grad = eval(grad_fun)
s = x1 - x0
y = grad(x1) - grad(x0)
B = DiagonalQN([1.0, -1.0, 1.0])
B = DiagonalAndrei([1.0, -1.0, 1.0])
push!(B, s, y)
@test abs(dot(s, B * s) - dot(s, y)) <= 1e-10
end
Expand All @@ -28,53 +28,53 @@ end
grad = eval(grad_fun)
s = x1 - x0
y = grad(x1) - grad(x0)
B = DiagonalQN([1.0, -1.0, 1.0], true)
B = DiagonalPSB([1.0, -1.0, 1.0])
push!(B, s, y)
@test abs(dot(s, B * s) - dot(s, y)) <= 1e-10
end
end

@testset "Hard coded test" begin
Bref = Dict{Symbol, Dict{Any, Any}}()
Bref[:∇f] = Dict{Any, Any}()
Bref[:∇g] = Dict{Any, Any}()
Bref[:∇h] = Dict{Any, Any}()
Bref[:∇f][DiagonalPSB] = [2, -1, 2]
Bref[:∇f][DiagonalAndrei] = [2, -2, 2]
Bref[:∇g][DiagonalPSB] = [1 + (sin(-1) - exp(-1) - 1) / 2, -1, 1 + (sin(-1) - exp(-1) - 1) / 2]
Bref[:∇g][DiagonalAndrei] = [(1 + sin(-1) - exp(-1)) / 2, -2, (1 + sin(-1) - exp(-1)) / 2]
Bref[:∇h][DiagonalPSB] = [-5 / 2, -1, -5 / 2]
Bref[:∇h][DiagonalAndrei] = [-5 / 2, -2, -5 / 2]

Bref_spg = Dict{Any, Any}()
Bref_spg[:∇f] = 2
Bref_spg[:∇g] = (1 - exp(-1) + sin(-1)) / 2
Bref_spg[:∇h] = -5 / 2

for grad_fun in (:∇f, :∇g, :∇h)
grad = eval(grad_fun)
s = x1 - x0
y = grad(x1) - grad(x0)
for psb ∈ (false, true)
B = DiagonalQN([1.0, -1.0, 1.0], psb)
if grad_fun == :∇f
Bref = psb ? [2, -1, 2] : [2, -2, 2]
elseif grad_fun == :∇g
Bref =
psb ? [1 + (sin(-1) - exp(-1) - 1) / 2, -1, 1 + (sin(-1) - exp(-1) - 1) / 2] :
[(1 + sin(-1) - exp(-1)) / 2, -2, (1 + sin(-1) - exp(-1)) / 2]
else
Bref = psb ? [-5 / 2, -1, -5 / 2] : [-5 / 2, -2, -5 / 2]
end
for DQN ∈ (DiagonalPSB, DiagonalAndrei)
B = DQN([1.0, -1.0, 1.0])
push!(B, s, y)
@test norm(B.d - Bref) <= 1e-10
@test norm(B.d - Bref[grad_fun][DQN]) <= 1e-10
end

B = SpectralGradient(1.0, 3)
if grad_fun == :∇f
Bref = 2
elseif grad_fun == :∇g
Bref = (1 - exp(-1) + sin(-1)) / 2
else
Bref = -5 / 2
end
push!(B, s, y)
@test abs(B.d[1] - Bref) <= 1e-10
@test abs(B.d[1] - Bref_spg[grad_fun]) <= 1e-10
end
end

@testset "Allocations test" begin
d = rand(5)
A = DiagonalQN(d)
A = DiagonalAndrei(d)
v = rand(5)
u = similar(v)
mul!(u, A, v)
@test (@allocated mul!(u, A, v)) == 0
B = DiagonalQN(d, true)
B = DiagonalPSB(d)
mul!(u, B, v)
@test (@allocated mul!(u, B, v)) == 0
C = SpectralGradient(rand(), 5)
Expand All @@ -83,7 +83,7 @@ end
end

@testset "reset" begin
B = DiagonalQN([1.0, -1.0, 1.0], false)
B = DiagonalAndrei([1.0, -1.0, 1.0])
s = x1 - x0
y = ∇f(x1) - ∇f(x0)
push!(B, s, y)
Expand Down