Skip to content

Commit

Permalink
Remove gradient storage (#48)
Browse files Browse the repository at this point in the history
* Remove gradient storage parameter
* Don't need Hessian
* Deprecate g-storage
* Update NEWS
* Deprecate alphatry g-storage
  • Loading branch information
anriseth authored and pkofod committed May 21, 2017
1 parent 5eebe50 commit 35d523d
Show file tree
Hide file tree
Showing 14 changed files with 132 additions and 71 deletions.
6 changes: 6 additions & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# LineSearches v2.1.0 release notes
- Algorithms no longer takes a gradient storage parameter. See
[#48](https://github.com/anriseth/LineSearches.jl/pull/48)
- Linesearch algorithms are now callable types. See
[#43](https://github.com/anriseth/LineSearches.jl/pull/43)

# LineSearches v2.0.0 release notes
- Switch storage and evaluation point position
# LineSearches v1.0.0 release notes
Expand Down
14 changes: 7 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@ The code was originally written as part of [Optim](https://github.com/JuliaNLSol
but has now been separated out to its own package.

### Available line search algorithms
* `hagerzhang!` (Taken from the Conjugate Gradient implementation
* `HagerZhang` (Taken from the Conjugate Gradient implementation
by Hager and Zhang, 2006)
* `morethuente!` (From the algorithm in More and Thuente, 1994)
* `backtracking!` (Described in Nocedal and Wright, 2006)
* `strongwolfe!` (Nocedal and Wright)
* `basic!` (Simply takes a given step length, default 1.0)
* `MoreThuente` (From the algorithm in More and Thuente, 1994)
* `BackTracking` (Described in Nocedal and Wright, 2006)
* `StrongWolfe` (Nocedal and Wright)
* `Static` (Simply takes a given step length, default 1.0)

## Example
This example shows how to use `LineSearches` with `Optim`.
Expand All @@ -25,7 +25,7 @@ First, run `Newton` with the default line search algorithm:
using Optim, LineSearches
prob = Optim.UnconstrainedProblems.examples["Rosenbrock"]

algo_hz = Newton(linesearch = hagerzhang!)
algo_hz = Newton(linesearch = HagerZhang())
res_hz = Optim.optimize(prob.f, prob.g!, prob.h!, prob.initial_x, method=algo_hz)
```

Expand All @@ -50,7 +50,7 @@ Results of Optimization Algorithm

Now we can try `Newton` with the cubic backtracking line search:
``` julia
algo_bt3 = Newton(linesearch = bt3!)
algo_bt3 = Newton(linesearch = BackTracking(order=3))
res_bt3 = Optim.optimize(prob.f, prob.g!, prob.h!, prob.initial_x, method=algo_bt3)
```

Expand Down
1 change: 0 additions & 1 deletion src/LineSearches.jl
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@ export LineSearchResults, LineSearchException
export clear!, alphatry, alphainit

export BackTracking, HagerZhang, Static, MoreThuente, StrongWolfe


include("types.jl")
include("api.jl")
Expand Down
1 change: 0 additions & 1 deletion src/api.jl
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@ function alphatry{T}(alpha::T,
x::Array,
s::Array,
xtmp::Array,
gtmp::Array,
lsr::LineSearchResults,
psi1::Real = convert(T,0.2),
psi2::Real = convert(T,2),
Expand Down
5 changes: 2 additions & 3 deletions src/backtracking.jl
Original file line number Diff line number Diff line change
Expand Up @@ -17,16 +17,15 @@ This is a modification of the algorithm described in Nocedal Wright (2nd ed), Se
maxstep::TF = Inf
end

(ls::BackTracking)(df, x, s, x_scratch, gr_scratch, lsr, alpha, mayterminate) =
_backtracking!(df, x, s, x_scratch, gr_scratch, lsr, alpha, mayterminate,
(ls::BackTracking)(df, x, s, x_scratch, lsr, alpha, mayterminate) =
_backtracking!(df, x, s, x_scratch, lsr, alpha, mayterminate,
ls.c1, ls.rhohi, ls.rholo, ls.iterations, ls.order, ls.maxstep)


function _backtracking!{T}(df,
x::Vector{T},
s::Vector,
x_scratch::Vector,
gr_scratch::Vector,
lsr::LineSearchResults,
alpha::Real = 1.0,
mayterminate::Bool = false,
Expand Down
5 changes: 2 additions & 3 deletions src/basic.jl
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,13 @@ with `Static(alpha = 0.3141)` for fixed step-size 0.3141. Default is 1.0.
alpha::T = 1.0
end

(ls::Static)(df, x, s, x_scratch, gr_scratch, lsr, alpha, mayterminate) =
_static!(df, x, s, x_scratch, gr_scratch, lsr, ls.alpha, mayterminate)
(ls::Static)(df, x, s, x_scratch, lsr, alpha, mayterminate) =
_static!(df, x, s, x_scratch, lsr, ls.alpha, mayterminate)

function _static!{T}(df,
x::Vector{T},
s::Vector,
x_scratch::Vector,
gr_scratch::Vector,
lsr::LineSearchResults,
alpha::Real = 1.0,
mayterminate::Bool = false)
Expand Down
88 changes: 85 additions & 3 deletions src/deprecate.jl
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,13 @@ function _deprecate(msg, depvar, lsfun, args...)
return lsfun(args...)
end


_bt3!(args...) = _backtracking!(args...)

_bt2!(df, x, s, x_scratch, gr_scratch, lsr, alpha, mayterminate,
_bt2!(df, x, s, x_scratch, lsr, alpha, mayterminate::Bool,
c1::Real = 1e-4, rhohi::Real = 0.5, rholo::Real = 0.1, iterations::Integer = 1_000) =
_backtracking!(df, x, s, x_scratch, gr_scratch, lsr, alpha, mayterminate,
c1, rhohi, rholo, iterations, 2)
_backtracking!(df, x, s, x_scratch, lsr, alpha, mayterminate,
c1, rhohi, rholo, iterations, 2)


bt3!(args...) = _deprecate(
Expand Down Expand Up @@ -58,3 +59,84 @@ basic!(args...) = _deprecate(
dep_basic, _static!, args...)

# <<<<<<<<<<<< end deprecation warnings for linesearch functions


# <<<< Start deprecation of gradient storage removal

const dep_g_bt2 = Ref(false)
const dep_g_bt3 = Ref(false)
const dep_g_backtracking = Ref(false)
const dep_g_static = Ref(false)
const dep_g_strongwolfe = Ref(false)
const dep_g_morethuente = Ref(false)
const dep_g_hagerzhang = Ref(false)
const dep_g_alphatry = Ref(false)

function _warn_g(depvar)
if depvar[] == false
warn("You no longer have to provide a 'g'(gradient storage) input")
depvar[] = true
end
end

function _hagerzhang!(df, x, s, xtmp, g, lsr, c, mayterminate, args...)
_warn_g(dep_g_hagerzhang)
retval = _hagerzhang!(df, x, s, xtmp, lsr, c, mayterminate, args...)
copy!(g, df.g)
return retval
end

function _backtracking!(df, x, s, xtmp, g, lsr, c, mayterminate, args...)
_warn_g(dep_g_backtracking)
retval = _backtracking!(df, x, s, xtmp, lsr, c, mayterminate, args...)
copy!(g, df.g)
return retval
end

function _bt2!(df, x, s, xtmp, g, lsr, c, mayterminate, args...)
_warn_g(dep_g_bt2)
retval = _bt2!(df, x, s, xtmp, lsr, c, mayterminate, args...)
copy!(g, df.g)
return retval
end

_bt2!(df, x, s, x_scratch, g, lsr, alpha, mayterminate,
c1::Real = 1e-4, rhohi::Real = 0.5, rholo::Real = 0.1, iterations::Integer = 1_000) =
_backtracking!(df, x, s, x_scratch, g, lsr, alpha, mayterminate,
c1, rhohi, rholo, iterations, 2)

function _bt3!(df, x, s, xtmp, g, lsr, c, mayterminate, args...)
_warn_g(dep_g_bt3)
retval = _bt3!(df, x, s, xtmp, lsr, c, mayterminate, args...)
copy!(g, df.g)
return retval
end

function _static!(df, x, s, xtmp, g, lsr, c, mayterminate, args...)
_warn_g(dep_g_static)
retval = _static!(df, x, s, xtmp, lsr, c, mayterminate, args...)
copy!(g, df.g)
return retval
end

function _strongwolfe!(df, x, s, xtmp, g, lsr, c, mayterminate, args...)
_warn_g(dep_g_strongwolfe)
retval = _strongwolfe!(df, x, s, xtmp, lsr, c, mayterminate, args...)
copy!(g, df.g)
return retval
end

function _morethuente!(df, x, s, xtmp, g, lsr, c, mayterminate, args...)
_warn_g(dep_g_morethuente)
retval = _morethuente!(df, x, s, xtmp, lsr, c, mayterminate, args...)
copy!(g, df.g)
return retval
end

function alphatry{T}(alpha::T, df, x::Array, s::Array, xtmp::Array, g::Array, lsr::LineSearchResults, args...)
_warn_g(dep_g_alphatry)
warn("Alphatry")
alphatry(alpha, df, x, s, xtmp, lsr, args...)
end

# >>>> End deprecation f gradient storage removal
41 changes: 15 additions & 26 deletions src/hagerzhang.jl
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
# phi = (galpha, alpha) -> linefunc(galpha, alpha, func, x, d, xtmp, g)

# Display flags are represented as a bitfield
# (not exported, but can use via OptimizeMod.ITER, for example)
const one64 = convert(UInt64, 1)
Expand Down Expand Up @@ -84,7 +82,6 @@ function _hagerzhang!{T}(df,
x::Array{T},
s::Array,
xtmp::Array,
g::Array,
lsr::LineSearchResults{T},
c::Real,
mayterminate::Bool,
Expand All @@ -98,9 +95,6 @@ function _hagerzhang!{T}(df,
psi3::Real = convert(T,0.1),
display::Integer = 0,
iterfinitemax::Integer = ceil(Integer, -log2(eps(T))) )
# TODO: do we need g anymore?
# Any call to gradient! or value_gradient! would update df.g anyway

if display & LINESEARCH > 0
println("New linesearch")
end
Expand All @@ -111,14 +105,14 @@ function _hagerzhang!{T}(df,
philim = phi0 + epsilon * abs(phi0)
@assert c > 0
@assert isfinite(c) && c <= alphamax
phic, dphic = linefunc!(df, x, s, c, xtmp, g, true)
phic, dphic = linefunc!(df, x, s, c, xtmp, true)
iterfinite = 1
while !(isfinite(phic) && isfinite(dphic)) && iterfinite < iterfinitemax
mayterminate = false
lsr.nfailures += 1
iterfinite += 1
c *= psi3
phic, dphic = linefunc!(df, x, s, c, xtmp, g, true)
phic, dphic = linefunc!(df, x, s, c, xtmp, true)
end
if !(isfinite(phic) && isfinite(dphic))
println("Warning: failed to achieve finite new evaluation point, using alpha=0")
Expand Down Expand Up @@ -185,7 +179,7 @@ function _hagerzhang!{T}(df,
return cold
end
end
phic, dphic = linefunc!(df, x, s, c, xtmp, g, true)
phic, dphic = linefunc!(df, x, s, c, xtmp, true)
iterfinite = 1
while !(isfinite(phic) && isfinite(dphic)) && c > nextfloat(cold) && iterfinite < iterfinitemax
alphamax = c
Expand All @@ -195,7 +189,7 @@ function _hagerzhang!{T}(df,
println("bracket: non-finite value, bisection")
end
c = (cold + c) / 2
phic, dphic = linefunc!(df, x, s, c, xtmp, g, true)
phic, dphic = linefunc!(df, x, s, c, xtmp, true)
end
if !(isfinite(phic) && isfinite(dphic))
return cold
Expand Down Expand Up @@ -234,7 +228,7 @@ function _hagerzhang!{T}(df,
if b - a <= eps(b)
return a # lsr.value[ia]
end
iswolfe, iA, iB = secant2!(df, x, s, xtmp, g, lsr, ia, ib, philim, delta, sigma, display)
iswolfe, iA, iB = secant2!(df, x, s, xtmp, lsr, ia, ib, philim, delta, sigma, display)
if iswolfe
return lsr.alpha[iA] # lsr.value[iA]
end
Expand All @@ -261,11 +255,11 @@ function _hagerzhang!{T}(df,
end
c = (A + B) / convert(T, 2)
# phic = phi(gphi, c) # TODO: Replace
phic, dphic = linefunc!(df, x, s, c, xtmp, g, true)
phic, dphic = linefunc!(df, x, s, c, xtmp, true)
@assert isfinite(phic) && isfinite(dphic)
push!(lsr, c, phic, dphic)
# ia, ib = update(phi, lsr, iA, iB, length(lsr), philim) # TODO: Pass options
ia, ib = update!(df, x, s, xtmp, g, lsr, iA, iB, length(lsr), philim, display)
ia, ib = update!(df, x, s, xtmp, lsr, iA, iB, length(lsr), philim, display)
end
iter += 1
end
Expand Down Expand Up @@ -304,7 +298,6 @@ function secant2!{T}(df,
x::Array,
s::Array,
xtmp::Array,
g::Array,
lsr::LineSearchResults{T},
ia::Integer,
ib::Integer,
Expand All @@ -329,7 +322,7 @@ function secant2!{T}(df,
end
@assert isfinite(c)
# phic = phi(tmpc, c) # Replace
phic, dphic = linefunc!(df, x, s, c, xtmp, g, true)
phic, dphic = linefunc!(df, x, s, c, xtmp, true)
@assert isfinite(phic) && isfinite(dphic)
push!(lsr, c, phic, dphic)
ic = length(lsr)
Expand All @@ -340,7 +333,7 @@ function secant2!{T}(df,
return true, ic, ic
end
# iA, iB = update(phi, lsr, ia, ib, ic, philim)
iA, iB = update!(df, x, s, xtmp, g, lsr, ia, ib, ic, philim, display)
iA, iB = update!(df, x, s, xtmp, lsr, ia, ib, ic, philim, display)
if display & SECANT2 > 0
println("secant2: iA = ", iA, ", iB = ", iB, ", ic = ", ic)
end
Expand All @@ -359,7 +352,7 @@ function secant2!{T}(df,
println("secant2: second c = ", c)
end
# phic = phi(tmpc, c) # TODO: Replace
phic, dphic = linefunc!(df, x, s, c, xtmp, g, true)
phic, dphic = linefunc!(df, x, s, c, xtmp, true)
@assert isfinite(phic) && isfinite(dphic)
push!(lsr, c, phic, dphic)
ic = length(lsr)
Expand All @@ -370,7 +363,7 @@ function secant2!{T}(df,
end
return true, ic, ic
end
iA, iB = update!(df, x, s, xtmp, g, lsr, iA, iB, ic, philim, display)
iA, iB = update!(df, x, s, xtmp, lsr, iA, iB, ic, philim, display)
end
if display & SECANT2 > 0
println("secant2 output: a = ", lsr.alpha[iA], ", b = ", lsr.alpha[iB])
Expand All @@ -386,7 +379,6 @@ function update!(df,
x::Array,
s::Array,
xtmp::Array,
g::Array,
lsr::LineSearchResults,
ia::Integer,
ib::Integer,
Expand Down Expand Up @@ -428,15 +420,14 @@ function update!(df,
end
# phic is bigger than phi0, which implies that the minimum
# lies between a and c. Find it via bisection.
return bisect!(df, x, s, xtmp, g, lsr, ia, ic, philim, display)
return bisect!(df, x, s, xtmp, lsr, ia, ic, philim, display)
end

# HZ, stage U3 (with theta=0.5)
function bisect!{T}(df,
x::Array,
s::Array,
xtmp::Array,
g::Array,
lsr::LineSearchResults{T},
ia::Integer,
ib::Integer,
Expand All @@ -456,7 +447,7 @@ function bisect!{T}(df,
println("bisect: a = ", a, ", b = ", b, ", b - a = ", b - a)
end
d = (a + b) / convert(T, 2)
phid, gphi = linefunc!(df, x, s, d, xtmp, g, true)
phid, gphi = linefunc!(df, x, s, d, xtmp, true)
@assert isfinite(phid) && isfinite(gphi)
push!(lsr, d, phid, gphi)
id = length(lsr)
Expand All @@ -480,17 +471,15 @@ function linefunc!(df,
s::Array,
alpha::Real,
xtmp::Array,
g::Array,
calc_grad::Bool)
for i = 1:length(x)
xtmp[i] = x[i] + alpha * s[i]
end
gphi = convert(eltype(g), NaN)
gphi = convert(eltype(s), NaN)
if calc_grad
val = NLSolversBase.value_gradient!(df,xtmp)
g[:] = gradient(df)
if isfinite(val)
gphi = vecdot(g, s)
gphi = vecdot(NLSolversBase.gradient(df), s)
end
else
val = value!(df,xtmp)
Expand Down
Loading

0 comments on commit 35d523d

Please sign in to comment.