Remove gradient storage (#48)

* Remove gradient storage parameter * Don't need Hessian * Deprecate g-storage * Update NEWS * Deprecate alphatry g-storage
JuliaNLSolvers · May 21, 2017 · 35d523d · 35d523d
1 parent 5eebe50
commit 35d523d
Show file tree

Hide file tree

Showing 14 changed files with 132 additions and 71 deletions.
diff --git a/NEWS.md b/NEWS.md
@@ -1,3 +1,9 @@
+# LineSearches v2.1.0 release notes
+- Algorithms no longer takes a gradient storage parameter. See
+[#48](https://github.com/anriseth/LineSearches.jl/pull/48)
+- Linesearch algorithms are now callable types. See
+[#43](https://github.com/anriseth/LineSearches.jl/pull/43)
+
 # LineSearches v2.0.0 release notes
 - Switch storage and evaluation point position
 # LineSearches v1.0.0 release notes

diff --git a/README.md b/README.md
@@ -9,12 +9,12 @@ The code was originally written as part of [Optim](https://github.com/JuliaNLSol
 but has now been separated out to its own package.
 
 ### Available line search algorithms
-* `hagerzhang!` (Taken from the Conjugate Gradient implementation
+* `HagerZhang` (Taken from the Conjugate Gradient implementation
   by Hager and Zhang, 2006)
-* `morethuente!` (From the algorithm in More and Thuente, 1994)
-* `backtracking!` (Described in Nocedal and Wright, 2006)
-* `strongwolfe!` (Nocedal and Wright)
-* `basic!` (Simply takes a given step length, default 1.0)
+* `MoreThuente` (From the algorithm in More and Thuente, 1994)
+* `BackTracking` (Described in Nocedal and Wright, 2006)
+* `StrongWolfe` (Nocedal and Wright)
+* `Static` (Simply takes a given step length, default 1.0)
 
 ## Example
 This example shows how to use `LineSearches` with `Optim`.
@@ -25,7 +25,7 @@ First, run `Newton` with the default line search algorithm:
 using Optim, LineSearches
 prob = Optim.UnconstrainedProblems.examples["Rosenbrock"]
 
-algo_hz = Newton(linesearch = hagerzhang!)
+algo_hz = Newton(linesearch = HagerZhang())
 res_hz = Optim.optimize(prob.f, prob.g!, prob.h!, prob.initial_x, method=algo_hz)
 ```
 
@@ -50,7 +50,7 @@ Results of Optimization Algorithm
 
 Now we can try `Newton` with the cubic backtracking line search:
 ``` julia
-algo_bt3 = Newton(linesearch = bt3!)
+algo_bt3 = Newton(linesearch = BackTracking(order=3))
 res_bt3 = Optim.optimize(prob.f, prob.g!, prob.h!, prob.initial_x, method=algo_bt3)
 ```
 

diff --git a/src/LineSearches.jl b/src/LineSearches.jl
@@ -12,7 +12,6 @@ export LineSearchResults, LineSearchException
 export clear!, alphatry, alphainit
 
 export BackTracking, HagerZhang, Static, MoreThuente, StrongWolfe
-
 
 include("types.jl")
 include("api.jl")

diff --git a/src/api.jl b/src/api.jl
@@ -5,7 +5,6 @@ function alphatry{T}(alpha::T,
                      x::Array,
                      s::Array,
                      xtmp::Array,
-                     gtmp::Array,
                      lsr::LineSearchResults,
                      psi1::Real = convert(T,0.2),
                      psi2::Real = convert(T,2),

diff --git a/src/backtracking.jl b/src/backtracking.jl
@@ -17,16 +17,15 @@ This is a modification of the algorithm described in Nocedal Wright (2nd ed), Se
     maxstep::TF = Inf
 end
 
-(ls::BackTracking)(df, x, s, x_scratch, gr_scratch, lsr, alpha, mayterminate) =
-    _backtracking!(df, x, s, x_scratch, gr_scratch, lsr, alpha, mayterminate,
+(ls::BackTracking)(df, x, s, x_scratch, lsr, alpha, mayterminate) =
+    _backtracking!(df, x, s, x_scratch, lsr, alpha, mayterminate,
              ls.c1, ls.rhohi, ls.rholo, ls.iterations, ls.order, ls.maxstep)
 
 
 function _backtracking!{T}(df,
                           x::Vector{T},
                           s::Vector,
                           x_scratch::Vector,
-                          gr_scratch::Vector,
                           lsr::LineSearchResults,
                           alpha::Real = 1.0,
                           mayterminate::Bool = false,

diff --git a/src/basic.jl b/src/basic.jl
@@ -10,14 +10,13 @@ with `Static(alpha = 0.3141)` for fixed step-size 0.3141. Default is 1.0.
     alpha::T = 1.0
 end
 
-(ls::Static)(df, x, s, x_scratch, gr_scratch, lsr, alpha, mayterminate) =
-        _static!(df, x, s, x_scratch, gr_scratch, lsr, ls.alpha, mayterminate)
+(ls::Static)(df, x, s, x_scratch, lsr, alpha, mayterminate) =
+        _static!(df, x, s, x_scratch, lsr, ls.alpha, mayterminate)
 
 function _static!{T}(df,
                    x::Vector{T},
                    s::Vector,
                    x_scratch::Vector,
-                   gr_scratch::Vector,
                    lsr::LineSearchResults,
                    alpha::Real = 1.0,
                    mayterminate::Bool = false)

diff --git a/src/deprecate.jl b/src/deprecate.jl
@@ -21,12 +21,13 @@ function _deprecate(msg, depvar, lsfun, args...)
    return lsfun(args...)
 end
 
+
 _bt3!(args...) = _backtracking!(args...)
 
-_bt2!(df, x, s, x_scratch, gr_scratch, lsr, alpha, mayterminate,
+_bt2!(df, x, s, x_scratch, lsr, alpha, mayterminate::Bool,
       c1::Real = 1e-4, rhohi::Real = 0.5, rholo::Real = 0.1, iterations::Integer = 1_000) =
-      _backtracking!(df, x, s, x_scratch, gr_scratch, lsr, alpha, mayterminate,
-                     c1, rhohi, rholo, iterations, 2)
+          _backtracking!(df, x, s, x_scratch, lsr, alpha, mayterminate,
+                         c1, rhohi, rholo, iterations, 2)
 
 
 bt3!(args...) = _deprecate(
@@ -58,3 +59,84 @@ basic!(args...) = _deprecate(
    dep_basic, _static!, args...)
 
 # <<<<<<<<<<<< end deprecation warnings for linesearch functions
+
+
+# <<<< Start deprecation of gradient storage removal
+
+const dep_g_bt2 = Ref(false)
+const dep_g_bt3 = Ref(false)
+const dep_g_backtracking = Ref(false)
+const dep_g_static = Ref(false)
+const dep_g_strongwolfe = Ref(false)
+const dep_g_morethuente = Ref(false)
+const dep_g_hagerzhang = Ref(false)
+const dep_g_alphatry = Ref(false)
+
+function _warn_g(depvar)
+    if depvar[] == false
+        warn("You no longer have to provide a 'g'(gradient storage) input")
+        depvar[] = true
+    end
+end
+
+function _hagerzhang!(df, x, s, xtmp, g, lsr, c, mayterminate, args...)
+    _warn_g(dep_g_hagerzhang)
+    retval = _hagerzhang!(df, x, s, xtmp, lsr, c, mayterminate, args...)
+    copy!(g, df.g)
+    return retval
+end
+
+function _backtracking!(df, x, s, xtmp, g, lsr, c, mayterminate, args...)
+    _warn_g(dep_g_backtracking)
+    retval = _backtracking!(df, x, s, xtmp, lsr, c, mayterminate, args...)
+    copy!(g, df.g)
+    return retval
+end
+
+function _bt2!(df, x, s, xtmp, g, lsr, c, mayterminate, args...)
+    _warn_g(dep_g_bt2)
+    retval = _bt2!(df, x, s, xtmp, lsr, c, mayterminate, args...)
+    copy!(g, df.g)
+    return retval
+end
+
+_bt2!(df, x, s, x_scratch, g, lsr, alpha, mayterminate,
+      c1::Real = 1e-4, rhohi::Real = 0.5, rholo::Real = 0.1, iterations::Integer = 1_000) =
+          _backtracking!(df, x, s, x_scratch, g, lsr, alpha, mayterminate,
+                         c1, rhohi, rholo, iterations, 2)
+
+function _bt3!(df, x, s, xtmp, g, lsr, c, mayterminate, args...)
+    _warn_g(dep_g_bt3)
+    retval = _bt3!(df, x, s, xtmp, lsr, c, mayterminate, args...)
+    copy!(g, df.g)
+    return retval
+end
+
+function _static!(df, x, s, xtmp, g, lsr, c, mayterminate, args...)
+    _warn_g(dep_g_static)
+    retval = _static!(df, x, s, xtmp, lsr, c, mayterminate, args...)
+    copy!(g, df.g)
+    return retval
+end
+
+function _strongwolfe!(df, x, s, xtmp, g, lsr, c, mayterminate, args...)
+    _warn_g(dep_g_strongwolfe)
+    retval = _strongwolfe!(df, x, s, xtmp, lsr, c, mayterminate, args...)
+    copy!(g, df.g)
+    return retval
+end
+
+function _morethuente!(df, x, s, xtmp, g, lsr, c, mayterminate, args...)
+    _warn_g(dep_g_morethuente)
+    retval = _morethuente!(df, x, s, xtmp, lsr, c, mayterminate, args...)
+    copy!(g, df.g)
+    return retval
+end
+
+function alphatry{T}(alpha::T, df, x::Array, s::Array, xtmp::Array, g::Array, lsr::LineSearchResults, args...)
+    _warn_g(dep_g_alphatry)
+    warn("Alphatry")
+    alphatry(alpha, df, x, s, xtmp, lsr, args...)
+end
+
+# >>>> End deprecation  f gradient storage removal
diff --git a/src/hagerzhang.jl b/src/hagerzhang.jl
@@ -1,5 +1,3 @@
-# phi = (galpha, alpha) -> linefunc(galpha, alpha, func, x, d, xtmp, g)
-
 # Display flags are represented as a bitfield
 # (not exported, but can use via OptimizeMod.ITER, for example)
 const one64 = convert(UInt64, 1)
@@ -84,7 +82,6 @@ function _hagerzhang!{T}(df,
                         x::Array{T},
                         s::Array,
                         xtmp::Array,
-                        g::Array,
                         lsr::LineSearchResults{T},
                         c::Real,
                         mayterminate::Bool,
@@ -98,9 +95,6 @@ function _hagerzhang!{T}(df,
                         psi3::Real = convert(T,0.1),
                         display::Integer = 0,
                         iterfinitemax::Integer = ceil(Integer, -log2(eps(T))) )
-    # TODO: do we need g anymore?
-    # Any call to gradient! or value_gradient! would update df.g anyway
-
     if display & LINESEARCH > 0
         println("New linesearch")
     end
@@ -111,14 +105,14 @@ function _hagerzhang!{T}(df,
     philim = phi0 + epsilon * abs(phi0)
     @assert c > 0
     @assert isfinite(c) && c <= alphamax
-    phic, dphic = linefunc!(df, x, s, c, xtmp, g, true)
+    phic, dphic = linefunc!(df, x, s, c, xtmp, true)
     iterfinite = 1
     while !(isfinite(phic) && isfinite(dphic)) && iterfinite < iterfinitemax
         mayterminate = false
         lsr.nfailures += 1
         iterfinite += 1
         c *= psi3
-        phic, dphic = linefunc!(df, x, s, c, xtmp, g, true)
+        phic, dphic = linefunc!(df, x, s, c, xtmp, true)
     end
     if !(isfinite(phic) && isfinite(dphic))
         println("Warning: failed to achieve finite new evaluation point, using alpha=0")
@@ -185,7 +179,7 @@ function _hagerzhang!{T}(df,
                     return cold
                 end
             end
-            phic, dphic = linefunc!(df, x, s, c, xtmp, g, true)
+            phic, dphic = linefunc!(df, x, s, c, xtmp, true)
             iterfinite = 1
             while !(isfinite(phic) && isfinite(dphic)) && c > nextfloat(cold) && iterfinite < iterfinitemax
                 alphamax = c
@@ -195,7 +189,7 @@ function _hagerzhang!{T}(df,
                     println("bracket: non-finite value, bisection")
                 end
                 c = (cold + c) / 2
-                phic, dphic = linefunc!(df, x, s, c, xtmp, g, true)
+                phic, dphic = linefunc!(df, x, s, c, xtmp, true)
             end
             if !(isfinite(phic) && isfinite(dphic))
                 return cold
@@ -234,7 +228,7 @@ function _hagerzhang!{T}(df,
         if b - a <= eps(b)
             return a # lsr.value[ia]
         end
-        iswolfe, iA, iB = secant2!(df, x, s, xtmp, g, lsr, ia, ib, philim, delta, sigma, display)
+        iswolfe, iA, iB = secant2!(df, x, s, xtmp, lsr, ia, ib, philim, delta, sigma, display)
         if iswolfe
             return lsr.alpha[iA] # lsr.value[iA]
         end
@@ -261,11 +255,11 @@ function _hagerzhang!{T}(df,
             end
             c = (A + B) / convert(T, 2)
             # phic = phi(gphi, c) # TODO: Replace
-            phic, dphic = linefunc!(df, x, s, c, xtmp, g, true)
+            phic, dphic = linefunc!(df, x, s, c, xtmp, true)
             @assert isfinite(phic) && isfinite(dphic)
             push!(lsr, c, phic, dphic)
             # ia, ib = update(phi, lsr, iA, iB, length(lsr), philim) # TODO: Pass options
-            ia, ib = update!(df, x, s, xtmp, g, lsr, iA, iB, length(lsr), philim, display)
+            ia, ib = update!(df, x, s, xtmp, lsr, iA, iB, length(lsr), philim, display)
         end
         iter += 1
     end
@@ -304,7 +298,6 @@ function secant2!{T}(df,
                      x::Array,
                      s::Array,
                      xtmp::Array,
-                     g::Array,
                      lsr::LineSearchResults{T},
                      ia::Integer,
                      ib::Integer,
@@ -329,7 +322,7 @@ function secant2!{T}(df,
     end
     @assert isfinite(c)
     # phic = phi(tmpc, c) # Replace
-    phic, dphic = linefunc!(df, x, s, c, xtmp, g, true)
+    phic, dphic = linefunc!(df, x, s, c, xtmp, true)
     @assert isfinite(phic) && isfinite(dphic)
     push!(lsr, c, phic, dphic)
     ic = length(lsr)
@@ -340,7 +333,7 @@ function secant2!{T}(df,
         return true, ic, ic
     end
     # iA, iB = update(phi, lsr, ia, ib, ic, philim)
-    iA, iB = update!(df, x, s, xtmp, g, lsr, ia, ib, ic, philim, display)
+    iA, iB = update!(df, x, s, xtmp, lsr, ia, ib, ic, philim, display)
     if display & SECANT2 > 0
         println("secant2: iA = ", iA, ", iB = ", iB, ", ic = ", ic)
     end
@@ -359,7 +352,7 @@ function secant2!{T}(df,
             println("secant2: second c = ", c)
         end
         # phic = phi(tmpc, c) # TODO: Replace
-        phic, dphic = linefunc!(df, x, s, c, xtmp, g, true)
+        phic, dphic = linefunc!(df, x, s, c, xtmp, true)
         @assert isfinite(phic) && isfinite(dphic)
         push!(lsr, c, phic, dphic)
         ic = length(lsr)
@@ -370,7 +363,7 @@ function secant2!{T}(df,
             end
             return true, ic, ic
         end
-        iA, iB = update!(df, x, s, xtmp, g, lsr, iA, iB, ic, philim, display)
+        iA, iB = update!(df, x, s, xtmp, lsr, iA, iB, ic, philim, display)
     end
     if display & SECANT2 > 0
         println("secant2 output: a = ", lsr.alpha[iA], ", b = ", lsr.alpha[iB])
@@ -386,7 +379,6 @@ function update!(df,
                  x::Array,
                  s::Array,
                  xtmp::Array,
-                 g::Array,
                  lsr::LineSearchResults,
                  ia::Integer,
                  ib::Integer,
@@ -428,15 +420,14 @@ function update!(df,
     end
     # phic is bigger than phi0, which implies that the minimum
     # lies between a and c. Find it via bisection.
-    return bisect!(df, x, s, xtmp, g, lsr, ia, ic, philim, display)
+    return bisect!(df, x, s, xtmp, lsr, ia, ic, philim, display)
 end
 
 # HZ, stage U3 (with theta=0.5)
 function bisect!{T}(df,
                     x::Array,
                     s::Array,
                     xtmp::Array,
-                    g::Array,
                     lsr::LineSearchResults{T},
                     ia::Integer,
                     ib::Integer,
@@ -456,7 +447,7 @@ function bisect!{T}(df,
             println("bisect: a = ", a, ", b = ", b, ", b - a = ", b - a)
         end
         d = (a + b) / convert(T, 2)
-        phid, gphi = linefunc!(df, x, s, d, xtmp, g, true)
+        phid, gphi = linefunc!(df, x, s, d, xtmp, true)
         @assert isfinite(phid) && isfinite(gphi)
         push!(lsr, d, phid, gphi)
         id = length(lsr)
@@ -480,17 +471,15 @@ function linefunc!(df,
                    s::Array,
                    alpha::Real,
                    xtmp::Array,
-                   g::Array,
                    calc_grad::Bool)
     for i = 1:length(x)
         xtmp[i] = x[i] + alpha * s[i]
     end
-    gphi = convert(eltype(g), NaN)
+    gphi = convert(eltype(s), NaN)
     if calc_grad
         val = NLSolversBase.value_gradient!(df,xtmp)
-        g[:] = gradient(df)
         if isfinite(val)
-            gphi = vecdot(g, s)
+            gphi = vecdot(NLSolversBase.gradient(df), s)
         end
     else
         val = value!(df,xtmp)