Halve step in MoreThuente if initial step generates nonfinite values (#…

…73) * Test for finite values in MoreThuente
JuliaNLSolvers · Nov 22, 2017 · 5cd79f4 · 5cd79f4
1 parent 56155bd
commit 5cd79f4
Show file tree

Hide file tree

Showing 2 changed files with 29 additions and 19 deletions.
diff --git a/src/backtracking.jl b/src/backtracking.jl
@@ -53,8 +53,8 @@ function _backtracking!(df,
     n = length(x)
 
     # read f_x and slope from LineSearchResults
-    f_x = lsr.value[end]
-    gxp = lsr.slope[end]
+    @inbounds f_x = lsr.value[end]
+    @inbounds gxp = lsr.slope[end]
 
     # Tentatively move a distance of alpha in the direction of s
     x_scratch .= x .+ alpha.*s
@@ -99,19 +99,19 @@ function _backtracking!(df,
             alphatmp = - (gxp * alpha^2) / ( 2.0 * (f_x_scratch - f_x - gxp*alpha) )
         else
             # Backtracking via cubic interpolation
-            alpha0 = lsr.alpha[end-1]
-            alpha1 = lsr.alpha[end]
-            phi0 = lsr.value[end-1]
-            phi1 = lsr.value[end]
+            @inbounds alpha0 = lsr.alpha[end-1]
+            @inbounds alpha1 = lsr.alpha[end]
+            @inbounds phi0 = lsr.value[end-1]
+            @inbounds phi1 = lsr.value[end]
 
-            div = 1.0/(alpha0^2*alpha1^2*(alpha1-alpha0))
+            div = one(alpha) / (alpha0^2 * alpha1^2 * (alpha1 - alpha0))
             a = (alpha0^2*(phi1-f_x-gxp*alpha1)-alpha1^2*(phi0-f_x-gxp*alpha0))*div
             b = (-alpha0^3*(phi1-f_x-gxp*alpha1)+alpha1^3*(phi0-f_x-gxp*alpha0))*div
 
-            if isapprox(a,0)
+            if isapprox(a, zero(a))
                 alphatmp = gxp / (2.0*b)
             else
-                discr = max(b^2-3*a*gxp, 0.)
+                discr = max(b^2-3*a*gxp, zero(alpha))
                 alphatmp = (-b + sqrt(discr)) / (3.0*a)
             end
         end

diff --git a/src/morethuente.jl b/src/morethuente.jl
@@ -134,8 +134,8 @@
 
 
 @with_kw struct MoreThuente{T}
-    f_tol::T = 1e-4
-    gtol::T = 0.9
+    f_tol::T = 1e-4 # c_1 Wolfe sufficient decrease condition
+    gtol::T = 0.9   # c_2 Wolfe curvature condition
     x_tol::T = 1e-8
     stpmin::T = 1e-16
     stpmax::T = 65536.0
@@ -160,10 +160,11 @@ function _morethuente!(df,
                       x_tol::Real = 1e-8,
                       stpmin::Real = 1e-16,
                       stpmax::Real = 65536.0,
-                      maxfev::Integer = 100) where T
+                       maxfev::Integer = 100) where T
     if norm(s) == 0
         Base.error("Step direction is zero.")
     end
+    iterfinitemax = -log2(eps(T))
     info = 0
     info_cstep = 1 # Info from step
 
@@ -251,18 +252,27 @@ function _morethuente!(df,
         # Evaluate the function and gradient at stp
         # and compute the directional derivative.
         #
+        @. x_new = x + stp*s
 
-        for i in 1:n
-            x_new[i] = x[i] + stp * s[i]
+        f = NLSolversBase.value_gradient!(df, x_new)
+        gdf = NLSolversBase.gradient(df)
+
+        # Ensure that the step provides finite function values
+        # This is not part of the original FORTRAN code
+        iterfinite = 0
+        while (!isfinite(f) || any(.!isfinite.(gdf))) && iterfinite < iterfinitemax
+            stp = 0.5*stp
+            @. x_new = x + stp*s
+            f = NLSolversBase.value_gradient!(df, x_new)
+            gdf = NLSolversBase.gradient(df)
         end
 
-        f = NLSolversBase.value_gradient!(df, x_new)
-        if isapprox(norm(NLSolversBase.gradient(df)), 0) # TODO: this should be tested vs Optim's gtol
+        if isapprox(norm(gdf), 0.0) # TODO: this should be tested vs Optim's g_tol
             return stp
         end
 
         nfev += 1 # This includes calls to f() and g!()
-        dg = vecdot(NLSolversBase.gradient(df), s)
+        dg = vecdot(gdf, s)
         push!(lsr, stp, f, dg)
         ftest1 = finit + stp * dgtest
 
@@ -392,7 +402,7 @@ end # function
 #
 # subroutine cstep(stx, fx, dgx,
 #                  sty, fy, dgy,
-#                  stp, f, dp,
+#                  stp, f, dg,
 #                  bracketed, stpmin, stpmax, info)
 #
 # where
@@ -408,7 +418,7 @@ end # function
 #   the interval of uncertainty. On output these parameters are
 #   updated appropriately
 #
-# stp, f, and dp are variables which specify the step,
+# stp, f, and dg are variables which specify the step,
 #   the function, and the derivative at the current step.
 #   If bracketed is set true then on input stp must be
 #   between stx and sty. On output stp is set to the new step