Skip to content

Commit

Permalink
Halve step in MoreThuente if initial step generates nonfinite values (#…
Browse files Browse the repository at this point in the history
…73)

* Test for finite values in MoreThuente
  • Loading branch information
anriseth authored Nov 22, 2017
1 parent 56155bd commit 5cd79f4
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 19 deletions.
18 changes: 9 additions & 9 deletions src/backtracking.jl
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,8 @@ function _backtracking!(df,
n = length(x)

# read f_x and slope from LineSearchResults
f_x = lsr.value[end]
gxp = lsr.slope[end]
@inbounds f_x = lsr.value[end]
@inbounds gxp = lsr.slope[end]

# Tentatively move a distance of alpha in the direction of s
x_scratch .= x .+ alpha.*s
Expand Down Expand Up @@ -99,19 +99,19 @@ function _backtracking!(df,
alphatmp = - (gxp * alpha^2) / ( 2.0 * (f_x_scratch - f_x - gxp*alpha) )
else
# Backtracking via cubic interpolation
alpha0 = lsr.alpha[end-1]
alpha1 = lsr.alpha[end]
phi0 = lsr.value[end-1]
phi1 = lsr.value[end]
@inbounds alpha0 = lsr.alpha[end-1]
@inbounds alpha1 = lsr.alpha[end]
@inbounds phi0 = lsr.value[end-1]
@inbounds phi1 = lsr.value[end]

div = 1.0/(alpha0^2*alpha1^2*(alpha1-alpha0))
div = one(alpha) / (alpha0^2 * alpha1^2 * (alpha1 - alpha0))
a = (alpha0^2*(phi1-f_x-gxp*alpha1)-alpha1^2*(phi0-f_x-gxp*alpha0))*div
b = (-alpha0^3*(phi1-f_x-gxp*alpha1)+alpha1^3*(phi0-f_x-gxp*alpha0))*div

if isapprox(a,0)
if isapprox(a, zero(a))
alphatmp = gxp / (2.0*b)
else
discr = max(b^2-3*a*gxp, 0.)
discr = max(b^2-3*a*gxp, zero(alpha))
alphatmp = (-b + sqrt(discr)) / (3.0*a)
end
end
Expand Down
30 changes: 20 additions & 10 deletions src/morethuente.jl
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@


@with_kw struct MoreThuente{T}
f_tol::T = 1e-4
gtol::T = 0.9
f_tol::T = 1e-4 # c_1 Wolfe sufficient decrease condition
gtol::T = 0.9 # c_2 Wolfe curvature condition
x_tol::T = 1e-8
stpmin::T = 1e-16
stpmax::T = 65536.0
Expand All @@ -160,10 +160,11 @@ function _morethuente!(df,
x_tol::Real = 1e-8,
stpmin::Real = 1e-16,
stpmax::Real = 65536.0,
maxfev::Integer = 100) where T
maxfev::Integer = 100) where T
if norm(s) == 0
Base.error("Step direction is zero.")
end
iterfinitemax = -log2(eps(T))
info = 0
info_cstep = 1 # Info from step

Expand Down Expand Up @@ -251,18 +252,27 @@ function _morethuente!(df,
# Evaluate the function and gradient at stp
# and compute the directional derivative.
#
@. x_new = x + stp*s

for i in 1:n
x_new[i] = x[i] + stp * s[i]
f = NLSolversBase.value_gradient!(df, x_new)
gdf = NLSolversBase.gradient(df)

# Ensure that the step provides finite function values
# This is not part of the original FORTRAN code
iterfinite = 0
while (!isfinite(f) || any(.!isfinite.(gdf))) && iterfinite < iterfinitemax
stp = 0.5*stp
@. x_new = x + stp*s
f = NLSolversBase.value_gradient!(df, x_new)
gdf = NLSolversBase.gradient(df)
end

f = NLSolversBase.value_gradient!(df, x_new)
if isapprox(norm(NLSolversBase.gradient(df)), 0) # TODO: this should be tested vs Optim's gtol
if isapprox(norm(gdf), 0.0) # TODO: this should be tested vs Optim's g_tol
return stp
end

nfev += 1 # This includes calls to f() and g!()
dg = vecdot(NLSolversBase.gradient(df), s)
dg = vecdot(gdf, s)
push!(lsr, stp, f, dg)
ftest1 = finit + stp * dgtest

Expand Down Expand Up @@ -392,7 +402,7 @@ end # function
#
# subroutine cstep(stx, fx, dgx,
# sty, fy, dgy,
# stp, f, dp,
# stp, f, dg,
# bracketed, stpmin, stpmax, info)
#
# where
Expand All @@ -408,7 +418,7 @@ end # function
# the interval of uncertainty. On output these parameters are
# updated appropriately
#
# stp, f, and dp are variables which specify the step,
# stp, f, and dg are variables which specify the step,
# the function, and the derivative at the current step.
# If bracketed is set true then on input stp must be
# between stx and sty. On output stp is set to the new step
Expand Down

0 comments on commit 5cd79f4

Please sign in to comment.