# More'-Thuente Line Search
#
# Combination of the cvsrch and cstep matlab files.
#
# Line Search Factory Function
#
# Returns a line search function using a variant of the More-Thuente
# line search originally implemented in
# \href{http://www.netlib.org/minpack/}{MINPACK}.
#
# @param c1 Constant used in sufficient decrease condition. Should take a value
# between 0 and 1.
# @param c2 Constant used in curvature condition. Should take a value between
# \code{c1} and 1.
# @param max_fn Maximum number of function evaluations allowed.
# @return Line search function.
# @references
# More, J. J., & Thuente, D. J. (1994).
# Line search algorithms with guaranteed sufficient decrease.
# \emph{ACM Transactions on Mathematical Software (TOMS)}, \emph{20}(3),
# 286-307.
# @seealso This code is based on a translation of the original MINPACK code
# for Matlab by
# \href{http://www.cs.umd.edu/users/oleary/software/}{Dianne O'Leary}.
more_thuente <- function(c1 = 1e-4, c2 = 0.1, max_fn = Inf, eps = 1e-6,
alpha_max = Inf,
approx_armijo = FALSE,
strong_curvature = TRUE,
safeguard_cubic = FALSE,
verbose = FALSE) {
if (approx_armijo) {
armijo_check_fn <- make_approx_armijo_ok_step(eps)
}
else {
armijo_check_fn <- armijo_ok_step
}
wolfe_ok_step_fn <- make_wolfe_ok_step_fn(
strong_curvature = strong_curvature,
approx_armijo = approx_armijo,
eps = eps
)
function(phi, step0, alpha,
total_max_fn = Inf, total_max_gr = Inf, total_max_fg = Inf,
pm = NULL) {
maxfev <- min(max_fn, total_max_fn, total_max_gr, floor(total_max_fg / 2))
if (maxfev <= 0) {
return(list(step = step0, nfn = 0, ngr = 0))
}
res <- cvsrch(phi, step0,
alpha = alpha, c1 = c1, c2 = c2,
maxfev = maxfev, alpha_max = alpha_max,
armijo_check_fn = armijo_check_fn,
wolfe_ok_step_fn = wolfe_ok_step_fn,
safeguard_cubic = safeguard_cubic,
verbose = verbose
)
list(step = res$step, nfn = res$nfn, ngr = res$nfn)
}
}
# More'-Thuente Line Search
#
# This routine is a translation of Dianne O'Leary's Matlab code, which was
# itself a translation of the MINPACK original. Original comments to the Matlab
# code are at the end.
# @param phi Line function.
# @param step0 Line search values at starting point of line search.
# @param alpha Initial guess for step size.
# @param c1 Constant used in sufficient decrease condition. Should take a value
# between 0 and 1.
# @param c2 Constant used in curvature condition. Should take a value between
# c1 and 1.
# @param xtol Relative width tolerance: convergence is reached if width falls
# below xtol * maximum step size.
# @param alpha_min Smallest acceptable value of the step size.
# @param alpha_max Largest acceptable value of the step size.
# @param maxfev Maximum number of function evaluations allowed.
# @param delta Value to force sufficient decrease of interval size on
# successive iterations. Should be a positive value less than 1.
# @return List containing:
# \itemize{
# \item \code{step} Best step found and associated line search info.
# \item \code{info} Return code from convergence check.
# \item \code{nfn} Number of function evaluations.
# }
# Translation of minpack subroutine cvsrch
# Dianne O'Leary July 1991
# **********
#
# Subroutine cvsrch
#
# The purpose of cvsrch is to find a step which satisfies
# a sufficient decrease condition and a curvature condition.
# The user must provide a subroutine which calculates the
# function and the gradient.
#
# At each stage the subroutine updates an interval of
# uncertainty with endpoints stx and sty. The interval of
# uncertainty is initially chosen so that it contains a
# minimizer of the modified function
#
# f(x+alpha*pv) - f(x) - c1*alpha*(gradf(x)'pv).
#
# If a step is obtained for which the modified function
# has a nonpositive function value and nonnegative derivative,
# then the interval of uncertainty is chosen so that it
# contains a minimizer of f(x+alpha*pv).
#
# The algorithm is designed to find a step which satisfies
# the sufficient decrease condition
#
# f(x+alpha*pv) <= f(x) + c1*alpha*(gradf(x)'pv),
#
# and the curvature condition
#
# abs(gradf(x+alpha*pv)'pv)) <= c2*abs(gradf(x)'pv).
#
# If c1 is less than c2 and if, for example, the function
# is bounded below, then there is always a step which satisfies
# both conditions. If no step can be found which satisfies both
# conditions, then the algorithm usually stops when rounding
# errors prevent further progress. In this case alpha only
# satisfies the sufficient decrease condition.
#
# The subroutine statement is
#
# subroutine cvsrch(fcn,n,x,f,g,pv,alpha,c1,c2,xtol,
# alpha_min,alpha_max,maxfev,info,nfev)
# where
#
# fcn is the name of the user-supplied subroutine which
# calculates the function and the gradient. fcn must
# be declared in an external statement in the user
# calling program, and should be written as follows.
#
# function [f,g] = fcn(n,x) (Matlab) (10/2010 change in documentation)
# (derived from Fortran subroutine fcn(n,x,f,g) )
# integer n
# f
# x(n),g(n)
# ---
# Calculate the function at x and
# return this value in the variable f.
# Calculate the gradient at x and
# return this vector in g.
# ---
# return
# end
#
# n is a positive integer input variable set to the number
# of variables.
#
# x is an array of length n. On input it must contain the
# base point for the line search. On output it contains
# x + alpha*pv.
#
# f is a variable. On input it must contain the value of f
# at x. On output it contains the value of f at x + alpha*pv.
#
# g is an array of length n. On input it must contain the
# gradient of f at x. On output it contains the gradient
# of f at x + alpha*pv.
#
# pv is an input array of length n which specifies the
# search direction.
#
# alpha is a nonnegative variable. On input alpha contains an
# initial estimate of a satisfactory step. On output
# alpha contains the final estimate.
#
# c1 and c2 are nonnegative input variables. Termination
# occurs when the sufficient decrease condition and the
# directional derivative condition are satisfied.
#
# xtol is a nonnegative input variable. Termination occurs
# when the relative width of the interval of uncertainty
# is at most xtol.
#
# alpha_min and alpha_max are nonnegative input variables which
# specify lower and upper bounds for the step.
#
# maxfev is a positive integer input variable. Termination
# occurs when the number of calls to fcn is at least
# maxfev by the end of an iteration.
#
# info is an integer output variable set as follows:
#
# info = 0 Improper input parameters.
#
# info = 1 The sufficient decrease condition and the
# directional derivative condition hold.
#
# info = 2 Relative width of the interval of uncertainty
# is at most xtol.
#
# info = 3 Number of calls to fcn has reached maxfev.
#
# info = 4 The step is at the lower bound alpha_min.
#
# info = 5 The step is at the upper bound alpha_max.
#
# info = 6 Rounding errors prevent further progress.
# There may not be a step which satisfies the
# sufficient decrease and curvature conditions.
# Tolerances may be too small.
#
# nfev is an integer output variable set to the number of
# calls to fcn.
#
#
# Subprograms called
#
# user-supplied......fcn
#
# MINPACK-supplied...cstep
#
# FORTRAN-supplied...abs,max,min
#
# Argonne National Laboratory. MINPACK Project. June 1983
# Jorge J. More', David J. Thuente
#
# **********
cvsrch <- function(phi, step0, alpha = 1,
c1 = 1e-4, c2 = 0.1, xtol = .Machine$double.eps,
alpha_min = 0, alpha_max = Inf,
maxfev = Inf, delta = 0.66,
armijo_check_fn = armijo_ok_step,
wolfe_ok_step_fn = strong_wolfe_ok_step,
safeguard_cubic = FALSE,
verbose = FALSE) {
# increase width by this amount during zoom phase
xtrapf <- 4
infoc <- 1
# Check the input parameters for errors.
params_ok <- TRUE
problems <- c()
if (alpha <= 0.0) {
params_ok <- FALSE
problems <- c(problems, paste0("alpha <= 0.0: ", formatC(alpha)))
}
if (c1 < 0.0) {
params_ok <- FALSE
problems <- c(problems, paste0("c1 < 0.0: ", formatC(c1)))
}
if (c2 < 0.0) {
params_ok <- FALSE
problems <- c(problems, paste0("c2 < 0.0: ", formatC(c2)))
}
if (xtol < 0.0) {
params_ok <- FALSE
problems <- c(problems, paste0("xtol < 0.0: ", formatC(xtol)))
}
if (alpha_min < 0.0) {
params_ok <- FALSE
problems <- c(problems, paste0("alpha_min < 0.0: ", formatC(alpha_min)))
}
if (alpha_max < alpha_min) {
params_ok <- FALSE
problems <- c(problems, paste0(
"alpha_max ", formatC(alpha_max),
" < alpha_min ", formatC(alpha_min)
))
}
if (maxfev < 0) {
params_ok <- FALSE
problems <- c(problems, paste0("maxfev < 0: ", formatC(maxfev)))
}
if (!params_ok) {
problems <- paste(problems, collapse = "; ")
stop("Parameter errors detected: ", problems)
}
if (maxfev == 0) {
return(list(step = step0, nfn = 0, info = 3))
}
# Check that pv is a descent direction: if not, return a zero step.
if (step0$d >= 0.0) {
return(list(step = step0, info = 6, nfn = 0))
}
dgtest <- c1 * step0$d
# Initialize local variables.
bracketed <- FALSE
brackt <- FALSE
stage1 <- TRUE
nfev <- 0
width <- alpha_max - alpha_min
width_old <- 2 * width
# The variables stx, fx, dgx contain the values of the step,
# function, and directional derivative at the best step.
# The variables sty, fy, dgy contain the value of the step,
# function, and derivative at the other endpoint of
# the interval of uncertainty.
# The variables alpha, f, dg contain the values of the step,
# function, and derivative at the current step.
stepx <- step0
stepy <- step0
step <- list(alpha = alpha)
# Start of iteration.
iter <- 0
while (1) {
iter <- iter + 1
# Set the minimum and maximum steps to correspond
# to the present interval of uncertainty.
if (brackt) {
stmin <- min(stepx$alpha, stepy$alpha)
stmax <- max(stepx$alpha, stepy$alpha)
} else {
stmin <- stepx$alpha
stmax <- step$alpha + xtrapf * (step$alpha - stepx$alpha)
}
# Force the step to be within the bounds alpha_max and alpha_min.
step$alpha <- max(step$alpha, alpha_min)
step$alpha <- min(step$alpha, alpha_max)
if (verbose) {
message(
"Bracket: [", formatC(stmin), ", ", formatC(stmax),
"] alpha = ", formatC(step$alpha)
)
}
# Evaluate the function and gradient at alpha
# and compute the directional derivative.
# Additional check: bisect (if needed) until a finite value is found
# (most important for first iteration)
ffres <- find_finite(phi, step$alpha, maxfev - nfev, min_alpha = stmin)
nfev <- nfev + ffres$nfn
if (!ffres$ok) {
if (verbose) {
message("Unable to create finite alpha")
}
return(list(step = step0, nfn = nfev, info = 7))
}
step <- ffres$step
# Test for convergence.
info <- check_convergence(step0, step, brackt, infoc, stmin, stmax,
alpha_min, alpha_max, c1, c2, nfev,
maxfev, xtol,
armijo_check_fn = armijo_check_fn,
wolfe_ok_step_fn = wolfe_ok_step_fn,
verbose = verbose
)
# Check for termination.
if (info != 0) {
# If an unusual termination is to occur, then set step to the best step
# found
if (info == 2 || info == 3 || info == 6) {
step <- stepx
}
if (verbose) {
message("alpha = ", formatC(step$alpha))
}
return(list(step = step, info = info, nfn = nfev))
}
# In the first stage we seek a step for which the modified
# function has a nonpositive value and nonnegative derivative.
# In the original MINPACK the following test is:
# if (stage1 .and. f .le. ftest1 .and.
# * dg .ge. min(ftol,gtol)*dginit) stage1
# which translates to: step$f <= f0 + alpha * c1 * d0 &&
# step$df >= min(c1, c2) * alpha * d0
# The second test is the armijo condition and the third is the
# curvature condition but using the smaller of c1 and
# c2. This is nearly the standard Wolfe conditions, but because c1 is
# always <= c2 for a convergent line search, this means
# we would always use c1 for the curvature condition.
# I have translated this faithfully, but it seems odd. Using c2 has no
# effect on the test function from the More'-Thuente paper
if (stage1 && wolfe_ok_step(step0, step, c1, min(c1, c2))) {
stage1 <- FALSE
}
# A modified function is used to predict the step only if
# we have not obtained a step for which the modified
# function has a nonpositive function value and nonnegative
# derivative, and if a lower function value has been
# obtained but the decrease is not sufficient.
if (stage1 && step$f <= stepx$f && !armijo_check_fn(step0, step, c1)) {
# Define the modified function and derivative values.
stepxm <- modify_step(stepx, dgtest)
stepym <- modify_step(stepy, dgtest)
stepm <- modify_step(step, dgtest)
step_result <- cstep(stepxm, stepym, stepm, brackt, stmin, stmax,
safeguard_cubic = safeguard_cubic,
verbose = verbose
)
brackt <- step_result$brackt
infoc <- step_result$info
stepxm <- step_result$stepx
stepym <- step_result$stepy
stepm <- step_result$step
# Reset the function and gradient values for f.
stepx <- unmodify_step(stepxm, dgtest)
stepy <- unmodify_step(stepym, dgtest)
step$alpha <- stepm$alpha
} else {
# Call cstep to update the interval of uncertainty
# and to compute the new step.
step_result <- cstep(stepx, stepy, step, brackt, stmin, stmax,
safeguard_cubic = safeguard_cubic,
verbose = verbose
)
brackt <- step_result$brackt
infoc <- step_result$info
stepx <- step_result$stepx
stepy <- step_result$stepy
step <- step_result$step
}
if (!bracketed && brackt) {
bracketed <- TRUE
if (verbose) {
message("Bracketed")
}
}
# Force a sufficient decrease in the size of the interval of uncertainty.
if (brackt) {
# if the length of I does not decrease by a factor of delta < 1
# then use a bisection step for the next trial alpha
width_new <- abs(stepy$alpha - stepx$alpha)
if (width_new >= delta * width_old) {
if (verbose) {
message("Interval did not decrease sufficiently: bisecting")
}
step$alpha <- stepx$alpha + 0.5 * (stepy$alpha - stepx$alpha)
}
width_old <- width
width <- width_new
}
}
}
# Modify Line Search Values
#
# Modifies a line search function and directional derivative value.
# Used by MINPACK version of More'-Thuente line search algorithm.
#
# @param step Line search information.
# @param dgtest Product of the initial line search directional derivative and
# the sufficent decrease condition constant.
# @return Modified step size.
modify_step <- function(step, dgtest) {
stepm <- step
stepm$f <- step$f - step$alpha * dgtest
stepm$d <- step$d - dgtest
stepm
}
# Un-modify Line Search Values
#
# Un-modifies a line search function and directional derivative value that was
# modified by the modify_step function. Used by MINPACK version of More'-Thuente
# line search algorithm.
#
# @param stepm Modified line search information.
# @param dgtest Product of the initial line search directional derivative and
# the sufficent decrease condition constant.
# @return Unmodified step size.
unmodify_step <- function(stepm, dgtest) {
stepm$f <- stepm$f + stepm$alpha * dgtest
stepm$d <- stepm$d + dgtest
stepm
}
# Check Convergence of More'-Thuente Line Search
#
# @param step0 Line search values at starting point.
# @param step Line search value at a step along the line.
# @param brackt TRUE if the step has been bracketed.
# @param infoc Return code of the last step size update.
# @param stmin Smallest value of the step size interval.
# @param stmax Largest value of the step size interval.
# @param alpha_min Smallest acceptable value of the step size.
# @param alpha_max Largest acceptable value of the step size.
# @param c1 Constant used in sufficient decrease condition. Should take a value
# between 0 and 1.
# @param c2 Constant used in curvature condition. Should take a value between
# c1 and 1.
# @param dgtest Product of the initial line search directional derivative and
# the sufficent decrease condition constant.
# @param nfev Current number of function evaluations.
# @param maxfev Maximum number of function evaluations allowed.
# @param xtol Relative width tolerance: convergence is reached if width falls
# below xtol * stmax.
# @return Integer code indicating convergence state:
# \itemize{
# \item \code{0} No convergence.
# \item \code{1} The sufficient decrease condition and the directional
# derivative condition hold.
# \item \code{2} Relative width of the interval of uncertainty
# is at most xtol.
# \item \code{3} Number of calls to fcn has reached maxfev.
# \item \code{4} The step is at the lower bound alpha_min.
# \item \code{5} The step is at the upper bound alpha_max.
# \item \code{6} Rounding errors prevent further progress.
# }
# NB dgtest was originally used in testing for min/max alpha test (code 4 and 5)
# but has been replaced with a call to the curvature test using c1 instead of c2
# so dgtest is no longer used in the body of the function.
check_convergence <- function(step0, step, brackt, infoc, stmin, stmax,
alpha_min, alpha_max, c1, c2, nfev,
maxfev, xtol, armijo_check_fn = armijo_ok_step,
wolfe_ok_step_fn = strong_wolfe_ok_step,
verbose = FALSE) {
info <- 0
if ((brackt && (step$alpha <= stmin || step$alpha >= stmax)) || infoc == 0) {
if (verbose) {
message(
"MT: Rounding errors prevent further progress: stmin = ",
formatC(stmin), " stmax = ", formatC(stmax)
)
}
# rounding errors prevent further progress
info <- 6
}
# use of c1 in curvature check is on purpose (it's in the MINPACK code)
if (step$alpha == alpha_max && armijo_check_fn(step0, step, c1) &&
!curvature_ok_step(step0, step, c1)) {
# reached alpha_max
info <- 5
if (verbose) {
message("MT: Reached alpha max")
}
}
# use of c1 in curvature check here is also in MINPACK code
if (step$alpha == alpha_min && (!armijo_check_fn(step0, step, c1) ||
curvature_ok_step(step0, step, c1))) {
# reached alpha_min
info <- 4
if (verbose) {
message("MT: Reached alpha min")
}
}
if (nfev >= maxfev) {
# maximum number of function evaluations reached
info <- 3
if (verbose) {
message("MT: exceeded fev")
}
}
if (brackt && stmax - stmin <= xtol * stmax) {
# interval width is below xtol
info <- 2
if (verbose) {
message("MT: interval width is <= xtol: ", formatC(xtol * stmax))
}
}
if (wolfe_ok_step_fn(step0, step, c1, c2)) {
# success!
info <- 1
if (verbose) {
message("Success!")
}
}
info
}
# Part of the More'-Thuente line search.
#
# Updates the interval of uncertainty of the current step size and updates the
# current best step size.
#
# This routine is a translation of Dianne O'Leary's Matlab code, which was
# itself a translation of the MINPACK original. Original comments to the Matlab
# code are at the end.
#
# @param stepx One side of the updated step interval, and the associated
# line search values.
# @param stepy Other side of the updated step interval, and the
# associated line search values.
# @param step Optimal step size and associated line search
# value.
# @param brackt TRUE if the interval has been bracketed.
# @param stpmin Minimum allowed interval length.
# @param stpmax Maximum allowed interval length.
# @return List containing:
# \itemize{
# \item \code{stepx} One side of the updated step interval, and the associated
# line search values.
# \item \code{stepy} Other side of the updated step interval, and the
# associated line search values.
# \item \code{step} Updated optimal step size and associated line search
# value.
# \item \code{brackt} TRUE if the interval has been bracketed.
# \item \code{info} Integer return code.
# }
# The possible integer return codes refer to the cases 1-4 enumerated in the
# original More'-Thuente paper that correspond to different line search values
# at the ends of the interval and the current best step size (and therefore
# the type of cubic or quadratic interpolation). An integer value of 0 indicates
# that the input parameters are invalid.
#
#
# Translation of minpack subroutine cstep
# Dianne O'Leary July 1991
# **********
#
# Subroutine cstep
#
# The purpose of cstep is to compute a safeguarded step for
# a linesearch and to update an interval of uncertainty for
# a minimizer of the function.
#
# The parameter stx contains the step with the least function
# value. The parameter stp contains the current step. It is
# assumed that the derivative at stx is negative in the
# direction of the step. If brackt is set true then a
# minimizer has been bracketed in an interval of uncertainty
# with end points stx and sty.
#
# The subroutine statement is
#
# subroutine cstep(stx,fx,dx,sty,fy,dy,stp,fp,dp,brackt,
# stpmin,stpmax,info)
#
# where
#
# stx, fx, and dx are variables which specify the step,
# the function, and the derivative at the best step obtained
# so far. The derivative must be negative in the direction
# of the step, that is, dx and stp-stx must have opposite
# signs. On output these parameters are updated appropriately.
#
# sty, fy, and dy are variables which specify the step,
# the function, and the derivative at the other endpoint of
# the interval of uncertainty. On output these parameters are
# updated appropriately.
#
# stp, fp, and dp are variables which specify the step,
# the function, and the derivative at the current step.
# If brackt is set true then on input stp must be
# between stx and sty. On output stp is set to the new step.
#
# brackt is a logical variable which specifies if a minimizer
# has been bracketed. If the minimizer has not been bracketed
# then on input brackt must be set false. If the minimizer
# is bracketed then on output brackt is set true.
#
# stpmin and stpmax are input variables which specify lower
# and upper bounds for the step.
#
# info is an integer output variable set as follows:
# If info <- 1,2,3,4,5, then the step has been computed
# according to one of the five cases below. Otherwise
# info <- 0, and this indicates improper input parameters.
#
# Subprograms called
#
# FORTRAN-supplied ... abs,max,min,sqrt
# ... dble
#
# Argonne National Laboratory. MINPACK Project. June 1983
# Jorge J. More', David J. Thuente
#
# **********
cstep <- function(stepx, stepy, step, brackt, stpmin, stpmax,
safeguard_cubic = FALSE,
verbose = FALSE) {
stx <- stepx$alpha
fx <- stepx$f
dx <- stepx$d
dfx <- stepx$df
sty <- stepy$alpha
fy <- stepy$f
dy <- stepy$d
dfy <- stepy$df
stp <- step$alpha
fp <- step$f
dp <- step$d
dfp <- step$df
delta <- 0.66
info <- 0
# Check the input parameters for errors.
if ((brackt && (stp <= min(stx, sty) ||
stp >= max(stx, sty))) ||
dx * (stp - stx) >= 0.0 || stpmax < stpmin) {
list(
stepx = stepx,
stepy = stepy,
step = step,
brackt = brackt, info = info
)
}
# Determine if the derivatives have opposite sign.
sgnd <- dp * (dx / abs(dx))
# First case. Trial function value is larger, so choose a step which is
# closer to stx.
# The minimum is bracketed.
# If the cubic step is closer to stx than the quadratic step, the cubic step
# is taken else the average of the cubic and quadratic steps is taken.
if (fp > fx) {
info <- 1
bound <- TRUE
stpc <- cubic_interpolate(stx, fx, dx, stp, fp, dp, ignoreWarnings = TRUE)
stpq <- quadratic_interpolate(stx, fx, dx, stp, fp)
if (is.nan(stpc)) {
stpf <- stpq
}
else {
if (abs(stpc - stx) < abs(stpq - stx)) {
stpf <- ifelse(safeguard_cubic, ensure_min_alpha(stpc, stx, sty), stpc)
} else {
stpf <- stpc + (stpq - stpc) / 2
}
}
brackt <- TRUE
# Second case. A lower function value and derivatives of
# opposite sign. The minimum is bracketed. If the cubic
# step is closer to stx than the quadratic (secant) step,
# the cubic step is taken, else the quadratic step is taken.
} else if (sgnd < 0.0) {
info <- 2
bound <- FALSE
stpc <- cubic_interpolate(stx, fx, dx, stp, fp, dp, ignoreWarnings = TRUE)
stpq <- quadratic_interpolateg(stp, dp, stx, dx)
if (is.nan(stpc)) {
stpf <- stpq
}
else {
if (abs(stpc - stp) > abs(stpq - stp)) {
stpf <- ifelse(safeguard_cubic, ensure_min_alpha(stpc, stx, sty), stpc)
} else {
stpf <- stpq
}
}
brackt <- TRUE
# Third case. A lower function value, derivatives of the
# same sign, and the magnitude of the derivative decreases.
# The next trial step exists outside the interval so is an extrapolation.
# The cubic may not have a minimizer. If it does, it may be in the
# wrong direction, e.g. stc < stx < stp
# The cubic step is only used if the cubic tends to infinity
# in the direction of the step and if the minimum of the cubic
# is beyond stp. Otherwise the cubic step is defined to be
# either stpmin or stpmax. The quadratic (secant) step is also
# computed and if the minimum is bracketed then the the step
# closest to stx is taken, else the step farthest away is taken.
} else if (abs(dp) < abs(dx)) {
info <- 3
bound <- TRUE
theta <- 3 * (fx - fp) / (stp - stx) + dx + dp
s <- norm(rbind(theta, dx, dp), "i")
# The case gamma = 0 only arises if the cubic does not tend
# to infinity in the direction of the step.
gamma <- s * sqrt(max(0., (theta / s)^2 - (dx / s) * (dp / s)))
if (stp > stx) {
gamma <- -gamma
}
p <- (gamma - dp) + theta
q <- (gamma + (dx - dp)) + gamma
r <- p / q
if (r < 0.0 && gamma != 0.0) {
stpc <- stp + r * (stx - stp)
} else if (stp > stx) {
stpc <- stpmax
} else {
stpc <- stpmin
}
stpq <- quadratic_interpolateg(stp, dp, stx, dx)
if (brackt) {
if (abs(stp - stpc) < abs(stp - stpq)) {
stpf <- stpc
} else {
stpf <- stpq
}
} else {
if (abs(stp - stpc) > abs(stp - stpq)) {
stpf <- stpc
} else {
stpf <- stpq
}
}
# Fourth case. A lower function value, derivatives of the
# same sign, and the magnitude of the derivative does
# not decrease. If the minimum is not bracketed, the step
# is either stpmin or stpmax, else the cubic step is taken.
} else {
info <- 4
bound <- FALSE
if (brackt) {
stpc <- cubic_interpolate(sty, fy, dy, stp, fp, dp, ignoreWarnings = TRUE)
if (is.nan(stpc)) {
stpc <- (sty + stp) / 2
}
stpf <- ifelse(safeguard_cubic, ensure_min_alpha(stpc, stx, sty), stpc)
} else if (stp > stx) {
stpf <- stpmax
} else {
stpf <- stpmin
}
}
# Update the interval of uncertainty. This update does not
# depend on the new step or the case analysis above.
if (fp > fx) {
sty <- stp
fy <- fp
dy <- dp
dfy <- dfp
} else {
if (sgnd < 0.0) {
sty <- stx
fy <- fx
dy <- dx
dfy <- dfx
}
stx <- stp
fx <- fp
dx <- dp
dfx <- dfp
}
# Compute the new step and safeguard it.
stpf <- min(stpmax, stpf)
stpf <- max(stpmin, stpf)
stp <- stpf
if (brackt && bound) {
# if the new step is too close to an end point
# replace with a (weighted) bisection (delta = 0.66 in the paper)
if (verbose) {
message("Step too close to end point, weighted bisection")
}
stb <- stx + delta * (sty - stx)
if (sty > stx) {
stp <- min(stb, stp)
} else {
stp <- max(stb, stp)
}
}
list(
stepx = list(alpha = stx, f = fx, d = dx, df = dfx),
stepy = list(alpha = sty, f = fy, d = dy, df = dfy),
step = list(alpha = stp, f = fp, d = dp, df = dfp),
brackt = brackt, info = info
)
}
# Line search modification to prevent cubic interpolant being too close to the
# lower bound, suggested in:
#
# Xie, D., & Schlick, T. (2002).
# A more lenient stopping rule for line search algorithms.
# Optimization Methods and Software, 17(4), 683-700.
#
# (where it is credited to a personal communication from More') and also in:
#
# Jiang, L., Byrd, R. H., Eskow, E., & Schnabel, R. B. (2004).
# A preconditioned L-BFGS algorithm with application to molecular energy
# minimization
# (No. CU-CS-982-04).
# Colorado Univ At Boulder Dept Of Computer Science.
#
# alpha - suggested step size
# x - one end of the interval (can be upper or lower)
# y - other end of the interval
# minshrink - minumum fraction of the current interval range that alpha can
# increase by.
# Return modified alpha if it's too close to x or y
ensure_min_alpha <- function(alpha, x, y, minshrink = 0.001) {
l <- min(x, y)
len <- abs(x - y)
max(l + minshrink * len, alpha)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.