# Functions for line searches
# p62 of Nocedal & Wright defines a "loose" line search as c1 = 1.e-4, c2 = 0.9
# But note that CG and SD methods are not considered suitable for loose line
# search because of the search directions are not well-scaled. c2 = 0.1 is
# suggested for CG on p34. With the Strong Wolfe conditions, reducing c2 makes
# the line search stricter (i.e. forces it closer to a minimum).
# More-Thuente ------------------------------------------------------------
more_thuente_ls <- function(c1 = c2 / 2, c2 = 0.1,
max_alpha = Inf,
max_alpha_mult = Inf,
min_step_size = .Machine$double.eps,
initializer = "s",
initial_step_length = 1,
try_newton_step = FALSE,
stop_at_min = TRUE,
max_fn = Inf,
max_gr = Inf,
max_fg = Inf,
approx_armijo = FALSE,
strong_curvature = TRUE,
safeguard_cubic = FALSE,
debug = FALSE) {
if (!is_in_range(c1, 0, 1, lopen = FALSE, ropen = FALSE)) {
stop("c1 must be between 0 and 1")
}
if (!is.null(c2) && !is_in_range(c2, c1, 1, lopen = FALSE, ropen = FALSE)) {
stop("c2 must be between c1 and 1")
}
max_ls_fn <- min(max_fn, max_gr, floor(max_fg / 2))
line_search(more_thuente(
c1 = c1, c2 = c2,
alpha_max = max_alpha,
max_fn = max_ls_fn,
strong_curvature = strong_curvature,
approx_armijo = approx_armijo,
safeguard_cubic = safeguard_cubic
),
name = "more-thuente",
max_alpha_mult = max_alpha_mult,
min_step_size = min_step_size, stop_at_min = stop_at_min,
initializer = initializer,
initial_step_length = initial_step_length,
try_newton_step = try_newton_step,
debug = debug
)
}
# Rasmussen ---------------------------------------------------------------
rasmussen_ls <- function(c1 = c2 / 2, c2 = 0.1, int = 0.1, ext = 3.0,
max_alpha_mult = Inf,
min_step_size = .Machine$double.eps,
initializer = "s",
initial_step_length = 1,
try_newton_step = FALSE,
stop_at_min = TRUE, eps = .Machine$double.eps,
max_fn = Inf,
max_gr = Inf,
max_fg = Inf,
strong_curvature = TRUE,
approx_armijo = FALSE,
debug = FALSE) {
if (!is_in_range(c1, 0, 1, lopen = FALSE, ropen = FALSE)) {
stop("c1 must be between 0 and 1")
}
if (!is.null(c2) && !is_in_range(c2, c1, 1, lopen = FALSE, ropen = FALSE)) {
stop("c2 must be between c1 and 1")
}
max_ls_fn <- min(max_fn, max_gr, floor(max_fg / 2))
line_search(rasmussen(
c1 = c1, c2 = c2, int = int, ext = ext,
max_fn = max_ls_fn,
strong_curvature = strong_curvature,
approx_armijo = approx_armijo
),
name = "rasmussen",
max_alpha_mult = max_alpha_mult,
min_step_size = min_step_size, stop_at_min = stop_at_min,
initializer = initializer,
initial_step_length = initial_step_length,
try_newton_step = try_newton_step,
eps = eps,
debug = debug
)
}
# Schmidt (minfunc) -------------------------------------------------------
schmidt_ls <- function(c1 = c2 / 2, c2 = 0.1,
max_alpha_mult = Inf,
min_step_size = .Machine$double.eps,
initializer = "s",
initial_step_length = "schmidt",
try_newton_step = FALSE,
stop_at_min = TRUE, eps = .Machine$double.eps,
max_fn = Inf,
max_gr = Inf,
max_fg = Inf,
strong_curvature = TRUE,
approx_armijo = FALSE,
debug = FALSE) {
if (!is_in_range(c1, 0, 1, lopen = FALSE, ropen = FALSE)) {
stop("c1 must be between 0 and 1")
}
if (!is.null(c2) && !is_in_range(c2, c1, 1, lopen = FALSE, ropen = FALSE)) {
stop("c2 must be between c1 and 1")
}
max_ls_fn <- min(max_fn, max_gr, floor(max_fg / 2))
line_search(schmidt(
c1 = c1, c2 = c2, max_fn = max_ls_fn,
strong_curvature = strong_curvature,
approx_armijo = approx_armijo
),
name = "schmidt",
max_alpha_mult = max_alpha_mult,
min_step_size = min_step_size, stop_at_min = stop_at_min,
initializer = initializer,
initial_step_length = initial_step_length,
try_newton_step = try_newton_step,
eps = eps,
debug = debug
)
}
schmidt_armijo_ls <- function(c1 = 0.005,
max_alpha_mult = Inf,
min_step_size = .Machine$double.eps,
initializer = "s",
initial_step_length = "schmidt",
try_newton_step = FALSE,
step_down = NULL,
stop_at_min = TRUE, eps = .Machine$double.eps,
max_fn = Inf,
max_gr = Inf,
max_fg = Inf,
debug = FALSE) {
if (!is_in_range(c1, 0, 1, lopen = FALSE, ropen = FALSE)) {
stop("c1 must be between 0 and 1")
}
max_ls_fn <- min(max_fn, max_gr, floor(max_fg / 2))
line_search(schmidt_armijo_backtrack(
c1 = c1, max_fn = max_ls_fn,
step_down = step_down
),
name = "schmidt_armijo",
max_alpha_mult = max_alpha_mult,
min_step_size = min_step_size, stop_at_min = stop_at_min,
initializer = initializer,
initial_step_length = initial_step_length,
try_newton_step = try_newton_step,
eps = eps,
debug = debug
)
}
# Hager-Zhang -------------------------------------------------------------
hager_zhang_ls <- function(c1 = c2 / 2, c2 = 0.1,
max_alpha_mult = Inf,
min_step_size = .Machine$double.eps,
initializer = "hz",
initial_step_length = "hz",
try_newton_step = FALSE,
stop_at_min = TRUE, eps = .Machine$double.eps,
max_fn = Inf,
max_gr = Inf,
max_fg = Inf,
strong_curvature = FALSE,
approx_armijo = TRUE,
debug = FALSE) {
if (!is_in_range(c1, 0, 1, lopen = FALSE, ropen = FALSE)) {
stop("c1 must be between 0 and 1")
}
if (!is.null(c2) && !is_in_range(c2, c1, 1, lopen = FALSE, ropen = FALSE)) {
stop("c2 must be between c1 and 1")
}
max_ls_fn <- min(max_fn, max_gr, floor(max_fg / 2))
line_search(hager_zhang(
c1 = c1, c2 = c2, max_fn = max_ls_fn,
strong_curvature = strong_curvature,
approx_armijo = approx_armijo
),
name = "hager-zhang",
max_alpha_mult = max_alpha_mult,
min_step_size = min_step_size, stop_at_min = stop_at_min,
initializer = initializer,
initial_step_length = initial_step_length,
try_newton_step = try_newton_step,
eps = eps,
debug = debug
)
}
# Line Search -------------------------------------------------------------
line_search <- function(ls_fn,
name,
initializer = "slope ratio",
try_newton_step = FALSE,
initial_step_length = 1,
max_alpha_mult = Inf,
min_step_size = .Machine$double.eps,
stop_at_min = TRUE,
debug = FALSE,
eps = .Machine$double.eps) {
if (!is.numeric(initializer)) {
initializer <- match.arg(
tolower(initializer),
c("slope ratio", "quadratic", "hz", "hager-zhang")
)
if (initializer == "hager-zhang") {
initializer <- "hz"
}
}
if (!is.numeric(initial_step_length)) {
initializer0 <- match.arg(
tolower(initial_step_length),
c(
"rasmussen", "scipy", "schmidt",
"hz", "hager-zhang"
)
)
if (initializer0 == "hager-zhang") {
initializer0 <- "hz"
}
}
else {
initializer0 <- initial_step_length
}
make_step_size(list(
name = name,
eps = eps,
init = function(opt, stage, sub_stage, par, fg, iter) {
if (!is_first_stage(opt, stage)) {
# Requires knowing f at the current location
# If this step size is part of any stage other than the first
# we have to turn on eager updating
# message("Wolfe: setting stage updating to eager")
opt$eager_update <- TRUE
}
sub_stage$value <- NULL
sub_stage$alpha0 <- NULL
sub_stage$d0 <- NULL
sub_stage$f0 <- NULL
sub_stage$df <- NULL
list(opt = opt, sub_stage = sub_stage)
},
calculate = function(opt, stage, sub_stage, par, fg, iter) {
pm <- stage$direction$value
if (norm2(pm) < .Machine$double.eps) {
sub_stage$value <- 0
if (is_last_stage(opt, stage)) {
opt <- set_fn_new(opt, opt$cache$fn_curr, iter)
}
return(list(opt = opt, sub_stage = sub_stage))
}
if (is_first_stage(opt, stage) && has_fn_curr(opt, iter)) {
f0 <- opt$cache$fn_curr
}
else {
opt <- calc_fn(opt, par, fg$fn)
f0 <- opt$fn
}
step0 <- list(
alpha = 0,
f = f0,
df = opt$cache$gr_curr,
d = dot(opt$cache$gr_curr, pm)
)
alpha_prev <- sub_stage$value
phi_alpha <- make_phi_alpha(par, fg, pm,
calc_gradient_default = TRUE, debug = debug
)
alpha_next <- 0
if (is.numeric(initializer)) {
alpha_next <- initializer
}
else if (initializer == "slope ratio" && !is.null(sub_stage$d0)) {
# described on p59 of Nocedal and Wright
alpha_next <- step_next_slope_ratio(
alpha_prev, sub_stage$d0,
step0, eps
)
}
else if (initializer == "quadratic" && !is.null(sub_stage$f0)) {
# quadratic interpolation
alpha_next <- step_next_quad_interp(sub_stage$f0, step0,
try_newton_step = try_newton_step
)
}
else if (initializer == "hz" && !is.null(alpha_prev)) {
step_next_res <- step_next_hz(phi_alpha, alpha_prev, step0)
alpha_next <- step_next_res$alpha
opt$counts$fn <- opt$counts$fn + step_next_res$fn
}
# Prevent the next step initial guess being too large
if (!is.null(alpha_prev) && alpha_next / alpha_prev > max_alpha_mult) {
alpha_next <- alpha_prev * max_alpha_mult
}
sub_stage$value <- alpha_next
if (is.null(sub_stage$value) || sub_stage$value <= 0) {
sub_stage$value <- guess_alpha0(
initializer0,
par,
step0$f,
step0$df,
step0$d,
try_newton_step
)
}
sub_stage$alpha0 <- sub_stage$value
sub_stage$d0 <- step0$d
sub_stage$f0 <- step0$f
max_fn <- Inf
max_gr <- Inf
max_fg <- Inf
if (!is.null(opt$convergence$max_fn) && is.finite(opt$convergence$max_fn)) {
max_fn <- opt$convergence$max_fn - opt$counts$fn
}
if (!is.null(opt$convergence$max_gr) && is.finite(opt$convergence$max_gr)) {
max_gr <- opt$convergence$max_gr - opt$counts$gr
}
if (!is.null(opt$convergence$max_fg) && is.finite(opt$convergence$max_fg)) {
max_fg <- opt$convergence$max_fg - (opt$counts$fn + opt$counts$gr)
}
if (max_fn <= 0 || max_gr <= 0 || max_fg <= 0) {
sub_stage$value <- 0
if (is_last_stage(opt, stage)) {
opt <- set_fn_new(opt, step0$f, iter)
sub_stage$df <- step0$df
}
}
else {
ls_result <- ls_fn(phi_alpha, step0, sub_stage$value,
total_max_fn = max_fn, total_max_gr = max_gr,
total_max_fg = max_fg, pm = pm
)
sub_stage$is_gr_curr <- ls_result$is_gr_curr
sub_stage$value <- ls_result$step$alpha
opt$counts$fn <- opt$counts$fn + ls_result$nfn
opt$counts$gr <- opt$counts$gr + ls_result$ngr
if (is_last_stage(opt, stage)) {
opt <- set_fn_new(opt, ls_result$step$f, iter)
if (is.null(ls_result$step$df)) {
sub_stage$df <- rep(sub_stage$eps, length(par))
}
else {
sub_stage$df <- ls_result$step$df
}
}
}
list(opt = opt, sub_stage = sub_stage)
},
after_step = function(opt, stage, sub_stage, par, fg, iter, par0,
update) {
if (opt$ok && is_last_stage(opt, stage) && has_fn_new(opt, iter)) {
opt <- set_fn_curr(opt, opt$cache$fn_new, iter + 1)
}
# Armijo LS does not necessarily calculate gradients
if (opt$ok && is_single_stage(opt) &&
(is.null(sub_stage$is_gr_curr) || sub_stage$is_gr_curr)) {
opt <- set_gr_curr(opt, sub_stage$df, iter + 1)
}
list(opt = opt)
},
depends = c("gradient")
))
}
make_phi_alpha <- function(par, fg, pm,
calc_gradient_default = FALSE, debug = FALSE) {
# LS functions are responsible for updating fn and gr count
function(alpha, calc_gradient = calc_gradient_default) {
y_alpha <- par + (alpha * pm)
if (!is.null(fg$fg) && calc_gradient) {
fg_res <- fg$fg(y_alpha)
f <- fg_res$fn
g <- fg_res$gr
step <- list(
alpha = alpha,
f = f,
df = g,
d = dot(g, pm)
)
}
else {
f <- fg$fn(y_alpha)
step <- list(
alpha = alpha,
f = f
)
if (calc_gradient) {
step$df <- fg$gr(y_alpha)
step$d <- dot(step$df, pm)
}
}
step$par <- y_alpha
if (debug) {
message(format_list(step))
}
step
}
}
# Ensure Valid Step Size
#
# Given an initial step size, if either the function value or the directional
# derivative is non-finite (NaN or infinite), reduce the step size until
# finite values are found.
#
# @param phi Line function.
# @param alpha Initial step size.
# @param min_alpha Minimum step size.
# @param max_fn Maximum number of function evaluations allowed.
# @return List containing:
# \itemize{
# \item step Valid step size or the last step size evaluated, or NULL if
# max_fn == 0.
# \item nfn Number of function evaluations.
# \item ok If a valid step was found
# }
find_finite <- function(phi, alpha, min_alpha = 0, max_fn = 20) {
nfn <- 0
ok <- FALSE
step <- NULL
while (nfn < max_fn && alpha >= min_alpha) {
step <- phi(alpha)
nfn <- nfn + 1
if (step_is_finite(step)) {
ok <- TRUE
break
}
alpha <- (min_alpha + alpha) / 2
}
list(step = step, nfn = nfn, ok = ok)
}
# Is the function and directional derivative (if it exists) a sane value?
step_is_finite <- function(step) {
is.finite(step$f) && (is.null(step$d) || is.finite(step$d))
}
# Initial Step Length -----------------------------------------------------
# Set the initial step length. If initial_step_length is a numeric scalar,
# then use that as-is. Otherwise, use one of several variations based around
# the only thing we know (the directional derivative)
guess_alpha0 <- function(guess_type, x0, f0, gr0, d0,
try_newton_step = FALSE) {
if (is.numeric(guess_type)) {
return(guess_type)
}
s <- switch(guess_type,
rasmussen = step0_rasmussen(d0),
scipy = step0_scipy(gr0, d0),
schmidt = step0_schmidt(gr0),
hz = step0_hz(x0, f0, gr0, psi0 = 0.01)
)
if (try_newton_step) {
s <- min(1, 1.01 * s)
}
s
}
# From minimize.m
step0_rasmussen <- function(d0) {
1 / (1 - d0)
}
# found in _minimize_bfgs in optimize.py with this comment:
# # Sets the initial step guess to dx ~ 1
# actually sets f_old to f0 + 0.5 * ||g||2 then uses f_old in the quadratic
# update formula. If you do the algebra, you get -||g||2 / d
# Assuming steepest descent for step0, this can be simplified further to
# 1 / sqrt(-d0), but may as well not assume that
step0_scipy <- function(gr0, d0) {
-norm2(gr0) / d0
}
# Mark Schmidt's minFunc.m uses reciprocal of the one-norm
step0_schmidt <- function(gr0) {
1 / norm1(gr0)
}
# I0 in the 'initial' routine in CG_DESCENT paper
step0_hz <- function(x0, f0, gr0, psi0 = 0.01) {
alpha <- 1
if (is.null(x0)) {
return(alpha)
}
ginf_norm <- norm_inf(gr0)
if (ginf_norm != 0) {
xinf_norm <- norm_inf(x0)
if (xinf_norm != 0) {
alpha <- psi0 * (xinf_norm / ginf_norm)
}
else if (is_finite_numeric(f0) && f0 != 0) {
g2_norm2 <- sqnorm2(gr0)
if (is_finite_numeric(g2_norm2) && g2_norm2 != 0) {
alpha <- psi0 * (abs(f0) / ginf_norm^2)
}
}
}
alpha
}
# Next Step Length --------------------------------------------------------
# described on p59 of Nocedal and Wright
# slope ratio method
step_next_slope_ratio <- function(alpha_prev, d0_prev, step0, eps) {
# NB the p vector must be a descent direction or the directional
# derivative will be positive => a negative initial step size!
slope_ratio <- d0_prev / (step0$d + eps)
s <- alpha_prev * slope_ratio
max(s, eps)
}
# quadratic interpolation
step_next_quad_interp <- function(f0_prev, step0, try_newton_step = FALSE) {
s <- 2 * (step0$f - f0_prev) / step0$d
if (try_newton_step) {
s <- min(1, 1.01 * s)
}
s
max(.Machine$double.eps, s)
}
# steps I1-2 in the routine 'initial' of the CG_DESCENT paper
step_next_hz <- function(phi, alpha_prev, step0, psi1 = 0.1, psi2 = 2) {
if (alpha_prev < .Machine$double.eps) {
return(list(alpha = .Machine$double.eps, fn = 0))
}
# I2: use if QuadStep fails
alpha <- alpha_prev * psi2
nfn <- 0
# I1: QuadStep
# If function is reduced at the initial guess, carry out a quadratic
# interpolation. If the resulting quadratic is strongly convex, use the
# minimizer of the quadratic. Otherwise, use I2.
step_psi <- phi(alpha_prev * psi1, calc_gradient = FALSE)
nfn <- 1
if (step_psi$f <= step0$f) {
alpha_q <- quadratic_interpolate_step(step0, step_psi)
# A 1D quadratic Ax^2 + Bx + C is strongly convex if A > 0. Second clause in
# if statement is A expressed in terms of the minimizer (this is easy to
# derive by looking at Nocedal & Wright 2nd Edition, equations 3.57 and
# 3.58)
if (alpha_q > 0 && -step0$d / (2 * alpha_q) > 0) {
alpha <- alpha_q
}
}
alpha <- max(.Machine$double.eps, alpha)
list(alpha = alpha, fn = nfn)
}
# Line Search Checks -------------------------------------------------------
# Armijo Rule (or Sufficient Decrease Condition)
#
# Line search test.
#
# The sufficient decrease condition is met if the line search step length yields
# a decrease in the function value that is sufficiently large (relative to the
# size of the step).
#
# This test prevents large step sizes that, while representing a function value
# decrease, don't reduce it by very much, which could indicate that the
# function minimum has been stepped over and you're now going back up the slope.
# Also, this condition can always be met by taking a sufficiently small step,
# so line searches involving this condition can always terminate. The downside
# is that you can end up taking very small steps, so it's usual to combine this
# condition with one that encourages larger step sizes.
#
# @param f0 Value of function at starting point of line search.
# @param d0 Directional derivative at starting point of line search.
# @param alpha the step length.
# @param fa Value of function at alpha.
# @param c1 the sufficient decrease constant. Should take a value between 0 and
# 1.
# @return \code{TRUE} if the step \code{alpha} represents a sufficient decrease.
armijo_ok <- function(f0, d0, alpha, fa, c1) {
fa <= f0 + c1 * alpha * d0
}
# Armijo Rule (or Sufficient Decrease Condition)
#
# Line search test.
#
# The sufficient decrease condition is met if the line search step length yields
# a decrease in the function value that is sufficiently large (relative to the
# size of the step).
#
# This test prevents large step sizes that, while representing a function value
# decrease, don't reduce it by very much, which could indicate that the
# function minimum has been stepped over and you're now going back up the slope.
# Also, this condition can always be met by taking a sufficiently small step,
# so line searches involving this condition can always terminate. The downside
# is that you can end up taking very small steps, so it's usual to combine this
# condition with one that encourages larger step sizes.
#
# @param step0 Line search values at starting point.
# @param step Line search value at a step along the line.
# @param c1 the sufficient decrease constant. Should take a value between 0 and
# 1.
# @return \code{TRUE} if the step represents a sufficient decrease.
armijo_ok_step <- function(step0, step, c1) {
armijo_ok(step0$f, step0$d, step$alpha, step$f, c1)
}
# Curvature Condition
#
# Line search test.
#
# Ensures that the directional derivative of the line search direction at a
# candidate step size is greater than a specified fraction of the slope of the
# line at the starting point of the search. This condition is used to stop step
# sizes being too small.
#
# In combination with the sufficient decrease condition \code{\link{armijo_ok}}
# these conditions make up the Wolfe conditions.
#
# @param d0 Directional derivative at starting point.
# @param da Directional derivative at step alpha.
# @param c2 Curvature condition constant. Should take a value between \code{c1}
# (the constant used in the sufficient decrease condition check) and 1.
# @return \code{TRUE} if the curvature condition is met.
curvature_ok <- function(d0, da, c2) {
da > c2 * d0
}
# Curvature Condition
#
# Line search test.
#
# Ensures that the directional derivative of the line search direction at a
# candidate step size is greater than a specified fraction of the slope of the
# line at the starting point of the search. This condition is used to stop step
# sizes being too small.
#
# In combination with the sufficient decrease condition \code{\link{armijo_ok}}
# these conditions make up the Wolfe conditions.
#
# @param step0 Line search values at starting point.
# @param step Line search value at a step along the line.
# @param c2 Curvature condition constant. Should take a value between \code{c1}
# (the constant used in the sufficient decrease condition check) and 1.
# @return \code{TRUE} if the curvature condition is met.
curvature_ok_step <- function(step0, step, c2) {
curvature_ok(step0$d, step$d, c2)
}
# Strong Curvature Condition
#
# Line search test.
#
# Ensures that the value of the directional derivative of the line search
# direction at a candidate step size is equal to or greater than a specified
# fraction of the slope of the line at the starting point of the search, while
# having the same direction. This condition is used to make the step size lie
# close to a stationary point. Unlike the normal curvature condition, a step
# size where the sign of the gradient changed (e.g. the minimum had been
# skipped) would not be acceptable for the strong curvature condition.
#
# In combination with the sufficient decrease condition \code{\link{armijo_ok}}
# these conditions make up the Strong Wolfe conditions.
#
# @param d0 Directional derivative at starting point.
# @param da Directrional derivative at step alpha.
# @param c2 Curvature condition constant. Should take a value between \code{c1}
# (the constant used in the sufficient decrease condition check) and 1.
# @return \code{TRUE} if the strong curvature condition is met.
strong_curvature_ok <- function(d0, da, c2) {
abs(da) <= -c2 * d0
}
# Strong Curvature Condition
#
# Line search test.
#
# Ensures that the value of the directional derivative of the line search
# direction at a candidate step size is equal to or greater than a specified
# fraction of the slope of the line at the starting point of the search, while
# having the same direction. This condition is used to make the step size lie
# close to a stationary point. Unlike the normal curvature condition, a step
# size where the sign of the gradient changed (e.g. the minimum had been
# skipped) would not be acceptable for the strong curvature condition.
#
# In combination with the sufficient decrease condition \code{\link{armijo_ok}}
# these conditions make up the Strong Wolfe conditions.
#
# @param step0 Line search values at starting point.
# @param step Line search value at a step along the line.
# @param c2 Curvature condition constant. Should take a value between \code{c1}
# (the constant used in the sufficient decrease condition check) and 1.
# @return \code{TRUE} if the curvature condition is met.
strong_curvature_ok_step <- function(step0, step, c2) {
strong_curvature_ok(step0$d, step$d, c2)
}
# Are the Strong Wolfe Conditions Met?
#
# Step size check.
#
# Returns true if the Strong Wolfe conditions are met, consisting of the
# sufficient decrease conditions and the strong curvature condition.
#
# @param f0 Function value at starting point.
# @param d0 Directional derivative value at starting point.
# @param alpha Step length.
# @param fa Function value at alpha.
# @param da Directional derivative at alpha.
# @param c1 Constant used in sufficient decrease condition. Should take a value
# between 0 and 1.
# @param c2 Constant used in curvature condition. Should take a value between
# c1 and 1.
# @return TRUE if the Strong Wolfe condition is met by the step size.
strong_wolfe_ok <- function(f0, d0, alpha, fa, da, c1, c2) {
armijo_ok(f0, d0, alpha, fa, c1) &&
strong_curvature_ok(d0, da, c2)
}
# Are the Strong Wolfe Conditions Met for the Given Step?
#
# Line search test.
#
# Returns true if the candidate step size meets the Strong Wolfe conditions,
# consisting of the sufficient decrease conditions and the strong curvature
# condition.
#
# @param step0 Line search values at starting point of line search.
# @param step Line search value at candiate step size.
# @param c1 Constant used in sufficient decrease condition. Should take a value
# between 0 and 1.
# @param c2 Constant used in curvature condition. Should take a value between
# c1 and 1.
# @return TRUE if the Strong Wolfe condition is met by the step size.
strong_wolfe_ok_step <- function(step0, step, c1, c2) {
armijo_ok_step(step0, step, c1) && strong_curvature_ok_step(step0, step, c2)
}
# Are the Wolfe Conditions Met for the Given Step?
wolfe_ok_step <- function(step0, step, c1, c2) {
armijo_ok_step(step0, step, c1) && curvature_ok_step(step0, step, c2)
}
# Create a Wolfe Conditions check function allowing for either approximate or
# exact Armijo condition and weak or strong curvature condition
make_wolfe_ok_step_fn <- function(approx_armijo = FALSE,
strong_curvature = TRUE, eps = 1e-6) {
approx_armijo_ok_fn <- make_approx_armijo_ok_step(eps)
function(step0, step, c1, c2) {
if (approx_armijo) {
ok <- approx_armijo_ok_fn(step0, step, c1)
}
else {
ok <- armijo_ok_step(step0, step, c1)
}
if (ok) {
if (strong_curvature) {
ok <- strong_curvature_ok_step(step0, step, c2)
}
else {
ok <- curvature_ok_step(step0, step, c2)
}
}
ok
}
}
# Create Approximation Armijo check function:
# Checks approximate Armijo conditions only if exact Armijo check fails and
# if function value is sufficiently close to step0 value according to eps
make_approx_armijo_ok_step <- function(eps) {
function(step0, step, c1) {
eps_k <- eps * abs(step0$f)
if (armijo_ok_step(step0, step, c1)) {
return(TRUE)
}
(step$f <= step0$f + eps_k) && approx_armijo_ok_step(step0, step, c1)
}
}
# Is the approximate Armijo condition met?
#
# Suggested by Hager and Zhang (2005) as part of the Approximate Wolfe
# Conditions. The second of these conditions is identical to the (weak)
# curvature condition.
#
# The first condition applies the armijo condition to a quadratic approximation
# to the function, which allows for higher precision in finding the minimizer.
#
# It is suggested that the approximate version of the Armijo condition be used
# when fa is 'close' to f0, e.g. fa <= f0 + eps * |f0| where eps = 1e-6
#
# c1 should be < 0.5
approx_armijo_ok <- function(d0, da, c1) {
(2 * c1 - 1) * d0 >= da
}
# Is the approximate Armijo condition met for the given step?
approx_armijo_ok_step <- function(step0, step, c1) {
approx_armijo_ok(step0$d, step$d, c1)
}
# Bracket -----------------------------------------------------------------
bracket_is_finite <- function(bracket) {
all(is.finite(c(bracket_props(bracket, c("f", "d")))))
}
# extracts all the properties (e.g. 'f', 'df', 'd' or 'alpha') from all members
# of the bracket. Works if there are one or two bracket members. Can get
# multiple properties at once, by providing an array of the properties,
# e.g. bracket_props(bracket, c('f', 'd'))
bracket_props <- function(bracket, prop) {
unlist(sapply(bracket, `[`, prop))
}
bracket_width <- function(bracket) {
bracket_range <- bracket_props(bracket, "alpha")
abs(bracket_range[2] - bracket_range[1])
}
bracket_min_alpha <- function(bracket) {
min(bracket_props(bracket, "alpha"))
}
best_bracket_step <- function(bracket) {
LOpos <- which.min(bracket_props(bracket, "f"))
bracket[[LOpos]]
}
is_in_bracket <- function(bracket, alpha) {
is_in_range(alpha, bracket[[1]]$alpha, bracket[[2]]$alpha)
}
format_bracket <- function(bracket) {
paste0(
"[", formatC(bracket[[1]]$alpha), ", ", formatC(bracket[[2]]$alpha),
"]"
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.