# R/armijo.R In rje: Miscellaneous Useful Functions for Statistics

#' Generic functions to aid finding local minima given search direction
#'
#' Allows use of an Armijo rule or coarse line search as part of minimisation
#' (or maximisation) of a differentiable function of multiple arguments (via
#' gradient descent or similar).  Repeated application of one of these rules
#' should (hopefully) lead to a local minimum.
#'
#' \code{coarseLine} performs a stepwise search and tries to find the integer
#' \eqn{k} minimising \eqn{f(x_k)} where \deqn{x_k = x + \beta^k dx.} Note
#' \eqn{k} may be negative.  This is genearlly quicker and dirtier
#' than the Armijo rule.
#'
#' \code{armijo} implements an Armijo rule for moving, which is to say that
#' \deqn{f(x_k) - f(x) < - \sigma \beta^k dx \cdot \nabla_x f.}{f(x_k) - f(x) <
#' - \sigma \beta^k dx . grad.} This has better convergence guarantees than a
#' simple line search, but may be slower in practice.  See Bertsekas (1999) for
#' theory underlying the Armijo rule.
#'
#' Each of these rules should be applied repeatedly to achieve convergence (see
#' example below).
#'
#'
#' @aliases armijo coarseLine
#' @param fun a function whose first argument is a numeric vector
#' @param x a starting value to be passed to \code{fun}
#' @param dx numeric vector containing feasible direction for search; defaults
#' @param beta numeric value (greater than 1) giving factor by which to adjust
#' step size
#' @param sigma numeric value (less than 1) giving steepness criterion for move
#' @param grad numeric gradient of \code{f} at \code{x} (will be estimated if
#' not provided)
#' @param maximise logical: if set to \code{TRUE} search is for a maximum
#' rather than a minimum.
#' @param searchup logical: if set to \code{TRUE} method will try to find
#' largest move satisfying Armijo criterion, rather than just accepting the
#' first it sees
#' @param \dots other arguments to be passed to \code{fun}
#' @return A list comprising \item{best}{the value of the function at the final
#' point of evaluation} \item{adj}{the constant in the step, i.e.
#' \eqn{\beta^n}} \item{move}{the final move; i.e. \eqn{\beta^n dx}}
#' \item{code}{an integer indicating the result of the function; 0 = returned
#' OK, 1 = very small move suggested, may be at minimum already, 2 = failed to
#' find minimum: function evaluated to \code{NA} or was always larger than
#' \eqn{f(x)} (direction might be infeasible), 3 = failed to find minimum:
#' stepsize became too small or large without satisfying rule.}
#' @author Robin Evans
#' @references Bertsekas, D.P. \emph{Nonlinear programming}, 2nd Edition.
#' Athena, 1999.
#' @keywords optimize
#' @examples
#'
#' # minimisation of simple function of three variables
#' x = c(0,-1,4)
#' f = function(x) ((x[1]-3)^2 + sin(x[2])^2 + exp(x[3]) - x[3])
#'
#' tol = .Machine$double.eps #' mv = 1 #' #' while (mv > tol) { #' # or replace with coarseLine() #' out = armijo(f, x, sigma=0.1) #' x = out$x
#'   mv = sum(out$move^2) #' } #' #' # correct solution is c(3,0,0) (or c(3,k*pi,0) for any integer k) #' x #' #' #' @export armijo armijo <- function (fun, x, dx, beta = 3, sigma = 0.5, grad, maximise = FALSE, searchup = TRUE, adj.start = 1, ...) { sign = 1 - 2 * maximise if (beta <= 1) stop("adjustment factor beta must be larger than 1") adj = adj.start tol = .Machine$double.eps
cur = sign * fun(x, ...)
nok = TRUE
neg = TRUE
for (i in seq_along(x)) {
x2 = x
x2[i] = x[i] + 1e-08
grad[i] = 1e+08 * (sign * fun(x2, ...) - cur)
}
}
if (missing(dx))
s = -sigma * sum(dx * grad)
moddx = sqrt(sum(dx^2))
if (s < 0) {
return(list(x = x, adj = 0, move = dx * 0, best = sign *
cur, code = 2))
}
while (nok) {
try = sign * fun(x + dx * adj, ...)
if (!is.na(try) && (cur - try) > adj * s) {
nok = FALSE
}
else {
searchup = FALSE
}
if (neg && !is.na(try) && (cur - try) > -adj * s)
neg = FALSE
if (adj * moddx < tol)
cur, code = 2 * neg))
}
if (searchup) {
ok = TRUE
while (ok) {
try2 = sign * fun(x + dx * adj, ...)
if (is.na(try2)) {
break
}
if (try - try2 < .Machine$double.eps) { adj = adj/beta break } else { try = try2 } if (adj > 1/tol) { if (sqrt(sum(adj * dx^2)) < 10 * tol) return(list(x = x + dx * adj, adj = adj, move = dx * adj, best = sign * try, code = 3)) else break } } } return(list(x = x + dx * adj, adj = adj, move = dx * adj, best = sign * try, code = 1)) } #' @describeIn armijo Coarse line search #' @export coarseLine coarseLine <- function (fun, x, dx, beta = 3, maximise = FALSE, ...) { sign = 1 - 2 * maximise if (missing(dx)) { dx = numeric(length(x)) for (i in seq_along(x)) { x2 = x x2[i] = x[i] + 1e-08 dx[i] = -sign * 1e+08 * (fun(x2, ...) - fun(x, ...)) } } if (beta <= 1) stop("adjustment factor beta must be larger than 1") adj = 1 tol = .Machine$double.eps
best = sign * fun(x, ...)
nok = TRUE
while (nok) {
best = sign * fun(x + adj * dx, ...)
if (!is.na(best))
nok = FALSE
stop("Failed to find valid move")
}
try = sign * fun(x + dx * adj * beta, ...)
if (is.na(try) || try >= best)
down = TRUE
else down = FALSE
if (down)
fact = 1/beta
else fact = beta
nok = TRUE
while (nok) {
try = sign * fun(x + dx * adj * fact, ...)
if (is.na(try) || try >= best) {
nok = FALSE
adj, best = sign * best, code = 2))
}
else {
best = try