R/hager_zhang.R

Defines functions bisect hzupdate secant2 secant secant0 satisfies_wolfe HagerZhangControl

Documented in HagerZhangControl

#' Control settings for Hager-Zhang line search
#'
#' TODO after finalizing line search
#'
#' @export
HagerZhangControl <- function(delta = 0.1, sigma = 0.9, alphamax = Inf, rho = 5.0, epsilon = 1e-6, gamma = 0.66, linesearchmax = 50, psi3 = 0.1, c = 1.0, verbose = FALSE)
  list(delta=delta, sigma=sigma, alphamax=alphamax, rho=rho, epsilon=epsilon, gamma=gamma, linesearchmax = linesearchmax, psi3 = psi3, c = c, verbose = verbose)


# Line search algorithm from Hager & Zhang ???
# Adapted from LineSearch.jl
setRefClass("HagerZhangStorage", fields=list(alphas="numeric", values="numeric", slopes="numeric"))

HagerZhang <- function (dphifn, phi_0, dphi_0, control = HagerZhangControl())
{
  nextfloat <- function(x)
    x + .Machine$double.eps

  for (var in names(control)) assign(var, control[[var]])

  if (!(is.finite(phi_0) && is.finite(dphi_0)))
    stop("Value and slope at step length = 0 must be finite.")
  if (dphi_0 >= .Machine$double.eps * abs(phi_0))
    stop("Search direction is not a direction of descent.")
  else if (dphi_0 >= 0)
    return(0) #return(list(0, phi_0))

  # Prevent values of x_new = x+alpha*s that are likely to make
  # phi(x_new) infinite
  iterfinitemax = ceiling(-log(.Machine$double.eps, 2))
  st <- new("HagerZhangStorage", alphas = c(0), values = c(phi_0), slopes = c(dphi_0))
  if (verbose)
    cat("New linesearch\n")

  phi_lim = phi_0 + epsilon * abs(phi_0)
  stopifnot(c >= 0)
  c <= .Machine$double.eps && return(0) #return(list(0, phi_0))
  stopifnot(is.finite(c) && c <= alphamax)
  fit = dphifn(c)
  phi_c = fit$objective
  dphi_c = fit$gradient
  iterfinite = 1
  while(!(is.finite(phi_c) && is.finite(dphi_c)) && iterfinite < iterfinitemax)
  {
    #mayterminate = FALSE #bc c is set to 1 for now
    iterfinite = iterfinite + 1
    c = c * psi3
    fit = dphifn(c)
    phi_c = fit$objective
    dphi_c = fit$gradient
  }
  if (!(is.finite(phi_c) && is.finite(dphi_c)))
  {
    warning("Failed to achieve finite new evaluation point, using alpha=0")
    return(0) #return(list(0, phi0))
  }
  st$alphas <- c(st$alphas, c)
  st$values <- c(st$values, phi_c)
  st$slopes <- c(st$slopes, dphi_c)

  #NSP: use initial c = 1 for Newton method. Uncommenting below slows things down
  # If c was generated by quadratic interpolation, check whether it
  # satisfies the Wolfe conditions
  #if (mayterminate && satisfies_wolfe(c, phi_c, dphi_c, phi_0, dphi_0, phi_lim, delta, sigma))
  #if (satisfies_wolfe(c, phi_c, dphi_c, phi_0, dphi_0, phi_lim, delta, sigma))
  #{
  #  if(verbose)
  #    cat("Wolfe condition satisfied on point alpha = ", c, "\n")
  #  return(list(c, phi_c)) # phi_c
  #}

  # Initial bracketing step (HZ, stages B0-B3)
  isbracketed = FALSE
  ia = 1
  ib = 2
  stopifnot(length(st$alphas) == 2)
  iter = 1
  cold = -1
  while(!isbracketed && iter < linesearchmax)
  {
    if(verbose)
      cat("bracketing: ia = ", ia, ", ib = ", ib, ", c = ", c, ", phi_c = ", phi_c, ", dphi_c = ", dphi_c, "\n")
    if(dphi_c >= 0)
    {
      # We've reached the upward slope, so we have b; examine
      # previous values to find a
      ib = length(st$alphas)
      for (i in (ib - 1):1)
      {
        if (st$values[i] <= phi_lim)
        {
          ia = i
          break
        }
      }
      isbracketed = TRUE
    }
    else if (st$values[length(st$values)] > phi_lim)
    {
      # The value is higher, but the slope is downward, so we must
      # have crested over the peak. Use bisection.
      ib = length(st$alphas)
      ia = 1
      if (c !=  st$alphas[ib] || st$slopes[ib] >= 0)
        stop("c = ", c)
      #bis = bisect(dphifn, alphas, values, slopes, ia, ib, phi_lim, verbose)
      bis = bisect(dphifn, st, ia, ib, phi_lim, verbose)
      ia  = bis[[1]]
      ib  = bis[[2]]
      isbracketed = TRUE
    }
    else
    {
      # We'll still going downhill, expand the interval and try again.
      # Reaching this branch means that dphi_c < 0 and phi_c <= phi_0 + e_k 
      # So cold = c has a lower objective than phi_0 up to epsilon. 
      # This makes it a viable step to return if bracketing fails.

      # Bracketing can fail if no cold < c <= alphamax can be found with finite phi_c and dphi_c. 
      # Going back to the loop with c = cold will only result in infinite cycling.
      # So returning (cold, phi_cold) and exiting the line search is the best move.
      cold = c
      phi_cold = phi_c
      if (nextfloat(cold) >= alphamax)
      {
        return(cold) #list(c(cold, phi_cold))
      }
      c = c * rho
      if (c > alphamax)
      {
        c = alphamax
        if (verbose)
          cat("bracket: exceeding alphamax, using c = alphamax = ", alphamax, ", cold = ", cold, "\n")
      }
      fit = dphifn(c)
      phi_c = fit$objective
      dphi_c = fit$gradient
      iterfinite = 1
      while (!(is.finite(phi_c) && is.finite(dphi_c)) && c > nextfloat(cold) && iterfinite < iterfinitemax)
      {
        alphamax = c # shrinks alphamax, assumes that steps >= c can never have finite phi_c and dphi_c
        iterfinite = iterfinite + 1
        if (verbose)
          cat("bracket: non-finite value, bisection\n")
        c = (cold + c) / 2
        fit = dphifn(c)
        phi_c = fit$objective
        dphi_c = fit$gradient
      }
      if(!(is.finite(phi_c) && is.finite(dphi_c)))
      {
        if (verbose)
        {
          cat("Warning: failed to expand interval to bracket with finite values. If this happens frequently, check your function and gradient.\n")
          cat("c = ", c, ", alphamax = ", alphamax, ", phi_c = ", phi_c, ", dphi_c = ", dphi_c, "\n")
        }
        return(cold) #return(list(cold, phi_cold))
      }
      st$alphas = c(st$alphas, c)
      st$values = c(st$values, phi_c)
      st$slopes = c(st$slopes, dphi_c)
    }
    iter = iter + 1
  }
  while (iter < linesearchmax)
  {
    a = st$alphas[ia]
    b = st$alphas[ib]
    stopifnot(b > a)
    if (verbose)
      cat("linesearch: ia = ", ia, ", ib = ", ib, ", a = ", a, ", b = ", b, ", phi(a) = ", st$values[ia], ", phi(b) = ", st$values[ib], "\n")
    if (b - a <= .Machine$double.eps)
    {
      return(a) #return(c(a, st$values[ia])) 
    }
    #sec = secant2(dphifn, alphas, values, slopes, ia, ib, phi_lim, delta, sigma, verbose)
    sec = secant2(dphifn, st, ia, ib, phi_lim, delta, sigma, verbose)
    iswolfe = sec[[1]]
    iA = sec[[2]]
    iB = sec[[3]]
    if(iswolfe)
    {
      return (st$alphas[iA]) #return(list(st$alphas[iA], st$values[iA]))
    }
    A = st$alphas[iA]
    B = st$alphas[iB]
    stopifnot(B > A)
    if (B - A < gamma * (b - a))
    {
      if (verbose)
        cat("Linesearch: secant succeeded\n")
      if (nextfloat(st$values[ia]) >= st$values[ib] && nextfloat(st$values[iA]) >= st$values[iB])
      {
        # It's so flat, secant didn't do anything useful, time to quit
        if (verbose)
          cat("Linesearch: secant suggests it's flat\n")
        return(A) #return(list(A, st$values[iA]))
      }
      ia = iA
      ib = iB
    }
    else
    {
      # Secant is converging too slowly, use bisection
      if(verbose)
        cat("Linesearch: secant failed, using bisection\n")
      c = (A + B) / 2

      fit = dphifn(c)
      phi_c = fit$objective
      dphi_c = fit$gradient
#      while (!(is.finite(phi_c) && is.finite(dphi_c))) #NSP: sometimes midpoint is not OK even if either end of interval is
#      {
#        c = (A + c) / 2
#        fit = dphifn(c)
#        phi_c = fit$objective
#        dphi_c = fit$gradient
#      } #end NSP
      stopifnot(is.finite(phi_c) && is.finite(dphi_c))
      st$alphas = c(st$alphas, c)
      st$values = c(st$values, phi_c)
      st$slopes = c(st$slopes, dphi_c)

      upd = hzupdate(dphifn, st, iA, iB, length(st$alphas), phi_lim, verbose) 
      ia = upd[[1]]
      ib = upd[[2]]
    }
    iter = iter + 1
  }
  stop("Linesearch failed to converge, reached maximum iterations ", st$alphas[ia])
}

# Check Wolfe & approximate Wolfe
satisfies_wolfe <- function(c, phi_c, dphi_c, phi_0, dphi_0, phi_lim, delta, sigma)
{
    wolfe1 = delta * dphi_0 >= (phi_c - phi_0) / c && dphi_c >= sigma * dphi_0
    wolfe2 = (2 * delta - 1) * dphi_0 >= dphi_c && dphi_c >= sigma * dphi_0 && phi_c <= phi_lim
    return (wolfe1 || wolfe2)
}

# HZ, stages S1-S4
secant0 <- function(a, b, dphi_a, dphi_b)
{
  return ((a * dphi_b - b * dphi_a) / (dphi_b - dphi_a))
}

secant <- function(alphas, values, slopes, ia, ib) #erg, remove this and insert indexing
{
  return (secant0(alphas[ia], alphas[ib], slopes[ia], slopes[ib]))
}

# phi
#secant2 <- function(dphifn, alphas, values, slopes, ia, ib, phi_lim, delta, sigma, verbose)
secant2 <- function(dphifn, st, ia, ib, phi_lim, delta, sigma, verbose)
{
    phi_0 = st$values[1]
    dphi_0 = st$slopes[1]
    a = st$alphas[ia]
    b = st$alphas[ib]
    dphi_a = st$slopes[ia]
    dphi_b = st$slopes[ib]
    if(!(dphi_a < 0 && dphi_b >= 0))
        stop("Search direction is not a direction of descent; this error may indicate that user-provided derivatives are inaccurate.\n(dphi_a = ", dphi_a, "dphi_b = ", dphi_b)
    c = secant0(a, b, dphi_a, dphi_b)
    if (verbose)
      cat("secant2: a = ", a, ", b = ", b, ", c = ", c, "\n")
    stopifnot(is.finite(c))
    fit = dphifn(c)
    phi_c = fit$objective
    dphi_c = fit$gradient
    stopifnot(is.finite(phi_c) && is.finite(dphi_c))

    st$alphas = c(st$alphas, c)
    st$values = c(st$values, phi_c)
    st$slopes = c(st$slopes, dphi_c)

    ic = length(st$alphas)
    if (satisfies_wolfe(c, phi_c, dphi_c, phi_0, dphi_0, phi_lim, delta, sigma))
    {
      if (verbose)
        cat("secant2: first c satisfied Wolfe conditions\n")
      return(list(TRUE, ic, ic))
    }

    upd = hzupdate(dphifn, st, ia, ib, ic, phi_lim, verbose) 
    iA = upd[[1]]
    iB = upd[[2]]
    if (verbose)
      cat("secant2: iA = ", iA, ", iB = ", iB, ", ic = ", ic, "\n")
    a = st$alphas[iA]
    b = st$alphas[iB]
    doupdate = FALSE
    if (iB == ic)
    {
      # we updated b, make sure we also update a
      c = secant(st$alphas, st$values, st$slopes, ib, iB)
    }
    else if (iA == ic)
    {
      # we updated a, do it for b too
      c = secant(st$alphas, st$values, st$slopes, ia, iA)
    }
    if ((iA == ic || iB == ic) && a <= c && c <= b)
    {
        if (verbose)
            cat("secant2: second c = ", c, "\n")
        fit = dphifn(c)
        phi_c = fit$objective
        dphi_c = fit$gradient
        stopifnot(is.finite(phi_c) && is.finite(dphi_c))

        st$alphas = c(st$alphas, c)
        st$values = c(st$values, phi_c)
        st$slopes = c(st$slopes, dphi_c)

        ic = length(st$alphas)
        # Check arguments here
        if (satisfies_wolfe(c, phi_c, dphi_c, phi_0, dphi_0, phi_lim, delta, sigma))
        {
            if (verbose)
                cat("secant2: second c satisfied Wolfe conditions\n")
            return (list(TRUE, ic, ic))
        }
        upd = hzupdate(dphifn, st, iA, iB, ic, phi_lim, verbose) 
        iA = upd[[1]]
        iB = upd[[2]]
    }
    if (verbose)
        cat("secant2 output: a = ", st$alphas[iA], ", b = ", st$alphas[iB], "\n")
    return(list(FALSE, iA, iB))
}

# HZ, stages U0-U3
# Given a third point, pick the best two that retain the bracket
# around the minimum (as defined by HZ, eq. 29)
# b will be the upper bound, and a the lower bound
#hzupdate <- function(dphifn, alphas, values, slopes, ia, ib, ic, phi_lim, verbose)
hzupdate <- function(dphifn, st, ia, ib, ic, phi_lim, verbose)
{
    a = st$alphas[ia]
    b = st$alphas[ib]
    # Debugging (HZ, eq. 4.4):
    stopifnot(st$slopes[ia] < 0)
    stopifnot(st$values[ia] <= phi_lim)
    stopifnot(st$slopes[ib] >= 0)
    stopifnot(b > a)
    c = st$alphas[ic]
    phi_c = st$values[ic]
    dphi_c = st$slopes[ic]
    if (verbose)
        cat("update: ia = ", ia, ", a = ", a, ", ib = ", ib, ", b = ", b, ", c = ", c, ", phi_c = ", phi_c, ", dphi_c = ", dphi_c)
    if (c < a || c > b)
        return (list(ia, ib)) #, 0, 0  # it's out of the bracketing interval
    if (dphi_c >= 0)
        return (list(ia, ic)) #, 0, 0  # replace b with a closer point
    # We know dphi_c < 0. However, phi may not be monotonic between a
    # and c, so check that the value is also smaller than phi_0.  (It's
    # more dangerous to replace a than b, since we're leaving the
    # secure environment of alpha=0; that's why we didn't check this
    # above.)
    if (phi_c <= phi_lim)
        return (list(ic, ib))#, 0, 0  # replace a
    # phi_c is bigger than phi_0, which implies that the minimum
    # lies between a and c. Find it via bisection.
    #return (bisect(dphifn, alphas, values, slopes, ia, ic, phi_lim, verbose))
    return (bisect(dphifn, st, ia, ic, phi_lim, verbose))
}

# HZ, stage U3 (with theta=0.5)
#bisect <- function(dphifn, alphas, values, slopes, ia, ib, phi_lim, verbose)
bisect <- function(dphifn, st, ia, ib, phi_lim, verbose)
{
    gphi = NaN
    a = st$alphas[ia]
    b = st$alphas[ib]
    # Debugging (HZ, conditions shown following U3)
    stopifnot(st$slopes[ia] < 0)
    stopifnot(st$values[ia] <= phi_lim)
    stopifnot(st$slopes[ib] < 0)       # otherwise we wouldn't be here
    stopifnot(st$values[ib] > phi_lim)
    stopifnot(b > a)
    while (b - a > .Machine$double.eps)
    {
        if (verbose)
            cat("bisect: a = ", a, ", b = ", b, ", b - a = ", b - a, "\n")
        d = (a + b) / 2.
        fit = dphifn(d)
        phi_d = fit$objective
        gphi = fit$gradient
#        if (!(is.finite(phi_d) && is.finite(gphi))) #NSP: sometimes we have that function is well-behaved at endpoints but not intermediate
#        {
#          b = d
#          next
#        } #end NSP
        stopifnot(is.finite(phi_d) && is.finite(gphi))

        st$alphas = c(st$alphas, d)
        st$values = c(st$values, phi_d)
        st$slopes = c(st$slopes, gphi)

        id = length(st$alphas)
        if (gphi >= 0)
        {
            return (list(ia, id)) # replace b, return
        }
        if (phi_d <= phi_lim)
        {
            a = d # replace a, but keep bisecting until dphi_b > 0
            ia = id
        }
        else
        {
            b = d
            ib = id
        }
    }
    return (list(ia, ib))
}
nspope/radish documentation built on July 12, 2020, 11:50 a.m.