Nothing
# Solution to the (unconstrained) trust-region sub-problem
#
# Compute the current best step of the trust-region sub-problem.
#
# @param Q numeric matrix of dot products between eigenvectors and gradient.
# @param X numeric matrix of eigenvectors.
# @param L numeric vector of eigenvalues.
# @param lambda lambda parameter of the trust-region method.
#
# @return Current best solution to the trust-region sub-problem.
#
# @references
# Jorge Nocedal and Stephen J Wright. **Numerical optimization**. Springer,
# New York, NY, USA, second edition, 2006. ISBN 978-0-387-30303-1.
ntrm_calc_p <- function(Q, X, L, lambda) {
# Equation (4.38) at page 84 from Nocedal and Wright (2006)
-colSums((t(X) * Q) / (L + lambda))
}
# Safeguard parameter computation
#
# Update the current value of `lambda` to hopefully obtain a non-singular
# matrix.
#
# @param lambda lambda parameter of the trust-region method.
# @param G gradient vector.
# @param H hessian matrix.
# @param delta radius of the trust region.
#
# @return Updated scalar value `lambda`.
#
# @references
# Jorge J Moré and D C Sorensen. Computing a Trust Region Step.
# **SIAM Journal on Scientific and Statistical Computing**, 4(3):553-572, 1983.
# doi: 10.1137/0904038.
ntrm_safeguard <- function(lambda, G, H, delta) {
# Equations are on page 560 of Moré and Sorensen (1983)
lambda_S <- max(-diag(H))
# squared Euclidean norm
G_norm <- sum(G^2)
# operator norm: maximum absolute column sum
H_norm <- max(colSums(abs(H)))
lambda_L <- max(0, lambda_S, G_norm / delta - H_norm)
lambda_U <- G_norm / delta + H_norm
# page 558
lambda <- min(max(lambda, lambda_L), lambda_U)
if (lambda <= lambda_S) {
lambda <- max(lambda_U / 1000, sqrt(lambda_L * lambda_U))
}
lambda
}
# Iterative solution to the (unconstrained) trust-region sub-problem
#
# Compute the best step of the trust-region sub-problem.
#
# @param G gradient vector.
# @param H hessian matrix.
# @param delta radius of the trust region.
#
# @return List with the best step solution `p` to the trust-region sub-problem
# and the value `m` of the objective function `m(p)`.
#
# @references
# Jorge Nocedal and Stephen J Wright. **Numerical optimization**. Springer,
# New York, NY, USA, second edition, 2006. ISBN 978-0-387-30303-1.
ntrm_solve_tr_subproblem <- function(G, H, delta) {
eps <- sqrt(.Machine$double.eps)
delta_2 <- delta^2
k <- length(G)
H_eig <- eigen(H, symmetric = TRUE)
H_ev_min <- H_eig$values[k]
# dot products between eigenvectors and gradient
Q <- colSums(H_eig$vectors * G)
p <- rep(0, k)
# Unconstrained solution
if (H_ev_min > eps) {
p <- ntrm_calc_p(Q, H_eig$vectors, H_eig$values, 0)
}
if ((H_ev_min < eps) || (sum(p^2) > delta_2)) {
# either hard case or we went outside the trust region
hard_case <- FALSE
if (abs(Q[k]) < eps) {
# this might be a hard case because the gradient is orthogonal to all
# eigenvectors associated with the lowest eigenvalue.
#
# lambda is taken to be -H_ev_min and we only need to find a multiple of
# an orthogonal eigenvector that lands on the boundary
# Equation (4.45) at page 88 from Nocedal and Wright (2006)
j <- 1:max(which(H_eig$values > H_ev_min))
p <- ntrm_calc_p(
Q[j], H_eig$vectors[, j], H_eig$values[j], -H_ev_min
)
p_norm_2 <- sum(p^2)
if (p_norm_2 <= delta_2) {
hard_case <- TRUE
tau <- sqrt(delta_2 - p_norm_2)
p <- tau * H_eig$vectors[, k] - p
}
}
if (!hard_case) {
# not hard case, we can apply the root finding technique
# Algorithm 4.3 at page 87 in Nocedal and Wright (2006)
B <- H
# lambda cannot be lower than this lower bound
lambda_lb <- -H_ev_min + 1.0e-12
lambda <- ntrm_safeguard(lambda_lb, G, H, delta)
for (i in seq_len(10)) {
diag(B) <- diag(H) + lambda
R <- tryCatch(chol(B), error = function(e) NULL)
if (is.null(R)) {
lambda <- 10 * lambda
next
}
p <- -solve(R, solve(t(R), G))
q <- solve(t(R), p)
p_norm_2 <- sum(p^2)
q_norm_2 <- sum(q^2)
lambda_old <- lambda
lambda <- lambda +
(p_norm_2 / q_norm_2) *
(sqrt(p_norm_2) - delta) /
delta
if (lambda < lambda_lb) {
lambda <- (lambda_lb + lambda_old) / 2
}
if (abs(lambda - lambda_old) < eps) {
break
}
}
}
}
# Equation (4.2) at page 68 in Nocedal and Wright (2006)
# function f is dropped because it does not depend on p and also because it
# disappears when computing m(0) - m(p)
m <- sum(G * p) + 0.5 * sum(p * (H %*% p))
list(
p = p,
m = m
)
}
# Newton with Trust Region Method
#
# Find the minimum of a function using the Newton's method, combined with the
# Trust Region method.
#
# @details
# The code is adapted from the Julia implementation of
# [Optim.jl](https://github.com/JuliaNLSolvers/Optim.jl). See
# 'src/multivariate/solvers/second_order/newton_trust_region.jl'
# file for the original source code.
#
# @param fn function handle to evaluate the function to minimize.
# @param gh function handle to evaluate both the gradient and Hessian of the
# function to minimize.
# @param init numeric vector of starting values.
# @param max_iter integer value for the maximum number of iterations.
# @param update_fn function handle to update a subset of parameters with the
# actual optimum.
#
# @return A list containing the value `optimum` for which `f(optimum)` is
# minimum, the value `minimum` containing `f(optimum)`, and a boolean
# value `converged` stating if the algorithm converged or not.
#
# @references
# Jorge J Moré and D C Sorensen. Computing a Trust Region Step.
# **SIAM Journal on Scientific and Statistical Computing**, 4(3):553-572, 1983.
# doi: 10.1137/0904038.
#
# Jorge Nocedal and Stephen J Wright. **Numerical optimization**. Springer,
# New York, NY, USA, second edition, 2006. ISBN 978-0-387-30303-1.
#
# Patrick Kofod Mogensen and Asbjørn Nilsen Riseth.
# Optim: A mathematical optimization package for Julia.
# **Journal of Open Source Software**, 3(24):615, 2018.
# doi: 10.21105/joss.00615.
ntrm <- function(fn, gh, init, max_iter, update_fn = NULL) {
eps <- sqrt(.Machine$double.eps)
converged <- FALSE
delta <- 1
cur_optimum <- init
cur_minimum <- fn(cur_optimum)
if (max_iter == 0) {
return(
list(
optimum = cur_optimum,
minimum = cur_minimum,
converged = ntrm_max_abs(gh(cur_optimum)$G) <= eps,
iterations = 0
)
)
}
f_converged <- 0
x_converged <- FALSE
g_converged <- FALSE
for (i in seq_len(max_iter)) {
if ((i %% 10 == 0) && !is.null(update_fn)) {
cur_optimum <- update_fn(cur_optimum)
cur_minimum <- fn(cur_optimum)
}
gradient_hessian <- gh(cur_optimum)
g_min <- ntrm_max_abs(gradient_hessian$G)
g_converged <- g_min <= eps
if (!g_converged && any(is.infinite(gradient_hessian$H))) {
# when the model is close to non-identifiability the gradient might be
# close to zero, but not enough for our previous check to succeed
break
}
# we use a counter for f_converged because the objective function might
# be very flat around the optimum. Once we hit a flat region, we explore the
# function 10 more times until we give up.
if (x_converged || g_converged || (f_converged > 10)) {
# this solution is good enough
converged <- TRUE
break
}
# perform a correction in case the Hessian is not positive definite
B <- ntrm_correct_hessian(gradient_hessian$H)
result <- ntrm_solve_tr_subproblem(gradient_hessian$G, B, delta)
candidate <- cur_optimum + result$p
cur_value <- fn(candidate)
cur_diff <- cur_minimum - cur_value
rho <- if (result$m > 0) {
# This can happen if the trust region radius is too large and the
# Hessian is not positive definite. We should shrink the trust region.
-1
} else if (abs(result$m) <= 1.0e-12) {
# This should only happen when the step is very small, in which case
# we should accept the step and assess convergence
1
} else if (is.nan(cur_value)) {
# We went somewhere wrong
-1
} else {
# m(0) = f
# m(p) = f + G' p + 0.5 p' H p
# m(0) - m(p) = -(G'p + 0.5 p' H p) = -result$m
-cur_diff / result$m
}
if (rho < 0.25) {
delta <- delta / 4
} else if ((rho > 0.75) && (ntrm_norm(result$p) >= delta)) {
delta <- min(2 * delta, 100)
}
if (rho > 0.1) {
if ((abs(cur_value - cur_minimum) / abs(cur_value)) <= eps) {
f_converged <- f_converged + 1
}
x_converged <- {
(max(abs(candidate - cur_optimum)) / max(abs(candidate))) <= eps
}
cur_optimum <- candidate
cur_minimum <- cur_value
}
}
# did it converge at the very last iteration?
if (i == max_iter) {
g_converged <- ntrm_max_abs(gh(cur_optimum)$G) <= eps
if (x_converged || g_converged || (f_converged > 10)) {
converged <- TRUE
}
}
list(
optimum = cur_optimum,
minimum = cur_minimum,
converged = converged,
iterations = i
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.