Nothing
#' Numeric Differentiation
#' @name genaDiff
#' @description Numeric estimation of the gradient and Hessian.
#' @param fn function for which gradient or Hessian should be calculated.
#' @param gr gradient function of \code{fn}.
#' @param par point (parameters' value) at which \code{fn} should be
#' differentiated.
#' @param eps numeric vector representing increment of the \code{par}.
#' So \code{eps[i]} represents increment of \code{par[i]}. If \code{eps} is
#' a constant then all increments are the same.
#' @param method numeric differentiation method: "central-difference" or
#' "forward-difference".
#' @param fn.args list containing arguments of \code{fn} except \code{par}.
#' @param gr.args list containing arguments of \code{gr} except \code{par}.
#' @details It is possible to substantially improve numeric Hessian accuracy
#' by using analytical gradient \code{gr}. If both \code{fn} and \code{gr}
#' are provided then only \code{gr} will be used. If only \code{fn} is provided
#' for \code{gena.hessian} then \code{eps} will be transformed to
#' \code{sqrt(eps)} for numeric stability purposes.
#'
#' @return Function \code{gena.grad} returns a vector that is a gradient of
#' \code{fn} at point \code{par} calculated via \code{method} numeric
#' differentiation approach using increment \code{eps}.
#'
#' Function \code{gena.hessian} returns a matrix that is a Hessian of
#' \code{fn} at point \code{par}.
#' @examples
#' # Consider the following function
#' fn <- function(par, a = 1, b = 2)
#' {
#' val <- par[1] * par[2] - a * par[1] ^ 2 - b * par[2] ^ 2
#' }
#'
#' # Calculate the gradient at point (2, 5) respect to 'par'
#' # when 'a = 1' and 'b = 1'
#' par <- c(2, 5)
#' fn.args = list(a = 1, b = 1)
#' gena.grad(fn = fn, par = par, fn.args = fn.args)
#'
#' # Calculate Hessian at the same point
#' gena.hessian(fn = fn, par = par, fn.args = fn.args)
#'
#' # Repeat calculation of the Hessian using analytical gradient
#' gr <- function(par, a = 1, b = 2)
#' {
#' val <- c(par[2] - 2 * a * par[1],
#' par[1] - 2 * b * par[2])
#' }
#' gena.hessian(gr = gr, par = par, gr.args = fn.args)
#'
#' @rdname genaDiff
#' @export
gena.grad <- function(fn, par,
eps = sqrt(.Machine$double.eps) * abs(par),
method = "central-difference",
fn.args = NULL)
{
n_par <- length(par)
if(length(eps) == 1)
{
eps <- rep(eps, n_par)
}
par_plus <- par
par_minus <- par
val <- rep(0, n_par)
for(i in 1:n_par)
{
if (method == "central-difference")
{
par_plus[i] <- par[i] + eps[i]
}
par_minus[i] <- par[i] - eps[i]
fn.args$par <- par_plus
fn_plus <- do.call(fn, fn.args)
fn.args$par <- par_minus
fn_minus <- do.call(fn, fn.args)
par_plus <- par
par_minus <- par
if (method == "central-difference")
{
val[i] <- (fn_plus - fn_minus) / (2 * eps[i])
}
if (method == "forward-difference")
{
val[i] <- (fn_plus - fn_minus) / eps[i]
}
}
return(val)
}
#' @rdname genaDiff
#' @export
gena.hessian <- function(fn = NULL, gr = NULL,
par, eps = sqrt(.Machine$double.eps) * abs(par),
fn.args = NULL, gr.args = NULL)
{
n_par <- length(par)
if (is.null(gr))
{
eps <- sqrt(eps)
}
if(length(eps) == 1)
{
eps <- rep(eps, n_par)
}
par_plus <- par
par_minus <- par
val <- matrix(0, n_par, n_par)
# With analytical gradient
if (!is.null(gr))
{
for(i in 1:n_par)
{
par_plus[i] <- par[i] + eps[i]
par_minus[i] <- par[i] - eps[i]
gr.args$par <- par_plus
gr_plus <- do.call(gr, gr.args)
gr.args$par <- par_minus
gr_minus <- do.call(gr, gr.args)
par_plus <- par
par_minus <- par
val[i, ] <- (gr_plus - gr_minus) / (2 * eps[i])
}
for (i in 1:n_par)
{
for (j in 1:i)
{
if (i != j)
{
val[i, j] <- (val[i, j] + val[j, i]) / 2
val[j, i] <- val[i, j]
}
}
}
}
# Without analytical gradient
else
{
fn.args$par <- par
fn_0 <- do.call(fn, fn.args)
fn_plus <- rep(NA, n_par)
for (i in 1:n_par)
{
par_plus[i] <- par[i] + eps[i]
fn.args$par <- par_plus
fn_plus[i] <- do.call(fn, fn.args)
par_plus[i] <- par[i]
}
par_ij <- par
for (i in 1:n_par)
{
par_ij[i] <- par[i] + eps[i]
for (j in 1:i)
{
par_ij[j] <- par_ij[j] + eps[j]
fn.args$par <- par_ij
fn_ij <- do.call(fn, fn.args)
val[i, j] <- (fn_ij - fn_plus[i] - fn_plus[j] + fn_0) /
(eps[i] * eps[j])
val[j, i] <- val[i, j]
par_ij[j] <- par[j]
}
par_ij[i] <- par[i]
}
}
return(val)
}
gena.lineSearch <- function(fn, gr = NULL,
fn.val, gr.val,
par,
dir, lr = 1,
c1 = 1e-4, c2 = 0.9,
mult = 0.7,
maxiter = 100,
fn.args = list(), gr.args = list())
{
# Variable to control whether learning rate is increasing
is_decrease = TRUE
# Learning rate values
lr.vec <- rep(NA, maxiter)
lr.vec[1] <- lr
# Parameters values
par.mat <- matrix(NA, ncol = length(par), nrow = maxiter)
# Function and gradient calls
fn.calls <- 0
gr.calls <- 0
# Non-monotone line search
# M <- min(length(fn.val), 5)
# fn.val <- mean(tail(fn.val, M))
# Wolfe's conditions
armajilo <- FALSE
curvature <- FALSE
for (i in 1:maxiter)
{
# Calculate new point and function
# value at this point
par.mat[i, ] <- par + lr.vec[i] * dir
fn.args$par <- par.mat[i, ]
fn.val.new <- do.call(fn, fn.args)
fn.calls <- fn.calls + 1
# Check Armijo condition
gr.dir <- sum(gr.val * dir)
armajilo <- fn.val.new <= (fn.val + c1 * lr.vec[i] * gr.dir)
if (armajilo)
{
# Check strong Curvature condition
gr.args$par <- par.mat[i, ]
gr.val.new <- NULL
if(!is.null(gr))
{
gr.val.new <- matrix(do.call(gr, gr.args), ncol = 1)
}
else
{
gr.val.new <- gena.grad(fn = fn, par = par.mat[i, ],
fn.args = fn.args)
}
gr.calls <- gr.calls + 1
gr.div.new <- sum(gr.val.new * dir)
curvature <- gr.div.new <= (c2 * gr.dir)
if (curvature)
{
if (is_decrease)
{
if (i == 1)
{
is_decrease <- FALSE
}
else
{
return.list <- list(lr = lr.vec[i],
par = par.mat[i, ],
fail = FALSE,
fn.calls = fn.calls,
gr.calls = gr.calls)
return(return.list)
}
}
}
}
if(!(armajilo & curvature) & !is_decrease)
{
return.list <- list(lr = lr.vec[i - 1],
par = par.mat[i - 1, ],
fail = FALSE,
fn.calls = fn.calls,
gr.calls = gr.calls)
return(return.list)
}
if (is_decrease)
{
lr.vec[i + 1] <- lr.vec[i] * mult
}
else
{
lr.vec[i + 1] <- lr.vec[i] / mult
}
}
# If line search is not successful
return.list <- list(lr = 0,
par = par,
fail = TRUE,
fn.calls = fn.calls,
gr.calls = gr.calls)
return(return.list)
}
gena.local <- function(par, # initial point
fn, # function to optimize
gr = NULL, # gradient of the function
maxiter = 1e+6, # the number of iterations
reltol = 1e-10, # relative tolerance
abstol = 1e-10, # absolute tolerance
lr = 1024, # learning rate
type = "BFGS", # optimization type
...) # additional arguments for fn and gr
{
dots <- list(...)
iter <- NULL
dir <- NULL
fn.calls <- 0
gr.calls <- 0
reltol.cond <- FALSE
maxiter.cond <- FALSE
abstol.cond <- FALSE
# Get the number of estimated parameters
n_x <- length(par)
# Initialize the matrix to store the
# points for each iteration
x <- matrix(NA,
nrow = maxiter,
ncol = length(par))
x[1, ] <- par
# Initialize variable to store function
# value and gradient information
fn.val <- rep(NA, n_x)
gr.val <- matrix(NA, nrow = maxiter, ncol = n_x)
# Initialize variable to store Hessians and their inverse
hessian <- array(dim = c(maxiter + 1, n_x, n_x))
hessian.inv <- hessian
hessian[1, , ] <- diag(rep(1, n_x)) #solve(gena.hessian(fn = fn, gr = gr, par = par))
hessian.inv[1, , ] <- hessian[1, , ]
# Estimate gradient and function value
# at the initial point
dots$par <- par
fn.val[1] <- do.call(fn, dots)
if(is.null(gr))
{
gr.val[1, ] <- gena.grad(fn, x[1, ], fn.args = dots)
} else {
dots$par <- x[1, ]
gr.val[1, ] <- do.call(gr, dots)
}
gr.calls <- gr.calls + 1
# Start the algorithm
for (i in 1:maxiter)
{
# Determine the direction
dir <- -hessian.inv[i, , ] %*% matrix(gr.val[i, ], ncol = 1)
# Perform the line search
ls.result <- gena.lineSearch(fn = fn, gr = gr,
fn.val = fn.val[i],
gr.val = gr.val[i, ],
par = x[i, ],
dir = dir, lr = lr,
fn.args = dots,
gr.args = dots)
# if (ls.result$fail)
# {
# print(123)
# ls.result <- gena.lineSearch(fn = fn, gr = gr,
# fn.val = fn.val[i],
# gr.val = gr.val[i, ],
# par = x[i, ],
# dir = -dir, lr = lr,
# fn.args = dots,
# gr.args = dots)
# hessian.inv[i, , ] <- diag(rep(1, n_x))
# }
x[i + 1, ] <- ls.result$par
lr <- ls.result$lr
fn.calls <- fn.calls + ls.result$fn.calls
gr.calls <- gr.calls + ls.result$gr.calls
# Estimate the function value at new point
dots$par <- x[i + 1, ]
fn.val[i + 1] <- do.call(fn, dots)
fn.calls <- fn.calls + 1
print(c(fn.val[i], fn.val[i + 1] , lr))
# Estimate the gradient and new point
if(is.null(gr))
{
gr.val[i + 1, ] <- gena.grad(fn, x[i + 1, ], fn.args = dots)
} else {
dots$par <- x[i + 1, ]
gr.val[i + 1, ] <- do.call(gr, dots)
}
# Calculate inverse of the Hessian
y <- matrix(gr.val[i + 1, ] - gr.val[i, ], ncol = 1)
s <- matrix(lr * dir, ncol = 1)
hessian.inv[i + 1, , ] <- hessian.inv[i, , ] +
(s %*% t(s)) *
(as.vector(t(s) %*% y +
t(y) %*% hessian.inv[i, , ] %*% y) /
as.vector((t(s) %*% y) ^ 2)) -
(hessian.inv[i, , ] %*% y %*% t(s) +
s %*% t(y) %*% hessian.inv[i, , ]) /
as.vector(t(s) %*% y)
# I <- diag(rep(1, n_x))
# hessian.inv[i + 1, ,] <- (I - s %*% t(y) / as.vector(t(y) %*% s)) %*%
# hessian.inv[i, , ] %*%
# (I - y %*% t(s) / as.vector(t(y) %*% s)) +
# (s %*% t(s)) / as.vector(t(y) %*% s)
# Print the information
cat(paste0("iter-", i, ": ", fn.val[i], "\n"))
# Check whether termination conditions
# are satisfied
abstol.cond <- abs(fn.val[i] - fn.val[i + 1]) < abstol
if (fn.val[i] != 0)
{
reltol.cond <- all(abs((fn.val[i] - fn.val[i + 1]) /
fn.val[i]) < reltol)
}
else
{
reltol.cond <- FALSE
}
maxiter.cond <- i == maxiter
if (abstol.cond | reltol.cond | maxiter.cond)
{
iter <- i
break
}
}
# Get the final point
return_list <- list(points = x[1:iter],
solution = x[iter, ],
value = fn.val[iter],
fn_grad = gr.val[iter, ],
hessian = solve(hessian.inv[iter, , ]),
hessian.inv = hessian.inv[iter, , ],
fn.calls = fn.calls,
gr.calls = gr.calls,
dir = dir)
return(return_list)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.