#' Numerical computation of the gradient, with parallel capabilities
#'
#' This function calculates the gradient of a function, numerically, including the possibility
#' of doing it in parallel.
#'
#' @param fn The function to calculate the gradient.
#' @param x The value to compute the gradient at.
#' @param method The method used. Currently implemented: central, backward, forward and Richardson. See details.
#' @param control A list of control arguments.
#' @param parallel Boolean, should numerical derivatives be calculated in parallel?
#' @param ... Additional arguments to be passed to \code{fn}.
#'
#' @return The gradient of \code{fn} at \code{x}.
#' @export
#'
#' @examples gradient(fn=function(x) sum(x^3), x=0)
gradient = function(fn, x, method, control, parallel, ...) {
UseMethod("gradient")
}
#' @export
gradient.default = function(fn, x, method=NULL, control=list(), parallel=FALSE, ...) {
# cluster structure must be defined outside
# by default, not running in parallel
use_disk = ifelse(!is.null(attr(fn, "..i")), TRUE, FALSE)
if(is.null(method)) method = "richardson"
df = switch(method,
central = .grad_central(fn, x, control=control, parallel=parallel, use_disk=use_disk, ...),
forward = .grad_simple(fn, x, side=+1, control=control, parallel=parallel, use_disk=use_disk, ...),
backward = .grad_simple(fn, x, side=-1, control=control, parallel=parallel, use_disk=use_disk, ...),
richardson = .grad_richardson(fn, x, control=control, parallel=parallel, use_disk=use_disk, ...),
stop("Undefined method for gradient computation."))
return(df)
}
# Implementation of methods for gradient computation ----------------------
.grad_simple = function(fn, x, side=1, control=list(), parallel=FALSE, use_disk=FALSE, ...) {
# simple method uses n+1 function evaluation, n+1 can be parallelized,
# so computer time is 1 function evaluation.
n = length(x)
if(is.null(control$step_method)) control$step_method = "simple"
if(length(side)==1) side = rep(side, n)
if(any(is.na(side))) side[is.na(side)] = 1
if(length(side)!=n) stop("Argument 'side' should have the same length as x.")
if(any(abs(side)!=1)) stop("Argument 'side' should have values +1 or -1.")
h = .get_h(x, control, method=control$step_method)*side
if(!isTRUE(parallel)) {
fx = if(!use_disk) fn(x, ...) else fn(x)
df = rep(NA_real_, n)
for(i in seq_len(n)) {
dx = h*(i == seq_len(n))
df[i] = if(!use_disk) (fn(x + dx, ...) - fx)/h[i] else (fn(x + dx) - fx)/h[i]
}
} else {
DX = diag(n)
diag(DX) = h
DX = rbind(0, DX)
f = foreach(i=seq(from=1, to=n+1), .combine=c, .verbose=FALSE, .inorder=TRUE) %dopar% {
dx = DX[i, ]
fi = if(!use_disk) fn(x + dx, ...) else fn(x + dx, ..i=i)
fi
}
fx = f[1] # fx = f(i=0)
df = (f[-1] - fx)/h
}
return(df)
}
.grad_central = function(fn, x, control=list(), parallel=FALSE, use_disk=FALSE, ...) {
# central method uses 2*n function evaluation, 2*n can be parallelized,
# so computing time is 1 function evaluation.
n = length(x)
if(is.null(control$step_method)) control$step_method = "simple"
h = .get_h(x, control, method=control$step_method)
if(!isTRUE(parallel)) {
df = rep(NA_real_, n)
for(i in seq_len(n)) {
dx = h*(i == seq_len(n))
df[i] = if(!use_disk) (fn(x + dx, ...) - fn(x - dx, ...))/(2*h[i]) else (fn(x + dx) - fn(x - dx))/(2*h[i])
}
} else {
DX = diag(n)
diag(DX) = h
DX = rbind(DX, -DX)
f = foreach(i=seq(from=1, to=2*n), .combine=c, .verbose=FALSE, .inorder=TRUE) %dopar% {
dx = DX[i, ]
fi = if(!use_disk) fn(x + dx, ...) else fn(x + dx, ..i=i)
fi
}
fp = head(f, n)
fm = tail(f, n)
df = (fp - fm)/(2*h)
}
return(df)
}
.grad_richardson = function(fn, x, control=list(), parallel=FALSE, use_disk=FALSE, ...) {
# richardson method uses 2*r*n function evaluations, 2*r*n can be parallelized,
# so computing time is 1 function evaluation.
r = if(is.null(control$r)) 4 else control$r # number of iterations, by default 4.
v = if(is.null(control$v)) 2 else control$v # reduction factor of h, by default 2.
n = length(x)
if(is.null(control$step_method)) control$step_method = "start"
h = .get_h(x, control, method=control$step_method)
if(!isTRUE(parallel)) {
DF = matrix(NA_real_, nrow=r, ncol=n)
for(k in seq_len(r)) {
# gradient as usual
for(i in seq_len(n)) {
if((k > 1) && (abs(DF[(k - 1), i]) < 1e-20)) {
# set derivative to zero when below 1e-20, why hardcoded?
DF[k, i] = 0
} else {
dx = h*(i == seq_len(n))
DF[k, i] = if(!use_disk) (fn(x + dx, ...) - fn(x - dx, ...))/(2*h[i]) else (fn(x + dx, ..i=i) - fn(x - dx, ..i=i))/(2*h[i])
if(any(is.na(DF[k, i])))
stop(sprintf("function returns NA at %g distance from x.", h))
}
}
# reduce h and try again
h = h/v
}
} else {
H = matrix(NA_real_, nrow=r, ncol=n)
for(k in seq_len(r)) H[k, ] = h/(v^(k-1))
DX = NULL
for(k in seq_len(r)) {
HX = diag(n)
diag(HX) = H[k, ]
DX = rbind(DX, HX, -HX)
}
f = foreach(i=seq(from=1, to=2*r*n), .combine=c, .verbose=FALSE, .inorder=TRUE) %dopar% {
dx = DX[i, ]
fi = if(!use_disk) fn(x + dx, ...) else fn(x + dx, ..i=i)
fi
}
f = matrix(f, nrow=r, ncol=2*n, byrow = TRUE)
fp = f[, 1:n, drop=FALSE]
fm = f[, -(1:n), drop=FALSE]
DF = (fp - fm)/(2*H)
} # end of parallel conditional.
# ponderate all estimates (same for all cases)
for(m in seq_len(r - 1)) {
ind0 = 2:(r + 1 - m)
ind1 = 1:(r - m)
DF = (DF[ind0, , drop = FALSE]*(4^m) - DF[ind1, , drop = FALSE])/(4^m - 1)
}
return(as.numeric(DF))
}
# Auxiliar functions ------------------------------------------------------
.get_h = function(x, control, method) {
if(method=="simple") {
args = list(eps = 1e-08, zero.tol = sqrt(.Machine$double.eps/7e-07))
args[names(control)] = control
eps = args$eps
zero.tol = args$zero.tol # threshold for an initial value to be zero (for initial h).
isZero = 0 + (abs(x) < zero.tol)
h = eps*abs(x)*(1-isZero) + eps*isZero
return(h)
}
if(method=="optextras") {
args = list(eps = 1e-08)
args[names(control)] = control
eps = args$eps
h = eps*(abs(x) + eps)
return(h)
}
if(method=="start") {
args = list(eps = 1e-04, d = 1e-04, r = 4, v = 2, zero.tol = sqrt(.Machine$double.eps/7e-07))
args[names(control)] = control
d = args$d # fraction of x for initial h.
r = args$r # number of iterations, by default 4.
v = args$v # reduction factor of h, by default 2.
eps = args$eps # initial epsilon when x near zero.
zero.tol = args$zero.tol # threshold for an initial value to be zero (for initial h).
isZero = 0 + (abs(x) < zero.tol)
h = d*abs(x)*(1-isZero) + eps*isZero
return(h)
}
stop("Undefined method for 'h' calculation.")
}
# Notes for future self ---------------------------------------------------
# args: skeleton, active
# # here we have to transform fn so it takes ..i and change folder to run if necessary.
# skeleton = skeleton
# if(is.null(skeleton)) skeleton = as.relistable(par)
#
# npar = length(par)
#
# # check active parameters
# active = .checkActive(active=active, npar=npar)
# isActive = which(active)
# activeFlag = isTRUE(all(active))
#
# # update to active parameters only
# guess = par
# par = guess[isActive]
#
# npar = length(par)
#
# force(replicates)
#
# # closure for function evaluation
# fn = match.fun(fn)
#
# # here we modify f so:
# # 1. use 'isActive' to mask some parameters ('guess' is reference)
# # 2. is re-listed according to skeleton
# # 3. is re-scaled according to control$fnscale
# # 4. is evaluated 'replicates' times
# # 5. is evaluated in the 'control$run/..i' folder.
# if(is.null(control$master)) {
#
# fn1 = function(x, ..i=0) {
#
# parx = guess
# parx[isActive] = par
# parx = relist(flesh = parx, skeleton = skeleton)
# output = NULL
# for(i in seq_len(replicates)) {
# out = fn(parx, ...)/control$fnscale
# output = rbind(output, t(as.matrix(out)))
# }
# return(as.numeric(colMeans(output)))
#
# }
#
# } else {
#
# fn1 = function(par, ..i=0) {
#
# cwd = getwd()
# on.exit(setwd(cwd))
# .setWorkDir(control$run, i=..i)
#
# parx = guess
# parx[isActive] = par
# parx = relist(flesh = parx, skeleton = skeleton)
# output = NULL
# for(i in seq_len(replicates)) {
# out = fn(parx, ...)/control$fnscale
# output = rbind(output, t(as.matrix(out)))
# }
# return(as.numeric(colMeans(output)))
#
# }
#
# } # end fn1
#
# # here master folder is used for initialization. Maybe it's already initialized (from higher level call),
# # but it may not.
# # we return the gradient in the real dimension, setting to zero non-active components
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.