# R/optim-wrapper.R In sdetorus: Statistical Tools for Toroidal Diffusions

#### Documented in mleOptimWrapper

#' @title Optimization wrapper for likelihood-based procedures
#'
#' @description A convenient wrapper to perform local optimization of the likelihood function via \code{nlm} and \code{optim} including several practical utilities.
#'
#' @param minusLogLik function computing the minus log-likelihood function. Must have a single argument containing a vector of length \code{p}.
#' @param region function to impose a feasibility region via a penalty. See details.
#' @param penalty imposed penalty if value is not finite.
#' @param optMethod one of the following strings: "nlm", "Nelder-Mead", "BFGS", "CG", "L-BFGS-B", "SANN", "Brent".
#' @param start starting values, a matrix with \code{p} columns, with each entry representing a different starting value.
#' @param lower,upper bound for box constraints as in method "L-BFGS-B" of \code{\link[stats]{optim}}.
#' @param selectSolution which criterion is used for selecting a solution among possible ones, either \code{"lowest"}, \code{"lowestConv"} or \code{"lowestLocMin"}. \code{"lowest"} returns the solution with lowest value in the \code{minusLogLik} function. \code{"lowestConv"} restricts the search of the best solution among the ones for which the optimizer has converged. \code{"lowestLocMin"} in addition imposes that the solution is guaranteed to be a local minimum by examining the Hessian matrix.
#' @param checkCircular logical indicating whether to automatically treat the variables with \code{lower} and \code{upper} entries equal to \code{-pi} and \code{pi} as circular. See details.
#' @param maxit maximum number of iterations.
#' @param tol tolerance for convergence (passed to \code{reltol}, \code{pgtol} or \code{gradtol}).
#' @param eigTol,condTol eigenvalue and condition number tolerance for the Hessian in order to guarantee a local minimum. Used only if \code{selectSolution = "lowestLocMin"}.
#' @param verbose an integer from \code{0} to \code{2} if \code{optMethod = "Nelder-Mead"} or from \code{0} to \code{4} otherwise giving the amount of information displayed.
#' @param ... further arguments passed to the \code{optMethod} selected. See options in \code{\link[stats]{nlm}} or \code{\link[stats]{optim}}.
#' @return A list containing the following elements:
#' \itemize{
#'   \item \code{par}: estimated minimizing parameters
#'   \item \code{value}: value of \code{minusLogLik} at the minimum.
#'   \item \code{convergence}: if the optimizer has converged or not.
#'   \item \code{message}: a character string giving any additional information returned by the optimizer.
#'   \item \code{eigHessian}: eigenvalues of the Hessian at the minimum. Recall that for the same solution slightly different outputs may be obtained according to the different computations of the Hessian of \code{nlm} and \code{optim}.
#'   \item \code{localMinimumGuaranteed}: tests if the Hessian is positive definite (all eigenvalues larger than the tolerance \code{eigTol} and condition number smaller than \code{condTol}).
#'   \item \code{solutionsOutput}: a list containing the complete output of the selected method for the different starting values. It includes the extra objects \code{convergence} and \code{localMinimumGuaranteed}.
#' }
#' @details If \code{checkCircular = TRUE}, then the corresponding \code{lower} and \code{upper} entries of the circular parameters are set to \code{-Inf} and \code{Inf}, respectively, and \code{minusLogLik} is called with the \emph{principal value} of the circular argument.
#'
#' If no solution is found satisfying the criterion in \code{selectSolution}, NAs are returned in the elements of the main solution.
#'
#' The Hessian is only computed if \code{selectSolution = "lowestLocMin"}.
#'
#' Region feasibility can be imposed by a function with the same arguments as \code{minusLogLik} that resets \code{pars} in to the boundary of the feasibility region and adds a penalty proportional to the violation of the feasibility region. Note that this is \emph{not the best procedure at all} to solve the constrained optimization problem, but just a relatively flexible and quick approach (for a more advanced treatment of restrictions, see \href{https://CRAN.R-project.org/view=Optimization}{optimization-focused packages}). The value must be a list with objects \code{pars} and \code{penalty}. By default no region is imposed, i.e., \code{region = function(pars) list("pars" = pars, "penalty" = 0)}. Note that the Hessian is computed from the unconstrained problem, hence \code{localMinimumGuaranteed} might be \code{FALSE} even if a local minimum to the constrained problem was found.
#' @examples
#' # No local minimum
#' head(mleOptimWrapper(minusLogLik = function(x) -sum((x - 1:4)^2),
#'                      start = rbind(10:13, 1:2), selectSolution = "lowest"))
#' head(mleOptimWrapper(minusLogLik = function(x) -sum((x - 1:4)^2),
#'                      start = rbind(10:13, 1:2), selectSolution = "lowestConv"))
#' head(mleOptimWrapper(minusLogLik = function(x) -sum((x - 1:4)^2),
#'                      start = rbind(10:13, 1:2), selectSolution = "lowestLocMin"))
#'
#' # Local minimum
#' head(mleOptimWrapper(minusLogLik = function(x) sum((x - 1:4)^2), start = rbind(10:13),
#'                      optMethod = "BFGS"))
#' head(mleOptimWrapper(minusLogLik = function(x) sum((x - 1:4)^2), start = rbind(10:13),
#'
#' # Function with several local minimum and a 'spurious' one
#' f <- function(x)  0.75 * (x - 1)^2 -
#'                   10 / (0.1 + 0.1 * ((x - 15)^2 + (x - 2)^2)) -
#'                   9.5 / (0.1 + 0.1 * ((x - 15)^2 + (x + 2)^2))
#' plotSurface2D(x = seq(0, 20, l = 100), y = seq(-3, 3, l = 100), f = f)
#' head(mleOptimWrapper(minusLogLik = f, start = rbind(c(15, 2), c(15, -2), c(5, 0)),
#'                      selectSolution = "lowest"))
#' head(mleOptimWrapper(minusLogLik = f, start = rbind(c(15, 2), c(15, -2), c(5, 0)),
#'                      selectSolution = "lowestConv"))
#' head(mleOptimWrapper(minusLogLik = f, start = rbind(c(15, 2), c(15, -2), c(5, 0)),
#'                      selectSolution = "lowestLocMin", eigTol = 0.01))
#'
#' # With constraint region
#' head(mleOptimWrapper(minusLogLik = function(x) sum((x - 1:2)^2), start = rbind(10:11),
#'                      region = function(pars) {
#'                        x <- pars
#'                        y <- pars
#'                        if (y <= x^2) {
#'                          return(list("pars" = pars, "penalty" = 0))
#'                        } else {
#'                         return(list("pars" = c(sqrt(y), y), "penalty" = y - x^2))
#'                        }
#'                      }, lower = c(0.5, 1), upper = c(Inf, Inf),
#'                 optMethod = "Nelder-Mead", selectSolution = "lowest"))
#' head(mleOptimWrapper(minusLogLik = function(x) sum((x - 1:2)^2), start = rbind(10:11),
#'                      lower = c(0.5, 1), upper = c(Inf, Inf), optMethod = "Nelder-Mead"))
#' @export
mleOptimWrapper <- function(minusLogLik, region = function(pars)
list("pars" = pars, "penalty" = 0), penalty = 1e10, optMethod = "Nelder-Mead",
start, lower = rep(-Inf, ncol(start)), upper = rep(Inf, ncol(start)),
selectSolution = "lowestLocMin", checkCircular = TRUE, maxit = 500,
tol = 1e-5, verbose = 0, eigTol = 1e-4, condTol = 1e4, ...) {

# Identify which parameters are circular to allow them vary in R but taking
# its principal values. This avoids artificial local minimums created as a
# combination of a bad starting value (for example -pi + 0.1) and a minimum
# placed at the other side of the interval (for example at pi - 0.1)
if (checkCircular) {

circularPars <- which(lower == -pi & upper == pi)
lower[circularPars] <- -Inf
upper[circularPars] <- Inf
start[circularPars] <- toPiInt(start[circularPars])

}

# Careful with overflows and boundaries
minusLogLikFiniteBound <- function(pars) {

# Wrap to [-pi, pi) the circular parameters
if (checkCircular) {

pars[circularPars] <- toPiInt(pars[circularPars])

}

# Check if all the parameters are in range
# If not: reset them and create a continuous penalty

# Lower
belowLow <- lower > pars
if (any(belowLow)) {

penaltyLow <- sqrt(sum((pars[belowLow] - lower[belowLow])^2))
pars[belowLow] <- lower[belowLow]

} else {

penaltyLow <- 0

}

# Upper
aboveUp <- upper < pars
if (any(aboveUp)) {

penaltyUp <- sqrt(sum((pars[aboveUp] - lower[aboveUp])^2))
pars[aboveUp] <- upper[aboveUp]

} else {

penaltyUp <- 0

}

# Check if parameters are in region
# If not: reset them and create a penalty
reg <- region(pars)

# Call minusLogLik
res <- minusLogLik(reg$pars) + reg$penalty + 10 * (penaltyLow + penaltyUp)

# Check finiteness
if (!is.finite(res)) {

res <- penalty

}

# Return values
return(res)

}

# Starting values
nStart <- dim(start)
if (is.null(nStart)) {

nStart <- 1
start <- matrix(start, nrow = 1)

}

# Compute Hessian? Takes a significant ammount of time
hessian <- selectSolution == "lowestLocMin"

# List with solutions
solutions <- vector(mode = "list", length = nStart)

# Loop on different starting values
for (i in 1:nStart) {

if (optMethod == "nlm") {

# Optimization
solutions[[i]] <- tryCatch(nlm(f = minusLogLikFiniteBound, p = start[i, ],
hessian = hessian, print.level = verbose,
iterlim = maxit, gradtol = tol, ...),
error = function(e) {
show(e)
list(minimum = NA, estimate = NA, gradient = NA, hessian = NA, code = 6,
iterations = NA, message = paste(paste(e$call, collapse = " "), e$message, sep = ": "))
})

# Add Hessian slot if hessian = FALSE
if (!hessian) {

solutions[[i]]$hessian <- NA } # Convergence message solutions[[i]]$message <-
switch(solutions[[i]]$code, paste("Relative gradient is close to zero,", "current iterate is probably solution"), paste("Successive iterates within tolerance,", "current iterate is probably solution"), paste("Last global step failed to locate a point", "lower than estimate. Either estimate is an", "approximate local minimum of the function or", "steptol is too small"), "Iteration limit exceeded", paste("Maximum step size stepmax exceeded five consecutive", "times. Either the function is unbounded below, becomes", "asymptotic to a finite value from above in some", "direction or stepmax is too small"), paste("Error in", solutions[[i]]$message))

# Check if convergence happened
solutions[[i]]$convergence <- solutions[[i]]$code %in% 1:3

# Wrap circular parameters
if (checkCircular) {

solutions[[i]]$estimate[circularPars] <- toPiInt(solutions[[i]]$estimate[circularPars])

}

} else {

# Optimization
control <- list(maxit = maxit, trace = verbose, ...)
if (optMethod == "L-BFGS-B") {

control$pgtol <- tol solutions[[i]] <- tryCatch(optim(par = start[i, ], fn = minusLogLikFiniteBound, method = optMethod, lower = lower, upper = upper, hessian = hessian, control = control), error = function(e) { show(e) list(par = NA, value = NA, counts = NA, convergence = 6, message = paste( paste(e$call, collapse = " "),
e$message, sep = ": "), hessian = NA) }) } else { control$reltol <- tol
solutions[[i]] <- tryCatch(optim(par = start[i, ],
fn = minusLogLikFiniteBound,
method = optMethod, hessian = hessian,
control = control),
error = function(e) {
show(e)
list(par = NA, value = NA, counts = NA,
convergence = 6,
message = paste(
paste(e$call, collapse = " "), e$message, sep = ": "),
hessian = NA)
})

}

# Add Hessian slot if hessian = FALSE
if (!hessian) {

solutions[[i]]$hessian <- NA } # Convergence message solutions[[i]]$message <-
paste(switch(as.character(solutions[[i]]$convergence), "0" = "Successful completion", "1" = "Iteration limit maxit had been reached", "10" = "Degeneracy of the Nelder-Mead simplex", "51" = "Warning from L-BFGS-B method", "52" = "Error from L-BFGS-B method", "6" = paste("Error in", solutions[[i]]$message)),
ifelse(is.null(solutions[[i]]$message) | solutions[[i]]$convergence == 6, "",
paste(".", solutions[[i]]$message)), sep = "") # Check if convergence happened solutions[[i]]$convergence <- solutions[[i]]$convergence == 0 # Wrap circular parameters if (checkCircular) { solutions[[i]]$par[circularPars] <-
toPiInt(solutions[[i]]$par[circularPars]) } } # Add eigendecomposition of Hessian and flag indicating if the solution # is a local minimum if (anyNA(solutions[[i]]$hessian)) {

solutions[[i]]$eigHessian <- NA solutions[[i]]$localMinimumGuaranteed <- FALSE

} else {

solutions[[i]]$eigHessian <- eigen(solutions[[i]]$hessian,
symmetric = TRUE,
only.values = TRUE)$values solutions[[i]]$localMinimumGuaranteed <-
all(solutions[[i]]$eigHessian > eigTol) & (max(abs(solutions[[i]]$eigHessian)) <
condTol * min(abs(solutions[[i]]$eigHessian))) } } # Which is the best found solution? if (selectSolution == "lowestLocMin") { indSelect <- sapply(1:nStart, function(i) solutions[[i]]$convergence & solutions[[i]]$localMinimumGuaranteed) } else if (selectSolution == "lowestConv") { indSelect <- sapply(1:nStart, function(i) solutions[[i]]$convergence)

} else if (selectSolution == "lowest") {

indSelect <- rep(TRUE, nStart)

} else {

stop("selectSolution must be 'lowestLocMin', 'lowestConv' or 'lowest'")

}
if (optMethod == "nlm") {

ind <- which.min(sapply(1:nStart, function(i)
solutions[[i]]$minimum)[indSelect]) } else { ind <- which.min(sapply(1:nStart, function(i) solutions[[i]]$value)[indSelect])

}
ind <- (1:nStart)[which(indSelect)[ind]]

# Add complete output of the solutions
msgHessian <- ifelse(hessian, paste0(", eigTol = ", eigTol,
" and condTol = ", condTol), "")
msg <- paste0(paste0("No solution was found with selectSolution = '",
selectSolution, "'"), msgHessian)
bestSolution <- list("par" = NA, "value" = NA, "convergence" = FALSE,
"message" = msg, "eigHessian" = NA,
"localMinimumGuaranteed" = FALSE,
"optMethod" = optMethod, "solutionsOutput" = solutions)

# If found a solution
if (length(ind) > 0) {

if (optMethod == "nlm") {

bestSolution$par <- solutions[[ind]]$estimate
bestSolution$value <- solutions[[ind]]$minimum

} else {

bestSolution$par <- solutions[[ind]]$par
bestSolution$value <- solutions[[ind]]$value

}

bestSolution$convergence <- solutions[[ind]]$convergence
bestSolution$message <- solutions[[ind]]$message
bestSolution$eigHessian <- solutions[[ind]]$eigHessian
bestSolution$localMinimumGuaranteed <- solutions[[ind]]$localMinimumGuaranteed

}

return(bestSolution)

}


## Try the sdetorus package in your browser

Any scripts or data that you put into this service are public.

sdetorus documentation built on Aug. 19, 2021, 9:06 a.m.