nlminb2 <- function(start,objective,gradient,hessian,...) {
nlminbcontrols <- c("eval.max","iter.max","trace","abs.tol","rel.tol","x.tol","step.min")
dots <- list(...)
control <- list(...)$control
control <- control[names(control)%in%nlminbcontrols]
dots$control <- control
if (length(dots$trace)>0 && dots$trace>0) cat("\n")
mypar <- c(list(start=start,objective=objective,gradient=gradient,hessian=hessian),dots)
mypar["debug"] <- NULL
do.call("nlminb", mypar)
}
nlminb1 <- function(start,objective,gradient,hessian,...) {
nlminb2(start,objective,gradient=gradient,hessian=NULL,...)
}
nlminb0 <- function(start,objective,gradient,hessian,...) {
nlminb2(start,objective,gradient=NULL,hessian=NULL,...)
}
################################################################################
estimatingfunction <- function(start,objective,gradient,hessian,NR=FALSE,...) {
myobj <- function(x,...) {
S <- gradient(x,...)
crossprod(S)[1]
}
if (!missing(hessian) && !is.null(hessian)) {
mygrad <- function(x) {
H <- hessian(x)
S <- gradient(x)
2*S%*%H
}
} else {
hessian <- function(x) numDeriv::jacobian(gradient,x,method=lava.options()$Dmethod)
mygrad <- function(x) {
H <- hessian(x)
S <- gradient(x)
2*S%*%H
}
}
if (NR) {
op <- lava::NR(start,gradient=gradient,hessian=hessian,...)
} else {
op <- nlminb2(start,myobj,mygrad,hessian=NULL,...)
}
return(op)
}
estimatingfunction0 <- function(...,hessian=NULL) estimatingfunction(...,hessian=hessian)
################################################################################
## Newton-Raphson/Scoring
################################################################################
##' @title Newton-Raphson method
##'
##' @param start Starting value
##' @param objective Optional objective function (used for selecting step length)
##' @param gradient gradient
##' @param hessian hessian (if NULL a numerical derivative is used)
##' @param control optimization arguments (see details)
##' @param args Optional list of arguments parsed to objective, gradient and hessian
##' @param ... additional arguments parsed to lower level functions
##' @details
##' \code{control} should be a list with one or more of the following components:
##' \itemize{
##' \item{trace} integer for which output is printed each 'trace'th iteration
##' \item{iter.max} number of iterations
##' \item{stepsize}: Step size (default 1)
##' \item{nstepsize}: Increase stepsize every nstepsize iteration (from stepsize to 1)
##' \item{tol}: Convergence criterion (gradient)
##' \item{epsilon}: threshold used in pseudo-inverse
##' \item{backtrack}: In each iteration reduce stepsize unless solution is improved according to criterion (gradient, armijo, curvature, wolfe)
##' }
##' @export
##' @examples
##' # Objective function with gradient and hessian as attributes
##' f <- function(z) {
##' x <- z[1]; y <- z[2]
##' val <- x^2 + x*y^2 + x + y
##' structure(val, gradient=c(2*x+y^2+1, 2*y*x+1),
##' hessian=rbind(c(2,2*y),c(2*y,2*x)))
##' }
##' NR(c(0,0),f)
##'
##' # Parsing arguments to the function and
##' g <- function(x,y) (x*y+1)^2
##' NR(0, gradient=g, args=list(y=2), control=list(trace=1,tol=1e-20))
##'
##'
NR <- function(start,objective=NULL,gradient=NULL,hessian=NULL,control,args=NULL,...) {
control0 <- list(trace=0,
stepsize=1,
lambda=0,
ngamma=0,
gamma2=0,
backtrack=TRUE,
iter.max=200,
tol=1e-6,
stabil=FALSE,
epsilon=1e-9)
if (!missing(control)) {
control0[names(control)] <- control
# Backward compatibility:
if (!is.null(control0$gammma)) control0$stepsize <- control0$gamma
}
## conditions to select the step length
if(control0$backtrack[1] == "armijo"){
control0$backtrack <- c(1e-4,0) # page 33
}
if(control0$backtrack[1] == "curvature"){
control0$backtrack <- c(0,0.9) # page 34
}
if(control0$backtrack[1] == "wolfe"){
control0$backtrack <- c(1e-4,0.9)
}
if(!is.logical(control0$backtrack) || length(control0$backtrack)!=1){
if(length(control0$backtrack) != 2){
stop("control$backtrack must have length two if not TRUE or FALSE \n")
}
if(any(!is.numeric(control0$backtrack)) || any(abs(control0$backtrack)>1)){
stop("elements in control$backtrack must be in [0,1] \n")
}
if(control0$backtrack[2]==0){
control0$backtrack[2] <- +Inf # no Wolfe condition
}
}
obj <- objective
grad <- gradient
hess <- hessian
if (!is.null(args)) {
if (!is.list(args)) args <- list(args)
if (!is.null(objective))
obj <- function(p) do.call(objective, c(list(p),args))
if (!is.null(gradient))
grad <- function(p) do.call(gradient, c(list(p),args))
if (!is.null(hessian))
hess <- function(p) do.call(hessian, c(list(p),args))
}
if (control0$trace>0) {
cat("\nIter=0\t")
if (!is.null(obj))
cat("Objective=",obj(as.double(start)))
cat(";\t\n \tp=", paste0(formatC(start), collapse=" "),"\n")
}
gradFun = !is.null(grad)
if (!gradFun & is.null(hess)) {
hess <- function(p) {
ff <- obj(p)
res <- attributes(ff)$hess
if (is.function(res)) {
res <- res(p)
attributes(res)$grad <- as.vector(attributes(ff)$grad(p))
} else {
attributes(res)$grad <- as.vector(attributes(ff)$grad)
}
return(res)
}
grad <- function(p) {
if (control0$trace>0) print("Numerical gradient")
numDeriv::jacobian(obj,p)
}
}
oneiter <- function(p.orig,Dprev,return.mat=FALSE,iter=1) {
D <- I <- NULL # Place-holders for gradient and negative hessian
if (!is.logical(control0$backtrack)) { # Back-tracking based on objective function evaluations
objective.origin <- obj(p.orig)
D <- attributes(objective.origin)$grad
I <- attributes(objective.origin)$hess
if (!is.null(I)) I <- -I
}
if (is.null(D) || is.null(I)) {
if (!is.null(hess)) {
H <- hess(p.orig)
}
if (is.null(hess) || is.null(H)) {
if (control0$trace>0) print("Numerical Hessian")
I <- -numDeriv::jacobian(grad,p.orig,method=lava.options()$Dmethod)
} else {
I <- -H
}
D <- attributes(I)$grad
if (is.null(D)) {
D <- grad(p.orig)
}
}
if (return.mat) return(list(D=D,I=I))
if (control0$stabil) {
if (control0$lambda!=0) {
if (control0$lambda<0) {
sigma <- (t(D)%*%(D))[1]
} else {
sigma <- control0$lambda
}
sigma <- min(sigma,10)
I <- I+control0$gamma2*sigma*diag(nrow=nrow(I))
} else {
sigma <- ((D)%*%t(D))
I <- I+control0$gamma2*(sigma)
}
}
iI <- Inverse(I, symmetric=TRUE, tol=control0$epsilon)
Delta <- control0$stepsize*tryCatch(solve(I, cbind(as.vector(D))),
error=function(...) { ## Fall back to Pseudo-Inverse using SVD:
iI%*%cbind(as.vector(D))})
Lambda <- 1 ## Initial step-size
if (identical(control0$backtrack, TRUE)) {
mD0 <- mean(Dprev^2)
mD <- mean(D^2)
p <- p.orig + as.vector(Lambda*Delta)
while (mD>=mD0) {
if (gradFun) {
D = grad(p)
} else {
DI <- oneiter(p,return.mat=TRUE)
D = DI$D
}
mD = mean(D^2)
if (is.nan(mD)) mD=mD0
Lambda <- Lambda/2
if (Lambda<1e-4) break;
p <- p.orig + as.vector(Lambda*Delta)
}
} else if(identical(control0$backtrack, FALSE)) {
p <- p.orig + Lambda*Delta
} else { # objective(p.orig) - obj(p) <= mu*Lambda*grad(p.orig)*Delta
## curvature
c_D.origin_Delta <- control0$backtrack * c(rbind(D) %*% Delta)
p <- p.orig + as.vector(Lambda*Delta)
mD0 <- c(objective.origin + Lambda * c_D.origin_Delta[1], abs(c_D.origin_Delta[2]))#
objective.new <- obj(p)
grad.new <- attributes(objective.new)$grad
if (is.null(grad.new)) {
grad.new <- grad(p)
}
mD <- c(objective.new, abs(grad.new %*% Delta))
count <- 0
while (any(mD>mD0) || any(is.nan(mD))) {
count <- count+1
Lambda <- Lambda/2
if (Lambda<1e-4) break;
p <- p.orig + Lambda*Delta
objective.new <- obj(p)
grad.new <- attributes(objective.new)$grad
if(!is.infinite(mD0[1])){
mD0[1] <- objective.origin + Lambda * c_D.origin_Delta[1]#
mD[1] <- objective.new
}
if(!is.infinite(mD0[2])){
if (is.null(grad.new)) {
grad.new <- grad(p)
}
mD[2] <- abs(grad.new %*% Delta)
}
}
}
return(list(p=p,D=D,iI=iI))
}
count <- count2 <- 0
thetacur <- start
stepsizecount <- 0
Dprev <- rep(Inf,length(start))
for (jj in seq_len(control0$iter.max)) {
stepsizecount <- stepsizecount+1
count <- count+1
count2 <- count2+1
newpar <- oneiter(thetacur,Dprev,iter=jj)
Dprev <- newpar$D
thetacur <- newpar$p
if (!is.null(control0$nstepsize) && control0$nstepsize>0) {
if (control0$nstepsize<=stepsizecount) {
control0$stepsize <- sqrt(control0$stepsize)
stepsizecount <- 0
}
}
if (count2==control0$trace) {
cat("Iter=", count,"\t",sep="")
if (!is.null(obj))
cat("Objective=",obj(as.double(newpar$p)))
cat(";\n\tD=", paste0(formatC(newpar$D),
collapse = " "), "\n")
cat("\tp=", paste0(formatC(thetacur), collapse = " "),
"\n")
count2 <- 0
}
if (mean(newpar$D^2)^.5<control0$tol) break;
}
res <- list(par=as.vector(thetacur), iterations=count, method="NR",
gradient=newpar$D, iH=newpar$iI)
return(res)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.