Nothing
###################################
# make a wrapper that applies optim then afterwards numDeriv, possibly on a boundary with one-sided derivatives if necessary
optimizer <- function(par,fn,...,method="pNewton",lower=-Inf,upper=Inf,period=FALSE,reset=identity,control=list())
{
DEBUG <- FALSE
method <- match.arg(method,c("pNewton","Nelder-Mead","BFGS","CG","L-BFGS-B","SANN","Brent"))
precision <- maxit <- NULL
default <- list(precision=1/2,maxit=.Machine$integer.max,parscale=pmin(abs(par),abs(par-lower),abs(upper-par)))
control <- replace(default,names(control),control)
# R check does not like attach
NAMES <- names(control) ; for(i in 1:length(control)) { assign(NAMES[i],control[[i]]) }
if(any(parscale==0)) { parscale[parscale<=.Machine$double.eps] <- 1 }
if(method=="pNewton") # use mc.optim
{
if(!DEBUG)
{ RESULT <- mc.optim(par=par,fn=fn,lower=lower,upper=upper,period=period,reset=reset,control=control) }
else
{
if(!exists(".Random.seed")) { set.seed(NULL) }
SEED <- .Random.seed
RESULT <- try(mc.optim(par=par,fn=fn,lower=lower,upper=upper,period=period,reset=reset,control=control))
while(class(RESULT)!="list")
{
debug(mc.optim)
.Random.seed <- SEED
RESULT <- try(mc.optim(par=par,fn=fn,lower=lower,upper=upper,period=period,reset=reset,control=control))
}
if(isdebugged(mc.optim)) { undebug(mc.optim) }
} # end DEBUG
}
else
{
# wrap objective function in try()
func <- function(par,...)
{
FN <- try(fn(par,...))
if(class(FN)[1] != "numeric" || is.nan(FN))
{
warning("Objective function failure at c(",paste0(names(par),"=",par,collapse=', '),")")
FN <- Inf
}
return(FN)
}
if(length(par)==1)
{
# try optimize (can fail with high skew & kurtosis)
tol <- 3*sqrt(2*.Machine$double.eps^precision) # digit precision
lower <- ifelse(lower>-Inf,lower,-10*abs(par))
upper <- ifelse(upper<Inf,upper,10*abs(par))
ATTEMPT <- stats::optimize(f=func,...,lower=lower,upper=upper,tol=tol)
RESULT <- rbind(c(ATTEMPT$minimum,ATTEMPT$objective))
# log scale backup that can't capture zero boundary
ndigit <- -log((.Machine$double.eps)^precision,10)
steptol <- sqrt(2*.Machine$double.eps^precision)
ATTEMPT <- stats::nlm(function(p){f=func(par*exp(p))},p=0,...,ndigit=ndigit,gradtol=0,steptol=steptol,stepmax=log(10),iterlim=maxit)
RESULT <- rbind(RESULT,c(par*exp(ATTEMPT$estimate),ATTEMPT$minimum))
# choose the better estimate
MIN <- which.min(RESULT[,2])
RESULT <- list(par=RESULT[MIN,1],value=RESULT[MIN,2])
}
else # use optim
{
control$zero <- NULL
control$precision <- NULL
if(!is.null(control$covariance)) { control$parscale <- sqrt(abs(diag(control$covariance))) }
control$covariance <- NULL
if(method=="Nelder-Mead")
{
lower <- -Inf
upper <- Inf
control$reltol <- .Machine$double.eps^precision
}
else
{
control$factr <- .Machine$double.eps^precision
}
RESULT <- stats::optim(par=par,fn=func,method=method,lower=lower,upper=upper,control=control,...)
}
}
return(RESULT)
}
## search up to and and then along a boundary/period (recursively)
# this is used by NR step only, not by line-search algorithm
box.search <- function(p0,grad,hess,cov=PDsolve(hess),lower=-Inf,upper=Inf,period=F,period.max=1/2)
{
# how far we can go before boundary
n <- length(p0)
lambda <- rep(1,n)
# limit uncertainty to std.dev
# cov <- PDclamp(cov,lower=0,upper=std.max^2)
# naive target
dp <- -c(cov %*% grad)
p1 <- p0 + dp
# passes through lower boundary ?
dlo <- lower-p1
LO <- p1==lower | (dlo>=0) # Inf fix
# passes through upper boundary ?
dhi <- p1-upper
HI <- p1==upper | (dhi>=0) # Inf fix
if(any(LO) || any(HI))
{
if(any(LO)) { lambda[LO] <- ifelse( dp[LO]<0 , 1 - abs( dlo[LO]/dp[LO] ) , 1 ) }
if(any(HI)) { lambda[HI] <- ifelse( dp[HI]>0 , 1 - abs( dhi[HI]/dp[HI] ) , 1 ) }
PER <- as.logical(period)
if(any(PER)) { lambda[PER] <- min(1, abs(dp[PER])/(period[PER]*period.max) ) }
lambda <- nant(lambda,1) # ignore Inf @ Inf
MIN <- which.min(lambda)
M <- sqrt(sum(dp^2)) # step length @ lambda=1
if(lambda[MIN]*M>=1) # stop at first boundary or unit step (suggested step size rediculous)
{
dp <- dp/M # prevent numerical errors from extreme M
p1 <- line.boxer(dp,p0,lower=lower,upper=upper,period=period,period.max=period.max)
}
else if(lambda[MIN]<1) # recursive search along boundaries
{
p2 <- p1 # target beyond boundary
p1 <- p0 + lambda[MIN]*dp # go to boundary and stop
# correct small numerical errors
LO <- (p1<lower)
if(any(LO)) { p1[LO] <- lower[LO] }
HI <- (p1>upper)
if(any(HI)) { p1[HI] <- upper[HI] }
# remaning dimensions
if(length(lambda)>1)
{
# gradient estimated on boundary
grad <- hess %*% (p1-p2) # should I use this?
# search along remaining dimensions
p1[-MIN] <- box.search(p1[-MIN],grad[-MIN],hess[-MIN,-MIN],lower=lower[-MIN],upper=upper[-MIN],period=period[-MIN],period.max=period.max)
}
} # end recurse
} # end boundary consideration
return(p1)
}
######################
# apply box constraints to travel in a straight line from p0 by dp
######################
line.boxer <- function(dp,p0=dp[,1],lower=-Inf,upper=Inf,period=F,period.max=1/2,tol=.Machine$double.eps*length(p0))
{
DIM <- dim(dp)
if(is.null(dim(dp))) { dp <- cbind(dp) }
single.boxer <- function(dp)
{
# where would we go without a boundary
p <- p0 + dp
# did we hit a boundary?
LO <- p==lower | (p-lower) <= .Machine$double.eps # Inf fix
UP <- p==upper | (upper-p) <= .Machine$double.eps # Inf fix
# are we trying to push through that boundary?
if(any(LO)) { LO <- LO & (dp<0) }
if(any(UP)) { UP <- UP & (dp>0) }
# stop when we hit the boundary
# time until we hit the first new boundary
t.lo <- ifelse(LO, (lower-p0)[LO]/dp[LO], 1)
t.up <- ifelse(UP, (upper-p0)[UP]/dp[UP], 1)
t <- c(t.lo,t.up)
t <- nant(t,Inf)
t <- t[t>tol] # avoid boundary trap from numerical error
t <- min(t.lo,t.up,na.rm=TRUE) # first non-trapping boundary
# circular parameters prevented from going more than half a period
PERIOD <- as.logical(period)
if(any(PERIOD)) { t <- min(t,abs(period/dp)[PERIOD]*period.max) }
# stop at first boundary
p <- p0 + t*dp
# fix to machine precision
p <- clamp(p,lower,upper)
return(p)
}
p <- apply(dp,2,function(d){single.boxer(d)})
dim(p) <- DIM
return(p)
}
###########################################
# calculate local derivatives (along directions DIR) from 3 points, with (par,fn.par) the best (global variables)
###########################################
QuadSolve <- function(P0,P1,P2,DIR,F0,F1,F2)
{
# convert back to displacements
P1 <- P1 - c(P0)
P2 <- P2 - c(P0)
# signed magnitudes of displacement vectors
# P1, P2, DIR columns will always be parallel
DIR <- cbind(DIR)
M1 <- colSums(DIR * P1)
M2 <- colSums(DIR * P2)
F1 <- F1-F0
F2 <- F2-F0
G1 <- F1/M1
G2 <- F2/M2
# Hessian estimates, not necessarily assuming that par is between P1 & P2
hessian <- (G2-G1)/(M2-M1)*2
hessian <- nant(hessian,0)
INF <- abs(hessian)==Inf
if(any(INF)) { hessian[INF] <- sign(hessian[INF])/.Machine$double.eps }
# gradient estimates, also not assuming that par is between P1 & P2
gradient <- (G2/M2-G1/M1)/(1/M2-1/M1)
gradient <- nant(gradient,0)
INF <- abs(gradient)==Inf
if(any(INF)) { gradient[INF] <- sign(gradient[INF])/.Machine$double.eps }
return(list(gradient=gradient,hessian=hessian))
}
# does this data look roughly quadratic near the minimum?
QuadTest <- function(x,y,MIN=which.min(y),thresh=0.5)
{
DIFF <- QuadSolve(x[MIN],x[MIN-1],x[MIN+1],1,y[MIN],y[MIN-1],y[MIN+1])
# predicted minimum
x0 <- x[MIN] - DIFF$gradient/DIFF$hessian
y0 <- y[MIN] - DIFF$hessian/2*(x[MIN]-x0)^2
# quadratic estimate
y.func <- function(x) { y0 + DIFF$hessian/2*(x-x0)^2 }
r <- y.func(x[MIN+c(-2,2)])
# relative residuals
r <- (y[MIN+c(-2,2)]-r)/(r-y0)
return(all(abs(r)<thresh))
}
########################
# does the line search satisfy the some Wolfe-like conditions
WolfeTest <- function(x,y,grad.i,i=1,MIN=which.min(y),const=c(1,1),thresh=0.1)
{
# test for any Inf near MIN
if(any(y[MIN + -1:1]==Inf)) { return(FALSE) }
# gradient information at minimum (approximate)
DIFF <- QuadSolve(x[MIN],x[MIN-1],x[MIN+1],1,y[MIN],y[MIN-1],y[MIN+1])
# strong curvature rule
TEST <- abs(DIFF$gradient)/abs(grad.i)
if(is.nan(DIFF$gradient)) { return(TRUE) } # numerical error - too tight
else { return(TEST<=thresh) }
}
######################
# minimally rotate orthonormal basis DIR to include vec
######################
Gram.Schmidt <- function(DIR,vec)
{
# normalize par.diff
vec <- vec / sqrt(sum(vec^2))
# calculate overlaps
OVER <- colSums(vec*DIR)
# replace dimension of largest overlap with par.diff
MAX <- which.max(abs(OVER))
DIR[,MAX] <- vec
OVER[MAX] <- 1 # not necessary, but true
# subtract projection from all but vec
DIR[,-MAX] <- DIR[,-MAX] - (vec %o% OVER[-MAX])
# re-normalization (may be necessary?)
MAG <- sqrt(colSums(DIR^2))
DIR <- t(t(DIR)/MAG)
return(DIR)
}
# correlation-preserving rank-1 update
rank1update <- function(H.LINE,LINE,hessian,covariance)
{
if(!is.na(H.LINE) && H.LINE>=.Machine$double.eps)
{
LINE <- LINE/sqrt(sum(LINE^2))
# Hessian diagonal element correction factor
H0 <- c(LINE %*% hessian %*% LINE)
FACT <- abs(H.LINE / H0)
FACT <- sqrt(FACT)
# rank-1 Hessian update - no matrix-matrix multiplication
A <- FACT-1
B <- c(hessian %*% LINE)
O <- outer(LINE)
hessian <- hessian + A^2*H0*O
B <- outer(LINE,B)
B <- B + t(B)
hessian <- hessian + A*B
# rank-1 covariance update - no matrix-matrix multiplication
A <- 1/FACT-1
B <- c(covariance %*% LINE)
covariance <- covariance + A^2*c(LINE%*%B)*O
B <- outer(LINE,B)
B <- B + t(B)
covariance <- covariance + A*B
}
else
{ FACT <- 0 }
return(list(hessian=hessian,covariance=covariance,condition=FACT))
}
##################
# best number of calculations to make with min count and cores
mc.min <- function(min,cores=detectCores())
{
x <- ceiling(min/cores)
x <- x * cores
return(x)
}
#################################
# Parallelized optimizers
# 0 - (TODO) Pattern Search
# 1 - partial-Newton-Raphson - custom method with efficient Hessian update
# - TODO: full Hessian option when DIM small
# - TODO: filling up mc queue if p>2n+1
# 2 - Preconditioned Non-linear Conjugate Gradient - Polak-Ribiere with automatic restarts
# 3 - Preconditioned Gradient Descent
##################################
mc.optim <- function(par,fn,...,lower=-Inf,upper=Inf,period=FALSE,reset=identity,control=list())
{
DEBUG <- FALSE # can be overridden in control
PMAP <- TRUE # periodic parameters are mapped locally: (-period/2,+period/2) -> (-Inf,Inf) during search steps
# check complains about visible bindings
fnscale <- parscale <- rescale <- maxit <- precision <- trace <- cores <- hessian <- covariance <- NULL
# fix default control arguments
default <- list(fnscale=1,parscale=pmin(abs(par),abs(par-lower),abs(upper-par)),maxit=100,trace=FALSE,precision=NULL,cores=1,hessian=NULL,covariance=NULL,stages=NULL,rescale=FALSE)
control <- replace(default,names(control),control)
# check does not like attach
NAMES <- names(control)
for(i in 1:length(control)) { assign(NAMES[i],control[[i]]) }
# something is stripping par names
NAMES <- names(par)
cores <- resolveCores(cores)
if(any(parscale==0)) { parscale[parscale==0] <- 1 }
# does fn take a zeroing argument?
if("zero" %in% names(formals(fn))) { ZERO <- TRUE } else { ZERO <- FALSE }
zero <- 0 # current value of the minimum
TOL.ZERO <- 0 # error from zeroing
if(is.null(precision) && ZERO) { precision <- 1/4 } # don't need as much precision
else if(is.null(precision) && !ZERO) { precision <- 1/2 }
if(is.null(stages))
{
if(precision<1/2) { stages <- 1 } # Newton-Raphson is good enough, hessian accurate here
else { stages <- 1:2 } # need conjugate gradient for higher precision
}
# differentiation step size: 2nd, 1st, 1st, 0th
STEP <- .Machine$double.eps^c(0.25,0.5,0.5,1)
# machine tolerances for various stage calculations
TOL <- .Machine$double.eps^c(0.5,1,1,1)
# goal errors
TOL.GOAL <- .Machine$double.eps^precision
# tolerance for BFGS update (its not very good/reliable)
TOL.BFGS <- sqrt(TOL[1])
STEP.BFGS <- sqrt(STEP[1])
DIM <- length(par)
period <- array(period,DIM)
par <- par/parscale
lower <- array(lower,DIM)/parscale
upper <- array(upper,DIM)/parscale
period <- period/parscale
# start with the canonical directions
# DIR <- diag(1,DIM) # rows of column vectors
# sometimes initial covariance/hessian is garbage because previous data/model were garbage
if(!is.null(covariance))
{
covariance <- t(t(covariance/parscale)/parscale)
TEST <- eigen(covariance)$values
TEST <- any(TEST<=TOL[1]) || any(TEST>=1/TOL[1]) || min(TEST)/max(TEST)<=TOL[1]
if(TEST) { covariance <- NULL }
}
# fall back --- unlikely
if(!is.null(hessian))
{
hessian <- t(t(hessian*parscale)*parscale)
TEST <- 1/eigen(hessian)$values
TEST <- any(TEST<=TOL[1]) || any(TEST>=1/TOL[1]) || min(TEST)/max(TEST)<=TOL[1]
if(TEST) { hessian <- NULL }
}
# preconditioning is safe
if(!is.null(covariance))
{ hessian <- PDsolve(covariance) }
else if(!is.null(hessian))
{ covariance <- PDsolve(hessian) }
else # use parscale to start
{
covariance <- diag(1,DIM) # inverse hessian
hessian <- diag(1,DIM) # hesssian
# LINE.DO <- TRUE
}
# what we will actually be evaluating
PERIOD <- as.logical(period)
if(PMAP && any(PERIOD))
{
PSCALE <- period[PERIOD]/pi
# locally-tangent tangent transform that maps periods to infinity
pmap <- function(p,theta,inverse=FALSE)
{
if(inverse) { p[PERIOD] <- atan(p[PERIOD]/PSCALE)*PSCALE + theta }
else { p[PERIOD] <- tan((p[PERIOD]-theta)/PSCALE)*PSCALE }
return(p)
}
# extract better theta from better par (under tangent transformation)
get.theta <- function(p,theta) # theta here is old theta
{
p <- pmap(p,theta,inverse=TRUE)
return(p[PERIOD])
}
# update old theta to new theta (under tangent transformation)
put.theta <- function(p,theta.old,theta.new)
{
p <- pmap(p,theta.old,inverse=TRUE)
p <- pmap(p,theta.new)
return(p)
}
period <- rep(FALSE,length(period)) # don't treat parameters as periodic from now on
# all coordinates are now transformed coordinates
theta <- par[PERIOD]
par <- pmap(par,theta)
}
else
{ PMAP <- FALSE }
# wrap objective function
func <- function(par,...)
{
if(PMAP) { par <- pmap(par,theta,inverse=TRUE) }
# this objective function has the ability to approximately zero its objective value
if(ZERO) { FN <- try(fn(par*parscale,zero=zero*fnscale,...),silent=!DEBUG) }
else { FN <- try(fn(par*parscale,...),silent=!DEBUG) }
# ordinary objective function
if(class(FN)[1]=="numeric" && !is.nan(FN)) { FN <- FN/fnscale }
else
{
# store to environmental variable so that I can debug?
warning("Objective function failure at c(",paste0(NAMES,"=",par*parscale,collapse=', '),')')
if(FALSE && DEBUG)
{
debug(ctmm.loglike)
# try again with debugging
if(ZERO) { FN <- try(fn(par*parscale,zero=zero*fnscale,...),silent=!DEBUG) }
else { FN <- try(fn(par*parscale,...),silent=!DEBUG) }
undebug(ctmm.loglike)
}
FN <- Inf
}
return(FN)
}
##################
# am I on a boundary and if so, align DIR
######################
is.boxed <- function(par,fix.dir=FALSE)
{
# what points are on the boundary, so that we have to do one-sided derivatives
LO <- par==lower | (par-lower) <= .Machine$double.eps
UP <- par==upper | (upper-par) <= .Machine$double.eps
BOX <- (LO | UP)
# rotate coordinates so that DIR[,BOX] points strictly towards boundary
if(fix.dir && any(LO)) { diag(DIR)[LO] <<- -1 }
return(BOX)
}
is.toosmallf <- function(fns,fn0)
{
if(ZERO) { R <- abs(diff(fns))<=TOL.STAGE+TOL.ZERO }
else { R <- abs(diff(fns))/fn0<=TOL.STAGE }
return(!length(R) || is.na(R) || R) # avoid 0/0 error
}
is.toosmallp <- function(ps,p0,tol=STEP[STAGE])
{
SCL <- pmax(abs(p0),1)
# relative step sizes
dp <- sqrt(abs(c((ps[,2]-ps[,1])^2 %*% (1/SCL^2)))) # formula explained in numderiv.diff()
R <- dp <= tol
return(!length(R) || is.na(R) || R) # avoid 0/0 error
}
numderiv.diff <- function(p0,DIR)
{
# different calculations for the numerical step length
# standard deviation for each axis
# STD <- sqrt(abs(diag(covariance)))
# parameter scale not along current axes
SCL <- pmax(abs(p0),1)
# squared parameter scale along axes - analog to STD
# SCL <- t(DIR) %*% diag(SCL^2,nrow=DIM) %*% DIR
# single parameter scale for each axis
# SCL <- sqrt(abs(diag(SCL)))
# equivalent calculation to the above, but avoids matrix-matrix multiplication
# SCL <- sqrt(abs(c(DIR^2 %*% SCL^2))) # can overflow
SCL <- diag(SCL,nrow=length(SCL))
SCL <- diag( t(DIR) %*% SCL %*% DIR )
# sample initial points around the center for numerical differentiation
STEP <- SCL*STEP[STAGE]
par.step <- t(STEP*t(DIR))
P1 <- -par.step # away from boundaries
P2 <- +par.step # towards boundaries
# don't sample points across boundary, fold back instead - correct one-sided derivatives implemented
BOX <- is.boxed(p0)
if(any(BOX)) { P2[,BOX] <- 2*P1[,BOX] }
# apply these displacements within box constraints
P1 <- line.boxer(P1,p0=p0,lower=lower,upper=upper,period=period)
P2 <- line.boxer(P2,p0=p0,lower=lower,upper=upper,period=period)
# columns are now boxed coordinates
return(list(P1=P1,P2=P2))
}
# distance to next boundary
m.box <- function(M=1)
{
# STUFF <<- list(M=M,lower=lower,upper=upper,par=par,DIR.STEP=DIR.STEP)
MB <- c(((upper-par)/(M*DIR.STEP))[M*DIR.STEP>0],((lower-par)/(M*DIR.STEP))[M*DIR.STEP<0])
MB <- nant(MB,0)
if(!length(MB)) { MB <- 0 }
MB <- min(MB,na.rm=TRUE)
MB <- max(0,MB)
return(MB)
}
# initializing stuff checked in loop
STAGE <- stages[1] # 1-Newton, 2-Conjugate Gradient, 3-Gradient Descent, 4-nothing yet
ERROR <- Inf
counts <- 0
par.target <- par # where to evaluate around next
par.old <- par.start <- par
fn.par <- Inf # current best objective value fn(par)
# condition <- Inf
gradient.old <- rep(0,DIM)
# hessian.LINE <- NULL # line-search hessian
CG.RESET <- TRUE # reset conjugate gradient to gradient descent
REVERSE <- FALSE
cg.dir <- rep(0,DIM) # conjugate gradient direction
par.diff <- rep(0,DIM)
NR <- FALSE # line search hessian/gradient are good
######################
# MAIN LOOP
######################
if(trace==1) { pb <- utils::txtProgressBar(style=3) }
while(STAGE<=last(stages) && counts<=maxit)
{
# udpate zero shift
if(ZERO && fn.par<Inf) { zero <- zero + fn.par }
## update local tangent frame for periodic variables
if(PMAP) # back transform
{
# update tangent origins
theta -> theta.old
# transform back
par <- pmap(par,theta,inverse=TRUE)
par.target <- pmap(par.target,theta,inverse=TRUE)
par.old <- pmap(par.old,theta,inverse=TRUE)
}
## shift back near origin to prevent overflow
RESCALE <- reset(par*parscale)/parscale
if(sqrt(sum((RESCALE-par)^2))>DIM*.Machine$double.eps)
{
# rescale parameters
TEMP <- RESCALE
RESCALE <- nant(TEMP/par,1)
par <- par.target <- par.old <- TEMP
# reset gradients
CG.RESET <- TRUE
gradient.old <- rep(0,DIM)
covariance <- hessian <- diag(1,DIM)
}
## finish local tangent update
if(PMAP) # forward transform
{
# new tangent origin (theta)
theta <- par[PERIOD]
# transform forward
par <- pmap(par,theta)
par.target <- pmap(par.target,theta)
par.old <- pmap(par.old,theta)
# change of variables 3x
gradient.old[PERIOD] <- gradient.old[PERIOD] / ( 1 + (par.old[PERIOD]/PSCALE)^2 )
}
DIR <- diag(1,DIM)
BOX <- is.boxed(par.target,fix.dir=TRUE)
# revert stage on big changes
for(i in stages) { if(STAGE>i && ERROR>=TOL[i]+TOL.ZERO && TOL[i]>=TOL.GOAL) { STAGE <- i ; break } }
# advance through requested stages only
STAGE <- (stages[stages>=STAGE])[1]
if(is.na(STAGE)) { break }
# set stage information
TOL.STAGE <- max(TOL.GOAL,TOL[STAGE])
if(STAGE<2) { CG.RESET <- TRUE } # reset conjugate gradient direction for next time STAGE==2
# now line searching to tolerance, so no point in this...
## update hessian from line-search result (Rank-1 update)
# if(STAGE==1 && NR)
# {
# DIFF <- rank1update(hessian.LINE,DIR.STEP,hessian,covariance)
# hessian <- DIFF$hessian
# covariance <- DIFF$covariance
# }
## lots of choices here for the basis
# we could stick with the canonical basis
if(STAGE==1 && sum(!BOX)>1)
{
## random basis
n <- sum(!BOX)
dir <- matrix(0,n,n)
# random angles near zero
dir[upper.tri(dir)] <- stats::runif((n^2-n)/2,min=-pi,max=pi)
# anti-symmetric matrix generator
dir <- dir - t(dir)
# orthogonal matrix
DIR[!BOX,!BOX] <- expm::expm(dir,trySym=FALSE)
################################
# transform Hessian to current coordinates
hessian <- t(DIR) %*% hessian %*% DIR
covariance <- t(DIR) %*% covariance %*% DIR
}
# sample for numeric differentiation
PS <- numderiv.diff(par.target,DIR)
P1 <- PS$P1
P2 <- PS$P2
# mc evaluate all points
# par must be re-evaluated to prevent zero shift roundoff error if(ZERO)
P <- cbind(par.target,P1,P2)
counts.diff <- ceiling(ncol(P)/cores)
counts <- counts + counts.diff
fn.queue <- unlist(plapply(split(P,col(P)),func,cores=cores))
# separate back into parts
par <- par.target
fn.par <- fn.target <- fn.queue[1]
F1 <- fn.queue[1+1:DIM]
F2 <- fn.queue[1+DIM+1:DIM]
# update estimate of round-off error in zeroing
if(counts>counts.diff && ZERO) { TOL.ZERO <- max(TOL.ZERO,abs(fn.par)) }
# is the minimum encapsulated?
# encapsulated <- (fn.target<F1) & (fn.target<F2)
# calculate axial derivatives to second order
DIFF <- QuadSolve(par.target,P1,P2,DIR,fn.target,F1,F2)
gradient <- DIFF$gradient
M <- sqrt(sum(gradient^2))
if(M<=.Machine$double.eps)
{
# need to move on to next stage regardless of objective
ERROR <- TOL[STAGE] * 0.99 # proceed to next stage (fake number)
STAGE <- STAGE + 1
}
# reset Hessian, if previous line search went backwards
if( REVERSE ) { covariance <- hessian <- diag(1,DIM) }
# Newton-Raphson (pNR diagonal update)
if(STAGE==1)
{
hessian.diag <- DIFF$hessian
### ERROR CHECKING ###
# will need line search if not normal looking
TEST <- is.na(hessian.diag)
if(any(TEST)) { hessian.diag[TEST] <- 0 }
# fix after condition report
# condition <- hessian.diag/diag(hessian)
# MIN <- which.min(abs(condition))
# MAX <- which.max(abs(condition))
# condition <- condition[c(MIN,MAX)]
# condition <- sign(condition) * sqrt(abs(condition))
# if the gradient is null in one direction, then we've likely profiled that axis to tolerance
# fix profiled dimensions to change nothing, else will get 0/0
TEST <- TEST | abs(F1-fn.target)<=TOL[STAGE] | abs(F2-fn.target)<=TOL[STAGE]
if(any(TEST))
{
gradient[TEST] <- 0
hessian.diag[TEST] <- diag(hessian)[TEST]
}
# stick with old estimate if new estimate is bad
TEST <- hessian.diag<=TOL[1] | hessian.diag>=1/TOL[1]
if(any(TEST)) { hessian.diag[TEST] <- diag(hessian)[TEST] }
# transform gradient back to canonical coordinates
gradient <- c(DIR %*% gradient)
} # end Newton Raphson diagonal update
# BFGS update (rank-1)
# we do this before the better local derivative update
# also this provides an objective check on Newton-Raphson step quality
########################################
if(STAGE==1 && counts>counts.diff)
{
# search direction
DIR.STEP <- par-par.old
M <- sqrt(sum(DIR.STEP^2))
DIR.STEP <- nant(DIR.STEP/M,sign(DIR.STEP))
# hessian along line between centers where derivatives were calculated
gradient.LINE <- gradient-gradient.old # canonical coordinates
hessian.LINE <- c( DIR.STEP%*%gradient.LINE )/M
hessian.LINE <- nant(hessian.LINE,0)
# does this info look good? Was the step big enough to justify the inaccuracy of this method? Then update along search direction
if(sum(gradient^2)<sum(gradient.old^2) && M>STEP.BFGS && hessian.LINE>TOL.BFGS && hessian.LINE<1/TOL.BFGS)
{
# transform DIFF to same frame as hessian/covariance
DIFF <- rank1update(hessian.LINE,c(t(DIR)%*%DIR.STEP),hessian,covariance)
hessian <- DIFF$hessian
covariance <- DIFF$covariance
}
}
# new best par
par <- par.target
fn.par <- fn.target
if(STAGE==1) # Newton-Raphson
{
####################################
# DIAGONAL UPDATE (Rank-DIM)
# curvature correction factor: new curvature / old curvature (current coordinates)
FACT <- sqrt(hessian.diag/diag(hessian)) # correction factor diagonal
# update curvatures while preserving correlations
hessian <- t(t(hessian*FACT)*FACT)
# update covariances the same way as Hessian (prevents requirement of matrix inversion)
covariance <- t(t(covariance/FACT)/FACT)
# transform Hessian back to canonical coordinates
hessian <- DIR %*% hessian %*% t(DIR)
covariance <- DIR %*% covariance %*% t(DIR)
}
####################################
# sanity check on hessian/covariance
TEST <- max(-Inf,conditionNumber(covariance),conditionNumber(hessian),na.rm=TRUE)
TEST <- TEST<=TOL.STAGE[1] || TEST>=1/TOL.STAGE[1]
# reset hessian/covariance if bad
if(TEST) { covariance <- hessian <- diag(1,DIM) }
## orthogonality test
# COS <- covariance %*% gradient
# COS <- c(gradient %*% COS)/sqrt(sum(gradient^2) * sum(COS^2))
# par could have been updated, updating boxed pars
BOX <- is.boxed(par,fix.dir=TRUE)
# what dimensions are pushing through the boundaries
BOXED <- BOX & c(gradient%*%DIR)<0
par.old <- par.target # need for BFGS
if(STAGE==1 || STAGE==3)
{
LINE.TYPE <- "Newton-Raphson"
# Newton-Raphson search step from par.target (can optimize along boundary)
# if(DEBUG) { DEBUG.STEP <<- list(LINE.TYPE=LINE.TYPE,par=par,gradient=gradient,hessian=hessian,covariance=covariance,lower=lower,upper=upper,period=period) }
par.target <- box.search(par,gradient,hessian,covariance,lower=lower,upper=upper,period=period)
}
else if(STAGE==2) # conjugate gradient (preconditioned)
{
LINE.TYPE <- "Conjugate gradient"
# if(DEBUG) { DEBUG.STEP <<- list(LINE.TYPE=LINE.TYPE,par=par,gradient=gradient,hessian=hessian,covariance=covariance,lower=lower,upper=upper,period=period,BOXED=BOXED,DIR=DIR) }
if(any(!BOXED))
{
FREE <- !BOXED # now the free dimensions
if(any(BOXED) && any(FREE)) { COV <- PDsolve(hessian[FREE,FREE]) }
else { COV <- covariance } # avoid costly matrix inversion if possible
if(!CG.RESET) # continue with preconditioned conjugate gradient
{
# Polak-Ribiere formula (preconditoned)
beta <- c(gradient[FREE] %*% COV %*% (gradient - gradient.old)[FREE]) / c(gradient.old[FREE] %*% COV %*% gradient.old[FREE])
# direction reset
if(is.na(beta) || beta<0 || beta>=1/STEP[STAGE]) { CG.RESET <- TRUE }
}
# initialize with preconditioned gradient descent
if(CG.RESET)
{
LINE.TYPE <- "Gradient descent"
beta <- 0
}
# update search direction # works even if beta==0
cg.dir[FREE] <- -c(COV %*% gradient[FREE]) + (beta * cg.dir[FREE])
cg.dir[BOXED] <- 0
CG.RESET <- FALSE
}
# where we aim to evaluate next
par.target <- line.boxer(cg.dir,p0=par,lower=lower,upper=upper,period=period)
} # end conjugate-gradient code
gradient.old <- gradient # need for BFGS & CG
par.diff <- par.target-par
if(any(BOXED)) { par.diff[BOXED] <- 0 }
# never quit before line search, unless you really must
if(is.toosmallp(cbind(par.target,par),par,tol=.Machine$double.eps))
{
# need to move on to next stage regardless of objective
ERROR <- TOL[STAGE] * 0.99 # proceed to next stage (fake number)
STAGE <- STAGE + 1
next
}
# predicted search step size
M <- sqrt(sum(par.diff^2))
# new search direction
DIR.STEP <- nant(par.diff/M,sign(par.diff))
# distance to boundary
M.BOX <- m.box()
# not too small, not too big
M <- clamp(M,.Machine$double.eps,M.BOX)
###################
# LINE SEARCH START
###################
par.start <- par
fn.start <- fn.par
# initialize (all) storage results
par.all <- cbind(par)
fn.all <- fn.par
hessian.LINE <- c(DIR.STEP %*% hessian %*% DIR.STEP)
gradient.LINE <- c(DIR.STEP %*% gradient)
# generate a linear sequence of points from par to the other side of par.target
P2 <- line.boxer(1.5*par.diff,p0=par,lower=lower,upper=upper,period=period)
P1 <- line.boxer(1.0*par.diff,p0=par,lower=lower,upper=upper,period=period)
M <- sqrt(sum((P1-par)^2)) # M is unsigned!
M1 <- min(1,M/2) # 1 in case hessian is bad
M2 <- sqrt(sum((P2-par)^2))
M <- sort(c(M1,M,M2)) # just in case
M1 <- M[1]; M2 <- M[3]; M <- M[2]
# sample to target or boundary
if(abs(M)<=M.BOX) # sample P1/2 to P1 to P2
{
LINE.TYPE <- paste(LINE.TYPE,"prospection")
n <- mc.min(3,cores)
SEQ <- M # target
n <- n-1 # remaining points
# under-estimates -- proportional-ish to M-M1
n1 <- nant((M-M1)/(M2-M1),1)*(n+2) - 1
n1 <- clamp(n1,0+(M-M1>=STEP[STAGE]),n-(M2-M>=STEP[STAGE]))
n1 <- round(n1)
# over-estimates -- proportional-ish to M2-M
n2 <- n-n1
n1 <- max(0,n1+1)
n2 <- max(0,n2+1)
SEQ <- c( seq(M1,M,length.out=n1), seq(M,M2,length.out=n2)[-1] )
}
else # sample only P2/2 to P2 to fit search within boundary
{
LINE.TYPE <- paste(LINE.TYPE,"boundary")
n <- mc.min(2,cores)
SEQ <- seq(0,M2,length.out=n+1)[-1]
}
# new points to evaluate
P <- line.boxer((DIR.STEP %o% SEQ),p0=par,lower=lower,upper=upper,period=period)
##################
# LINE SEARCH LOOP
##################
while(counts < maxit)
{
counts <- counts + ceiling(ncol(P)/cores)
# most expensive part
# evaluate objective function at new P and store to fn.queue
fn.queue <- unlist(plapply(split(P,col(P)),func,cores=cores))
PROGRESS <- any(fn.queue<fn.par) # did this iteration improve things?
PROGRESS <- is.good(PROGRESS) # possible 0/0 error somewhere
# combine with older results
par.all <- cbind(par.all,P)
fn.all <- c(fn.all,fn.queue)
# sort along DIR.STEP
SORT <- c(DIR.STEP %*% (par.all-par))
SORT <- sort(SORT,method="quick",index.return=TRUE)$ix
par.all <- par.all[,SORT,drop=FALSE] # ARGH!
fn.all <- fn.all[SORT]
# new best estimate
MIN <- which.min(fn.all)
par <- par.all[,MIN]
fn.par <- fn.all[MIN]
END <- (MIN==1 || MIN==length(fn.all))
if(trace==2) { message(sprintf("%s %s search",format(zero+fn.par,digits=16),LINE.TYPE)) }
if(trace==3) { message("\tc(",paste0(NAMES,"=",par,collapse=', '),")") }
# if(DEBUG>1)
# {
# graphics::plot((DIR.STEP %*% (par.all-par)),(fn.all-fn.par),xlab="Distance from MIN",ylab="Value over MIN")
# graphics::abline(h=c(fn.start-fn.par,0),col=scales::alpha(c('black','blue'),0.5))
# graphics::abline(v=c(DIR.STEP %*% (par.start-par),0),col=scales::alpha(c('black','blue'),0.5))
# graphics::points(c(DIR.STEP %*% (P-par)),fn.queue-fn.par,pch=16,col="red")
# graphics::points(DIR.STEP %*% (par.start-par),fn.start-fn.par,pch=16)
# graphics::title(sprintf("%s search",LINE.TYPE))
# }
## calculted steps were too small on one side to proceede
# no where to go
TEST <- FALSE
if(MIN>1) { TEST <- TEST || is.toosmallp(par.all[,MIN-1:0,drop=FALSE],par,tol=.Machine$double.eps) }
if(MIN<length(fn.all)) { TEST <- TEST || is.toosmallp(par.all[,MIN+0:1,drop=FALSE],par,tol=.Machine$double.eps) }
if(TEST) { break }
# we met our objective
TEST <- FALSE
if(MIN>1) { TEST <- TEST || is.toosmallf(fn.all[MIN-1:0],fn.par) }
if(MIN<length(fn.all)) { TEST <- TEST || is.toosmallf(fn.all[MIN+0:1],fn.par) }
if(TEST && !PROGRESS) { break }
# we didn't meet our objective and probably won't make further progress
TEST <- FALSE
if(MIN>1) { TEST <- TEST || is.toosmallp(par.all[,MIN-1:0,drop=FALSE],par) }
if(MIN<length(fn.all)) { TEST <- TEST || is.toosmallp(par.all[,MIN+0:1,drop=FALSE],par) }
if(TEST && !PROGRESS) { break }
################################
# 1D Newton-Raphson info
################################
if(MIN==1)
{ i <- MIN+1; j <- MIN+2 }
else if(MIN==length(fn.all))
{ i <- MIN-1; j <- MIN-2 }
else
{ i <- MIN-1; j <- MIN+1 }
DIFF <- QuadSolve(par,par.all[,i],par.all[,j],DIR.STEP,fn.par,fn.all[i],fn.all[j])
# did this improve like it should
if(DIFF$gradient^2<gradient.LINE^2 && DIFF$hessian>=TOL[STAGE] && DIFF$hessian<1/TOL[STAGE])
{
NR <- TRUE # will try NR step
M <- -DIFF$gradient/DIFF$hessian # M is signed!
# distance to next boundary in number of Ms
M.BOX <- m.box(M)
# Is estimated NR step plausible for a convex objective?
if(M.BOX<=.Machine$double.eps || abs(M)<=.Machine$double.eps) # past boundary, or no change
{ NR <- FALSE }
else if(MIN>1 && M<=(par.all[,MIN-1]-par)%*%DIR.STEP ) # M left of MIN-1
{ NR <- FALSE }
else if(MIN<length(fn.all) && M>=(par.all[,MIN+1]-par)%*%DIR.STEP) # M right of M+1
{ NR <- FALSE }
}
else
{
NR <- FALSE
M <- 0
}
# good to update local information
if(NR)
{
gradient.LINE <- DIFF$gradient
hessian.LINE <- DIFF$hessian
# predicted step is too small to calculate anything
if(abs(M)<=.Machine$double.eps)
{
if(MIN==1 || MIN==length(fn.par)) # but we should keep going
{ NR <- FALSE; M <- 0 } # try heuristic search instead of NR
else # and we can stop
{ break }
}
}
# TEST <- (MIN<=2) || (MIN>=length(fn.all)-1)
# if(!TEST) { TEST <- !WolfeTest(x=c(DIR.STEP %*% par.all),y=fn.all,grad.i=c(DIR.STEP%*%gradient),i=which.min(c(DIR.STEP%*%(par.all-par.start))^2),MIN=MIN) }
# distance to first boundary
M.BOX <- m.box()
# terminated on boundary---do not refine
if(M.BOX<=.Machine$double.eps && END) { break }
#########################
# setup next search steps
#########################
# if(DEBUG) { DEBUG.LINE <<- list(NR=NR,M=M,MIN=MIN,fn.all=fn.all,par.all=par.all,par=par,fn.par=fn.par,M.BOX=M.BOX,cores=cores,DIR.STEP=DIR.STEP,lower=lower,upper=upper,period=period,BOX=BOX,par.start=par.start,STEP=STEP,STAGE=STAGE) }
# setup searches
if(NR && M) # extrapolation search
{
if((MIN==1 && M<=0) || (MIN==length(fn.all) && M>=0))
{
if(abs(M)<abs(M.BOX))
{
LINE.TYPE <- "1D Newton-Raphson expansion"
n <- cores
if(abs(M)>1) # in case hessian is bad
{
n <- mc.min(2,cores)
M1 <- sign(M)
}
else
{ M1 <- M/2 }
M2 <- sign(M) * min(1.5*abs(M),M.BOX)
n <- n-1
SEQ <- M
n1 <- nant((M-M1)/(M2-M1),1)*(n+2) - 1
n1 <- clamp(n1,0,n)
n1 <- round(n1)
n2 <- n-n1
n1 <- max(0,n1+1)
n2 <- max(0,n2+1)
SEQ <- c( seq(M1,M,length.out=n1)[-n1] , seq(M,M2,length.out=n2) )
}
else
{
LINE.TYPE <- "1D Newton-Raphson boundary"
M <- M.BOX
n <- cores
if(abs(M)>1) # in case hessian is bad
{
n <- mc.min(2,cores)
M1 <- sign(M)
}
else
{ M1 <- M/n }
SEQ <- seq(M1,M,length.out=n)
}
}
else # interpolation search
{
LINE.TYPE <- "1D Newton-Raphson refinement"
n <- cores
M1 <- M
# step to next point after M
M2 <- sign(M)*sqrt(sum((par.all[,MIN+ifelse(M>=0,1,-1)]-par)^2))
SEQ <- M
n <- n-1
if(n)
{
n1 <- nant(M1/M2,1)*(n+2) - 1
n1 <- clamp(n1,0,n)
n1 <- round(n1)
n2 <- n-n1
SEQ <- c( seq(0,M1,length.out=n1+2)[-1], seq(M1,M2,length.out=n2+2)[-c(1,n2+2)] )
}
}
} # end targeted search
else # untargeted searches: heuristic -- not Newton-Raphson
{
# terminated on (or starting from) boundary # convex solution is adjacent
if(M.BOX<=.Machine$double.eps || (END && sqrt(sum((par.start-par)^2))<=.Machine$double.eps))
{
n <- cores
# step inward to previous point (too far)
M <- c( (par.all[,MIN+ifelse(MIN==1,1,-1)] - par.all[,MIN]) %*% DIR.STEP )
if(abs(M)/(n+1)<=STEP[STAGE]) # not enough room for geometric sequence
{
LINE.TYPE <- "Linear refinement"
SEQ <- seq(0,M,length.out=n+2)[-c(1,n+2)]
}
else if(abs(M)/2^n>=STEP[STAGE])
{
LINE.TYPE <- "Binary refinement"
SEQ <- (1/2)^(1:n) * M
}
else # that became too small
{
LINE.TYPE <- "Geometric refinement"
SEQ <- sign(M) * STEP[STAGE]^((1:n)/n)
}
}
else if(END) # expansion searches
{
# previous step
M <- c( (par.all[,MIN] - par.all[,MIN+ifelse(MIN==1,1,-1)]) %*% DIR.STEP )
n <- cores
if(2^n*abs(M)<=M.BOX) # boundary is far away
{
LINE.TYPE <- "Binary expansion"
SEQ <- 2^(1:n) * M
}
else if(n==1 || M.BOX/n<=abs(M)) # boundary is close
{
LINE.TYPE <- "Linear boundary"
SEQ <- seq(0,sign(M)*M.BOX,length.out=n+1)[-1]
}
else # intermediate
{
LINE.TYPE <- "Geometric boundary"
SEQ <- n^((1:n - n)/(n-1)) * sign(M) * M.BOX
}
}
else # would like golden search here, but that requires consistent spacing...
{
LINE.TYPE <- "Ternary refinement"
n <- mc.min(2,cores)
M1 <- sqrt(sum((par-par.all[,MIN-1])^2))
M2 <- sqrt(sum((par-par.all[,MIN+1])^2))
n1 <- nant(M1/(M1+M2),1/2)*(n+2) - 1
n1 <- clamp(n1,1,n-1)
n1 <- round(n1)
n2 <- n-n1
SEQ <- c( -seq(0,M1,length.out=n1+2)[-c(1,n1+2)], seq(0,M2,length.out=n2+2)[-c(1,n2+2)] )
}
} # end search setups
P <- line.boxer((DIR.STEP %o% SEQ),p0=par,lower=lower,upper=upper,period=period)
# goto search evaluation
} # end line search iteration loop via break
# did the search finish backwards?
REVERSE <- c( DIR.STEP %*% (par-par.start) ) < 0
par.target <- par # for outer loop
if(ZERO) { ERROR <- abs(fn.start-fn.par) }
else { ERROR <- abs(fn.start-fn.par)/max(1,abs(fn.par)) } # fnscale=1 effectively; avoid underflow from non-zeroed objective functions
# error-check wrapup
if(ERROR <= TOL.STAGE + TOL.ZERO)
{ STAGE <- STAGE + 1 }
else # didn't go anywhere
{
TEST <- is.toosmallp(cbind(par,par.start),par,tol=.Machine$double.eps)
if(TEST)
{
ERROR <- TOL[STAGE] * 0.99 # proceed to next stage (fake number)
STAGE <- STAGE + 1
}
}
if(trace==1) { utils::setTxtProgressBar(pb,clamp(nant(log(ERROR)/log(TOL.GOAL),0))) }
} # end main loop
if(trace==1) { close(pb) }
if(counts<maxit) { convergence <- 0} else { convergence <- 1 }
if(trace) { message(sprintf("%s in %d parallel function evaluations.",ifelse(convergence,"No convergence","Convergence"),counts)) }
if(trace==3) { message("\tc(",paste0(NAMES,"=",par,collapse=', '),")") }
if(PMAP) { par <- pmap(par,theta,inverse=TRUE) }
# return stuff in similar format to optim
RETURN <- list()
RETURN$par <- par*parscale
RETURN$value <- (fn.par+zero)*fnscale
RETURN$counts <- counts
RETURN$convergence <- convergence
RETURN$hessian <- t(t(hessian/parscale)/parscale)
RETURN$covariance <- t(t(covariance*parscale)*parscale)
RETURN$lower <- lower*parscale
RETURN$upper <- upper*parscale
# initial parscale was bad [UNFINISHED]
if(rescale && ERROR>TOL.GOAL && max(abs(log2(par)))>1)
{
control$parscale <- RETURN$par
control$hessian <- RETURN$hessian
control$covariance <- RETURN$covariance
RETURN <- mc.optim(RETURN$par,fn=fn,lower=lower,upper=upper,period=period,reset=reset,control=control,...)
}
return(RETURN)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.