R/optim.R
In ctmm: Continuous-Time Movement Modeling

Documented in optimizer

OPTIM <- list()
OPTIM$n.algo <- 10^5 # if length(par) is too large, switch to simpler optimizer

###################################
# make a wrapper that applies optim then afterwards numDeriv, possibly on a boundary with one-sided derivatives if necessary
optimizer <- function(par,fn,...,method="pNewton",lower=-Inf,upper=Inf,period=FALSE,reset=identity,control=list())
{
  DEBUG <- FALSE
  method <- match.arg(method,c("pNewton","Nelder-Mead","BFGS","CG","L-BFGS-B","SANN","Brent"))

  precision <- maxit <- NULL
  default <- list(precision=1/2,maxit=.Machine$integer.max,parscale=pmin(abs(par),abs(par-lower),abs(upper-par)))
  control <- replace(default,names(control),control)
  # R check does not like attach
  NAMES <- names(control) ; for(i in 1:length(control)) { assign(NAMES[i],control[[i]]) }

  if(any(parscale==0)) { parscale[parscale<=.Machine$double.eps] <- 1 }

  if(method=="pNewton") # use mc.optim
  {
    if(!DEBUG)
    { RESULT <- mc.optim(par=par,fn=fn,lower=lower,upper=upper,period=period,reset=reset,control=control) }
    else
    {
      if(!exists(".Random.seed")) { set.seed(NULL) }
      SEED <- .Random.seed
      RESULT <- try(mc.optim(par=par,fn=fn,lower=lower,upper=upper,period=period,reset=reset,control=control))
      while(class(RESULT)!="list")
      {
        debug(mc.optim)
        .Random.seed <- SEED
        RESULT <- try(mc.optim(par=par,fn=fn,lower=lower,upper=upper,period=period,reset=reset,control=control))
      }
      if(isdebugged(mc.optim)) { undebug(mc.optim) }
    } # end DEBUG
  }
  else
  {
    # wrap objective function in try()
    func <- function(par,...)
    {
      FN <- try(fn(par,...))
      if(class(FN)[1] != "numeric" || is.nan(FN))
      {
        warning("Objective function failure at c(",paste0(names(par),"=",par,collapse=', '),")")
        FN <- Inf
      }
      return(FN)
    }

    if(length(par)==1)
    {
      # try optimize (can fail with high skew & kurtosis)
      tol <- 3*sqrt(2*.Machine$double.eps^precision) # digit precision
      lower <- ifelse(lower>-Inf,lower,-10*abs(par))
      upper <- ifelse(upper<Inf,upper,10*abs(par))
      ATTEMPT <- stats::optimize(f=func,...,lower=lower,upper=upper,tol=tol)
      RESULT <- rbind(c(ATTEMPT$minimum,ATTEMPT$objective))

      # log scale backup that can't capture zero boundary
      ndigit <- -log((.Machine$double.eps)^precision,10)
      steptol <- sqrt(2*.Machine$double.eps^precision)
      ATTEMPT <- stats::nlm(function(p){f=func(par*exp(p))},p=0,...,ndigit=ndigit,gradtol=0,steptol=steptol,stepmax=log(10),iterlim=maxit)
      RESULT <- rbind(RESULT,c(par*exp(ATTEMPT$estimate),ATTEMPT$minimum))

      # choose the better estimate
      MIN <- which.min(RESULT[,2])
      RESULT <- list(par=RESULT[MIN,1],value=RESULT[MIN,2])
    }
    else     # use optim
    {
      control$zero <- NULL
      control$precision <- NULL
      if(!is.null(control$covariance)) { control$parscale <- sqrt(abs(diag(control$covariance))) }
      control$covariance <- NULL
      if(method=="Nelder-Mead")
      {
        lower <- -Inf
        upper <- Inf
        control$reltol <- .Machine$double.eps^precision
      }
      else
      {
        control$factr <- .Machine$double.eps^precision
      }
      RESULT <- stats::optim(par=par,fn=func,method=method,lower=lower,upper=upper,control=control,...)
    }
  }

  return(RESULT)
}


## search up to and and then along a boundary/period (recursively)
# this is used by NR step only, not by line-search algorithm
box.search <- function(p0,grad,hess,cov=pd.solve(hess),lower=-Inf,upper=Inf,period=F,period.max=1/2)
{
  # how far we can go before boundary
  n <- length(p0)
  lambda <- rep(1,n)

  # limit uncertainty to std.dev
  # cov <- PDclamp(cov,lower=0,upper=std.max^2)
  # naive target
  dp <- -c(cov %*% grad)
  p1 <- p0 + dp

  # passes through lower boundary ?
  dlo <- lower-p1
  LO <- p1==lower | (dlo>=0) # Inf fix

  # passes through upper boundary ?
  dhi <- p1-upper
  HI <- p1==upper | (dhi>=0) # Inf fix

  if(any(LO) || any(HI))
  {
    if(any(LO)) { lambda[LO] <- ifelse( dp[LO]<0 , 1 - abs( dlo[LO]/dp[LO] ) , 1 ) }
    if(any(HI)) { lambda[HI] <- ifelse( dp[HI]>0 , 1 - abs( dhi[HI]/dp[HI] ) , 1 ) }

    PER <- as.logical(period)
    if(any(PER)) { lambda[PER] <- min(1, abs(dp[PER])/(period[PER]*period.max) ) }

    lambda <- nant(lambda,1) # ignore Inf @ Inf
    MIN <- which.min(lambda)
    M <- sqrt(sum(dp^2)) # step length @ lambda=1
    if(lambda[MIN]*M>=1) # stop at first boundary or unit step (suggested step size rediculous)
    {
      dp <- dp/M # prevent numerical errors from extreme M
      p1 <- line.boxer(dp,p0,lower=lower,upper=upper,period=period,period.max=period.max)
    }
    else if(lambda[MIN]<1)  # recursive search along boundaries
    {
      p2 <- p1 # target beyond boundary
      p1 <- p0 + lambda[MIN]*dp # go to boundary and stop

      # correct small numerical errors
      LO <- (p1<lower)
      if(any(LO)) { p1[LO] <- lower[LO] }
      HI <- (p1>upper)
      if(any(HI)) { p1[HI] <- upper[HI] }

      # remaning dimensions
      if(length(lambda)>1)
      {
        # gradient estimated on boundary
        grad <- hess %*% (p1-p2) # should I use this?
        # search along remaining dimensions
        p1[-MIN] <- box.search(p1[-MIN],grad[-MIN],hess[-MIN,-MIN],lower=lower[-MIN],upper=upper[-MIN],period=period[-MIN],period.max=period.max)
      }
    } # end recurse
  } # end boundary consideration

  return(p1)
}

######################
# apply box constraints to travel in a straight line from p0 by dp
######################
line.boxer <- function(dp,p0=dp[,1],lower=-Inf,upper=Inf,period=F,period.max=1/2,tol=.Machine$double.eps*length(p0))
{
  DIM <- dim(dp)
  if(is.null(dim(dp))) { dp <- cbind(dp) }

  single.boxer <- function(dp)
  {
    # where would we go without a boundary
    p <- p0 + dp

    # did we hit a boundary?
    LO <- p==lower | (p-lower) <= .Machine$double.eps # Inf fix
    UP <- p==upper | (upper-p) <= .Machine$double.eps # Inf fix

    # are we trying to push through that boundary?
    if(any(LO)) { LO <- LO & (dp<0) }
    if(any(UP)) { UP <- UP & (dp>0) }

    # stop when we hit the boundary
    # time until we hit the first new boundary
    t.lo <- ifelse(LO, (lower-p0)[LO]/dp[LO], 1)
    t.up <- ifelse(UP, (upper-p0)[UP]/dp[UP], 1)

    t <- c(t.lo,t.up)
    t <- nant(t,Inf)
    t <- t[t>tol] # avoid boundary trap from numerical error
    t <- min(t.lo,t.up,na.rm=TRUE) # first non-trapping boundary

    # circular parameters prevented from going more than half a period
    PERIOD <- as.logical(period)
    if(any(PERIOD)) { t <- min(t,abs(period/dp)[PERIOD]*period.max) }

    # stop at first boundary
    p <- p0 + t*dp

    # fix to machine precision
    p <- clamp(p,lower,upper)

    return(p)
  }

  p <- apply(dp,2,function(d){single.boxer(d)})
  dim(p) <- DIM

  return(p)
}


###########################################
# calculate local derivatives (along directions DIR) from 3 points, with (par,fn.par) the best (global variables)
###########################################
QuadSolve <- function(P0,P1,P2,DIR,F0,F1,F2)
{
  # convert back to displacements
  P1 <- P1 - c(P0)
  P2 <- P2 - c(P0)
  # signed magnitudes of displacement vectors
  # P1, P2, DIR columns will always be parallel
  DIR <- cbind(DIR)
  M1 <- colSums(DIR * P1)
  M2 <- colSums(DIR * P2)

  F1 <- F1-F0
  F2 <- F2-F0

  G1 <- F1/M1
  G2 <- F2/M2
  # Hessian estimates, not necessarily assuming that par is between P1 & P2
  hessian <- (G2-G1)/(M2-M1)*2
  hessian <- nant(hessian,0)
  INF <- abs(hessian)==Inf
  if(any(INF)) { hessian[INF] <- sign(hessian[INF])/.Machine$double.eps }

  # gradient estimates, also not assuming that par is between P1 & P2
  gradient <- (G2/M2-G1/M1)/(1/M2-1/M1)
  gradient <- nant(gradient,0)
  INF <- abs(gradient)==Inf
  if(any(INF)) { gradient[INF] <- sign(gradient[INF])/.Machine$double.eps }

  return(list(gradient=gradient,hessian=hessian))
}


# does this data look roughly quadratic near the minimum?
QuadTest <- function(x,y,MIN=which.min(y),thresh=0.5)
{
  DIFF <- QuadSolve(x[MIN],x[MIN-1],x[MIN+1],1,y[MIN],y[MIN-1],y[MIN+1])
  # predicted minimum
  x0 <- x[MIN] - DIFF$gradient/DIFF$hessian
  y0 <- y[MIN] - DIFF$hessian/2*(x[MIN]-x0)^2
  # quadratic estimate
  y.func <- function(x) { y0 + DIFF$hessian/2*(x-x0)^2 }
  r <- y.func(x[MIN+c(-2,2)])
  # relative residuals
  r <- (y[MIN+c(-2,2)]-r)/(r-y0)
  return(all(abs(r)<thresh))
}


########################
# does the line search satisfy the some Wolfe-like conditions
WolfeTest <- function(x,y,grad.i,i=1,MIN=which.min(y),const=c(1,1),thresh=0.1)
{
  # test for any Inf near MIN
  if(any(y[MIN + -1:1]==Inf)) { return(FALSE) }

  # gradient information at minimum (approximate)
  DIFF <- QuadSolve(x[MIN],x[MIN-1],x[MIN+1],1,y[MIN],y[MIN-1],y[MIN+1])

  # strong curvature rule
  TEST <- abs(DIFF$gradient)/abs(grad.i)

  if(is.nan(DIFF$gradient)) { return(TRUE) } # numerical error - too tight
  else { return(TEST<=thresh) }
}


######################
# minimally rotate orthonormal basis DIR to include vec
######################
Gram.Schmidt <- function(DIR,vec)
{
  # normalize par.diff
  vec <- vec / sqrt(sum(vec^2))

  # calculate overlaps
  OVER <- colSums(vec*DIR)
  # replace dimension of largest overlap with par.diff
  MAX <- which.max(abs(OVER))
  DIR[,MAX] <- vec
  OVER[MAX] <- 1 # not necessary, but true

  # subtract projection from all but vec
  DIR[,-MAX] <- DIR[,-MAX] - (vec %o% OVER[-MAX])

  # re-normalization (may be necessary?)
  MAG <- sqrt(colSums(DIR^2))
  DIR <- t(t(DIR)/MAG)

  return(DIR)
}


# correlation-preserving rank-1 update
rank1update <- function(H.LINE,LINE,hessian,covariance)
{
  if(!is.na(H.LINE) && H.LINE>=.Machine$double.eps)
  {
    LINE <- LINE/sqrt(sum(LINE^2))

    # Hessian diagonal element correction factor
    H0 <- c(LINE %*% hessian %*% LINE)
    FACT <- abs(H.LINE / H0)
    FACT <- sqrt(FACT)

    # rank-1 Hessian update - no matrix-matrix multiplication
    A <- FACT-1
    B <- c(hessian %*% LINE)
    O <- outer(LINE)
    hessian <- hessian + A^2*H0*O
    B <- outer(LINE,B)
    B <- B + t(B)
    hessian <- hessian + A*B

    # rank-1 covariance update - no matrix-matrix multiplication
    A <- 1/FACT-1
    B <- c(covariance %*% LINE)
    covariance <- covariance + A^2*c(LINE%*%B)*O
    B <- outer(LINE,B)
    B <- B + t(B)
    covariance <- covariance + A*B
  }
  else
  { FACT <- 0 }

  return(list(hessian=hessian,covariance=covariance,condition=FACT))
}


##################
# best number of calculations to make with min count and cores
mc.min <- function(min,cores=detectCores())
{
  x <- ceiling(min/cores)
  x <- x * cores
  return(x)
}


#################################
# Parallelized optimizers
# 0 - (TODO) Pattern Search
# 1 - partial-Newton-Raphson - custom method with efficient Hessian update
#   - TODO: full Hessian option when DIM small
#   - TODO: filling up mc queue if p>2n+1
# 2 - Preconditioned Non-linear Conjugate Gradient - Polak-Ribiere with automatic restarts
# 3 - Preconditioned Gradient Descent
##################################
mc.optim <- function(par,fn,...,lower=-Inf,upper=Inf,period=FALSE,reset=identity,control=list())
{
  DEBUG <- FALSE # can be overridden in control
  PMAP <- TRUE # periodic parameters are mapped locally: (-period/2,+period/2) -> (-Inf,Inf) during search steps
  # check complains about visible bindings
  fnscale <- parscale <- rescale <- maxit <- precision <- trace <- cores <- hessian <- covariance <- NULL
  # fix default control arguments
  default <- list(fnscale=1,parscale=pmin(abs(par),abs(par-lower),abs(upper-par)),maxit=100,trace=FALSE,precision=NULL,cores=1,hessian=NULL,covariance=NULL,stages=NULL,rescale=FALSE)
  control <- replace(default,names(control),control)
  # check does not like attach
  NAMES <- names(control)
  for(i in 1:length(control)) { assign(NAMES[i],control[[i]]) }
  # something is stripping par names
  NAMES <- names(par)

  cores <- resolveCores(cores)

  if(any(parscale==0)) { parscale[parscale==0] <- 1 }

  # does fn take a zeroing argument?
  if("zero" %in% names(formals(fn))) { ZERO <- TRUE } else { ZERO <- FALSE }
  zero <- 0 # current value of the minimum
  TOL.ZERO <- 0 # error from zeroing

  if(is.null(precision) && ZERO) { precision <- 1/4 } # don't need as much precision
  else if(is.null(precision) && !ZERO) { precision <- 1/2 }

  if(is.null(stages))
  {
    if(precision<1/2) { stages <- 1 } # Newton-Raphson is good enough, hessian accurate here
    else { stages <- 1:2 } # need conjugate gradient for higher precision
  }

  # differentiation step size: 2nd, 1st, 1st, 0th
  STEP <- .Machine$double.eps^c(0.25,0.5,0.5,1)
  # machine tolerances for various stage calculations
  TOL <- .Machine$double.eps^c(0.5,1,1,1)
  # goal errors
  TOL.GOAL <- .Machine$double.eps^precision
  # tolerance for BFGS update (its not very good/reliable)
  TOL.BFGS <- sqrt(TOL[1])
  STEP.BFGS <- sqrt(STEP[1])

  DIM <- length(par)
  period <- array(period,DIM)
  par <- par/parscale
  lower <- array(lower,DIM)/parscale
  upper <- array(upper,DIM)/parscale
  period <- period/parscale

  # start with the canonical directions
  # DIR <- diag(1,DIM) # rows of column vectors

  # sometimes initial covariance/hessian is garbage because previous data/model were garbage
  if(!is.null(covariance))
  {
    covariance <- t(t(covariance/parscale)/parscale)
    TEST <- eigen(covariance)$values
    TEST <- any(TEST<=TOL[1]) || any(TEST>=1/TOL[1]) || min(TEST)/max(TEST)<=TOL[1]
    if(TEST) { covariance <- NULL }
  }
  # fall back --- unlikely
  if(!is.null(hessian))
  {
    hessian <- t(t(hessian*parscale)*parscale)
    TEST <- 1/eigen(hessian)$values
    TEST <- any(TEST<=TOL[1]) || any(TEST>=1/TOL[1]) || min(TEST)/max(TEST)<=TOL[1]
    if(TEST) { hessian <- NULL }
  }
  # preconditioning is safe
  if(!is.null(covariance))
  { hessian <- pd.solve(covariance) }
  else if(!is.null(hessian))
  { covariance <- pd.solve(hessian) }
  else # use parscale to start
  {
    covariance <- diag(1,DIM) # inverse hessian
    hessian <- diag(1,DIM) # hesssian
    # LINE.DO <- TRUE
  }

  # what we will actually be evaluating
  PERIOD <- as.logical(period)
  if(PMAP && any(PERIOD))
  {
    PSCALE <- period[PERIOD]/pi

    # locally-tangent tangent transform that maps periods to infinity
    pmap <- function(p,theta,inverse=FALSE)
    {
      if(inverse) { p[PERIOD] <- atan(p[PERIOD]/PSCALE)*PSCALE + theta }
      else { p[PERIOD] <- tan((p[PERIOD]-theta)/PSCALE)*PSCALE }
      return(p)
    }

    # extract better theta from better par (under tangent transformation)
    get.theta <- function(p,theta) # theta here is old theta
    {
      p <- pmap(p,theta,inverse=TRUE)
      return(p[PERIOD])
    }

    # update old theta to new theta (under tangent transformation)
    put.theta <- function(p,theta.old,theta.new)
    {
      p <- pmap(p,theta.old,inverse=TRUE)
      p <- pmap(p,theta.new)
      return(p)
    }

    period <- rep(FALSE,length(period)) # don't treat parameters as periodic from now on

    # all coordinates are now transformed coordinates
    theta <- par[PERIOD]
    par <- pmap(par,theta)
  }
  else
  { PMAP <- FALSE }

  # wrap objective function
  func <- function(par,...)
  {
    if(PMAP) { par <- pmap(par,theta,inverse=TRUE) }

    # this objective function has the ability to approximately zero its objective value
    if(ZERO) { FN <- try(fn(par*parscale,zero=zero*fnscale,...),silent=!DEBUG) }
    else { FN <- try(fn(par*parscale,...),silent=!DEBUG) }
    # ordinary objective function

    if(class(FN)[1]=="numeric" && !is.nan(FN)) { FN <- FN/fnscale }
    else
    {
      # store to environmental variable so that I can debug?
      warning("Objective function failure at c(",paste0(NAMES,"=",par*parscale,collapse=', '),')')
      if(FALSE && DEBUG)
      {
        debug(ctmm.loglike)
        # try again with debugging
        if(ZERO) { FN <- try(fn(par*parscale,zero=zero*fnscale,...),silent=!DEBUG) }
        else { FN <- try(fn(par*parscale,...),silent=!DEBUG) }
        undebug(ctmm.loglike)
      }
      FN <- Inf
    }

    return(FN)
  }

  ##################
  # am I on a boundary and if so, align DIR
  ######################
  is.boxed <- function(par,fix.dir=FALSE)
  {
    # what points are on the boundary, so that we have to do one-sided derivatives
    LO <- par==lower | (par-lower) <= .Machine$double.eps
    UP <- par==upper | (upper-par) <= .Machine$double.eps
    BOX <- (LO | UP)

    # rotate coordinates so that DIR[,BOX] points strictly towards boundary
    if(fix.dir && any(LO)) { diag(DIR)[LO] <<- -1 }

    return(BOX)
  }

  is.toosmallf <- function(fns,fn0)
  {
    if(ZERO) { R <- abs(diff(fns))<=TOL.STAGE+TOL.ZERO }
    else { R <- abs(diff(fns))/fn0<=TOL.STAGE }
    return(!length(R) || is.na(R) || R) # avoid 0/0 error
  }

  is.toosmallp <- function(ps,p0,tol=STEP[STAGE])
  {
    SCL <- pmax(abs(p0),1)
    # relative step sizes
    dp <- sqrt(abs(c((ps[,2]-ps[,1])^2 %*% (1/SCL^2)))) # formula explained in numderiv.diff()
    R <- dp <= tol
    return(!length(R) || is.na(R) || R) # avoid 0/0 error
  }

  numderiv.diff <- function(p0,DIR)
  {
    # different calculations for the numerical step length
    # standard deviation for each axis
    # STD <- sqrt(abs(diag(covariance)))
    # parameter scale not along current axes
    SCL <- pmax(abs(p0),1)
    # squared parameter scale along axes - analog to STD
    # SCL <- t(DIR) %*% diag(SCL^2,nrow=DIM) %*% DIR
    # single parameter scale for each axis
    # SCL <- sqrt(abs(diag(SCL)))
    # equivalent calculation to the above, but avoids matrix-matrix multiplication
    # SCL <- sqrt(abs(c(DIR^2 %*% SCL^2))) # can overflow
    SCL <- diag(SCL,nrow=length(SCL))
    SCL <- diag( t(DIR) %*% SCL %*% DIR )
    # sample initial points around the center for numerical differentiation
    STEP <- SCL*STEP[STAGE]
    par.step <- t(STEP*t(DIR))
    P1 <- -par.step # away from boundaries
    P2 <- +par.step # towards boundaries
    # don't sample points across boundary, fold back instead - correct one-sided derivatives implemented
    BOX <- is.boxed(p0)
    if(any(BOX)) { P2[,BOX] <- 2*P1[,BOX] }

    # apply these displacements within box constraints
    P1 <- line.boxer(P1,p0=p0,lower=lower,upper=upper,period=period)
    P2 <- line.boxer(P2,p0=p0,lower=lower,upper=upper,period=period)
    # columns are now boxed coordinates

    return(list(P1=P1,P2=P2))
  }

  # distance to next boundary
  m.box <- function(M=1)
  {
    # STUFF <<- list(M=M,lower=lower,upper=upper,par=par,DIR.STEP=DIR.STEP)
    MB <- c(((upper-par)/(M*DIR.STEP))[M*DIR.STEP>0],((lower-par)/(M*DIR.STEP))[M*DIR.STEP<0])
    MB <- nant(MB,0)
    if(!length(MB)) { MB <- 0 }
    MB <- min(MB,na.rm=TRUE)
    MB <- max(0,MB)
    return(MB)
  }

  # initializing stuff checked in loop
  STAGE <- stages[1] # 1-Newton, 2-Conjugate Gradient, 3-Gradient Descent, 4-nothing yet
  ERROR <- Inf
  counts <- 0
  par.target <- par # where to evaluate around next
  par.old <- par.start <- par
  fn.par <- Inf # current best objective value fn(par)
  # condition <- Inf
  gradient.old <- rep(0,DIM)
  # hessian.LINE <- NULL # line-search hessian
  CG.RESET <- TRUE # reset conjugate gradient to gradient descent
  REVERSE <- FALSE
  cg.dir <- rep(0,DIM) # conjugate gradient direction
  par.diff <- rep(0,DIM)
  NR <- FALSE # line search hessian/gradient are good
  ######################
  # MAIN LOOP
  ######################
  if(trace==1) { pb <- utils::txtProgressBar(style=3) }
  while(STAGE<=last(stages) && counts<=maxit)
  {
    # udpate zero shift
    if(ZERO && fn.par<Inf) { zero <- zero + fn.par }

    ## update local tangent frame for periodic variables
    if(PMAP) # back transform
    {
      # update tangent origins
      theta -> theta.old

      # transform back
      par <- pmap(par,theta,inverse=TRUE)
      par.target <- pmap(par.target,theta,inverse=TRUE)
      par.old <- pmap(par.old,theta,inverse=TRUE)
    }

    ## shift back near origin to prevent overflow
    RESCALE <- reset(par*parscale)/parscale
    if(sqrt(sum((RESCALE-par)^2))>DIM*.Machine$double.eps)
    {
      # rescale parameters
      TEMP <- RESCALE
      RESCALE <- nant(TEMP/par,1)
      par <- par.target <- par.old <- TEMP
      # reset gradients
      CG.RESET <- TRUE
      gradient.old <- rep(0,DIM)
      covariance <- hessian <- diag(1,DIM)
    }

    ## finish local tangent update
    if(PMAP) # forward transform
    {
      # new tangent origin (theta)
      theta <- par[PERIOD]
      # transform forward
      par <- pmap(par,theta)
      par.target <- pmap(par.target,theta)
      par.old <- pmap(par.old,theta)

      # change of variables 3x
      gradient.old[PERIOD] <- gradient.old[PERIOD] / ( 1 + (par.old[PERIOD]/PSCALE)^2 )
    }

    DIR <- diag(1,DIM)
    BOX <- is.boxed(par.target,fix.dir=TRUE)

    # revert stage on big changes
    for(i in stages) { if(STAGE>i && ERROR>=TOL[i]+TOL.ZERO && TOL[i]>=TOL.GOAL) { STAGE <- i ; break } }

    # advance through requested stages only
    STAGE <- (stages[stages>=STAGE])[1]
    if(is.na(STAGE)) { break }

    # set stage information
    TOL.STAGE <- max(TOL.GOAL,TOL[STAGE])
    if(STAGE<2) { CG.RESET <- TRUE } # reset conjugate gradient direction for next time STAGE==2

    # now line searching to tolerance, so no point in this...
    ## update hessian from line-search result (Rank-1 update)
    # if(STAGE==1 && NR)
    # {
    #   DIFF <- rank1update(hessian.LINE,DIR.STEP,hessian,covariance)
    #   hessian <- DIFF$hessian
    #   covariance <- DIFF$covariance
    # }

    ## lots of choices here for the basis
    # we could stick with the canonical basis
    if(STAGE==1 && sum(!BOX)>1)
    {
      ## random basis
      n <- sum(!BOX)
      dir <- matrix(0,n,n)
      # random angles near zero
      dir[upper.tri(dir)] <- stats::runif((n^2-n)/2,min=-pi,max=pi)
      # anti-symmetric matrix generator
      dir <- dir - t(dir)
      # orthogonal matrix
      DIR[!BOX,!BOX] <- expm::expm(dir,trySym=FALSE)

      ################################
      # transform Hessian to current coordinates
      hessian <- t(DIR) %*% hessian %*% DIR
      covariance <- t(DIR) %*% covariance %*% DIR
    }

    # sample for numeric differentiation
    PS <- numderiv.diff(par.target,DIR)
    P1 <- PS$P1
    P2 <- PS$P2
    # mc evaluate all points
    # par must be re-evaluated to prevent zero shift roundoff error if(ZERO)
    P <- cbind(par.target,P1,P2)
    counts.diff <- ceiling(ncol(P)/cores)
    counts <- counts + counts.diff
    fn.queue <- unlist(plapply(split(P,col(P)),func,cores=cores))
    # separate back into parts
    par <- par.target
    fn.par <- fn.target <- fn.queue[1]
    F1 <- fn.queue[1+1:DIM]
    F2 <- fn.queue[1+DIM+1:DIM]

    # update estimate of round-off error in zeroing
    if(counts>counts.diff && ZERO) { TOL.ZERO <- max(TOL.ZERO,abs(fn.par)) }

    # is the minimum encapsulated?
    # encapsulated <- (fn.target<F1) & (fn.target<F2)

    # calculate axial derivatives to second order
    DIFF <- QuadSolve(par.target,P1,P2,DIR,fn.target,F1,F2)
    gradient <- DIFF$gradient

    M <- sqrt(sum(gradient^2))
    if(M<=.Machine$double.eps)
    {
      # need to move on to next stage regardless of objective
      ERROR <- TOL[STAGE] * 0.99 # proceed to next stage (fake number)
      STAGE <- STAGE + 1
    }

    # reset Hessian, if previous line search went backwards
    if( REVERSE ) { covariance <- hessian <- diag(1,DIM) }

    # Newton-Raphson (pNR diagonal update)
    if(STAGE==1)
    {
      hessian.diag <- DIFF$hessian

      ### ERROR CHECKING ###
      # will need line search if not normal looking
      TEST <- is.na(hessian.diag)
      if(any(TEST)) { hessian.diag[TEST] <- 0 }

      # fix after condition report
      # condition <- hessian.diag/diag(hessian)
      # MIN <- which.min(abs(condition))
      # MAX <- which.max(abs(condition))
      # condition <- condition[c(MIN,MAX)]
      # condition <- sign(condition) * sqrt(abs(condition))

      # if the gradient is null in one direction, then we've likely profiled that axis to tolerance
      # fix profiled dimensions to change nothing, else will get 0/0
      TEST <- TEST | abs(F1-fn.target)<=TOL[STAGE] | abs(F2-fn.target)<=TOL[STAGE]
      if(any(TEST))
      {
        gradient[TEST] <- 0
        hessian.diag[TEST] <- diag(hessian)[TEST]
      }

      # stick with old estimate if new estimate is bad
      TEST <- hessian.diag<=TOL[1] | hessian.diag>=1/TOL[1]
      if(any(TEST)) { hessian.diag[TEST] <- diag(hessian)[TEST] }

      # transform gradient back to canonical coordinates
      gradient <- c(DIR %*% gradient)
    } # end Newton Raphson diagonal update

    # BFGS update (rank-1)
    # we do this before the better local derivative update
    # also this provides an objective check on Newton-Raphson step quality
    ########################################
    if(STAGE==1 && counts>counts.diff)
    {
      # search direction
      DIR.STEP <- par-par.old
      M <- sqrt(sum(DIR.STEP^2))
      DIR.STEP <- nant(DIR.STEP/M,sign(DIR.STEP))

      # hessian along line between centers where derivatives were calculated
      gradient.LINE <- gradient-gradient.old # canonical coordinates
      hessian.LINE <- c( DIR.STEP%*%gradient.LINE )/M
      hessian.LINE <- nant(hessian.LINE,0)

      # does this info look good? Was the step big enough to justify the inaccuracy of this method? Then update along search direction
      if(sum(gradient^2)<sum(gradient.old^2) && M>STEP.BFGS && hessian.LINE>TOL.BFGS && hessian.LINE<1/TOL.BFGS)
      {
        # transform DIFF to same frame as hessian/covariance
        DIFF <- rank1update(hessian.LINE,c(t(DIR)%*%DIR.STEP),hessian,covariance)
        hessian <- DIFF$hessian
        covariance <- DIFF$covariance
      }
    }

    # new best par
    par <- par.target
    fn.par <- fn.target

    if(STAGE==1) # Newton-Raphson
    {
      ####################################
      # DIAGONAL UPDATE (Rank-DIM)
      # curvature correction factor: new curvature / old curvature (current coordinates)
      FACT <- sqrt(hessian.diag/diag(hessian)) # correction factor diagonal
      # update curvatures while preserving correlations
      hessian <- t(t(hessian*FACT)*FACT)
      # update covariances the same way as Hessian (prevents requirement of matrix inversion)
      covariance <- t(t(covariance/FACT)/FACT)

      # transform Hessian back to canonical coordinates
      hessian <- DIR %*% hessian %*% t(DIR)
      covariance <- DIR %*% covariance %*% t(DIR)
    }

    ####################################
    # sanity check on hessian/covariance
    TEST <- max(-Inf,conditionNumber(covariance),conditionNumber(hessian),na.rm=TRUE)
    TEST <- TEST<=TOL.STAGE[1] || TEST>=1/TOL.STAGE[1]
    # reset hessian/covariance if bad
    if(TEST) { covariance <- hessian <- diag(1,DIM) }
    ## orthogonality test
    # COS <- covariance %*% gradient
    # COS <- c(gradient %*% COS)/sqrt(sum(gradient^2) * sum(COS^2))

    # par could have been updated, updating boxed pars
    BOX <- is.boxed(par,fix.dir=TRUE)
    # what dimensions are pushing through the boundaries
    BOXED <- BOX & c(gradient%*%DIR)<0

    par.old <- par.target # need for BFGS
    if(STAGE==1 || STAGE==3)
    {
      LINE.TYPE <- "Newton-Raphson"
      # Newton-Raphson search step from par.target (can optimize along boundary)
      # if(DEBUG) { DEBUG.STEP <<- list(LINE.TYPE=LINE.TYPE,par=par,gradient=gradient,hessian=hessian,covariance=covariance,lower=lower,upper=upper,period=period) }
      par.target <- box.search(par,gradient,hessian,covariance,lower=lower,upper=upper,period=period)
    }
    else if(STAGE==2) # conjugate gradient (preconditioned)
    {
      LINE.TYPE <- "Conjugate gradient"
      # if(DEBUG) { DEBUG.STEP <<- list(LINE.TYPE=LINE.TYPE,par=par,gradient=gradient,hessian=hessian,covariance=covariance,lower=lower,upper=upper,period=period,BOXED=BOXED,DIR=DIR) }

      if(any(!BOXED))
      {
        FREE <- !BOXED # now the free dimensions

        if(any(BOXED) && any(FREE)) { COV <- pd.solve(hessian[FREE,FREE]) }
        else { COV <- covariance } # avoid costly matrix inversion if possible

        if(!CG.RESET) # continue with preconditioned conjugate gradient
        {
          # Polak-Ribiere formula (preconditoned)
          beta <- c(gradient[FREE] %*% COV %*% (gradient - gradient.old)[FREE]) / c(gradient.old[FREE] %*% COV %*% gradient.old[FREE])
          # direction reset
          if(is.na(beta) || beta<0 || beta>=1/STEP[STAGE]) { CG.RESET <- TRUE }
        }

        # initialize with preconditioned gradient descent
        if(CG.RESET)
        {
          LINE.TYPE <- "Gradient descent"
          beta <- 0
        }

        # update search direction # works even if beta==0
        cg.dir[FREE] <- -c(COV %*% gradient[FREE]) + (beta * cg.dir[FREE])
        cg.dir[BOXED] <- 0

        CG.RESET <- FALSE
      }

      # where we aim to evaluate next
      par.target <- line.boxer(cg.dir,p0=par,lower=lower,upper=upper,period=period)
    } # end conjugate-gradient code
    gradient.old <- gradient # need for BFGS & CG
    par.diff <- par.target-par

    if(any(BOXED)) { par.diff[BOXED] <- 0 }

    # never quit before line search, unless you really must
    if(is.toosmallp(cbind(par.target,par),par,tol=.Machine$double.eps))
    {
      # need to move on to next stage regardless of objective
      ERROR <- TOL[STAGE] * 0.99 # proceed to next stage (fake number)
      STAGE <- STAGE + 1
      next
    }

    # predicted search step size
    M <- sqrt(sum(par.diff^2))
    # new search direction
    DIR.STEP <- nant(par.diff/M,sign(par.diff))

    # distance to boundary
    M.BOX <- m.box()
    # not too small, not too big
    M <- clamp(M,.Machine$double.eps,M.BOX)

    ###################
    # LINE SEARCH START
    ###################
    par.start <- par
    fn.start <- fn.par

    # initialize (all) storage results
    par.all <- cbind(par)
    fn.all <- fn.par

    hessian.LINE <- c(DIR.STEP %*% hessian %*% DIR.STEP)
    gradient.LINE <- c(DIR.STEP %*% gradient)

    # generate a linear sequence of points from par to the other side of par.target
    P2 <- line.boxer(1.5*par.diff,p0=par,lower=lower,upper=upper,period=period)
    P1 <- line.boxer(1.0*par.diff,p0=par,lower=lower,upper=upper,period=period)

    M <- sqrt(sum((P1-par)^2)) # M is unsigned!
    M1 <- min(1,M/2) # 1 in case hessian is bad
    M2 <- sqrt(sum((P2-par)^2))

    M <- sort(c(M1,M,M2)) # just in case
    M1 <- M[1]; M2 <- M[3]; M <- M[2]

    # sample to target or boundary
    if(abs(M)<=M.BOX) # sample P1/2 to P1 to P2
    {
      LINE.TYPE <- paste(LINE.TYPE,"prospection")
      n <- mc.min(3,cores)

      SEQ <- M # target
      n <- n-1 # remaining points

      # under-estimates -- proportional-ish to M-M1
      n1 <- nant((M-M1)/(M2-M1),1)*(n+2) - 1
      n1 <- clamp(n1,0+(M-M1>=STEP[STAGE]),n-(M2-M>=STEP[STAGE]))
      n1 <- round(n1)
      # over-estimates -- proportional-ish to M2-M
      n2 <- n-n1

      n1 <- max(0,n1+1)
      n2 <- max(0,n2+1)
      SEQ <- c( seq(M1,M,length.out=n1), seq(M,M2,length.out=n2)[-1] )
    }
    else # sample only P2/2 to P2 to fit search within boundary
    {
      LINE.TYPE <- paste(LINE.TYPE,"boundary")
      n <- mc.min(2,cores)

      SEQ <- seq(0,M2,length.out=n+1)[-1]
    }
    # new points to evaluate
    P <- line.boxer((DIR.STEP %o% SEQ),p0=par,lower=lower,upper=upper,period=period)

    ##################
    # LINE SEARCH LOOP
    ##################
    while(counts < maxit)
    {
      counts <- counts + ceiling(ncol(P)/cores)

      # most expensive part
      # evaluate objective function at new P and store to fn.queue
      fn.queue <- unlist(plapply(split(P,col(P)),func,cores=cores))
      PROGRESS <- any(fn.queue<fn.par) # did this iteration improve things?
      PROGRESS <- is.good(PROGRESS) # possible 0/0 error somewhere

      # combine with older results
      par.all <- cbind(par.all,P)
      fn.all <- c(fn.all,fn.queue)

      # sort along DIR.STEP
      SORT <- c(DIR.STEP %*% (par.all-par))
      SORT <- sort(SORT,method="quick",index.return=TRUE)$ix
      par.all <- par.all[,SORT,drop=FALSE] # ARGH!
      fn.all <- fn.all[SORT]

      # new best estimate
      MIN <- which.min(fn.all)
      par <- par.all[,MIN]
      fn.par <- fn.all[MIN]

      END <- (MIN==1 || MIN==length(fn.all))
      if(END) # did we cross zero without any resolution
      {
        TEST <- apply(par.all,1,function(r){abs(diff(sign(r)))})
        TEST <- apply(TEST,1,sum)
        if((MIN==1 && TEST[1]) || (MIN==length(fn.all) && TEST[length(fn.all)-1]))
        { END <- FALSE } # resolve this case better
      }

      if(trace==2) { message(sprintf("%s %s search",format(zero+fn.par,digits=16),LINE.TYPE)) }
      if(trace==3) { message("\tc(",paste0(NAMES,"=",par,collapse=', '),")") }

      # if(DEBUG>1)
      # {
      #   graphics::plot((DIR.STEP %*% (par.all-par)),(fn.all-fn.par),xlab="Distance from MIN",ylab="Value over MIN")
      #   graphics::abline(h=c(fn.start-fn.par,0),col=scales::alpha(c('black','blue'),0.5))
      #   graphics::abline(v=c(DIR.STEP %*% (par.start-par),0),col=scales::alpha(c('black','blue'),0.5))
      #   graphics::points(c(DIR.STEP %*% (P-par)),fn.queue-fn.par,pch=16,col="red")
      #   graphics::points(DIR.STEP %*% (par.start-par),fn.start-fn.par,pch=16)
      #   graphics::title(sprintf("%s search",LINE.TYPE))
      # }

      ## calculted steps were too small on one side to proceede
      # no where to go
      TEST <- FALSE
      if(MIN>1) { TEST <- TEST || is.toosmallp(par.all[,MIN-1:0,drop=FALSE],par,tol=.Machine$double.eps) }
      if(MIN<length(fn.all)) { TEST <- TEST || is.toosmallp(par.all[,MIN+0:1,drop=FALSE],par,tol=.Machine$double.eps) }
      if(TEST) { break }
      # we met our objective
      TEST <- FALSE
      if(MIN>1) { TEST <- TEST || is.toosmallf(fn.all[MIN-1:0],fn.par) }
      if(MIN<length(fn.all)) { TEST <- TEST || is.toosmallf(fn.all[MIN+0:1],fn.par) }
      if(TEST && !PROGRESS) { break }
      # we didn't meet our objective and probably won't make further progress
      TEST <- FALSE
      if(MIN>1) { TEST <- TEST || is.toosmallp(par.all[,MIN-1:0,drop=FALSE],par) }
      if(MIN<length(fn.all)) { TEST <- TEST || is.toosmallp(par.all[,MIN+0:1,drop=FALSE],par) }
      if(TEST && !PROGRESS) { break }


      ################################
      # 1D Newton-Raphson info
      ################################
      if(MIN==1)
      { i <- MIN+1; j <- MIN+2 }
      else if(MIN==length(fn.all))
      { i <- MIN-1; j <- MIN-2 }
      else
      { i <- MIN-1; j <- MIN+1 }
      DIFF <- QuadSolve(par,par.all[,i],par.all[,j],DIR.STEP,fn.par,fn.all[i],fn.all[j])

      # did this improve like it should
      if(DIFF$gradient^2<gradient.LINE^2 && DIFF$hessian>=TOL[STAGE] && DIFF$hessian<1/TOL[STAGE])
      {
        NR <- TRUE # will try NR step
        M <- -DIFF$gradient/DIFF$hessian # M is signed!

        # distance to next boundary in number of Ms
        M.BOX <- m.box(M)

        # Is estimated NR step plausible for a convex objective?
        if(M.BOX<=.Machine$double.eps || abs(M)<=.Machine$double.eps) # past boundary, or no change
        { NR <- FALSE }
        else if(MIN>1 && M<=(par.all[,MIN-1]-par)%*%DIR.STEP ) # M left of MIN-1
        { NR <- FALSE }
        else if(MIN<length(fn.all) && M>=(par.all[,MIN+1]-par)%*%DIR.STEP) # M right of M+1
        { NR <- FALSE }
      }
      else
      {
        NR <- FALSE
        M <- 0
      }

      # good to update local information
      if(NR)
      {
        gradient.LINE <- DIFF$gradient
        hessian.LINE <- DIFF$hessian

        # predicted step is too small to calculate anything
        if(abs(M)<=.Machine$double.eps)
        {
          if(MIN==1 || MIN==length(fn.par)) # but we should keep going
          { NR <- FALSE; M <- 0 } # try heuristic search instead of NR
          else # and we can stop
          { break }
        }
      }

      # TEST <- (MIN<=2) || (MIN>=length(fn.all)-1)
      # if(!TEST) { TEST <- !WolfeTest(x=c(DIR.STEP %*% par.all),y=fn.all,grad.i=c(DIR.STEP%*%gradient),i=which.min(c(DIR.STEP%*%(par.all-par.start))^2),MIN=MIN) }

      # distance to first boundary
      M.BOX <- m.box()
      # terminated on boundary---do not refine
      if(M.BOX<=.Machine$double.eps && END) { break }

      #########################
      # setup next search steps
      #########################

      # if(DEBUG) { DEBUG.LINE <<- list(NR=NR,M=M,MIN=MIN,fn.all=fn.all,par.all=par.all,par=par,fn.par=fn.par,M.BOX=M.BOX,cores=cores,DIR.STEP=DIR.STEP,lower=lower,upper=upper,period=period,BOX=BOX,par.start=par.start,STEP=STEP,STAGE=STAGE) }

      # setup searches
      if(NR && M) # extrapolation search
      {
        if((MIN==1 && M<=0) || (MIN==length(fn.all) && M>=0))
        {
          if(abs(M)<abs(M.BOX))
          {
            LINE.TYPE <- "1D Newton-Raphson expansion"

            n <- cores
            if(abs(M)>1) # in case hessian is bad
            {
              n <- mc.min(2,cores)
              M1 <- sign(M)
            }
            else
            { M1 <- M/2 }
            M2 <- sign(M) * min(1.5*abs(M),M.BOX)

            n <- n-1
            SEQ <- M

            n1 <- nant((M-M1)/(M2-M1),1)*(n+2) - 1
            n1 <- clamp(n1,0,n)
            n1 <- round(n1)
            n2 <- n-n1

            n1 <- max(0,n1+1)
            n2 <- max(0,n2+1)
            SEQ <- c( seq(M1,M,length.out=n1)[-n1] , seq(M,M2,length.out=n2) )
          }
          else
          {
            LINE.TYPE <- "1D Newton-Raphson boundary"
            M <- M.BOX

            n <- cores
            if(abs(M)>1) # in case hessian is bad
            {
              n <- mc.min(2,cores)
              M1 <- sign(M)
            }
            else
            { M1 <- M/n }

            SEQ <- seq(M1,M,length.out=n)
          }
        } # if((MIN==1 && M<=0) || (MIN==length(fn.all) && M>=0))
        else # interpolation search
        {
          LINE.TYPE <- "1D Newton-Raphson refinement"
          n <- cores
          M1 <- M
          # step to next point after M
          M2 <- sign(M)*sqrt(sum((par.all[,MIN+ifelse(M>=0,1,-1)]-par)^2))

          SEQ <- M
          n <- n-1

          if(n)
          {
            n1 <- nant(M1/M2,1)*(n+2) - 1
            n1 <- clamp(n1,0,n)
            n1 <- round(n1)
            n2 <- n-n1

            SEQ <- c( seq(0,M1,length.out=n1+2)[-1], seq(M1,M2,length.out=n2+2)[-c(1,n2+2)] )
          }
        }
      } # end targeted search
      else # untargeted searches: heuristic -- not Newton-Raphson
      {
        # terminated on (or starting from) boundary # convex solution is adjacent
        if(M.BOX<=.Machine$double.eps || (END && sqrt(sum((par.start-par)^2))<=.Machine$double.eps))
        {
          n <- cores

          # step inward to previous point (too far)
          M <- c( (par.all[,MIN+ifelse(MIN==1,1,-1)] - par.all[,MIN]) %*% DIR.STEP )

          if(abs(M)/(n+1)<=STEP[STAGE]) # not enough room for geometric sequence
          {
            LINE.TYPE <- "Linear refinement"
            SEQ <- seq(0,M,length.out=n+2)[-c(1,n+2)]
          }
          else if(abs(M)/2^n>=STEP[STAGE])
          {
            LINE.TYPE <- "Binary refinement"
            SEQ <- (1/2)^(1:n) * M
          }
          else # that became too small
          {
            LINE.TYPE <- "Geometric refinement"
            SEQ <- sign(M) * STEP[STAGE]^((1:n)/n)
          }
        }
        else if(END) # expansion searches
        {
          # previous step
          M <- c( (par.all[,MIN] - par.all[,MIN+ifelse(MIN==1,1,-1)]) %*% DIR.STEP )

          n <- cores

          if(2^n*abs(M)<=M.BOX) # boundary is far away
          {
            LINE.TYPE <- "Binary expansion"
            SEQ <- 2^(1:n) * M
          }
          else if(n==1 || M.BOX/n<=abs(M)) # boundary is close
          {
            LINE.TYPE <- "Linear boundary"
            SEQ <- seq(0,sign(M)*M.BOX,length.out=n+1)[-1]
          }
          else # intermediate
          {
            LINE.TYPE <- "Geometric boundary"
            SEQ <- n^((1:n - n)/(n-1)) * sign(M) * M.BOX
          }
        }
        else # would like golden search here, but that requires consistent spacing...
        {
          LINE.TYPE <- "Ternary refinement"

          n <- mc.min(2,cores)

          M1 <- sqrt(sum(( par-par.all[,max(MIN-1,1)] )^2))
          M2 <- sqrt(sum(( par-par.all[,min(MIN+1,length(fn.all))] )^2))

          n1 <- nant(M1/(M1+M2),1/2)*(n+2) - 1
          n1 <- clamp(n1,1,n-1)
          n1 <- round(n1)
          n2 <- n-n1

          SEQ <- c( -seq(0,M1,length.out=n1+2)[-c(1,n1+2)], seq(0,M2,length.out=n2+2)[-c(1,n2+2)] )
        }
      } # end search setups
      P <- line.boxer((DIR.STEP %o% SEQ),p0=par,lower=lower,upper=upper,period=period)
      # goto search evaluation
    } # end line search iteration loop via break

    # did the search finish backwards?
    REVERSE <- c( DIR.STEP %*% (par-par.start) ) < 0

    par.target <- par # for outer loop

    if(ZERO) { ERROR <- abs(fn.start-fn.par) }
    else { ERROR <- abs(fn.start-fn.par)/max(1,abs(fn.par)) } # fnscale=1 effectively; avoid underflow from non-zeroed objective functions

    # error-check wrapup
    if(ERROR <= TOL.STAGE + TOL.ZERO)
    { STAGE <- STAGE + 1 }
    else # didn't go anywhere
    {
      TEST <- is.toosmallp(cbind(par,par.start),par,tol=.Machine$double.eps)
      if(TEST)
      {
        ERROR <- TOL[STAGE] * 0.99 # proceed to next stage (fake number)
        STAGE <- STAGE + 1
      }
    }

    if(trace==1) { utils::setTxtProgressBar(pb,clamp(nant(log(ERROR)/log(TOL.GOAL),0))) }
  } # end main loop
  if(trace==1) { close(pb) }

  if(counts<maxit) { convergence <- 0} else { convergence <- 1 }
  if(trace) { message(sprintf("%s in %d parallel function evaluations.",ifelse(convergence,"No convergence","Convergence"),counts)) }
  if(trace==3) { message("\tc(",paste0(NAMES,"=",par,collapse=', '),")") }

  if(PMAP) { par <- pmap(par,theta,inverse=TRUE) }

  # return stuff in similar format to optim
  RETURN <- list()
  RETURN$par <- par*parscale
  RETURN$value <- (fn.par+zero)*fnscale
  RETURN$counts <- counts
  RETURN$convergence <- convergence
  RETURN$hessian <- t(t(hessian/parscale)/parscale)
  RETURN$covariance <- t(t(covariance*parscale)*parscale)
  RETURN$lower <- lower*parscale
  RETURN$upper <- upper*parscale

  # initial parscale was bad [UNFINISHED]
  if(rescale && ERROR>TOL.GOAL && max(abs(log2(par)))>1)
  {
    control$parscale <- RETURN$par
    control$hessian <- RETURN$hessian
    control$covariance <- RETURN$covariance
    RETURN <- mc.optim(RETURN$par,fn=fn,lower=lower,upper=upper,period=period,reset=reset,control=control,...)
  }

  return(RETURN)
}