R/optim.R
In optreplace: Trial package to replace optim() function with better codes

Documented in optim

optim <- function(par, fn, gr=NULL, lower=-Inf, upper=Inf, 
            method="Nelder-Mead", itnmax=NULL, hessian=FALSE,
            control=list(),
             ...) {
# Input:
#  par = a single vector of starting values
#  fn = objective function (assumed to be sufficeintly differentiable)
#  gr = name of a function to compute the (analytic) gradient of the objective function
#  method = a character variable denoting the algorithm to be executed
#  hessian = logical variable that, if present, is equivalent to control$kkt. If TRUE, it causes
#      optimx to try to compute an approximation to the Hessian matrix at the final set of parameters.
#  control = list of control information, in particular
#      trace = an integer controlling output (note different methods may use logicals
#         trace = 0 gives no output, positive numbers give more output for greater values
#      follow.on = TRUE or FALSE. If TRUE, and there are multiple methods, then the last set of 
#         parameters from one method is used as the starting set for the next. 
#      save.failures = TRUE if we wish to keep "answers" from runs where the method does not 
#         return conv==0. FALSE otherwise (default).
#      maximize = TRUE if we want to maximize rather than minimize a function. (Default FALSE)
#         090601: Not yet implemented for nlm, nlminb, ucminf. However, there is a check to avoid
#                 usage of these codes when maximize is TRUE.
#      all.methods = TRUE if we want to use all available (and suitable) methods
#      sort.result=TRUE, that is, we sort the results in decreasing order of the final function value
#      starttests=TRUE  By default we run tests of the function and parameters: feasibility relative to
#               bounds, analytic vs numerical gradient, scaling tests) before we try optimization methods
#      dowarn=TRUE By default we leave warnings generated by optimx.
#      badval=(0.5)*.Machine$double.xmax The value to set for the function value when try(fn()) fails.
#      scaletol=3 To check parameters or their bounds we take logs of absolute values and find the range 
#               i.e., max - min. This should not exceed scaletol. A value of 3 gives magnitudes between 
#               1 and 20 approximately.  
#
# Output:
# ans = an object containing two sets of information:
# essential output = a data frame of converged parameters, function value at convergence, 
#    name of algorithm, convergence indicator, and function evaluation and gradient evaluation counts
# detailed output = this is a list with one component for each algorithm, and contains parameters, 
#    function values, convergence code, number of function and gradient evals, numerical gradient 
#    and hessian at convergence, eigenvalues of that hessian approximation, cpu time, and other 
#    information returned by algorithms, and name of the algorithm.
# detailed output can be accessed via the attribute called `details'
#
#  Authors:  Ravi Varadhan & John Nash
#  Date:  February 17, 2008
#  Changes: Ravi Varadhan - Date: May 29, 2009, John Nash - Latest: Oct 18, 2009
#
#################################################################
scalecheck<-function(par, lower=lower, upper=upper,dowarn){
	# a function to check the initial parameters and bounds for inputs to optimization codes
	# Arguments:
	#	par -- starting parameters supplied 
	# 	lower, upper -- lower and upper bounds supplied
	#
	# Returns:
	#	list(lpratio, lbratio) -- the log of the ratio of largest to smallest parameters
	#		and bounds intervals (upper-lower) in absolute value (ignoring Inf, NULL, NA)
	######################################
	if (is.null(par)) { stop("Null parameter vector") }
	npar<-length(par)
        if (npar < 2) stop("optreplace (i.e., optim()) works only on more than 1 parameter")
	if (is.null(lower)) { 
		if (dowarn) warning("Null lower bounds vector")
		lower<-rep(-Inf,npar)
	}
	if (is.null(upper)) { 
		if (dowarn) warning("Null upper bounds vector")
		upper<-rep(Inf,npar)
	}
	newpar<-abs(par[which(is.finite(par))])
	logpar<-log10(newpar[which(newpar>0)]) # Change 20100711
	newlower<-abs(lower[which(is.finite(lower))])
	loglower<-log10(newlower[which(newlower>0)]) # Change 20100711
	newupper<-abs(upper[which(is.finite(upper))])
	logupper<-log10(newupper[which(newupper>0)]) # Change 20100711
	bddiff<-upper-lower
	bddiff<-bddiff[which(is.finite(bddiff))]
	lbd<-log10(bddiff[which(bddiff>0)]) # Change 20100711
	lpratio<-max(logpar) - min(logpar)
	if (length(lbd) > 0) {
		lbratio<-max(lbd)-min(lbd)
	} else { 
		lbratio<-NA
	}
	ratios<-list(lpratio=lpratio,lbratio=lbratio)
	# return(ratios)
}
# -------------- end scalecheck ----------------- #

# Get real name of function to be minimized
  fname<-deparse(substitute(fn))
  if (!is.null(control$trace) && control$trace>0) {
	cat("fn is ",fname,"\n")
  }

## Code more or less common to funtest, funcheck and optimx <<<
# Check parameters are in right form
  if (!is.null(dim(par))) stop("Parameter should be a vector, not a matrix!", call. = FALSE)
  if (! is.vector(par) ) {
	stop("The parameters are NOT in a vector")
  }
  npar<-length(par)
# Set control defaults
    ctrl <- list(
	trace=0,
	sort.result=TRUE,
	kkt=TRUE,
	starttests=TRUE,
	maximize=FALSE,
	dowarn=TRUE, 
	kkttol=0.001,
	kkt2tol=1.0E-6,
	badval=(0.5)*.Machine$double.xmax,
	scaletol=3
    ) # for now turn off sorting until we can work out how to do it nicely
    
# Note that we do NOT want to check on the names, because we may introduce 
#    new names in the control lists of added methods
#    if (!all(namc %in% names(ctrl))) 
#        stop("unknown names in control: ", namc[!(namc %in% names(ctrl))])
# However, we do want to substitute the appropriate information. 
# removed copy of hessian to control$kkt
    ncontrol <- names(control)
    nctrl <- names(ctrl)
    for (onename in ncontrol) {
       if (onename %in% nctrl) {
           ctrl[onename]<-control[onename]
       } else {
           ctrl[onename]<-control[onename]
       }
    }
    if (is.null(control$kkt)) { # turn off kkt for large matrices
      ctrl$kkt<-TRUE # default it to compute KKT tests
      if (is.null(gr)) { # no analytic gradient
         if (npar > 50) {
           ctrl$kkt=FALSE # too much work when large number of parameters
           if (ctrl$trace>0) cat("gr NULL, npar > 50, kkt set FALSE\n")
         }
      } else {
         if (npar > 500) {
            ctrl$kkt=FALSE # too much work when large number of parameters, even with analytic gradient
            if (ctrl$trace>0) cat("gr NULL, npar > 50, kkt set FALSE\n")
         }
      }
    } else { # kkt is set
      if (control$kkt) {
        if (is.null(gr)) {
           if (npar > 50) {
             if ((ctrl$trace>0) && ctrl$dowarn) warning("Computing hessian for gr NULL, npar > 50, can be slow\n")
           }
        } else {
           if (npar > 500) {
             if ((ctrl$trace>0) && ctrl$dowarn) warning("Computing hessian with gr code, npar > 500, can be slow\n")
           }
        }
      }
    }

# 091216 for maximization
  negfn <- function (par, ...) { # negate the function for maximizing
	val<-(-1.)*fn(par,...)
	# return(val)
  } # end of negfn
  if (! is.null(gr) ) {
    neggr <- function (par, ...) { # negate the function for maximizing
	ngr<-(-1.)*gr(par,...)
	# return(ngr)
    } # end of neggr
  } else { neggr<-NULL }
  ufn<-fn
  ugr<-gr
# reset the function if we are maximizing
  if (! is.null(control$fnscale))  
        stop("Control fnscale is not available in optreplace. Use maximize with relevant optimizers.")
  if ((! is.null(control$maximize)) && control$maximize ) { 
        cat("Maximizing -- use negfn and neggr\n")
	ufn <- negfn 
	ugr <- neggr
  }
# Check parscale not used
  if (! is.null(control$parscale)) {
        stop("Control parscale is NOT available in optreplace. Please use explicit scaling.")
  }

# Restrict list of methods if we have bounds
  if (any(is.finite(c(lower, upper)))) { have.bounds<-TRUE # set this for convenience
  } else { have.bounds <- FALSE }

  if (ctrl$starttests) {
	# Check parameters in bounds (090601: As yet not dealing with masks ??)
	#  bdmsk<-as.vector(bdmset[k, ])
	infeasible<-FALSE
	if (ctrl$trace > 0) cat("Function has ",npar," arguments\n")
	if (have.bounds) {
    	  # Expand bounds to vectors if needed
          # Note 20100610: we do not check if there is a vector of wrong length.??
    	  if (length(lower)==1 ) lower <- rep(lower, npar)
    	  if (length(upper)==1 ) upper <- rep(upper, npar)
    	  bstate<-vector(mode="character", length=npar)
    	  for (i in 1:npar) {
     	    if ( (lower[i]<=par[i]) && (par[i]<=upper[i])) {
    	      bstate[i]<-" In Bounds "
            } else { 
            #   if (bdmsk[i]!=0) {
                  infeasible<-TRUE
            #   }
                if (lower[i]>par[i]) {bstate[i]<-" Out of Bounds LOW" } else { bstate[i]<-" Out of Bounds HIGH " }
            } # end if in bounds
#           if (ctrl$trace > 0) cat("par[",i,"]: ",lower[i],"  <?",par[i],"  <?",upper[i],"  ",bdmsk[i],"   ",bstate,"\n")
            if (ctrl$trace > 0) cat("par[",i,"]: ",lower[i],"  <?",par[i],"  <?",upper[i],"  ",bstate,"\n")
          } # end of for loop over parameter vector elements
	  if (infeasible) { 
        	stop("Infeasible point, no further tests")
	  } 
          if ((method != 'L-BFGS-B') && (method != 'BFGS')) {
              wstr<- paste("With bounds change method from ",method," to BFGS")
              warning(wstr)
              method <- 'BFGS'
          }
  	} # end have.bounds
        # Check if function can be computed
        firsttry<-try(finit<-ufn(par, ...), silent=TRUE ) # 20100711
        # Note: This incurs one EXTRA function evaluation because optimx is a wrapper for other methods
        if (class(firsttry) == "try-error") {
    	   infeasible <- TRUE
           stop("Cannot evaluate function at initial parameters")
        }
        # Also check that it is returned as a scalar
       if (!(is.vector(finit) && (length(finit)==1)) || is.list(finit) || 
            is.matrix(finit) || is.array(finit) || ! is.numeric(finit) ) {
           stop("Function provided is not returning a scalar number")
       }
       if (is.infinite(finit) || is.na(finit)) {
          stop("Function returned is infinite or NA (non-computable)")
       }
  }

  # Check that we have the functions we need
   if (! require(numDeriv, quietly=TRUE) ) stop("Install package `numDeriv'", call.=FALSE)
  if (ctrl$starttests) {
     if (! is.null(gr)){ # check gradient
       gname <- deparse(substitute(gr))
       if (ctrl$trace>0) cat("Analytic gradient from function ",gname,"\n\n")
          fval <- ufn(par,...) 
          gn <- grad(func=ufn, x=par,...) # 
          ga <- ugr(par, ...)
          # Now test for equality (090612: ?? There may be better choices for the tolerances.
          teps <- (.Machine$double.eps)^(1/3)
          if (max(abs(gn-ga))/(1 + abs(fval)) >= teps) stop("Gradient function might be wrong - check it! \n", call.=FALSE)
       } else if (ctrl$trace>0) cat("Analytic gradient not made available.\n")

   }
## >>> End of code common to funtest, funcheck and optimx, with mods for local needs
  # Set up the vector to return answers
  ans.ret <- vector(mode="list")
  # mode= is not strictly required. length defaults to 0. This sets up our answer vector.

  # List of methods in base or stats, namely those in optim(), nlm(), nlminb()
  bmeth <- c("BFGS", "CG", "Nelder-Mead", "L-BFGS-B", "SANN")
# SANN has no termination for optimality, only a maxit count for
#    the maximum number of function evaluations; remove DEoptim for now -- not useful 
#    for smooth functions. Code left in for those who may need it.
  # List of methods in packages. 
  allmeth <- bmeth
# Now make sure methods loaded
   allmeth <- bmeth # start with base methods
   
   if (require(Rcgmin, quietly=TRUE) )  allmeth<-c(allmeth, "Rcgmin")
   else stop("Package `Rcgmin' Not installed", call.=FALSE)
   
   if (require(Rvmmin, quietly=TRUE) )  allmeth<-c(allmeth, "Rvmmin")
   else stop("Package `Rvmmin' Not installed", call.=FALSE)

   if (require(dfoptim, quietly=TRUE) )  allmeth<-c(allmeth, "dfoptim")
   else stop("Package `dfoptim' Not installed", call.=FALSE)
   
  # Partial matching of method string allowed
  # avoid duplicates here
  # 2011-1-17 JN: to set L-BFGS-B
# ?? 2012-10-29 -- change method to single character format

  method <- try(unique(match.arg(method, allmeth, several.ok=TRUE) ),silent=TRUE)
  if (class(method)=="try-error") {
     warning("optim: No match to available methods")
     method<-NULL
     nmeth<-0
  } else {
     nmeth <- length(method) # number of methods requested
  } # JN 2011-1-17 fix for default when there are bounds
  if ((nmeth==0) && have.bounds) {
      method="L-BFGS-B"
      warning("Default method when bounds specified is L-BFGS-B to match optim()")
      nmeth<-1
  }
  ## Check that methods are indeed available and loaded
  for (i in 1:nmeth) {
     cmeth <- method[i]
     if (ctrl$trace > 0) cat("Looking for method = ",cmeth,"\n")
     if (! (cmeth %in% allmeth) ) {
         errmsg <- paste(cmeth," not found in any list of methods available")
         stop(errmsg, call.=FALSE)
     } # otherwise the method is available, and just needs to be loaded
  } # end check methods available
  if (ctrl$trace>1) {
    cat("Methods to be used:")
    print(method)
  }

# Scaling check  091219
    if (ctrl$starttests) {
	srat<-scalecheck(par, lower, upper,ctrl$dowarn)
	sratv<-c(srat$lpratio, srat$lbratio)
	if (max(sratv,na.rm=TRUE) > ctrl$scaletol) { 
		warnstr<-"Parameters or bounds appear to have different scalings.\n  This can cause poor performance in optimization. \n  It is important for derivative free methods like BOBYQA, UOBYQA, NEWUOA."
		if (ctrl$dowarn) warning(warnstr)
	}
        if (ctrl$trace>0) {
		cat("Scale check -- log parameter ratio=",srat$lpratio,"  log bounds ratio=",srat$lbratio,"\n")
	}
    }
# end scaling check
# cat("end topstuff in optimxCRAN\n") # 120713
# Run methods
#  times <- rep(0, nmeth)  # figure out how many methods and reserve that many times to record.
#  names(times) <- method  # And label these times appropriately with the method.
#  j <- 0  # j will be the index to the answers, one per each method, starting parameter pair

   meth <- method # extract the method name
   conv <- -1 # indicate that we have not yet converged
   if (ctrl$trace>0) cat("Method: ", method, "\n") # display the method being used
      # Extract control information e.g., trace
      # 20100215: Note that maxit needs to be defined other than 0 e.g., for ucminf
      # create local control list for a single method -- this is one of the key issues for optimx
      mcontrol<-ctrl
      mcontrol$follow.on<-NULL # And make sure that controls NOT in method are deleted (nulled)
      mcontrol$save.failures<-NULL
      mcontrol$sort.result<-NULL
      mcontrol$kkt<-NULL
      mcontrol$starttests<-NULL
      mcontrol$all.methods<-NULL
      mcontrol$dowarn<-NULL
      mcontrol$kkttol<-NULL
      mcontrol$kkt2tol<-NULL
      mcontrol$maximize<-NULL # Even if method DOES have it
      mcontrol$badval<-NULL
      mcontrol$scaletol<-NULL # not used in any methods -- it is here for the scale check of parameters and bounds above

# Methods from optim()
      if (method == "SANN") {
          mcontrol$maxit<-10000 # !! arbitrary for now 
          stop("SANN is NOT supported here")
      }
## --------------------------------------------
###### progress point #########
      else if (method == "CG") { # Use Rcgmin routine (ignoring masks)
        cat("Found method as CG\n")
        # Note: no message for ingnoring type argument
	bdmsk<-rep(1,npar)
	mcontrol$trace<-NULL
	if (ctrl$trace>0) mcontrol$trace<-1
        time <- system.time(ans <- try(Rcgmin(par=par, fn=ufn, gr=ugr, lower=lower, upper=upper, bdmsk=bdmsk, control=mcontrol, ...), silent=TRUE))[1]
        if (class(ans)[1] == "try-error") {
		if (ctrl$trace>0) cat("Rcgmin failed for current problem \n")
		ans$value= ctrl$badval
		ans$par<-rep(NA,npar)
                ans$convergence<-9999 # failed in run
#                ans$counts <- c(1, 0) # ?? what to put in
                ans$message
        } else {
           if (ctrl$maximize) {
		ans$value= -ans$value
	   }
        }
        ans$MethodUsed <- "Rcgmin"
      }  ## end if using Rcgmin
## --------------------------------------------
      else if (method == "Nelder-Mead") {# Use nmkb routine from dfoptim package
         mcontrol$maxfeval<-mcontrol$maxit
         mcontrol$maxit<-NULL # and null out control that is NOT used
         if (mcontrol$trace > 0) {
            mcontrol$trace<-NULL
            mcontrol$trace<-TRUE # logical needed, not integer
         } else { mcontrol$trace<-FALSE }
#         mcontrol$usenumDeriv<-NULL
#         mcontrol$maximize<-NULL
#         mcontrol$parscale<-NULL
#         mcontrol$fnscale<-NULL
         nampar<-names(par) # save names 110508
         if (have.bounds) {
             time <- system.time(ans <- try(nmkb(par=par, fn=ufn, lower = lower, 
              upper = upper, control=mcontrol,...), silent=TRUE))[1]
         } else {
             time <- system.time(ans <- try(nmk(par=par, fn=ufn, 
                control=mcontrol,...), silent=TRUE))[1]
         }
         if (class(ans)[1] != "try-error") {
            ans$value<-as.numeric(ans$value)
            ans$counts <- c(ans$feval, 0)
            ans$feval <- NULL
            names(ans$par)<-nampar # restore names 110508
           # What about 'restarts' and 'message'??
         } else {
            if (ctrl$trace>0) cat("nmkb failed for current problem \n")
            ans<-list(fevals=NA) # ans not yet defined, so set as list
            ans$value= ctrl$badval
            ans$par<-rep(NA,npar)
            ans$convergence<-9999 # failed in run
            ans$niter<-NULL
         }
        ans$MethodUsed <- "dfoptim:nmkb"
      }  ## end if using nmkb###### progress point #########
## --------------------------------------------
###### progress point #########
      else if ( (method == "BFGS") || (method == "L-BFGS-B") ) { # Use Rvmmin routine (ignoring masks)
	bdmsk<-rep(1,npar) # to ensure this is defined
	mcontrol$trace <- ctrl$trace
        time <- system.time(ans <- try(Rvmmin(par=par, fn=ufn, gr=ugr, lower=lower, upper=upper, bdmsk=bdmsk, control=mcontrol, ...), silent=TRUE))[1]
        if ((class(ans)[1] == "try-error") && (ans$convergence==0)) {
		if (ctrl$trace>0) cat("Rvmmin failed for current problem \n")
		ans<-list(fevals=NA) # ans not yet defined, so set as list
		ans$value= ctrl$badval
#		ans$par<-rep(NA,npar)
                ans$convergence<-9999 # failed in run
        }
	if (ctrl$maximize) { # ?? may need to check this
		ans$value= -ans$value
	}
        ans$MethodUsed <- "Rvmmin"
      }  ## end if using Rvmmin
## --------------------------------------------
# ---  UNDEFINED METHOD ---
      else { errmsg<-paste("UNDEFINED METHOD: ", method, sep='')
             stop(errmsg, call.=FALSE)
      }
## --------------------------------------------
## Post-processing -- Kuhn Karush Tucker conditions
# cat("End methods in optimxCRAN\n") # 120713
#  Ref. pg 77, Gill, Murray and Wright (1981) Practical Optimization, Academic Press
      if (ctrl$trace>0) { cat("Post processing for method ",meth,"\n") }
      if (ctrl$trace && ans$conv==0) cat("Successful convergence! \n")  ## inform user we've succeeded
      # Testing final solution. Use numDeriv to get gradient and Hessian; compute Hessian eigenvalues
      hessOK<-FALSE # indicator for later
      if ((ctrl$kkt || hessian) && (ans$conv != 9999)) {
      if (ctrl$trace>0) cat("Compute Hessian approximation at finish of ",method[i],"\n")
         if (is.null(gr)) {
            nhatend<-try(hessian(ufn, ans$par, ...), silent=TRUE) # change 20100711
         } else {
            nhatend<-try(jacobian(ugr,ans$par, ...), silent=TRUE) # change 20100711
         } # numerical hessian at "solution"
         if (class(nhatend) != "try-error") {
             hessOK<-TRUE
         }
      }
      ans$kkt1<-NA
      ans$kkt2<-NA
###          if ((hessian || ctrl$kkt) && (ans$conv != 9999)) { # need to avoid test when method failed
      if (ctrl$trace>0) cat("Compute gradient approximation at finish of ",method[i],"\n")
         gradOK<-FALSE
         if (is.null(gr)) {
            ngatend<-try(grad(ufn, ans$par, ...), silent=TRUE) # change 20100711
         } else {
            ngatend<-try(ugr(ans$par, ...), silent=TRUE) # Gradient at solution # change 20100711
         }
         if (class(ngatend) != "try-error") gradOK<-TRUE # 100215 had == rather than != here
            if ( (! gradOK) && (ctrl$trace>0)) cat("Gradient computation failure!\n") 
            if (gradOK) {
               # test gradient
               ans$kkt1<-(max(abs(ngatend)) <= ctrl$kkttol*(1.0+abs(ans$value)) ) # ?? Is this sensible?
               if (hessOK) {
                   # For bounds constraints, we need to "project" the gradient and Hessian
                   bset<-sort(unique(c(which(ans$par<=lower), which(ans$par>=upper))))
                   nbds<-length(bset) # number of elements nulled by bounds
                   # Note: we assume that we are ON, not over boundary, but use <= and >=. No tolerance is used.
                   ngatend[bset]<-0 # "project" the gradient
                   nhatend[bset,] <-0
                   nhatend[, bset] <-0 # and the Hessian
                   ans$ngatend <- ngatend
                   ans$nhatend <- nhatend
                   hev<- try(eigen(nhatend)$values, silent=TRUE) # 091215 use try in case of trouble, 
		       # 20100711 silent
                   if (ctrl$kkt){
   	              if (class(hev) != "try-error") {
                         ans$evnhatend <- hev # answers are OK
                         # now look at Hessian properties
                         negeig<-(ans$evnhatend[npar] <= (-1)*ctrl$kkt2tol*(1.0+abs(ans$value))) # 20100711 kkt2tol
                         evratio<-ans$evnhatend[npar-nbds]/ans$evnhatend[1]
                         # If non-positive definite, then there will be zero eigenvalues (from the projection)
                         # in the place of the "last" eigenvalue and we'll have singularity.
                         # WARNING: Could have a weak minimum if semi-definite.
                         ans$kkt2<- (evratio > ctrl$kkt2tol) && (! negeig)
                      } else {
                         warnstr<-paste("Eigenvalue failure after method ",method[i],sep='')
                         if (ctrl$dowarn) warning(warnstr)
                         if (ctrl$trace>0) {
                            cat("Hessian eigenvalue calculation failure!\n")
                            print(nhatend)
                         }
                      }
                   } # kkt2 evaluation
                } else { # computing Hessian has failed
                   warnstr<-paste("Hessian not computable after method ",method[i],sep='')
                   if (ctrl$dowarn) warning(warnstr)
                   if (ctrl$trace>0) cat(warnstr,"\n") 
                }
             } else { # gradient failure
                warnstr<-paste("Gradient not computable after method ",method[i],sep='')
                if (ctrl$dowarn) warning(warnstr)
                if (ctrl$trace>0) cat(warnstr,"\n") 
             }
###          } # end kkt test
          ans$systime <- time
    ans # return(ans)
} ## end of optimx