R/base_optim_function.R
In FAMoS: A Flexible Algorithm for Model Selection

Documented in base.optim

#' Fit Parameters of a Model
#'
#' Given a specific model, \code{base.optim} fits the corresponding parameters and saves the output.
#' @param binary A vector containing zeroes and ones. Zero indicates that the corresponding parameter is not fitted.
#' @param parms A named vector containing the initial values for \code{\link{optim}}. Must be in the same order as \code{binary}.
#' @param fit.fn A cost function. Has to take the complete parameter vector as an input argument (needs to be named \code{parms}) and must return must return a selection criterion value (e.g. AICc or BIC). The binary vector containing the information which parameters are fitted, can also be used by taking \code{binary} as an additional function input argument.
#' @param homedir A string giving the directory in which the result folders generated by \code{\link{famos}} are found.
#' @param use.optim Logical. If true, the cost function \code{fit.fn} will be fitted via \code{\link{optim}}. If FALSE, the cost function will only be evaluated.
#' @param optim.runs The number of times that each model will be optimised. Default to 1. Numbers larger than 1 use random initial conditions (see \code{random.borders}).
#' @param default.val A named list containing the values that the non-fitted parameters should take. If NULL, all non-fitted parameters will be set to zero. Default values can be either given by a numeric value or by the name of the corresponding parameter the value should be inherited from (NOTE: In this case the corresponding parameter entry has to contain a numeric value). Default to NULL.
#' @param random.borders The ranges from which the random initial parameter conditions for all optim.runs > 1 are sampled. Can be either given as a vector containing the relative deviations for all parameters or as a matrix containing in its first column the lower and in its second column the upper border values. Parameters are uniformly sampled based on \code{\link{runif}}. Default to 1 (100\% deviation of all parameters). Alternatively, functions such as \code{\link{rnorm}}, \code{\link{rchisq}}, etc. can be used if the additional arguments are passed along as well.
#' @param con.tol The relative convergence tolerance. \code{famos} will rerun \code{\link{optim}} until the relative improvement between the current and the last fit is less than \code{con.tol}. Default is set to 0.01, meaning the fitting will terminate if the improvement is less than 1\% of the previous value.
#' @param control.optim Control parameters passed along to \code{optim}. For more details, see \code{\link{optim}}.
#' @param parscale.pars Logical. If TRUE (default), the \code{parscale} option will be used when fitting with \code{\link{optim}}. This is helpful, if the parameter values are on different scales.
#' @param scaling Numeric vector determining how newly added model parameters are scaled. Only needed if \code{parscale.pars} is TRUE.
#' @param verbose Logical. If TRUE, FAMoS will output all details about the current fitting procedure.
#' @param ... Additional parameters.
#' @details The fitting routine of \code{base.optim} is based on the function \code{\link{optim}}. The number of fitting runs can be specified by the \code{optim.runs} parameter in the \code{\link{famos}} function. Here, the first fitting run takes the parameters supplied in \code{parms} as a starting condition, while all following fitting runs sample new initial sets according to a uniform distribution based on the intervals [\code{parms} - \code{abs}(\code{parms}), \code{parms} + \code{abs}(\code{parms})].
#' Additionally, each fitting run is based on a \code{while}-loop that compares the outcome of the previous and the current fit. Each fitting run is terminated when the specified convergence tolerance \code{con.tol} is reached.
#' @export
#' @return Saves the results obtained from fitting the corresponding model parameters in the respective files, from which they can be accessed by the main function \code{\link{famos}}.
#' @examples
#' #setting data
#' true.p2 <- 3
#' true.p5 <- 2
#' sim.data <- cbind.data.frame(range = 1:10,
#'                              y = true.p2^2 * (1:10)^2 - exp(true.p5 * (1:10)))
#'
#' #define initial parameter values and corresponding test function
#' inits <- c(p1 = 3, p2 = 4, p3 = -2, p4 = 2, p5 = 0)
#'
#' cost_function <- function(parms, binary, data){
#'   if(max(abs(parms)) > 5){
#'     return(NA)
#'   }
#'   with(as.list(c(parms)), {
#'     res <- p1*4 + p2^2*data$range^2 + p3*sin(data$range) + p4*data$range - exp(p5*data$range)
#'     diff <- sum((res - data$y)^2)
#'
#'     #calculate AICC
#'     nr.par <- length(which(binary == 1))
#'     nr.data <- nrow(data)
#'     AICC <- diff + 2*nr.par + 2*nr.par*(nr.par + 1)/(nr.data - nr.par -1)
#'
#'     return(AICC)
#'   })
#' }
#'
#' #create directories if needed
#' make.directories(tempdir())
#'
#' #optimise the model parameters
#' base.optim(binary = c(0,1,1,0,1),
#'            parms = inits,
#'            fit.fn = cost_function,
#'            homedir = tempdir(),
#'            data = sim.data)
#'
#' #delete tempdir
#' unlink(paste0(tempdir(),"/FAMoS-Results"), recursive = TRUE)

base.optim <- function(binary,
                       parms,
                       fit.fn,
                       homedir,
                       use.optim = TRUE,
                       optim.runs = 1,
                       default.val = NULL,
                       random.borders = 1,
                       con.tol = 0.01,
                       control.optim = list(maxit = 1000),
                       parscale.pars = FALSE,
                       scaling = NULL,
                       verbose = FALSE,
                       ...) {

  #get the vectors of the fitted and the not-fitted parameters
  no.fit <- which(binary == 0)

  if(length(no.fit) > 0){
    #specify fit vector
    fit.vector <- parms[-no.fit]
  }else{
    fit.vector <- parms
  }

  #number of succesful runs
  k <- 1
  #number of insuccessful runs
  total.tries <- 0

  #try k = optim.runs different combinations
  while(k <= optim.runs && total.tries < (4*optim.runs)){
    #set failing status to FALSE
    abort <- F

    #print number of successful runs
    if(verbose){
      cat(paste0("\nFitting run # ", k, "\n"))
    }


    #check if initial parameters set is working
    if(k == 1){
      #take the parameter combination from the currently best model
      ran.par <- fit.vector
      #check if function works
      if(use.optim == TRUE){
        finite.val <- combine.and.fit(par = ran.par,
                                      par.names = names(parms),
                                      fit.fn = fit.fn,
                                      default.val = default.val,
                                      binary = binary,
                                      ...)
        works <- is.finite(finite.val)

        #if not, skip the current set
        if(works == FALSE){
          if(verbose){
            cat("Inherited parameters do not work and are being skipped.\n")
          }

          k <- k + 1
          total.tries <- total.tries + 1
          next
        }
      }
    }else{
      #get random initial conditions and test if they work
      works <- FALSE
      tries <- 0

      while(works == FALSE){
        if(is.vector(random.borders)){
          if(length(random.borders) == 1){
            random.min <- fit.vector - random.borders*abs(fit.vector)
            random.max <- fit.vector + random.borders*abs(fit.vector)
          }else{
            random.min <- fit.vector - random.borders[-no.fit]*abs(fit.vector)
            random.max <- fit.vector + random.borders[-no.fit]*abs(fit.vector)
          }

          ran.par <- stats::runif(n = length(fit.vector),
                                  min = random.min,
                                  max = random.max)

        }else if(is.matrix(random.borders)){
          if(nrow(random.borders) == 1){
            random.min <- rep(random.borders[1,1], length(fit.vector))
            random.max <- rep(random.borders[1,2], length(fit.vector))
          }else{
            random.min <- random.borders[-no.fit,1]
            random.max <- random.borders[-no.fit,2]
          }

          ran.par <- stats::runif(n = length(fit.vector),
                                  min = random.min,
                                  max = random.max)
        }else if(is.function(random.borders)){
          ran.par <- R.utils::doCall(random.borders,
                                     args = c(list(n = length(parms)),
                                              list(...)))
          ran.par <- ran.par[-no.fit]
        }else{
          stop("random.borders must be a number, a vector or a matrix!")
        }

        names(ran.par) <- names(fit.vector)
        if(use.optim == TRUE){
        finite.val <- combine.and.fit(par = ran.par,
                                      par.names = names(parms),
                                      fit.fn = fit.fn,
                                      default.val = default.val,
                                      binary = binary,
                                      ...)

          works <- is.finite(finite.val)
        }else{
          works <- TRUE
        }

        tries <- tries + 1
        if(tries > 100){
          stop("Tried 100 times to sample valid starting conditions for optim, but failed. Please check if 'random.borders' is correctly specified.")
        }
      }
    }
    #specify parameters (only for the first run)
    opt.run <- 10
    opt.previous <- 10^300 # the difference of opt.run and opt.previous has to be bigger than 0.1, but the values are not important
    runs = 1

    repeat{#test if the current run yielded better results than the previous. If yes keep optimising
      # get initial parameter sets for optim
      if(runs == 1){
        opt.par <- ran.par
      }
      if(runs > 1){
        opt.previous <- opt.run
      }


      if(use.optim == TRUE){

        #use scaled parameters for optim
        if(parscale.pars == TRUE){
          control.optim.x <- c(control.optim,
                               list(parscale = parscale.famos(par = opt.par,
                                                              scale = abs(opt.par),
                                                              correction = scaling)))
        }else{
          control.optim.x <- control.optim
        }

        opt <- stats::optim(par = opt.par,
                            fn = combine.and.fit,
                            par.names = names(parms),
                            fit.fn = fit.fn,
                            default.val = default.val,
                            binary = binary,
                            ...,
                            control = control.optim.x)
        opt.par <- opt$par
        opt.run <- opt$value
      }else{
        opt <- combine.and.fit(par = opt.par,
                               par.names = names(parms),
                               fit.fn = fit.fn,
                               binary = binary,
                               default.val = default.val,
                               ...)

        if(!is.list(opt)){
          stop("The output of the user-defined optimisation function must be a list including the selection criterion value and a named vector of the returned parameter values.")
        }

        opt.run <- opt[[1]]
        opt.par <- opt[[2]]
        break
      }

      if (opt.run > 1e34 || !is.finite(opt.run)) {
        abort <- T
        if(verbose){
          cat("Optimisation failed. Run skipped.\n")
        }

        total.tries <- total.tries + 1
        break
      }

      if(abs((opt.run - opt.previous)/opt.previous) < con.tol){
        break
      }
      #update run
      runs = runs + 1
      if(verbose){
        cat(paste(opt.run, "\n"))
      }

    }
    #test if current run was better
    if(k == 1 || opt.run < opt.min){
      opt.min <- opt.run

      #get corresponding parameter values
      out.par <- combine.par(fit.par = opt.par, all.names = names(parms), default.val = default.val)

      result <- c(SCV = opt.min, out.par)

      #saves progress if the recent fit is the first or better than any previously saved one
      #check if this model has already been tested
      if(file.exists(paste0(homedir,
                            "/FAMoS-Results/Fits/Model",
                            paste(binary, collapse =""),
                            ".rds"))
         == TRUE){

        result_old <- readRDS(file = paste0(homedir,
                                            "/FAMoS-Results/Fits/Model",
                                            paste(binary, collapse =""),
                                            ".rds"))
        #overwrite output file only if current fit is better than the previous one
        if(result_old[1] > opt.min){
          if(verbose){
            cat("Current fit better than previous best fit. Results overwritten.\n")
          }

          saveRDS(object = result, file = paste0(homedir,
                                                 "/FAMoS-Results/Fits/Model",
                                                 paste(binary, collapse =""),
                                                 ".rds"))
        }

      }else{
        saveRDS(object = result, file = paste0(homedir,
                                               "/FAMoS-Results/Fits/Model",
                                               paste(binary, collapse =""),
                                               ".rds"))
        if(verbose){
          cat("No results file present. Current parameters saved.\n")
        }

      }


    }


    if(!abort || k==1) {
      k <- k + 1
      total.tries <- total.tries + 1
    }


  }
  if(verbose){
    cat("\nFitting done.\n")
  }

  return("done")
}