R/statistics.R

Defines functions fitErrorModel reduceReplicates load.parlist msParframe mstrust vcov confint.parframe progressBar profile

Documented in confint.parframe fitErrorModel load.parlist msParframe mstrust profile progressBar reduceReplicates vcov

#' Profile-likelihood (PL) computation
#' 
#' @param obj Objective function \code{obj(pars, fixed, ...)} returning a list with "value",
#' "gradient" and "hessian". If attribute "valueData" and/or "valuePrior are returned they are attached to the return value.
#' @param pars Parameter vector corresponding to the log-liklihood optimum.
#' @param whichPar Numeric or character vector. The parameters for which the profile is computed.
#' @param alpha Numeric, the significance level based on the chisquare distribution with df=1
#' @param limits Numeric vector of length 2, the lower and upper deviance from the original 
#' value of \code{pars[whichPar]}
#' @param method Character, either \code{"integrate"} or \code{"optimize"}. This is a short-cut for
#' setting stepControl, algoControl and optControl by hand.
#' @param stepControl List of arguments controlling the step adaption. Defaults to integration set-up, i.e.
#' \code{list(stepsize = 1e-4, min = 1e-4, max = Inf, atol = 1e-2, rtol = 1e-2, limit = 100)}
#' @param algoControl List of arguments controlling the fast PL algorithm. defaults to
#' \code{list(gamma = 1, W = "hessian", reoptimize = FALSE, correction = 1, reg = .Machine$double.eps)}
#' @param optControl List of arguments controlling the \code{trust()} optimizer. Defaults to
#' \code{list(rinit = .1, rmax = 10, iterlim = 10, fterm = sqrt(.Machine$double.eps), mterm = sqrt(.Machine$double.eps))}.
#' See \link{trust} for more details.
#' @param verbose Logical, print verbose messages.
#' @param cores number of cores used when computing profiles for several
#' parameters.
#' @param cautiousMode Logical, write every step to disk and don't delete intermediate results
#' @param ... Arguments going to obj()
#' @details Computation of the profile likelihood is based on the method of Lagrangian multipliers
#' and Euler integration of the corresponding differential equation of the profile likelihood paths.
#' 
#' \code{algoControl}: Since the Hessian which is needed for the differential equation is frequently misspecified, 
#' the error in integration needs to be compensated by a correction factor \code{gamma}. Instead of the
#' Hessian, an identity matrix can be used. To guarantee that the profile likelihood path stays on
#' the true path, each point proposed by the differential equation can be used as starting point for
#' an optimization run when \code{reoptimize = TRUE}. The correction factor \code{gamma} is adapted
#' based on the amount of actual correction. If this exceeds the value \code{correction}, \code{gamma} is
#' reduced. In some cases, the Hessian becomes singular. This leads to problems when inverting the
#' Hessian. To avoid this problem, the pseudoinverse is computed by removing all singular values lower
#' than \code{reg}.
#' 
#' \code{stepControl}: The Euler integration starts with \code{stepsize}. In each step the predicted change
#' of the objective function is compared with the actual change. If this is larger than \code{atol}, the
#' stepsize is reduced. For small deviations, either compared the abolute tolerance \code{atol} or the
#' relative tolerance \code{rtol}, the stepsize may be increased. \code{max} and \code{min} are upper and lower
#' bounds for \code{stepsize}. \code{limit} is the maximum number of steps that are take for the profile computation.
#' \code{stop} is a character, usually "value" or "data", for which the significance level \code{alpha}
#' is evaluated.
#' 
#' @return Named list of length one. The name is the parameter name. The list enty is a
#' matrix with columns "value" (the objective value), "constraint" (deviation of the profiled paramter from
#' the original value), "stepsize" (the stepsize take for the iteration), "gamma" (the gamma value employed for the
#' iteration), "valueData" and "valuePrior" (if specified in obj), one column per parameter (the profile paths).
#' @example inst/examples/profiles.R
#' @export
profile <- function(obj, pars, whichPar, alpha = 0.05, 
                    limits = c(lower = -Inf, upper = Inf), 
                    method = c("integrate", "optimize"),
                    stepControl = NULL, 
                    algoControl = NULL,
                    optControl  = NULL,
                    verbose = FALSE,
                    cores = 1,
                    cautiousMode = FALSE,
                    ...) {
  # Ensure that obj is defined in this environment such that it is copied to the parallel workers
  force(obj)
  
  
  # Guarantee that pars is named numeric without deriv attribute
  dotArgs <- list(...)
  sanePars <- sanitizePars(pars, dotArgs$fixed)
  pars <- sanePars$pars
  fixed <- sanePars$fixed
  
  
  # Initialize control parameters depending on method
  method  <- match.arg(method)
  if (method == "integrate") {
    sControl <- list(stepsize = 1e-4, min = 1e-4, max = Inf, atol = 1e-2, rtol = 1e-2, limit = 500, stop = "value")
    aControl <- list(gamma = 1, W = "hessian", reoptimize = FALSE, correction = 1, reg = .Machine$double.eps)
    oControl <- list(rinit = .1, rmax = 10, iterlim = 10, fterm = sqrt(.Machine$double.eps), mterm = sqrt(.Machine$double.eps))
  }
  if (method == "optimize") {
    sControl <- list(stepsize = 1e-2, min = 1e-4, max = Inf, atol = 1e-1, rtol = 1e-1, limit = 100, stop = "value")
    aControl <- list(gamma = 0, W = "identity", reoptimize = TRUE, correction = 1, reg = 0)
    oControl <- list(rinit = .1, rmax = 10, iterlim = 100, fterm = sqrt(.Machine$double.eps), mterm = sqrt(.Machine$double.eps))
  }
  
  # Check if on Windows
  cores <- min(length(whichPar), cores)
  cores <- sanitizeCores(cores)
  
  # Substitute user-set control parameters
  if (!is.null(stepControl)) sControl[match(names(stepControl), names(sControl))] <- stepControl
  if (!is.null(algoControl)) aControl[match(names(algoControl), names(aControl))] <- algoControl
  if (!is.null(optControl )) oControl[match(names(optControl), names(oControl ))] <- optControl
  
  
  # Create interRes folder for cautiousMode
  if (cautiousMode){
    interResFolder <- "profiles-interRes"
    dir.create(interResFolder,showWarnings = FALSE)
  }
  
  
  # Start cluster if on windows
  if (cores > 1) {
    
    if (Sys.info()[['sysname']] == "Windows") {
      
      cluster <- parallel::makeCluster(cores)
      doParallel::registerDoParallel(cl = cluster)
      
      parallel::clusterCall(cl = cluster, function(x) .libPaths(x), .libPaths())
      
      varlist <- ls()
      # Exclude things like "missing argument"
      varlist <- c("obj", "whichPar", "alpha", "limits", "method", "verbose", "cores",
                   "pars", "fixed", "dotArgs",
                   "sControl", "aControl", "oControl")
      parallel::clusterExport(cluster, envir = environment(), varlist = varlist)
      
    } else {
      
      doParallel::registerDoParallel(cores = cores)
      
    }
    
    "%mydo%" <- foreach::"%dopar%"
    
    
  } else {
    
    "%mydo%" <- foreach::"%do%"
    
  }
  
  
  # Convert whichPar to index vector
  if (is.character(whichPar)) whichPar <- which(names(pars) %in% whichPar)
  
  loaded_packages <- .packages()  
  out <- foreach::foreach(whichIndex = whichPar, 
                          .packages = loaded_packages, 
                          .inorder = TRUE,
                          .options.multicore = list(preschedule = FALSE)) %mydo% {
                            
                            loadDLL(obj)
                            
                            
                            whichPar.name <- names(pars)[whichIndex]
                            
                            
                            
                            ## Functions needed during profile computation -----------------------
                            obj.opt <- obj
                            obj.prof <- function(p, ...) {
                              out <- obj(p, ...)
                              # If "identity", substitute hessian such that steps are in whichPar-direction.
                              Id <- diag(1/.Machine$double.eps, length(out$gradient))
                              Id[whichIndex, whichIndex] <- 1
                              colnames(Id) <- rownames(Id) <- names(out$gradient)
                              
                              W <- match.arg(aControl$W[1], c("hessian", "identity"))
                              out$hessian <- switch(W,
                                                    "hessian" = out$hessian,
                                                    "identity" = Id)
                              return(out)    
                            }
                            
                            pseudoinverse <- function(m, tol) {
                              msvd <- svd(m)
                              index <- which(abs(msvd$d) > max(dim(m))*max(msvd$d)*tol) 
                              if (length(index) == 0) {
                                out <- array(0, dim(m)[2:1])
                              }
                              else {
                                out <- msvd$u[,index] %*% (1/msvd$d[index] * t(msvd$v)[index,])
                              }
                              attr(out, "valid") <- 1:length(msvd$d) %in% index
                              return(out)
                            }
                            
                            constraint <- function(p) {
                              value <- p[whichIndex] - pars[whichIndex]
                              gradient <- rep(0, length(p))
                              gradient[whichIndex] <- 1
                              return(list(value = value, gradient = gradient))
                            }
                            lagrange <- function(y) {
                              
                              # initialize values
                              p <- y
                              lambda <- 0
                              out <- do.call(obj.prof, c(list(p = p), dotArgs))
                              g.original <- constraint(p)
                              
                              # evaluate derivatives and constraints
                              g     <- direction * g.original$value
                              gdot  <- direction * g.original$gradient
                              ldot  <- out$gradient
                              lddot <- out$hessian 
                              
                              # compute rhs of profile ODE
                              M <- rbind(cbind(lddot, gdot), 
                                         matrix(c(gdot, 0), nrow=1))
                              
                              v <- c(-rep(gamma, length(p))*(ldot + lambda*gdot), 1)
                              v0 <- c(-rep(0, length(p))*(ldot + lambda*gdot), 1)
                              
                              W <- pseudoinverse(M, tol = aControl$reg)
                              valid <- attr(W, "valid")
                              if(any(!valid)) {
                                dy <- try(as.vector(W%*%v)[1:length(p)], silent=FALSE)
                                dy0 <- try(as.vector(W%*%v0)[1:length(p)], silent=FALSE)
                                dy[!valid[1:length(p)]] <- dy0[!valid[1:length(p)]] <- 0
                                dy[whichIndex] <- dy0[whichIndex] <- direction
                                warning(paste0("Iteration ", i, ": Some singular values of the Hessian are below the threshold. Optimization will be performed."))
                              } else {
                                dy <- try(as.vector(W%*%v)[1:length(p)], silent=FALSE)
                                dy0 <- try(as.vector(W%*%v0)[1:length(p)], silent=FALSE)
                              }
                              
                              
                              if(!inherits(dy, "try-error")) {
                                names(dy) <- names(y) 
                                correction <- sqrt(sum((dy-dy0)^2))/sqrt(sum(dy^2))
                              } else {
                                dy <- NA
                                correction <- 0
                                warning(paste0("Iteration ", i, ": Impossible to invert Hessian. Trying to optimize instead."))
                              }
                              
                              # Get attributes of the returned objective value (only if numeric)
                              out.attributes <- attributes(out)[sapply(attributes(out), is.numeric)]
                              out.attributes.names <- names(out.attributes)
                              
                              
                              return(c(list(dy = dy, 
                                            value = out$value, 
                                            gradient = out$gradient, 
                                            correction = correction, 
                                            valid = valid, 
                                            attributes = out.attributes.names),
                                       out.attributes))
                              
                              
                              
                              
                            }
                            doIteration <- function() {
                              
                              optimize <- aControl$reoptimize
                              # Check for error in evaluation of lagrange()
                              if(is.na(dy[1])) {
                                #cat("Evaluation of lagrange() not successful. Will optimize instead.\n")
                                optimize <- TRUE
                                y.try <- y
                                y.try[whichIndex] <- y[whichIndex] + direction*stepsize
                                rinit <- oControl$rinit
                              } else {
                                dy.norm <- sqrt(sum(dy^2))
                                rinit <- min(c(oControl$rinit, 3*dy.norm))
                                y.try <- y + dy
                                if(any(!lagrange.out$valid)) optimize <- TRUE
                              }
                              
                              # Do reoptimization if requested or necessary
                              if(optimize) {      
                                parinit.opt <- y.try[-whichIndex]
                                fixed.opt <- c(fixed, y.try[whichIndex])
                                
                                arglist <- c(list(objfun = obj.opt, parinit = parinit.opt, fixed = fixed.opt, rinit = rinit), 
                                             oControl[names(oControl)!="rinit"],
                                             dotArgs[names(dotArgs) != "fixed"])
                                
                                
                                myfit <- try(do.call(trust, arglist), silent=FALSE)
                                if(!inherits(myfit, "try-error")) {
                                  y.try[names(myfit$argument)] <- as.vector(myfit$argument)  
                                } else {
                                  warning("Optimization not successful. Profile may be erroneous.")
                                }
                                
                              }
                              
                              return(y.try)
                              
                            }
                            doAdaption <- function() {
                              
                              lagrange.out.try <- lagrange(y.try)
                              valid <- TRUE
                              
                              # Predicted change of the objective value
                              dobj.pred <- sum(lagrange.out$gradient*(y.try - y))
                              dobj.fact <- lagrange.out.try$value - lagrange.out$value
                              correction <- lagrange.out.try$correction
                              
                              # Gamma adaption based on amount of actual correction
                              if (correction > aControl$correction) gamma <- gamma/2
                              if (correction < 0.5*aControl$correction) gamma <- min(c(aControl$gamma, gamma*2))
                              
                              # Stepsize adaption based on difference in predicted change of objective value
                              if (abs(dobj.fact - dobj.pred) > sControl$atol & stepsize > sControl$min) {
                                stepsize <- max(c(stepsize/1.5, sControl$min))
                                valid <- FALSE
                              }
                              if (abs(dobj.fact - dobj.pred) < .3*sControl$atol | abs((dobj.fact - dobj.pred)/dobj.fact) < .3*sControl$rtol) {
                                stepsize <- min(c(stepsize*2, sControl$max))
                              }
                              
                              ## Verbose
                              if (verbose) {
                                # Compute progres
                                diff.thres <- diff.steps <- diff.limit <- 0
                                if (threshold < Inf)
                                  diff.thres <- 1 - max(c(0, min(c(1, (threshold - lagrange.out.try$value)/delta))))
                                if (sControl$limit < Inf)
                                  diff.steps <- i/sControl$limit
                                diff.limit <- switch(as.character(sign(constraint.out$value)),
                                                     "1"  = 1 - (limits[2] - constraint.out$value)/limits[2],
                                                     "-1" = diff.limit <- 1 - (limits[1] - constraint.out$value)/limits[1],
                                                     "0"  = 0)
                                
                                percentage <- max(c(diff.thres, diff.steps, diff.limit), na.rm = TRUE)*100
                                progressBar(percentage)
                                
                                
                                #cat("diff.thres:", diff.thres, "diff.steps:", diff.steps, "diff.limit:", diff.limit)
                                myvalue <- format(substr(lagrange.out$value  , 0, 8), width = 8)
                                myconst <- format(substr(constraint.out$value, 0, 8), width = 8)
                                mygamma <- format(substr(gamma               , 0, 8), width = 8)
                                myvalid <- all(lagrange.out$valid)
                                cat("\tvalue:", myvalue, "constraint:", myconst, "gamma:", mygamma, "valid:", myvalid) 
                              }
                              
                              
                              
                              return(list(lagrange = lagrange.out.try, stepsize = stepsize, gamma = gamma, valid = valid))
                              
                              
                            }
                            
                            ## Compute profile -------------------------------------------------
                            
                            # Initialize profile
                            i <- 0 
                            direction <- 1
                            gamma <- aControl$gamma
                            stepsize <- sControl$stepsize
                            ini <- pars
                            
                            lagrange.out <- lagrange(ini)
                            constraint.out <- constraint(pars)
                            
                            delta <- qchisq(1-alpha, 1)
                            #delta.t <- qf(1 - alpha, 1L, nobs - npar)
                            
                            
                            
                            threshold <- lagrange.out[[sControl$stop]] + delta
                            out.attributes <- unlist(lagrange.out[lagrange.out$attributes])
                            
                            out <- c(value = lagrange.out$value, 
                                     constraint = as.vector(constraint.out$value), 
                                     stepsize = stepsize, 
                                     gamma = gamma, 
                                     whichPar = whichIndex,
                                     out.attributes, ini)
                            
                            # Compute right profile
                            if (verbose) {
                              cat("Compute right profile\n")
                            }
                            direction <- 1
                            gamma <- aControl$gamma
                            stepsize <- sControl$stepsize
                            y <- ini
                            
                            lagrange.out <- lagrange.out
                            constraint.out <- constraint.out
                            
                            while (i < sControl$limit) {
                              
                              ## Iteration step
                              sufficient <- FALSE
                              retry <- 0
                              while (!sufficient & retry < 5) {
                                dy <- stepsize*lagrange.out$dy
                                y.try <- try(doIteration(), silent = TRUE)
                                out.try <- try(doAdaption(), silent = TRUE)
                                if (inherits(y.try, "try-error") | inherits(out.try, "try-error")) {
                                  sufficient <- FALSE
                                  stepsize <- stepsize/1.5
                                  retry <- retry + 1
                                } else {
                                  sufficient <- out.try$valid
                                  stepsize <- out.try$stepsize  
                                }
                                
                              }    
                              if (inherits(y.try, "try-error") | inherits(out.try, "try-error")) break
                              
                              
                              ## Set values
                              y <- y.try
                              lagrange.out <- out.try$lagrange
                              constraint.out <- constraint(y.try)
                              stepsize <- out.try$stepsize
                              gamma <- out.try$gamma
                              out.attributes <- unlist(lagrange.out[lagrange.out$attributes])
                              
                              ## Return values 
                              out <- rbind(out, 
                                           c(value = lagrange.out$value, 
                                             constraint = as.vector(constraint.out$value), 
                                             stepsize = stepsize, 
                                             gamma = gamma, 
                                             whichPar = whichIndex,
                                             out.attributes, 
                                             y))
                              
                              if(cautiousMode) {
                                outCautious <- as.data.frame(out)
                                outCautious$whichPar <- whichPar.name
                                outCautious <- parframe(
                                  outCautious,
                                  parameters = names(pars),
                                  metanames = c("value", "constraint", "stepsize", "gamma", "whichPar"),
                                  obj.attributes = names(out.attributes)
                                )
                                dput(outCautious, file = file.path(interResFolder, paste0(whichPar.name, "-right.R")))
                              }
                              
                              value <- lagrange.out[[sControl$stop]]
                              if (value > threshold | constraint.out$value > limits[2]) break
                              
                              i <- i + 1
                              
                            }
                            
                            # Compute left profile
                            if (verbose) {
                              cat("\nCompute left profile\n")
                            }
                            i <- 0
                            direction <- -1
                            gamma <- aControl$gamma
                            stepsize <- sControl$stepsize
                            y <- ini
                            
                            lagrange.out <- lagrange(ini)
                            constraint.out <- constraint(pars)
                            
                            while (i < sControl$limit) {
                              
                              ## Iteration step
                              sufficient <- FALSE
                              retry <- 0
                              while (!sufficient & retry < 5) {
                                dy <- stepsize*lagrange.out$dy
                                y.try <- try(doIteration(), silent = TRUE)
                                out.try <- try(doAdaption(), silent = TRUE)
                                if (inherits(y.try, "try-error") | inherits(out.try, "try-error")) {
                                  sufficient <- FALSE
                                  stepsize <- stepsize/1.5
                                  retry <- retry + 1
                                } else {
                                  sufficient <- out.try$valid
                                  stepsize <- out.try$stepsize  
                                }
                                
                              }
                              if (inherits(y.try, "try-error") | inherits(out.try, "try-error")) break
                              
                              ## Set values
                              y <- y.try
                              lagrange.out <- out.try$lagrange
                              constraint.out <- constraint(y.try)
                              stepsize <- out.try$stepsize
                              gamma <- out.try$gamma
                              out.attributes <- unlist(lagrange.out[lagrange.out$attributes])
                              
                              
                              ## Return values
                              out <- rbind(c(value = lagrange.out$value, 
                                             constraint = as.vector(constraint.out$value), 
                                             stepsize = stepsize, 
                                             gamma = gamma,
                                             whichPar = whichIndex,
                                             out.attributes,
                                             y), 
                                           out)
                              
                              if(cautiousMode) {
                                outCautious <- as.data.frame(out)
                                outCautious$whichPar <- whichPar.name
                                outCautious <- parframe(
                                  outCautious,
                                  parameters = names(pars),
                                  metanames = c("value", "constraint", "stepsize", "gamma", "whichPar"),
                                  obj.attributes = names(out.attributes)
                                )
                                dput(outCautious, file = file.path(interResFolder, paste0(whichPar.name, "-left.R")))
                              }
                              
                              
                              value <- lagrange.out[[sControl$stop]]
                              if (value > threshold | constraint.out$value < limits[1]) break
                              
                              i <- i + 1
                              
                            }
                            
                            # Output
                            out <- as.data.frame(out)
                            out$whichPar <- whichPar.name
                            parframe(
                              out,
                              parameters = names(pars),
                              metanames = c("value", "constraint", "stepsize", "gamma", "whichPar"),
                              obj.attributes = names(out.attributes)
                            )
                            
                          }
  
  
  
  if (Sys.info()[['sysname']] == "Windows" & cores > 1) {
    
    parallel::stopCluster(cluster)
    doParallel::stopImplicitCluster()
    
  }
  
  # .. Prepare output -----
  outncol <- vapply(out, ncol, 1)
  failed <- outncol != max(outncol)
  if (any(failed)) message("The following profiles failed: ", paste0(whichPar[failed], collapse = ", "))
  out <- out[!failed]  
  
  do.call(rbind, out)
  
  
}

#' Progress bar
#' 
#' @param percentage Numeric between 0 and 100
#' @param size Integer, the size of the bar print-out
#' @param number Logical, Indicates whether the percentage should be printed out.
#' @export
progressBar <- function(percentage, size = 50, number = TRUE) {
  
  if(percentage < 0) percentage <- 0
  if(percentage > 100) percentage <- 100
  
  out <- paste("\r|", paste(rep("=", round(size*percentage/100)), collapse=""), paste(rep(" ", size-round(size*percentage/100)), collapse=""), "|", sep="")
  cat(out)
  if(number) cat(format(paste(" ", round(percentage), "%", sep=""), width=5))
  
}

#' Profile uncertainty extraction
#' 
#' @description extract parameter uncertainties from profiles
#' @param object object of class \code{parframe}, returned from \link{profile} function.
#' @param parm a specification of which parameters are to be given confidence intervals, 
#' either a vector of numbers or a vector of names. If missing, all parameters are considered.
#' @param level the confidence level required.
#' @param ... not used right now.
#' @param val.column the value column used in the parframe, usually 'data'.
#' @export
confint.parframe <- function(object, parm = NULL, level = 0.95, ..., val.column = "data") {
  
  profile <- object
  obj.attributes <- attr(profile, "obj.attributes")
  
  if (is.null(parm))
    parm <- unique(profile[["whichPar"]])
  
  threshold <- qchisq(level, df = 1)
  
  # Reduce to profiles for parm
  profile <- profile[profile[["whichPar"]] %in% parm,]
  
  # Evaluate confidence intervals per parameter
  CIs <- lapply(split(profile, profile[["whichPar"]]), function(d) {
    
    # Get origin of profile
    origin <- which.min(abs(d[["constraint"]]))
    whichPar <- d[["whichPar"]][origin]
    
    # Define function to return constraint value where threshold is passed
    get_xThreshold <- function(branch) {
      
      y <- branch[[val.column]] - d[[val.column]][origin]
      x <- branch[["constraint"]]
      
      # If less than 3 points, return NA
      if (length(x) < 3)
        return(NA)
      
      # If threshold exceeded, take closest points below and above threshold
      # and interpolate
      if (any(y > threshold)) {
        i.above <- utils::head(which(y > threshold), 1)
        i.below <- utils::tail(which(y < threshold), 1)
        if (i.below > i.above) {
          return(NA)
        } else {
          slope <- (y[i.above] - y[i.below])/(x[i.above] - x[i.below])
          dy <- threshold - y[i.below]
          dx <- dy/slope
          x_threshold <- x[i.below] + dx
          return(x_threshold)
        }
      }
      
      # If threshold not exceeded,
      # take the last 20% of points (at least 3) an perform linear fit
      n_last20 <- max(3, length(which(x - x[1] > 0.8 * (max(x) - x[1]))))
      x <- tail(x, n_last20)
      y <- tail(y, n_last20)
      slope <- sum((x - mean(x))*(y - mean(y)))/sum((x - mean(x))^2)
      
      # If slope < 0, return Inf
      if (slope < 0)
        return(Inf)
      
      # Extrapolate until threshold is passed
      dy <- threshold - tail(y, 1)
      dx <- dy/slope
      x_threshold <- tail(x, 1) + dx
      
      # Test if extrapolation takes the point of passage very far
      # Set to Inf in that case
      if (x_threshold > 10*(max(x) - min(x)))
        x_threshold <- Inf
      
      return(x_threshold)
      
      
    } 
    
    # Right profiles
    right <- d[d[["constraint"]] >= 0,]
    upper <- d[[whichPar]][origin] + get_xThreshold(right)
    
    # Left profile
    left <- d[d[["constraint"]] <= 0,]
    left[["constraint"]] <- - left[["constraint"]]
    left <- left[order(left[["constraint"]]), ]
    lower <- d[[whichPar]][origin] - get_xThreshold(left)
    
    
    data.frame(name = whichPar, 
               value = d[[whichPar]][origin], 
               lower = lower, 
               upper = upper)
    
    
  })
  
  do.call(rbind, CIs)
  
}


#' Get variance-covariance matrix from trust result
#' 
#' @param fit return from trust or selected fit from parlist
#' @param parupper upper limit for parameter values. Parameters
#' are considered fixed when exceeding these limits.
#' @param parlower lower limit for parameter values. Parameters
#' are considered fixed when exceeding these limits.
#' 
#' @export 
vcov <- function(fit, parupper = NULL, parlower = NULL) {
  
  hessian__ <- fit[["hessian"]]
  arg__ <- fit[["argument"]]
  
  if (is.null(arg__)) return()
  
  if (is.null(hessian)) {
    vcov__ <- matrix(NA, nrow = length(arg__), ncol = length(arg__), 
                     dimnames = list(names(arg__), names(arg__)))
    return(vcov__)
  }
  
  fixed <- NULL
  if (!is.null(parupper)) {
    myarg__ <- arg__[names(parupper)]
    fixed <- union(fixed, names(myarg__)[myarg__ >= parupper])
  }
  if (!is.null(parlower)) {
    myarg__ <- arg__[names(parlower)]
    fixed <- union(fixed, names(myarg__)[myarg__ <= parlower])
  }
  
  vcov__ <- 0*hessian__
  is_fixed__ <- colnames(hessian__) %in% fixed
  subhessian__ <- hessian__[!is_fixed__, !is_fixed__, drop = FALSE]
  subvcov__ <- try(solve(0.5*subhessian__), silent = TRUE)
  if (inherits(subvcov__, "try-error")) subvcov__ <- MASS::ginv(subhessian__)
  vcov__[!is_fixed__, !is_fixed__] <- subvcov__
  
  # This part should not be necessary due to regularization usually done
  # Perform identifiability check based on
  # subvcov__ <- vcov__[!is_fixed__, !is_fixed__, drop = FALSE]
  # eigen__ <- eigen(subhessian__)
  # tol__ <- sqrt(.Machine$double.eps)
  # V__ <- eigen__[["vectors"]][, abs(eigen__[["values"]]) < tol__, drop = FALSE]
  # identifiable__ <- apply(V__, 1, function(v) all(abs(v) < tol__))
  # diag(subvcov__)[!identifiable__] <- Inf
  # 
  # vcov__[!is_fixed__, !is_fixed__] <- subvcov__
  
  return(vcov__) 
  
}




#' Non-Linear Optimization, multi start
#' 
#' @description Wrapper around \code{\link{trust}} allowing for multiple fits 
#'   from randomly chosen initial values.
#'   
#' @param objfun Objective function, see \code{\link{trust}}.
#' @param center Parameter values around which the initial values for each fit 
#'   are randomly sampled. The initial values handed to \link{trust} are the sum
#'   of center and the output of \option{samplefun}, center + 
#'   \option{samplefun}. See \code{\link{trust}}, parinit.
#'   \code{center} Can also be a parframe, then the parameter values are taken 
#'   from the parframe. In this case, the \code{fits} argument is overwritten.
#'   To use a reproducible set of initial guesses, generate center with 
#'   \code{\link{msParframe}}
#' @param studyname The names of the study or fit. This name is used to 
#'   determine filenames for interim and final results. See Details.
#' @param rinit Starting trust region radius, see \code{\link{trust}}.
#' @param rmax Maximum allowed trust region radius, see \code{\link{trust}}.
#' @param fits Number of fits (jobs).
#' @param cores Number of cores for job parallelization.
#' @param samplefun Function to sample random initial values. It is assumed, 
#'   that \option{samplefun} has a named parameter "n" which defines how many 
#'   random numbers are to be returned, such as for \code{\link{rnorm}} or 
#'   \code{\link{runif}}. By default \code{\link{rnorm}} is used. Parameteres 
#'   for samplefun are simply appended as named parameters to the mstrust call 
#'   and automatically handed to samplefun by matching parameter names.
#' @param resultPath character indicating the folder where the results should 
#'   be stored. Defaults to ".". 
#' @param stats If true, the same summary statistic as written to the logfile is
#'   printed to command line on mstrust completion.
#' @param ... Additional parameters handed to trust(), samplefun(), or the 
#'   objective function by matching parameter names. All unmatched parameters 
#'   are handed to the objective function objfun(). The log file starts with a 
#'   table telling which parameter was assigend to which function.
#' @param output logical. If true, writes output to the disc.
#' @param cautiousMode Logical, write every fit to disk in deparsed form (avoids the RDA incompatibility trap) and don't delete intermediate results
#'   
#' @details By running multiple fits starting at randomly chosen inital 
#'   parameters, the chisquare landscape can be explored using a deterministic 
#'   optimizer. Here, \code{\link{trust}} is used for optimization. The standard
#'   procedure to obtain random initial values is to sample random variables 
#'   from a uniform distribution (\code{\link{rnorm}}) and adding these to 
#'   \option{center}. It is, however, possible, to employ any other sampling 
#'   strategy by handing the respective function to mstrust(), 
#'   \option{samplefun}.
#'   
#'   In case a special sampling is required, a customized sampling function can 
#'   be used. If, e.g., inital values leading to a non-physical systems are to 
#'   be discarded upfront, the objective function can be addapted accordingly.
#'   
#'   All started fits either lead to an error or complete converged or
#'   unconverged. A statistics about the return status of fits can be shown by
#'   setting \option{stats} to TRUE.
#'   
#'   Fit final and intermediat results are stored under \option{studyname}. For
#'   each run of mstrust for the same study name, a folder under
#'   \option{studyname} of the form "trial-x-date" is created. "x" is the number
#'   of the trial, date is the current time stamp. In this folder, the
#'   intermediate results are stored. These intermediate results can be loaded
#'   by \code{\link{load.parlist}}. These are removed on successfull completion
#'   of mstrust. In this case, the final list of fit parameters
#'   (parameterList.Rda) and the fit log (mstrust.log) are found instead.
#'   
#' @return A parlist holding errored and converged fits.
#'   
#' @seealso 1. \code{\link{trust}}, for the used optimizer,
#'   2. \code{\link{rnorm}}, \code{\link{runif}} for two common sampling functions,
#'   3. \code{\link{msParframe}} for passing a reproducible set of random initial 
#'   guesses to mstrust,
#'   4. \code{\link{as.parframe}} for formatting the output to a handy table
#'   
#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
#'  
#' @export
#' @import parallel
mstrust <- function(objfun, center, studyname, rinit = .1, rmax = 10, fits = 20, cores = 1, optmethod = "trust",
                    samplefun = "rnorm", resultPath = ".", stats = FALSE, output = FALSE, cautiousMode = FALSE, start1stfromCenter = FALSE,
                    ...) {
  
  narrowing <- NULL
  
  
  # Check if on Windows
  cores <- sanitizeCores(cores)
  
  
  # Argument parsing, sorting, and enhancing
  # Gather all function arguments
  varargslist <- list(...)
  
  argslist <- list(
    objfun = objfun, center = center, studyname = studyname,
    rinit = rinit, rmax = rmax, fits = fits, 
    cores = cores, optmethod = optmethod, samplefun = samplefun, 
    resultPath = resultPath, stats = stats, output = output, 
    cautiousMode = cautiousMode)
  argslist <- c(argslist, varargslist)
  
  # Add extra arguments
  argslist$n <- length(center) # How many inital values do we need?
  
  # Determine target function for each function argument.
  # First, define argument names used locally in mstrust().
  # Second, check what trust() and samplefun() accept and check for name clashes.
  # Third, whatever is unused is passed to the objective function objfun().
  nameslocal <- c("studyname", "center", "fits", "cores", "optmethod", "samplefun",
                  "resultPath", "stats", "narrowing", "output")
  namestrust <- intersect(names(formals(trust)), names(argslist))
  namessample <- intersect(names(formals(samplefun)), names(argslist))
  if (length(intersect(namestrust, namessample) != 0)) {
    stop("Argument names of trust() and ", samplefun, "() clash.")
  }
  
  # Default optimizer
  namesobj <- setdiff(names(argslist), c(namestrust, namessample, nameslocal))
  
  # Assemble argument lists common to all calls in mclapply
  # Sample function
  argssample <- structure(vector("list", length = length(namessample)), names = namessample)
  for (name in namessample) {
    argssample[[name]] <- argslist[[name]]
  }
  
  # Objective function
  argsobj <- structure(vector("list", length = length(namesobj)), names = namesobj)
  for (name in namesobj) {
    argsobj[[name]] <- argslist[[name]]
  }
  
  # Trust optimizer, except for initial values
  argstrust <- structure(vector("list", length = length(namestrust)), names = namestrust)
  for (name in namestrust) {
    argstrust[[name]] <- argslist[[name]]
  }
  
  # Assemble and create output filenames, folders and files
  m_timeStamp <- paste0(format(Sys.time(), "%d-%m-%Y-%H%M%S"))
  
  # Folders
  resultFolderBase <- file.path(argslist$resultPath, argslist$studyname)
  m_trial <- paste0("trial-", length(dir(resultFolderBase, pattern = "trial*")) + 1)
  resultFolder <- file.path(resultFolderBase, paste0(m_trial, "-", m_timeStamp))
  
  interResultFolder <- file.path(resultFolder, "interRes")
  dir.create(path = interResultFolder, showWarnings = FALSE, recursive = TRUE)
  
  # Files
  fileNameLog <- paste0("mstrust.log")
  fileNameParList <- paste0("parameterList.Rda")
  fileLog <- file.path(resultFolder, fileNameLog)
  fileParList <- file.path(resultFolder, fileNameParList)
  
  
  
  # Apply trust optimizer in parallel
  # The error checking leverages that mclappy runs each job in a try().
  logfile <- file(fileLog, open = "w")
  
  # Parameter assignment information
  if (is.null(narrowing) || narrowing[1] == 1) {
    msg <- paste0("Parameter assignment information\n",
                  strpad("mstrust", 12),                        ": ", paste0(nameslocal, collapse = ", "), "\n",
                  strpad("trust", 12),                          ": ", paste0(namestrust, collapse = ", "), "\n",
                  strpad(as.character(argslist$samplefun), 12), ": ", paste0(namessample, collapse = ", "), "\n\n")
    #strpad(as.character(argslist$objfun), 12),    ": ", paste0(namesobj, collapse = ", "), "\n\n")
    writeLines(msg, logfile)
    flush(logfile)
  }
  
  # Write narrowing status information to file
  if (!is.null(narrowing)) {
    msg <- paste0("--> Narrowing, run ", narrowing[1], " of ", narrowing[2], "\n",
                  "--> " , fits, " fits to run\n")
    writeLines(msg, logfile)
    flush(logfile)
  }
  
  if(is.parframe(center)) {
    fits <- nrow(center)
  }
  
  
  cores <- min(fits, cores)
  if (cores > 1) {
    
    # Start cluster if on windows
    if (Sys.info()[['sysname']] == "Windows") {
      
      cluster <- parallel::makeCluster(cores)
      doParallel::registerDoParallel(cluster)
      parallel::clusterCall(cl = cluster, function(x) .libPaths(x), .libPaths())
      
      varlist <- ls()
      # Exclude things like "missing argument"
      varlist <- c("objfun", "center", "argstrust", 
                   "samplefun", "argssample", "argsobj", 
                   "output", "interResultFolder", "logfile")
      parallel::clusterExport(cluster, envir = environment(), varlist = varlist)
      
    } else {
      
      doParallel::registerDoParallel(cores = cores)
      
    }
    
    "%mydo%" <- foreach::"%dopar%"
    
  } else {
    
    "%mydo%" <- foreach::"%do%"
    
  }
  
  
  loaded_packages <- .packages()  
  m_parlist <- as.parlist(foreach::foreach(i = 1:fits, 
                                           .packages = loaded_packages, 
                                           .inorder = TRUE,
                                           .options.multicore = list(preschedule = FALSE)
  ) %mydo% {
    
    suppressMessages(loadDLL(objfun))
    
    if(is.parframe(center)) {
      argstrust$parinit <- as.parvec(center, i)
    } else {
      if (i == 1 & start1stfromCenter) {
        # First fit always starts from center
        argstrust$parinit <- center
      } else {
        # All other fits start from random positions
        argstrust$parinit <- center + do.call(samplefun, argssample)  
      }
    }
    
    # Check if traceFile is requested. In that case combine tracefile, with logfolder and fit number
    if (!is.null(argstrust[["traceFile"]])) {
      digits <- floor(log10(fits))
      argstrust[["traceFile"]] <- file.path(resultFolder, paste0(formatC(i, digits = digits, flag = "0"), "_", argslist[["traceFile"]]))
    }
    
    fit <- do.call(optmethod, c(argstrust, argsobj))
    
    # Keep only numeric attributes of object returned by trust()
    attr.fit <- attributes(fit)
    keep.attr <- sapply(attr.fit, is.numeric)
    fit <- fit[1:length(fit)] # deletes attributes
    if (any(keep.attr)) attributes(fit) <- c(attributes(fit), attr.fit[keep.attr]) # attach numeric attributes
    
    
    # In some crashes a try-error object is returned which is not a list. Since
    # each element in the parlist is assumed to be a list, we wrap these cases.
    if (!is.list(fit)) {
      f <- list()
      f$error <- fit
      fit <- f
    }
    
    fit$parinit <- argstrust$parinit
    
    # Write current fit to disk
    if (output) {
      saveRDS(fit, file = file.path(interResultFolder, paste0("fit-", i, ".Rda")))
      if (cautiousMode) dput(fit[c("value", "argument", "iterations", "converged")], file = file.path(interResultFolder, paste0("fit-", i, ".R")))
      # Reporting
      # With concurent jobs and everyone reporting, this is a classic race
      # condition. Assembling the message beforhand lowers the risk of interleaved
      # output to the log.
      msgSep <- "-------"
      if (any(names(fit) == "error")) {
        msg <- paste0(msgSep, "\n",
                      "Fit ", i, " failed after ", fit$iterations, " iterations with error\n",
                      "--> ", fit$error,
                      msgSep, "\n")
        
        writeLines(msg, logfile)
        flush(logfile)
      } else {
        msg <- paste0(msgSep, "\n",
                      "Fit ", i, " completed\n",
                      "--> iterations : ", fit$iterations, "\n",
                      "-->  converged : ", fit$converged, "\n",
                      "--> obj. value : ", round(fit$value, digits = 2), "\n",
                      msgSep)
        
        writeLines(msg, logfile)
        flush(logfile)
      }
    }
    return(fit)
  })
  
  close(logfile)
  
  if (Sys.info()[['sysname']] == "Windows" & cores > 1) {
    
    parallel::stopCluster(cluster)
    doParallel::stopImplicitCluster()
    
  }
  
  
  
  # Cull failed and completed fits Two kinds of errors occure. The first returns
  # an object of class "try-error". The reason for these failures are unknown to
  # me. The second returns a list of results from trust(), where one name of the
  # list is error holding an object of class "try-error". These abortions are 
  # due to errors which are captured within trust(). Completed fits return with 
  # a valid result list from trust(), with "error" not part of its names. These
  # fits, can still be unconverged, if the maximim number of iterations was the
  # reason for the return of trust(). Be also aware of fits which converge due
  # to the trust radius hitting rmin. Such fits are reported as converged but
  # are not in truth.
  m_trustFlags.converged = 0
  m_trustFlags.unconverged = 1
  m_trustFlags.error = 2
  m_trustFlags.fatal = 3
  idxStatus <- sapply(m_parlist, function(fit) {
    if (inherits(fit, "try-error") || any(names(fit) == "error")) {
      return(m_trustFlags.error)
    } else if (!any(names(fit) == "converged")) {
      return(m_trustFlags.fatal)
    } else if (fit$converged) {
      return(m_trustFlags.converged)
    } else {
      return(m_trustFlags.unconverged)
    }
  })
  
  
  # Wrap up
  # Write out results
  if (output) saveRDS(m_parlist, file = fileParList)
  
  # Remove temporary files
  if (!cautiousMode) {
    unlink(interResultFolder, recursive = TRUE)
  } else {
    for (f in list.files(interResultFolder, "Rda$")) unlink(f)
  }
  
  # Show summary
  sum.error <- sum(idxStatus == m_trustFlags.error)
  sum.fatal <- sum(idxStatus == m_trustFlags.fatal)
  sum.unconverged <- sum(idxStatus == m_trustFlags.unconverged)
  sum.converged <- sum(idxStatus == m_trustFlags.converged)
  msg <- paste0("Multi start trust summary\n",
                "Outcome     : Occurrence\n",
                "Error       : ", sum.error, "\n",
                "Fatal       : ", sum.fatal, " must be 0\n",
                "Unconverged : ", sum.unconverged, "\n",
                "Converged   : ", sum.converged, "\n",
                "           -----------\n",
                "Total       : ", sum.error + sum.fatal + sum.unconverged + sum.converged, paste0("[", fits, "]"), "\n")
  logfile <- file(fileLog, open = "a")
  writeLines(msg, logfile)
  flush(logfile)
  close(logfile)
  
  if (stats) {
    cat(msg)
  }
  
  return(m_parlist)
}

#' Reproducibly construct "random" parframes
#' 
#' The output of this function can be used for the \code{center} - argument of \code{\link{mstrust}}
#'
#' @param pars Named vector. If \code{samplefun} has a "mean"-argument, values of pars will used as mean
#' @param n Integer how many lines should the parframe have
#' @param seed Seed for the random number generator
#' @param samplefun random number generator: \code{\link{rnorm}}, \code{\link{runif}}, etc...
#' @param ... arguments going to samplefun
#'
#' @return parframe (without metanames)
#' @export
#' 
#' @seealso \code{\link{mstrust}} and \code{\link{parframe}}
#'
#' @examples
#' msParframe(c(a = 0, b = 100000), 5)
#' 
#' # Parameter specific sigma
#' msParframe(c(a = 0, b = 100000), 5, samplefun = rnorm, sd = c(100, 0.5))
msParframe <- function(pars, n = 20, seed = 12345, samplefun = stats::rnorm, ...) {
  set.seed(seed)
  
  if (n == 1)
    return(parframe(as.data.frame(t(pars))))
  
  # generate random pars
  rnd <- samplefun((n-1)*length(pars), ...)
  mypars <- matrix(rnd, nrow = (n-1), byrow = T)
  mean_pars <- 0
  if ("mean" %in% names(formals(samplefun)))
    mean_pars <- t(matrix(pars, nrow = length(pars), ncol = (n-1)))
  mypars <- mypars + mean_pars
  
  # assure that pars itself is also part
  mypars <- rbind(t(pars), mypars)
  mypars <- `names<-`(as.data.frame(mypars), names(pars))
  
  parframe(mypars)
}


#' Construct fitlist from temporary files.
#'
#' @description An aborted \code{\link{mstrust}}
#'   leaves behind results of already completed fits. This command loads these
#'   fits into a fitlist.
#'
#' @param folder Path to the folder where the fit has left its results.
#'
#' @details The command \code{\link{mstrust}} saves
#'   each completed fit along the multi-start sequence such that the results can
#'   be resurected on abortion. This command loads a fitlist from these
#'   intermediate results.
#'
#' @return An object of class parlist.
#'
#' @seealso \code{\link{mstrust}}
#'
#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
#'
#' @export
load.parlist <- function(folder) {
  # Read in all fits
  m_fileList <- dir(folder, pattern = "*.Rda")
  m_parVec <- lapply(m_fileList, function(file) {
    return(readRDS(file.path(folder, file)))
  })
  
  return(as.parlist(m_parVec))
}




#' Reduce replicated measurements to mean and standard deviation
#'
#' @description
#' Obtain the mean and standard deviation from replicates per condition.
#'
#' @param file Data file of csv. See Format for details.
#' @param select Names of the columns in the data file used to define
#'        conditions, see Details.
#' @param datatrans Character vector describing a function to transform data.
#'        Use \kbd{x} to refere to data.
#'
#'
#' @format
#' The following columns are mandatory for the data file.
#' \describe{
#'  \item{name}{Name of the observed species.}
#'  \item{time}{Measurement time point.}
#'  \item{value}{Measurement value.}
#'  \item{condition}{The condition under which the observation was made.}
#' }
#'
#' In addition to these columns, any number of columns can follow to allow a
#' fine grained definition of conditions. The values of all columns named in
#' \option{select} are then merged to get the set of conditions.
#'
#' @details
#' Experiments are usually repeated multiple times possibly under different
#' conditions leading to replicted measurements. The column "Condition" in the
#' data allows to group the data by their condition. However, sometimes, a more
#' fine grained grouping is desirable. In this case, any number of additional
#' columns can be append to the data. These columns are referred to as
#' "condition identifier". Which of the condition identifiers are used to do the
#' grouping is user defined by anouncing the to \option{select}. The mandatory
#' column "Condition" is always used. The total set of different conditions is
#' thus defined by all combinations of values occuring in the selected condition
#' identifiers. The replicates of each condition are then reduced to mean and
#' variance.New conditions names are derived by merging all conditions which
#' were used in mean and std.
#'
#' @return
#' A data frame of the following variables
#' \describe{
#'  \item{time}{Measurement time point.}
#'  \item{name}{Name of the observed species.}
#'  \item{value}{Mean of replicates.}
#'  \item{sigma}{Standard error of the mean, NA for single measurements.}
#'  \item{n}{The number of replicates reduced.}
#'  \item{condition}{The condition for which the value and sigma were calculated. If
#'        more than one column were used to define the condition, this variable
#'        holds the effecive condition which is the combination of all applied
#'        single conditions. }
#' }
#'
#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
#'
#' @export
reduceReplicates <- function(file, select = "condition", datatrans = NULL) {
  
  # File format definiton
  fmtnames <- c("name", "time",  "value", "condition")
  fmtnamesnumber <- length(fmtnames)
  
  # Read data and sanity checks
  data <- read.csv(file)
  if (length(intersect(names(data), fmtnames)) != fmtnamesnumber) {
    stop(paste("Mandatory column names are:", paste(fmtnames, collapse = ", ")))
  }
  
  # Transform data if requested
  if (is.character(datatrans)) {
    x <- data$value
    data$value <- eval(parse(text = datatrans))
  }
  
  # Experiments are usually repeated multiple times possibly under different
  # conditions. The column "Condition" in the data thus groups the data per
  # condition. However, sometimes, a more fine grained grouping is desirable. In
  # this case, any number of additional columns can be append to the data. These
  # columns are referred to as "condition identifier". Which of the condition
  # identifiers are used to do the grouping is user defined by giving their
  # names in <select>. The mandatory column "Condition" is always used. The
  # total set of different conditions is thus defined by all combinations of
  # values occuring in the condition identifiers named for grouping. Mean and
  # variance is computed for each condition by averaging over measurements
  # recorded at the same time point. New conditions names are derived by merging
  # all conditions which were used in mean and std.
  select <- unique(c("name", "time", "condition", select))
  condidnt <- Reduce(paste, subset(data, select = select))
  conditions <- unique(condidnt)
  reduct <- do.call(rbind, lapply(conditions, function(cond) {
    conddata <- data[condidnt == cond,]
    mergecond <- Reduce(paste, conddata[1, setdiff(select, c("name", "time"))])
    data.frame(time = conddata[1, "time"],
               value = mean(conddata[, "value"]),
               sigma = sd(conddata[, "value"])/sqrt(nrow(conddata)),
               n = nrow(conddata),
               name = conddata[1, "name"],
               condition = mergecond)
  }))
  
  return(reduct)
}



#' Fit an error model
#'
#' @description Fit an error model to reduced replicate data, see
#'   \code{\link{reduceReplicates}}.
#'
#' @param data Reduced replicate data, see \code{\link{reduceReplicates}}. Need 
#'   columns "value", "sigma", "n".
#' @param factors \option{data} is pooled with respect to the columns named
#'   here, see Details.
#' @param errorModel Character vector defining the error model in terms of the variance. 
#'   Use \kbd{x} to reference the independend variable, see Details.
#' @param par Inital values for the parameters of the error model.
#' @param plotting If TRUE, a plot of the pooled variance together with the fit
#'   of the error model is shown.
#' @param blather If TRUE, additional information is returned, such as fit parameters 
#'  and sigmaLS (original sigma given in input data).
#' @param ... Parameters handed to the optimizer \code{\link{optim}}.
#'
#' @details The variance estimator using \eqn{n-1} data points is \eqn{chi^2}
#'   distributed with \eqn{n-1} degrees of freedom. Given replicates for
#'   consecutive time points, the sample variance can be assumed a function of
#'   the sample mean. By defining an error model which must hold for all time
#'   points, a maximum likelihood estimator for the parameters of the error
#'   model can be derived. The parameter \option{errorModel} takes the error
#'   model as a character vector, where the mean (independent variable) is
#'   refered to as \kbd{x}.
#'
#'   It is desireable to estimate the variance from many replicates. The
#'   parameter \option{data} must provide one or more columns which define the
#'   pooling of data. In case more than one column is announced by
#'   \option{factors}, all combinations are constructed. If, e.g.,
#'   \option{factors = c("condition", "name")} is used, where "condition" is
#'   "a", "b", "c" and repeating and "name" is "d", "e" and repeating, the
#'   effective conditions used for pooling are "a d", "b e", "c d", "a e", "b
#'   d", and "c e".
#'
#'   By default, a plot of the pooled data, sigma and its confidence bound at
#'   68\% and 95\% is shown.
#'
#' @return Returned by default is a data frame with columns as in \option{data}, 
#'   but with the sigma values replaced by the derived values, obtained by evaluating 
#'   the error model with the fit parameters. 
#'   
#'   If the blather = TRUE option is chosen, fit values of the parameters of the error
#'   model are appended, with the column names equal to the parameter names. 
#'   The error model is appended as the attribute "errorModel".
#'   Confidence bounds for sigma at confidence level 68\% and 95\% are
#'   calculated, their values come next in the returned data frame. Finally, the
#'   effective conditions are appended to easily check how the pooling was done.
#'
#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
#'
#' @export
#' @importFrom stats D approx optim qchisq sd time
fitErrorModel <- function(data, factors, errorModel = "exp(s0)+exp(srel)*x^2",
                          par = c(s0 = 1, srel = .1), plotting = TRUE, blather = FALSE, ...) {
  
  # Assemble conditions
  condidnt <- Reduce(paste, subset(data, select = factors))
  conditions <- unique(condidnt)
  
  
  # Fit error model
  nColData <- ncol(data)
  dataErrorModel <- cbind(data, as.list(par))
  
  for (cond in conditions) {
    subdata <- dataErrorModel[condidnt == cond,]
    x <- subdata$value
    n <- subdata$n
    y <- subdata$sigma*sqrt(n)
    
    obj <- function(par) {
      value <- with(as.list(par), {
        z <- eval(parse(text = errorModel))
        sum(log(z)-log(dchisq((n-1)*(y^2)/z, df = n-1)), na.rm = TRUE)
      })
      return(value)
    }
    
    fit <- optim(par = par, fn = obj, ...)
    sigma <- sqrt(with(as.list(fit$par), eval(parse(text = errorModel))))
    dataErrorModel[condidnt == cond, ]$sigma <- sigma 
    dataErrorModel[condidnt == cond, -(nColData:1)] <- data.frame(as.list(fit$par))
  }
  
  
  # Calculate confidence bounds about sigma
  p68 <- (1-.683)/2
  p95 <- (1-.955)/2
  dataErrorModel$cbLower68 <- dataErrorModel$sigma^2*qchisq(p = p68, df = dataErrorModel$n-1)/(dataErrorModel$n-1)
  dataErrorModel$cbUpper68 <- dataErrorModel$sigma^2*qchisq(p = p68, df = dataErrorModel$n-1, lower.tail = FALSE)/(dataErrorModel$n-1)
  dataErrorModel$cbLower95 <- dataErrorModel$sigma^2*qchisq(p = p95, df = dataErrorModel$n-1)/(dataErrorModel$n-1)
  dataErrorModel$cbUpper95 <- dataErrorModel$sigma^2*qchisq(p = p95, df = dataErrorModel$n-1, lower.tail = FALSE)/(dataErrorModel$n-1)
  
  
  # Assemble result
  dataErrorModel <- cbind(dataErrorModel, condidnt, sigmaLS = data$sigma)
  attr(dataErrorModel, "errorModel") <- errorModel
  
  
  # Plot if requested
  if (plotting) {
    print(ggplot(dataErrorModel, aes(x=value)) +
            geom_point(aes(y=sigmaLS^2*(n))) +
            geom_line(aes(y=sigma^2)) +
            geom_ribbon(aes(ymin=cbLower95, ymax=cbUpper95), alpha=.3) +
            geom_ribbon(aes(ymin=cbLower68, ymax=cbUpper68), alpha=.3) +
            ylab("variance") +
            facet_wrap(~condidnt, scales = "free") +
            scale_y_log10() +
            theme_dMod()
    )}
  
  # Return standard error of the mean
  dataErrorModel$sigma <- dataErrorModel$sigma/sqrt(dataErrorModel$n)
  data$sigma <- dataErrorModel$sigma
  if(blather)
    return(dataErrorModel)
  else 
    return(data)
}
dkaschek/dMod documentation built on April 28, 2024, 4:13 p.m.