R/statistics.R

Defines functions fitErrorModel reduceReplicates.character reduceReplicates.data.frame reduceReplicates load.parlist msParframe mstrust vcov confint.parframe progressBar profile

Documented in confint.parframe fitErrorModel load.parlist msParframe mstrust profile progressBar reduceReplicates reduceReplicates.character reduceReplicates.data.frame vcov

#' Profile-likelihood (PL) computation
#' 
#' @param obj Objective function \code{obj(pars, fixed, ...)} returning a list with "value",
#' "gradient" and "hessian". If attribute "valueData" and/or "valuePrior are returned they are attached to the return value.
#' @param pars Parameter vector corresponding to the log-liklihood optimum.
#' @param whichPar Numeric or character vector. The parameters for which the profile is computed.
#' @param alpha Numeric, the significance level based on the chisquare distribution with df=1
#' @param limits Numeric vector of length 2, the lower and upper deviance from the original 
#' value of \code{pars[whichPar]}
#' @param method Character, either \code{"integrate"} or \code{"optimize"}. This is a short-cut for
#' setting stepControl, algoControl and optControl by hand.
#' @param stepControl List of arguments controlling the step adaption. Defaults to integration set-up, i.e.
#' \code{list(stepsize = 1e-4, min = 1e-4, max = Inf, atol = 1e-2, rtol = 1e-2, limit = 100)}
#' @param algoControl List of arguments controlling the fast PL algorithm. defaults to
#' \code{list(gamma = 1, W = "hessian", reoptimize = FALSE, correction = 1, reg = .Machine$double.eps)}
#' @param optControl List of arguments controlling the \code{trust()} optimizer. Defaults to
#' \code{list(rinit = .1, rmax = 10, iterlim = 10, fterm = sqrt(.Machine$double.eps), mterm = sqrt(.Machine$double.eps))}.
#' See \link{trust} for more details.
#' @param verbose Logical, print verbose messages.
#' @param cores number of cores used when computing profiles for several
#' parameters.
#' @param cautiousMode Logical, write every step to disk and don't delete intermediate results
#' @param side either, "left", "right" or "both": determines the side of the profile which is calculated (usefeull for parallelization). default is "both"
#' @param ... Arguments going to obj()
#' @details Computation of the profile likelihood is based on the method of Lagrangian multipliers
#' and Euler integration of the corresponding differential equation of the profile likelihood paths.
#' 
#' \code{algoControl}: Since the Hessian which is needed for the differential equation is frequently misspecified, 
#' the error in integration needs to be compensated by a correction factor \code{gamma}. Instead of the
#' Hessian, an identity matrix can be used. To guarantee that the profile likelihood path stays on
#' the true path, each point proposed by the differential equation can be used as starting point for
#' an optimization run when \code{reoptimize = TRUE}. The correction factor \code{gamma} is adapted
#' based on the amount of actual correction. If this exceeds the value \code{correction}, \code{gamma} is
#' reduced. In some cases, the Hessian becomes singular. This leads to problems when inverting the
#' Hessian. To avoid this problem, the pseudoinverse is computed by removing all singular values lower
#' than \code{reg}.
#' 
#' \code{stepControl}: The Euler integration starts with \code{stepsize}. In each step the predicted change
#' of the objective function is compared with the actual change. If this is larger than \code{atol}, the
#' stepsize is reduced. For small deviations, either compared the abolute tolerance \code{atol} or the
#' relative tolerance \code{rtol}, the stepsize may be increased. \code{max} and \code{min} are upper and lower
#' bounds for \code{stepsize}. \code{limit} is the maximum number of steps that are take for the profile computation.
#' \code{stop} is a character, usually "value" or "data", for which the significance level \code{alpha}
#' is evaluated.
#' 
#' @return Named list of length one. The name is the parameter name. The list enty is a
#' matrix with columns "value" (the objective value), "constraint" (deviation of the profiled paramter from
#' the original value), "stepsize" (the stepsize take for the iteration), "gamma" (the gamma value employed for the
#' iteration), "valueData" and "valuePrior" (if specified in obj), one column per parameter (the profile paths).
#' @example inst/examples/profiles.R
#' @export
profile <- function(obj, pars, whichPar, alpha = 0.05, 
                    limits = c(lower = -Inf, upper = Inf), 
                    method = c("integrate", "optimize"),
                    stepControl = NULL, 
                    algoControl = NULL,
                    optControl  = NULL,
                    verbose = FALSE,
                    cores = 1,
                    cautiousMode = FALSE,
                    side = c("both", "left", "right")[1],
                    ...) {
  # Ensure that obj is defined in this environment such that it is copied to the parallel workers
  force(obj)
  
  # sanitize "side" argument, must be either "left", "right" or "both"
  if (!(side %in% c("left", "right", "both"))) stop("side must be either 'left', 'right' or 'both'")
  
  # Guarantee that pars is named numeric without deriv attribute
  dotArgs <- list(...)
  sanePars <- sanitizePars(pars, dotArgs$fixed)
  pars <- sanePars$pars
  fixed <- sanePars$fixed
  
  
  # Initialize control parameters depending on method
  method  <- match.arg(method)
  if (method == "integrate") {
    sControl <- list(stepsize = 1e-4, min = 1e-4, max = Inf, atol = 1e-2, rtol = 1e-2, limit = 500, stop = "value")
    aControl <- list(gamma = 1, W = "hessian", reoptimize = FALSE, correction = 1, reg = .Machine$double.eps)
    oControl <- list(rinit = .1, rmax = 10, iterlim = 10, fterm = sqrt(.Machine$double.eps), mterm = sqrt(.Machine$double.eps))
  }
  if (method == "optimize") {
    sControl <- list(stepsize = 1e-2, min = 1e-4, max = Inf, atol = 1e-1, rtol = 1e-1, limit = 100, stop = "value")
    aControl <- list(gamma = 0, W = "identity", reoptimize = TRUE, correction = 1, reg = 0)
    oControl <- list(rinit = .1, rmax = 10, iterlim = 100, fterm = sqrt(.Machine$double.eps), mterm = sqrt(.Machine$double.eps))
  }
  
  # Check if on Windows
  cores <- min(length(whichPar), cores)
  cores <- sanitizeCores(cores)
  
  # Substitute user-set control parameters
  if (!is.null(stepControl)) sControl[match(names(stepControl), names(sControl))] <- stepControl
  if (!is.null(algoControl)) aControl[match(names(algoControl), names(aControl))] <- algoControl
  if (!is.null(optControl )) oControl[match(names(optControl), names(oControl))] <- optControl
  
  
  # Create interRes folder for cautiousMode
  if (cautiousMode){
    interResFolder <- "profiles-interRes"
    dir.create(interResFolder,showWarnings = FALSE)
  }
  
  
  # Start cluster if on windows
  if (cores > 1) {
    
    if (Sys.info()[['sysname']] == "Windows") {
      
      cluster <- parallel::makeCluster(cores)
      doParallel::registerDoParallel(cl = cluster)
      
      parallel::clusterCall(cl = cluster, function(x) .libPaths(x), .libPaths())
      
      varlist <- ls()
      # Exclude things like "missing argument"
      varlist <- c("obj", "whichPar", "alpha", "limits", "method", "verbose", "cores",
                   "pars", "fixed", "dotArgs",
                   "sControl", "aControl", "oControl")
      parallel::clusterExport(cluster, envir = environment(), varlist = varlist)
      
    } else {
      
      doParallel::registerDoParallel(cores = cores)
      
    }
    
    "%mydo%" <- foreach::"%dopar%"
    
    
  } else {
    
    "%mydo%" <- foreach::"%do%"
    
  }
  
  
  # Convert whichPar to index vector
  if (is.character(whichPar)) whichPar <- which(names(pars) %in% whichPar)
  
  loaded_packages <- .packages()  
  out <- foreach::foreach(whichIndex = whichPar, 
                          .packages = loaded_packages, 
                          .inorder = TRUE,
                          .options.multicore = list(preschedule = FALSE)) %mydo% {
                            
                            loadDLL(obj)
                            
                            
                            whichPar.name <- names(pars)[whichIndex]
                            
                            
                            
                            ## Functions needed during profile computation -----------------------
                            obj.opt <- obj
                            obj.prof <- function(p, ...) {
                              out <- obj(p, ...)
                              # If "identity", substitute hessian such that steps are in whichPar-direction.
                              Id <- diag(1/.Machine$double.eps, length(out$gradient))
                              Id[whichIndex, whichIndex] <- 1
                              colnames(Id) <- rownames(Id) <- names(out$gradient)
                              
                              W <- match.arg(aControl$W[1], c("hessian", "identity"))
                              out$hessian <- switch(W,
                                                    "hessian" = out$hessian,
                                                    "identity" = Id)
                              return(out)    
                            }
                            
                            pseudoinverse <- function(m, tol) {
                              msvd <- svd(m)
                              index <- which(abs(msvd$d) > max(dim(m))*max(msvd$d)*tol) 
                              if (length(index) == 0) {
                                out <- array(0, dim(m)[2:1])
                              }
                              else {
                                out <- msvd$u[,index] %*% (1/msvd$d[index] * t(msvd$v)[index,])
                              }
                              attr(out, "valid") <- 1:length(msvd$d) %in% index
                              return(out)
                            }
                            
                            constraint <- function(p) {
                              value <- p[whichIndex] - pars[whichIndex]
                              gradient <- rep(0, length(p))
                              gradient[whichIndex] <- 1
                              return(list(value = value, gradient = gradient))
                            }
                            lagrange <- function(y) {
                              
                              # initialize values
                              p <- y
                              lambda <- 0
                              out <- do.call(obj.prof, c(list(p = p), dotArgs))
                              g.original <- constraint(p)
                              
                              # evaluate derivatives and constraints
                              g     <- direction * g.original$value
                              gdot  <- direction * g.original$gradient
                              ldot  <- out$gradient
                              lddot <- out$hessian 
                              
                              # compute rhs of profile ODE
                              M <- rbind(cbind(lddot, gdot), 
                                         matrix(c(gdot, 0), nrow=1))
                              
                              v <- c(-rep(gamma, length(p))*(ldot + lambda*gdot), 1)
                              v0 <- c(-rep(0, length(p))*(ldot + lambda*gdot), 1)
                              
                              W <- pseudoinverse(M, tol = aControl$reg)
                              valid <- attr(W, "valid")
                              if(any(!valid)) {
                                dy <- try(as.vector(W%*%v)[1:length(p)], silent=FALSE)
                                dy0 <- try(as.vector(W%*%v0)[1:length(p)], silent=FALSE)
                                dy[!valid[1:length(p)]] <- dy0[!valid[1:length(p)]] <- 0
                                dy[whichIndex] <- dy0[whichIndex] <- direction
                                warning(paste0("Iteration ", i, ": Some singular values of the Hessian are below the threshold. Optimization will be performed."))
                              } else {
                                dy <- try(as.vector(W%*%v)[1:length(p)], silent=FALSE)
                                dy0 <- try(as.vector(W%*%v0)[1:length(p)], silent=FALSE)
                              }
                              
                              
                              if(!inherits(dy, "try-error")) {
                                names(dy) <- names(y) 
                                correction <- sqrt(sum((dy-dy0)^2))/sqrt(sum(dy^2))
                              } else {
                                dy <- NA
                                correction <- 0
                                warning(paste0("Iteration ", i, ": Impossible to invert Hessian. Trying to optimize instead."))
                              }
                              
                              # Get attributes of the returned objective value (only if numeric)
                              out.attributes <- attributes(out)[sapply(attributes(out), is.numeric)]
                              out.attributes.names <- names(out.attributes)
                              
                              
                              return(c(list(dy = dy, 
                                            value = out$value, 
                                            gradient = out$gradient, 
                                            correction = correction, 
                                            valid = valid, 
                                            attributes = out.attributes.names),
                                       out.attributes))
                              
                              
                              
                              
                            }
                            doIteration <- function() {
                              
                              optimize <- aControl$reoptimize
                              # Check for error in evaluation of lagrange()
                              if(is.na(dy[1])) {
                                #cat("Evaluation of lagrange() not successful. Will optimize instead.\n")
                                optimize <- TRUE
                                y.try <- y
                                y.try[whichIndex] <- y[whichIndex] + direction*stepsize
                                rinit <- oControl$rinit
                              } else {
                                dy.norm <- sqrt(sum(dy^2))
                                rinit <- min(c(oControl$rinit, 3*dy.norm))
                                y.try <- y + dy
                                if(any(!lagrange.out$valid)) optimize <- TRUE
                              }
                              
                              # Do reoptimization if requested or necessary
                              if(optimize) {      
                                parinit.opt <- y.try[-whichIndex]
                                fixed.opt <- c(fixed, y.try[whichIndex])
                                
                                arglist <- c(list(objfun = obj.opt, parinit = parinit.opt, fixed = fixed.opt, rinit = rinit), 
                                             oControl[names(oControl)!="rinit"],
                                             dotArgs[names(dotArgs) != "fixed"])
                                
                                
                                myfit <- try(do.call(trust, arglist), silent=FALSE)
                                if(!inherits(myfit, "try-error")) {
                                  y.try[names(myfit$argument)] <- as.vector(myfit$argument)  
                                } else {
                                  warning("Optimization not successful. Profile may be erroneous.")
                                }
                                
                              }
                              
                              return(y.try)
                              
                            }
                            doAdaption <- function() {
                              
                              lagrange.out.try <- lagrange(y.try)
                              valid <- TRUE
                              
                              # Predicted change of the objective value
                              dobj.pred <- sum(lagrange.out$gradient*(y.try - y))
                              dobj.fact <- lagrange.out.try$value - lagrange.out$value
                              correction <- lagrange.out.try$correction
                              
                              # Gamma adaption based on amount of actual correction
                              if (correction > aControl$correction) gamma <- gamma/2
                              if (correction < 0.5*aControl$correction) gamma <- min(c(aControl$gamma, gamma*2))
                              
                              # Stepsize adaption based on difference in predicted change of objective value
                              if (abs(dobj.fact - dobj.pred) > sControl$atol & stepsize > sControl$min) {
                                stepsize <- max(c(stepsize/1.5, sControl$min))
                                valid <- FALSE
                              }
                              if (abs(dobj.fact - dobj.pred) < .3*sControl$atol | abs((dobj.fact - dobj.pred)/dobj.fact) < .3*sControl$rtol) {
                                stepsize <- min(c(stepsize*2, sControl$max))
                              }
                              
                              ## Verbose
                              if (verbose) {
                                # Compute progres
                                diff.thres <- diff.steps <- diff.limit <- 0
                                if (threshold < Inf)
                                  diff.thres <- 1 - max(c(0, min(c(1, (threshold - lagrange.out.try$value)/delta))))
                                if (sControl$limit < Inf)
                                  diff.steps <- i/sControl$limit
                                diff.limit <- switch(as.character(sign(constraint.out$value)),
                                                     "1"  = 1 - (limits[2] - constraint.out$value)/limits[2],
                                                     "-1" = diff.limit <- 1 - (limits[1] - constraint.out$value)/limits[1],
                                                     "0"  = 0)
                                
                                percentage <- max(c(diff.thres, diff.steps, diff.limit), na.rm = TRUE)*100
                                progressBar(percentage)
                                
                                
                                #cat("diff.thres:", diff.thres, "diff.steps:", diff.steps, "diff.limit:", diff.limit)
                                myvalue <- format(substr(lagrange.out$value  , 0, 8), width = 8)
                                myconst <- format(substr(constraint.out$value, 0, 8), width = 8)
                                mygamma <- format(substr(gamma               , 0, 8), width = 8)
                                myvalid <- all(lagrange.out$valid)
                                cat("\tvalue:", myvalue, "constraint:", myconst, "gamma:", mygamma, "valid:", myvalid) 
                              }
                              
                              
                              
                              return(list(lagrange = lagrange.out.try, stepsize = stepsize, gamma = gamma, valid = valid))
                              
                              
                            }
                            
                            ## Compute profile -------------------------------------------------
                            
                            # Initialize profile
                            i <- 0 
                            direction <- 1
                            gamma <- aControl$gamma
                            stepsize <- sControl$stepsize
                            ini <- pars
                            
                            lagrange.out <- lagrange(ini)
                            constraint.out <- constraint(pars)
                            
                            delta <- qchisq(1-alpha, 1)
                            #delta.t <- qf(1 - alpha, 1L, nobs - npar)
                            
                            
                            
                            threshold <- lagrange.out[[sControl$stop]] + delta
                            out.attributes <- unlist(lagrange.out[lagrange.out$attributes])
                            
                            out <- c(value = lagrange.out$value, 
                                     constraint = as.vector(constraint.out$value), 
                                     stepsize = stepsize, 
                                     gamma = gamma, 
                                     whichPar = whichIndex,
                                     out.attributes, ini)
                            if (side %in% c("right", "both")) {
                              # Compute right profile
                              if (verbose) {
                                cat("Compute right profile\n")
                              }
                              direction <- 1
                              gamma <- aControl$gamma
                              stepsize <- sControl$stepsize
                              y <- ini
                              
                              lagrange.out <- lagrange.out
                              constraint.out <- constraint.out
                              
                              while (i < sControl$limit) {
                                
                                ## Iteration step
                                sufficient <- FALSE
                                retry <- 0
                                while (!sufficient & retry < 5) {
                                  dy <- stepsize*lagrange.out$dy
                                  y.try <- try(doIteration(), silent = TRUE)
                                  out.try <- try(doAdaption(), silent = TRUE)
                                  if (inherits(y.try, "try-error") | inherits(out.try, "try-error")) {
                                    sufficient <- FALSE
                                    stepsize <- stepsize/1.5
                                    retry <- retry + 1
                                  } else {
                                    sufficient <- out.try$valid
                                    stepsize <- out.try$stepsize  
                                  }
                                  
                                }    
                                if (inherits(y.try, "try-error") | inherits(out.try, "try-error")) break
                                
                                
                                ## Set values
                                y <- y.try
                                lagrange.out <- out.try$lagrange
                                constraint.out <- constraint(y.try)
                                stepsize <- out.try$stepsize
                                gamma <- out.try$gamma
                                out.attributes <- unlist(lagrange.out[lagrange.out$attributes])
                                
                                ## Return values 
                                out <- rbind(out, 
                                             c(value = lagrange.out$value, 
                                               constraint = as.vector(constraint.out$value), 
                                               stepsize = stepsize, 
                                               gamma = gamma, 
                                               whichPar = whichIndex,
                                               out.attributes, 
                                               y))
                                
                                if(cautiousMode) {
                                  outCautious <- as.data.frame(out)
                                  outCautious$whichPar <- whichPar.name
                                  outCautious <- parframe(
                                    outCautious,
                                    parameters = names(pars),
                                    metanames = c("value", "constraint", "stepsize", "gamma", "whichPar"),
                                    obj.attributes = names(out.attributes)
                                  )
                                  dput(outCautious, file = file.path(interResFolder, paste0(whichPar.name, "-right.R")))
                                }
                                
                                value <- lagrange.out[[sControl$stop]]
                                if (value > threshold | constraint.out$value > limits[2]) break
                                
                                i <- i + 1
                                
                              }
                            }
                            
                            if (side %in% c("left", "both")) {
                              # Compute left profile
                              if (verbose) {
                                cat("\nCompute left profile\n")
                              }
                              i <- 0
                              direction <- -1
                              gamma <- aControl$gamma
                              stepsize <- sControl$stepsize
                              y <- ini
                              
                              lagrange.out <- lagrange(ini)
                              constraint.out <- constraint(pars)
                              
                              while (i < sControl$limit) {
                                
                                ## Iteration step
                                sufficient <- FALSE
                                retry <- 0
                                while (!sufficient & retry < 5) {
                                  dy <- stepsize*lagrange.out$dy
                                  y.try <- try(doIteration(), silent = TRUE)
                                  out.try <- try(doAdaption(), silent = TRUE)
                                  if (inherits(y.try, "try-error") | inherits(out.try, "try-error")) {
                                    sufficient <- FALSE
                                    stepsize <- stepsize/1.5
                                    retry <- retry + 1
                                  } else {
                                    sufficient <- out.try$valid
                                    stepsize <- out.try$stepsize  
                                  }
                                  
                                }
                                if (inherits(y.try, "try-error") | inherits(out.try, "try-error")) break
                                
                                ## Set values
                                y <- y.try
                                lagrange.out <- out.try$lagrange
                                constraint.out <- constraint(y.try)
                                stepsize <- out.try$stepsize
                                gamma <- out.try$gamma
                                out.attributes <- unlist(lagrange.out[lagrange.out$attributes])
                                
                                
                                ## Return values
                                out <- rbind(c(value = lagrange.out$value, 
                                               constraint = as.vector(constraint.out$value), 
                                               stepsize = stepsize, 
                                               gamma = gamma,
                                               whichPar = whichIndex,
                                               out.attributes,
                                               y), 
                                             out)
                                
                                if(cautiousMode) {
                                  outCautious <- as.data.frame(out)
                                  outCautious$whichPar <- whichPar.name
                                  outCautious <- parframe(
                                    outCautious,
                                    parameters = names(pars),
                                    metanames = c("value", "constraint", "stepsize", "gamma", "whichPar"),
                                    obj.attributes = names(out.attributes)
                                  )
                                  dput(outCautious, file = file.path(interResFolder, paste0(whichPar.name, "-left.R")))
                                }
                                
                                
                                value <- lagrange.out[[sControl$stop]]
                                if (value > threshold | constraint.out$value < limits[1]) break
                                
                                i <- i + 1
                                
                              }
                            }
                            # Output
                            out <- as.data.frame(out)
                            out$whichPar <- whichPar.name
                            parframe(
                              out,
                              parameters = names(pars),
                              metanames = c("value", "constraint", "stepsize", "gamma", "whichPar"),
                              obj.attributes = names(out.attributes)
                            )
                            
                          }
  
  
  
  if (Sys.info()[['sysname']] == "Windows" & cores > 1) {
    
    parallel::stopCluster(cluster)
    doParallel::stopImplicitCluster()
    
  }
  
  # .. Prepare output -----
  outncol <- vapply(out, ncol, 1)
  failed <- outncol != max(outncol)
  if (any(failed)) message("The following profiles failed: ", paste0(whichPar[failed], collapse = ", "))
  out <- out[!failed]  
  
  do.call(rbind, out)
  
  
}

#' Progress bar
#' 
#' @param percentage Numeric between 0 and 100
#' @param size Integer, the size of the bar print-out
#' @param number Logical, Indicates whether the percentage should be printed out.
#' @export
progressBar <- function(percentage, size = 50, number = TRUE) {
  
  if(percentage < 0) percentage <- 0
  if(percentage > 100) percentage <- 100
  
  out <- paste("\r|", paste(rep("=", round(size*percentage/100)), collapse=""), paste(rep(" ", size-round(size*percentage/100)), collapse=""), "|", sep="")
  cat(out)
  if(number) cat(format(paste(" ", round(percentage), "%", sep=""), width=5))
  
}

#' Profile uncertainty extraction
#' 
#' @description extract parameter uncertainties from profiles
#' @param object object of class \code{parframe}, returned from \link{profile} function.
#' @param parm a specification of which parameters are to be given confidence intervals, 
#' either a vector of numbers or a vector of names. If missing, all parameters are considered.
#' @param level the confidence level required.
#' @param ... not used right now.
#' @param val.column the value column used in the parframe, usually 'data'.
#' @export
confint.parframe <- function(object, parm = NULL, level = 0.95, ..., val.column = "data") {
  
  profile <- object
  obj.attributes <- attr(profile, "obj.attributes")
  
  if (is.null(parm))
    parm <- unique(profile[["whichPar"]])
  
  threshold <- qchisq(level, df = 1)
  
  # Reduce to profiles for parm
  profile <- profile[profile[["whichPar"]] %in% parm,]
  
  # Evaluate confidence intervals per parameter
  CIs <- lapply(split(profile, profile[["whichPar"]]), function(d) {
    
    # Get origin of profile
    origin <- which.min(abs(d[["constraint"]]))
    whichPar <- d[["whichPar"]][origin]
    
    # Define function to return constraint value where threshold is passed
    get_xThreshold <- function(branch) {
      
      y <- branch[[val.column]] - d[[val.column]][origin]
      x <- branch[["constraint"]]
      
      # If less than 3 points, return NA
      if (length(x) < 3)
        return(NA)
      
      # If threshold exceeded, take closest points below and above threshold
      # and interpolate
      if (any(y > threshold)) {
        i.above <- utils::head(which(y > threshold), 1)
        i.below <- utils::tail(which(y < threshold), 1)
        if (i.below > i.above) {
          return(NA)
        } else {
          slope <- (y[i.above] - y[i.below])/(x[i.above] - x[i.below])
          dy <- threshold - y[i.below]
          dx <- dy/slope
          x_threshold <- x[i.below] + dx
          return(x_threshold)
        }
      }
      
      # If threshold not exceeded,
      # take the last 20% of points (at least 3) an perform linear fit
      n_last20 <- max(3, length(which(x - x[1] > 0.8 * (max(x) - x[1]))))
      x <- tail(x, n_last20)
      y <- tail(y, n_last20)
      slope <- sum((x - mean(x))*(y - mean(y)))/sum((x - mean(x))^2)
      
      # If slope < 0, return Inf
      if (slope < 0)
        return(Inf)
      
      # Extrapolate until threshold is passed
      dy <- threshold - tail(y, 1)
      dx <- dy/slope
      x_threshold <- tail(x, 1) + dx
      
      # Test if extrapolation takes the point of passage very far
      # Set to Inf in that case
      if (x_threshold > 10*(max(x) - min(x)))
        x_threshold <- Inf
      
      return(x_threshold)
      
      
    } 
    
    # Right profiles
    right <- d[d[["constraint"]] >= 0,]
    upper <- d[[whichPar]][origin] + get_xThreshold(right)
    
    # Left profile
    left <- d[d[["constraint"]] <= 0,]
    left[["constraint"]] <- - left[["constraint"]]
    left <- left[order(left[["constraint"]]), ]
    lower <- d[[whichPar]][origin] - get_xThreshold(left)
    
    
    data.frame(name = whichPar, 
               value = d[[whichPar]][origin], 
               lower = lower, 
               upper = upper)
    
    
  })
  
  do.call(rbind, CIs)
  
}


#' Get variance-covariance matrix from trust result
#' 
#' @param fit return from trust or selected fit from parlist
#' @param parupper upper limit for parameter values. Parameters
#' are considered fixed when exceeding these limits.
#' @param parlower lower limit for parameter values. Parameters
#' are considered fixed when exceeding these limits.
#' 
#' @export 
vcov <- function(fit, parupper = NULL, parlower = NULL) {
  
  hessian__ <- fit[["hessian"]]
  arg__ <- fit[["argument"]]
  
  if (is.null(arg__)) return()
  
  if (is.null(hessian)) {
    vcov__ <- matrix(NA, nrow = length(arg__), ncol = length(arg__), 
                     dimnames = list(names(arg__), names(arg__)))
    return(vcov__)
  }
  
  fixed <- NULL
  if (!is.null(parupper)) {
    myarg__ <- arg__[names(parupper)]
    fixed <- union(fixed, names(myarg__)[myarg__ >= parupper])
  }
  if (!is.null(parlower)) {
    myarg__ <- arg__[names(parlower)]
    fixed <- union(fixed, names(myarg__)[myarg__ <= parlower])
  }
  
  vcov__ <- 0*hessian__
  is_fixed__ <- colnames(hessian__) %in% fixed
  subhessian__ <- hessian__[!is_fixed__, !is_fixed__, drop = FALSE]
  subvcov__ <- try(solve(0.5*subhessian__), silent = TRUE)
  if (inherits(subvcov__, "try-error")) subvcov__ <- MASS::ginv(subhessian__)
  vcov__[!is_fixed__, !is_fixed__] <- subvcov__
  
  # This part should not be necessary due to regularization usually done
  # Perform identifiability check based on
  # subvcov__ <- vcov__[!is_fixed__, !is_fixed__, drop = FALSE]
  # eigen__ <- eigen(subhessian__)
  # tol__ <- sqrt(.Machine$double.eps)
  # V__ <- eigen__[["vectors"]][, abs(eigen__[["values"]]) < tol__, drop = FALSE]
  # identifiable__ <- apply(V__, 1, function(v) all(abs(v) < tol__))
  # diag(subvcov__)[!identifiable__] <- Inf
  # 
  # vcov__[!is_fixed__, !is_fixed__] <- subvcov__
  
  return(vcov__) 
  
}




#' Non-Linear Optimization, multi start
#' 
#' @description Wrapper around \code{\link{trust}} allowing for multiple fits 
#'   from randomly chosen initial values.
#'   
#' @param objfun Objective function, see \code{\link{trust}}.
#' @param center Parameter values around which the initial values for each fit 
#'   are randomly sampled. The initial values handed to \link{trust} are the sum
#'   of center and the output of \option{samplefun}, center + 
#'   \option{samplefun}. See \code{\link{trust}}, parinit.
#'   \code{center} Can also be a parframe, then the parameter values are taken 
#'   from the parframe. In this case, the \code{fits} argument is overwritten.
#'   To use a reproducible set of initial guesses, generate center with 
#'   \code{\link{msParframe}}
#' @param studyname The names of the study or fit. This name is used to 
#'   determine filenames for interim and final results. See Details.
#' @param rinit Starting trust region radius, see \code{\link{trust}}.
#' @param rmax Maximum allowed trust region radius, see \code{\link{trust}}.
#' @param fits Number of fits (jobs).
#' @param cores Number of cores for job parallelization.
#' @param samplefun Function to sample random initial values. It is assumed, 
#'   that \option{samplefun} has a named parameter "n" which defines how many 
#'   random numbers are to be returned, such as for \code{\link{rnorm}} or 
#'   \code{\link{runif}}. By default \code{\link{rnorm}} is used. Parameteres 
#'   for samplefun are simply appended as named parameters to the mstrust call 
#'   and automatically handed to samplefun by matching parameter names.
#' @param resultPath character indicating the folder where the results should 
#'   be stored. Defaults to ".". 
#' @param stats If true, the same summary statistic as written to the logfile is
#'   printed to command line on mstrust completion.
#' @param ... Additional parameters handed to trust(), samplefun(), or the 
#'   objective function by matching parameter names. All unmatched parameters 
#'   are handed to the objective function objfun(). The log file starts with a 
#'   table telling which parameter was assigend to which function.
#' @param output logical. If true, writes output to the disc.
#' @param cautiousMode Logical, write every fit to disk in deparsed form (avoids the RDA incompatibility trap) and don't delete intermediate results
#'   
#' @details By running multiple fits starting at randomly chosen inital 
#'   parameters, the chisquare landscape can be explored using a deterministic 
#'   optimizer. Here, \code{\link{trust}} is used for optimization. The standard
#'   procedure to obtain random initial values is to sample random variables 
#'   from a uniform distribution (\code{\link{rnorm}}) and adding these to 
#'   \option{center}. It is, however, possible, to employ any other sampling 
#'   strategy by handing the respective function to mstrust(), 
#'   \option{samplefun}.
#'   
#'   In case a special sampling is required, a customized sampling function can 
#'   be used. If, e.g., inital values leading to a non-physical systems are to 
#'   be discarded upfront, the objective function can be addapted accordingly.
#'   
#'   All started fits either lead to an error or complete converged or
#'   unconverged. A statistics about the return status of fits can be shown by
#'   setting \option{stats} to TRUE.
#'   
#'   Fit final and intermediat results are stored under \option{studyname}. For
#'   each run of mstrust for the same study name, a folder under
#'   \option{studyname} of the form "trial-x-date" is created. "x" is the number
#'   of the trial, date is the current time stamp. In this folder, the
#'   intermediate results are stored. These intermediate results can be loaded
#'   by \code{\link{load.parlist}}. These are removed on successfull completion
#'   of mstrust. In this case, the final list of fit parameters
#'   (parameterList.Rda) and the fit log (mstrust.log) are found instead.
#'   
#' @return A parlist holding errored and converged fits.
#'   
#' @seealso 1. \code{\link{trust}}, for the used optimizer,
#'   2. \code{\link{rnorm}}, \code{\link{runif}} for two common sampling functions,
#'   3. \code{\link{msParframe}} for passing a reproducible set of random initial 
#'   guesses to mstrust,
#'   4. \code{\link{as.parframe}} for formatting the output to a handy table
#'   
#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
#'  
#' @export
#' @import parallel
mstrust <- function(objfun, center, studyname, rinit = .1, rmax = 10, fits = 20, cores = 1, optmethod = "trust",
                    samplefun = "rnorm", resultPath = ".", stats = FALSE, output = FALSE, cautiousMode = FALSE, start1stfromCenter = FALSE,
                    ...) {
  
  narrowing <- NULL
  
  
  # Check if on Windows
  cores <- sanitizeCores(cores)
  
  
  # Argument parsing, sorting, and enhancing
  # Gather all function arguments
  varargslist <- list(...)
  
  argslist <- list(
    objfun = objfun, center = center, studyname = studyname,
    rinit = rinit, rmax = rmax, fits = fits, 
    cores = cores, optmethod = optmethod, samplefun = samplefun, 
    resultPath = resultPath, stats = stats, output = output, 
    cautiousMode = cautiousMode)
  argslist <- c(argslist, varargslist)
  
  # Add extra arguments
  argslist$n <- length(center) # How many inital values do we need?
  
  # Determine target function for each function argument.
  # First, define argument names used locally in mstrust().
  # Second, check what trust() and samplefun() accept and check for name clashes.
  # Third, whatever is unused is passed to the objective function objfun().
  nameslocal <- c("studyname", "center", "fits", "cores", "optmethod", "samplefun",
                  "resultPath", "stats", "narrowing", "output")
  namestrust <- intersect(names(formals(trust)), names(argslist))
  namessample <- intersect(names(formals(samplefun)), names(argslist))
  if (length(intersect(namestrust, namessample) != 0)) {
    stop("Argument names of trust() and ", samplefun, "() clash.")
  }
  
  # Default optimizer
  namesobj <- setdiff(names(argslist), c(namestrust, namessample, nameslocal))
  
  # Assemble argument lists common to all calls in mclapply
  # Sample function
  argssample <- structure(vector("list", length = length(namessample)), names = namessample)
  for (name in namessample) {
    argssample[[name]] <- argslist[[name]]
  }
  
  # Objective function
  argsobj <- structure(vector("list", length = length(namesobj)), names = namesobj)
  for (name in namesobj) {
    argsobj[[name]] <- argslist[[name]]
  }
  
  # Trust optimizer, except for initial values
  argstrust <- structure(vector("list", length = length(namestrust)), names = namestrust)
  for (name in namestrust) {
    argstrust[[name]] <- argslist[[name]]
  }
  
  # Assemble and create output filenames, folders and files
  m_timeStamp <- paste0(format(Sys.time(), "%d-%m-%Y-%H%M%S"))
  
  # Folders
  resultFolderBase <- file.path(argslist$resultPath, argslist$studyname)
  m_trial <- paste0("trial-", length(dir(resultFolderBase, pattern = "trial*")) + 1)
  resultFolder <- file.path(resultFolderBase, paste0(m_trial, "-", m_timeStamp))
  
  interResultFolder <- file.path(resultFolder, "interRes")
  dir.create(path = interResultFolder, showWarnings = FALSE, recursive = TRUE)
  
  # Files
  fileNameLog <- paste0("mstrust.log")
  fileNameParList <- paste0("parameterList.Rda")
  fileLog <- file.path(resultFolder, fileNameLog)
  fileParList <- file.path(resultFolder, fileNameParList)
  
  
  
  # Apply trust optimizer in parallel
  # The error checking leverages that mclappy runs each job in a try().
  logfile <- file(fileLog, open = "w")
  
  # Parameter assignment information
  if (is.null(narrowing) || narrowing[1] == 1) {
    msg <- paste0("Parameter assignment information\n",
                  strpad("mstrust", 12),                        ": ", paste0(nameslocal, collapse = ", "), "\n",
                  strpad("trust", 12),                          ": ", paste0(namestrust, collapse = ", "), "\n",
                  strpad(as.character(argslist$samplefun), 12), ": ", paste0(namessample, collapse = ", "), "\n\n")
    #strpad(as.character(argslist$objfun), 12),    ": ", paste0(namesobj, collapse = ", "), "\n\n")
    writeLines(msg, logfile)
    flush(logfile)
  }
  
  # Write narrowing status information to file
  if (!is.null(narrowing)) {
    msg <- paste0("--> Narrowing, run ", narrowing[1], " of ", narrowing[2], "\n",
                  "--> " , fits, " fits to run\n")
    writeLines(msg, logfile)
    flush(logfile)
  }
  
  if(is.parframe(center)) {
    fits <- nrow(center)
  }
  
  
  cores <- min(fits, cores)
  if (cores > 1) {
    
    # Start cluster if on windows
    if (Sys.info()[['sysname']] == "Windows") {
      
      cluster <- parallel::makeCluster(cores)
      doParallel::registerDoParallel(cluster)
      parallel::clusterCall(cl = cluster, function(x) .libPaths(x), .libPaths())
      
      varlist <- ls()
      # Exclude things like "missing argument"
      varlist <- c("objfun", "center", "argstrust", 
                   "samplefun", "argssample", "argsobj", 
                   "output", "interResultFolder", "logfile")
      parallel::clusterExport(cluster, envir = environment(), varlist = varlist)
      
    } else {
      
      doParallel::registerDoParallel(cores = cores)
      
    }
    
    "%mydo%" <- foreach::"%dopar%"
    
  } else {
    
    "%mydo%" <- foreach::"%do%"
    
  }
  
  
  loaded_packages <- .packages()  
  m_parlist <- as.parlist(foreach::foreach(i = 1:fits, 
                                           .packages = loaded_packages, 
                                           .inorder = TRUE,
                                           .options.multicore = list(preschedule = FALSE)
  ) %mydo% {
    
    suppressMessages(loadDLL(objfun))
    
    if(is.parframe(center)) {
      argstrust$parinit <- as.parvec(center, i)
    } else {
      if (i == 1 & start1stfromCenter) {
        # First fit always starts from center
        argstrust$parinit <- center
      } else {
        # All other fits start from random positions
        argstrust$parinit <- center + do.call(samplefun, argssample)  
      }
    }
    
    # Check if traceFile is requested. In that case combine tracefile, with logfolder and fit number
    if (!is.null(argstrust[["traceFile"]])) {
      digits <- floor(log10(fits))
      argstrust[["traceFile"]] <- file.path(resultFolder, paste0(formatC(i, digits = digits, flag = "0"), "_", argslist[["traceFile"]]))
    }
    
    fit <- do.call(optmethod, c(argstrust, argsobj))
    
    # Keep only numeric attributes of object returned by trust()
    attr.fit <- attributes(fit)
    keep.attr <- sapply(attr.fit, is.numeric)
    fit <- fit[1:length(fit)] # deletes attributes
    if (any(keep.attr)) attributes(fit) <- c(attributes(fit), attr.fit[keep.attr]) # attach numeric attributes
    
    
    # In some crashes a try-error object is returned which is not a list. Since
    # each element in the parlist is assumed to be a list, we wrap these cases.
    if (!is.list(fit)) {
      f <- list()
      f$error <- fit
      fit <- f
    }
    
    fit$parinit <- argstrust$parinit
    
    # Write current fit to disk
    if (output) {
      saveRDS(fit, file = file.path(interResultFolder, paste0("fit-", i, ".Rda")))
      if (cautiousMode) dput(fit[c("value", "argument", "iterations", "converged")], file = file.path(interResultFolder, paste0("fit-", i, ".R")))
      # Reporting
      # With concurent jobs and everyone reporting, this is a classic race
      # condition. Assembling the message beforhand lowers the risk of interleaved
      # output to the log.
      msgSep <- "-------"
      if (any(names(fit) == "error")) {
        msg <- paste0(msgSep, "\n",
                      "Fit ", i, " failed after ", fit$iterations, " iterations with error\n",
                      "--> ", fit$error,
                      msgSep, "\n")
        
        writeLines(msg, logfile)
        flush(logfile)
      } else {
        msg <- paste0(msgSep, "\n",
                      "Fit ", i, " completed\n",
                      "--> iterations : ", fit$iterations, "\n",
                      "-->  converged : ", fit$converged, "\n",
                      "--> obj. value : ", round(fit$value, digits = 2), "\n",
                      msgSep)
        
        writeLines(msg, logfile)
        flush(logfile)
      }
    }
    return(fit)
  })
  
  close(logfile)
  
  if (Sys.info()[['sysname']] == "Windows" & cores > 1) {
    
    parallel::stopCluster(cluster)
    doParallel::stopImplicitCluster()
    
  }
  
  
  
  # Cull failed and completed fits Two kinds of errors occure. The first returns
  # an object of class "try-error". The reason for these failures are unknown to
  # me. The second returns a list of results from trust(), where one name of the
  # list is error holding an object of class "try-error". These abortions are 
  # due to errors which are captured within trust(). Completed fits return with 
  # a valid result list from trust(), with "error" not part of its names. These
  # fits, can still be unconverged, if the maximim number of iterations was the
  # reason for the return of trust(). Be also aware of fits which converge due
  # to the trust radius hitting rmin. Such fits are reported as converged but
  # are not in truth.
  m_trustFlags.converged = 0
  m_trustFlags.unconverged = 1
  m_trustFlags.error = 2
  m_trustFlags.fatal = 3
  idxStatus <- sapply(m_parlist, function(fit) {
    if (inherits(fit, "try-error") || any(names(fit) == "error")) {
      return(m_trustFlags.error)
    } else if (!any(names(fit) == "converged")) {
      return(m_trustFlags.fatal)
    } else if (fit$converged) {
      return(m_trustFlags.converged)
    } else {
      return(m_trustFlags.unconverged)
    }
  })
  
  
  # Wrap up
  # Write out results
  if (output) saveRDS(m_parlist, file = fileParList)
  
  # Remove temporary files
  if (!cautiousMode) {
    unlink(interResultFolder, recursive = TRUE)
  } else {
    for (f in list.files(interResultFolder, "Rda$")) unlink(f)
  }
  
  # Show summary
  sum.error <- sum(idxStatus == m_trustFlags.error)
  sum.fatal <- sum(idxStatus == m_trustFlags.fatal)
  sum.unconverged <- sum(idxStatus == m_trustFlags.unconverged)
  sum.converged <- sum(idxStatus == m_trustFlags.converged)
  msg <- paste0("Multi start trust summary\n",
                "Outcome     : Occurrence\n",
                "Error       : ", sum.error, "\n",
                "Fatal       : ", sum.fatal, " must be 0\n",
                "Unconverged : ", sum.unconverged, "\n",
                "Converged   : ", sum.converged, "\n",
                "           -----------\n",
                "Total       : ", sum.error + sum.fatal + sum.unconverged + sum.converged, paste0("[", fits, "]"), "\n")
  logfile <- file(fileLog, open = "a")
  writeLines(msg, logfile)
  flush(logfile)
  close(logfile)
  
  if (stats) {
    cat(msg)
  }
  
  return(m_parlist)
}

#' Reproducibly construct "random" parframes
#' 
#' The output of this function can be used for the \code{center} - argument of \code{\link{mstrust}}
#'
#' @param pars Named vector. If \code{samplefun} has a "mean"-argument, values of pars will used as mean
#' @param n Integer how many lines should the parframe have
#' @param seed Seed for the random number generator
#' @param samplefun random number generator: \code{\link{rnorm}}, \code{\link{runif}}, etc...
#' @param keepfirst boolean, if set to \code{TRUE} the first row of the parframe will be the pars
#' @param ... arguments going to samplefun
#'
#' @return parframe (without metanames)
#' @export
#' 
#' @seealso \code{\link{mstrust}} and \code{\link{parframe}}
#'
#' @examples
#' msParframe(c(a = 0, b = 100000), 5)
#' 
#' # Parameter specific sigma
#' msParframe(c(a = 0, b = 100000), 5, samplefun = rnorm, sd = c(100, 0.5))
msParframe <- function(pars, n = 20, seed = 12345, samplefun = stats::rnorm, keepfirst = TRUE, ...) {
  set.seed(seed)
  
  if (keepfirst == TRUE) {
    if (n == 1)
      return(parframe(as.data.frame(t(pars))))
    
    # generate random pars
    rnd <- samplefun((n-1)*length(pars), ...)
    mypars <- matrix(rnd, nrow = (n-1), byrow = T)
    mean_pars <- 0
    if ("mean" %in% names(formals(samplefun)))
      mean_pars <- t(matrix(pars, nrow = length(pars), ncol = (n-1)))
    mypars <- mypars + mean_pars
    
    # assure that pars itself is also part
    mypars <- rbind(t(pars), mypars)
  } else {
    # generate random pars
    rnd <- samplefun((n)*length(pars), ...)
    mypars <- matrix(rnd, nrow = (n), byrow = T)
    mean_pars <- 0
    if ("mean" %in% names(formals(samplefun)))
      mean_pars <- t(matrix(pars, nrow = length(pars), ncol = (n)))
    mypars <- mypars + mean_pars
  }
  

  mypars <- `names<-`(as.data.frame(mypars), names(pars))
  
  parframe(mypars)
}


#' Construct fitlist from temporary files.
#'
#' @description An aborted \code{\link{mstrust}}
#'   leaves behind results of already completed fits. This command loads these
#'   fits into a fitlist.
#'
#' @param folder Path to the folder where the fit has left its results.
#'
#' @details The command \code{\link{mstrust}} saves
#'   each completed fit along the multi-start sequence such that the results can
#'   be resurected on abortion. This command loads a fitlist from these
#'   intermediate results.
#'
#' @return An object of class parlist.
#'
#' @seealso \code{\link{mstrust}}
#'
#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
#'
#' @export
load.parlist <- function(folder) {
  # Read in all fits
  m_fileList <- dir(folder, pattern = "*.Rda")
  m_parVec <- lapply(m_fileList, function(file) {
    return(readRDS(file.path(folder, file)))
  })
  
  return(as.parlist(m_parVec))
}


#' Reduce replicated measurements to mean and standard deviation
#'
#' @description
#' Obtain the mean and standard deviation from replicates per condition.
#'
#' @param data A data frame containing the measurements. See Format for details.
#' @param select Names of the columns in the data frame used to define
#'        conditions, see Details.
#' @param datatrans Character vector describing a function to transform data.
#'        Use \kbd{x} to refer to data.
#'
#' @format
#' The following columns are mandatory for the data frame:
#' \describe{
#'  \item{name}{Name of the observed species.}
#'  \item{time}{Measurement time point.}
#'  \item{value}{Measurement value.}
#'  \item{condition}{The condition under which the observation was made.}
#' }
#'
#' In addition to these columns, any number of columns can follow to allow a
#' fine-grained definition of conditions. The values of all columns named in
#' \code{select} are then merged to get the set of conditions.
#'
#' @details
#' Experiments are usually repeated multiple times possibly under different
#' conditions leading to replicated measurements. The column "condition" in the
#' data allows grouping the data by their condition. However, sometimes, a more
#' fine-grained grouping is desirable. In this case, any number of additional
#' columns can be appended to the data. These columns are referred to as
#' "condition identifiers". Which of the condition identifiers are used for
#' grouping is user-defined by specifying their names in \code{select}. The mandatory
#' column "condition" is always used. The total set of different conditions is
#' thus defined by all combinations of values occurring in the selected condition
#' identifiers. The replicates of each condition are then reduced to mean and 
#' standard deviation. New condition names are derived by merging all conditions 
#' which were used in mean and standard deviation. Columns that are not listed in
#' \code{select} but have different values within grouped data are dropped. Columns
#' that remain stable across all replicates are retained and horizontally attached
#' to the resulting data frame.
#'
#' @return
#' A data frame of the following variables:
#' \describe{
#'  \item{time}{Measurement time point.}
#'  \item{name}{Name of the observed species.}
#'  \item{value}{Mean of replicates.}
#'  \item{sigma}{Standard error of the mean, NA for single measurements.}
#'  \item{n}{The number of replicates reduced.}
#'  \item{condition}{The condition for which the value and sigma were calculated. If
#'        more than one column was used to define the condition, this variable
#'        holds the effective condition which is the combination of all applied
#'        single conditions.}
#'  \item{other columns}{Columns that were stable across replicates are retained and horizontally attached to the resulting data frame.}
#' }
#'
#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
#' @author Simon Beyer, \email{simon.beyer@@fdm.uni-freiburg.de}
#'
#' @export
reduceReplicates <- function(data, select = "condition", datatrans = NULL) {
  UseMethod("reduceReplicates")
}

#' Method for data frames
#' @export
reduceReplicates.data.frame <- function(data, select = "condition", datatrans = NULL) {
  # File format definition
  fmtnames <- c("name", "time", "value", "condition")
  if (length(intersect(names(data), fmtnames)) != length(fmtnames)) {
    stop(paste("Mandatory column names are:", paste(fmtnames, collapse = ", ")))
  }
  
  # Transform data if requested
  if (!is.null(datatrans) && is.character(datatrans)) {
    x <- data$value
    data$value <- eval(parse(text = datatrans))
  }
  
  # Define grouping conditions
  select <- unique(c("name", "time", "condition", select))
  condidnt <- apply(data[select], 1, paste, collapse = "_")
  conditions <- unique(condidnt)
  
  # Identify columns that are consistent across replicates
  potential_cols <- setdiff(names(data), c("value", "sigma", "n", select))
  stable_cols <- potential_cols[sapply(potential_cols, function(col) {
    all(tapply(data[[col]], condidnt, function(x) length(unique(x)) == 1))
  })]
  dropped_cols <- setdiff(names(data), c("time", "value", "sigma", "n", stable_cols))
  
  # Reduce data
  reduct <- do.call(rbind, lapply(conditions, function(cond) {
    conddata <- data[condidnt == cond, ]
    mergecond <- paste(unique(conddata[setdiff(select, c("name", "time"))]), collapse = "_")
    
    data.frame(
      time = conddata[1, "time"],
      value = mean(conddata$value),
      sigma = if (nrow(conddata) > 1) {
        sd(conddata$value) / sqrt(nrow(conddata)) # Standard Error of the Mean (SEM)
      } else {
        NA
      },
      n = nrow(conddata),
      name = conddata[1, "name"],
      condition = mergecond,
      conddata[1, stable_cols, drop = FALSE] # Retain only stable columns
    )
  }))
  
  message("Dropped columns: ", paste(setdiff(dropped_cols, names(reduct)), collapse = ", "))
  return(reduct)
}


#' Method for files (character)
#' @export
reduceReplicates.character <- function(data, select = "condition", datatrans = NULL) {
  # Ensure the file exists
  if (!file.exists(data)) {
    stop("The specified file does not exist.")
  }
  
  # Determine the file type based on extension
  ext <- tools::file_ext(data)
  if (tolower(ext) == "csv") {
    data <- read.csv(data, stringsAsFactors = FALSE)
  } else if (tolower(ext) %in% c("xls", "xlsx")) {
    if (!requireNamespace("openxlsx", quietly = TRUE)) {
      stop("The 'openxlsx' package is required to read Excel files. Please install it.")
    }
    data <- openxlsx::read.xlsx(data)
  } else {
    stop("Unsupported file format. Only .csv, .xls, and .xlsx are supported.")
  }
  
  # Call the data.frame method
  reduceReplicates(as.data.frame(data), select = select, datatrans = datatrans)
}


#' Fit an error model using maximum likelihood estimation
#'
#' @description Fit an error model to reduced replicate data using maximum 
#' likelihood estimation (MLE). The model estimates the variance of replicate 
#' measurements as a function of the mean, based on a chi-square distribution.
#'
#' @param data A data frame containing reduced replicate data. Must include 
#'   columns "value" (mean of replicates), "sigma" (sample standard deviation), 
#'   and "n" (number of replicates per condition).
#' @param factors Character vector specifying the columns in \option{data} 
#'   that define pooling conditions. The model is fit separately for each unique 
#'   combination of these factors.
#' @param errorModel A character string defining the error model in terms of 
#'   variance. The mean is referenced as \kbd{x}, e.g., "exp(s0) + exp(srel) * x^2".
#' @param par Named numeric vector of initial values for the parameters in 
#'   \option{errorModel}.
#' @param lower Optional named numeric vector specifying lower bounds for 
#'   parameters. Defaults to \code{NULL} (no bounds).
#' @param upper Optional named numeric vector specifying upper bounds for 
#'   parameters. Defaults to \code{NULL} (no bounds).
#' @param plotting Logical. If \code{TRUE}, a plot of the pooled variance and 
#'   the fitted error model is displayed.
#' @param blather Logical. If \code{TRUE}, additional information is returned, 
#'   including fitted parameter values, original \code{sigma} values, and confidence intervals.
#' @param ... Additional arguments passed to the optimizer \code{\link[optimx]{optimr}}.
#'
#' @details The model assumes that the sample variance of replicate measurements 
#'   follows a chi-square distribution with \eqn{n-1} degrees of freedom. The 
#'   variance is estimated by maximizing the log-likelihood function derived 
#'   from this distribution. Given multiple replicates, the variance can be 
#'   modeled as a function of the mean.
#'
#'   The \option{errorModel} parameter defines this functional relationship. 
#'   It should be expressed as a character string, using \kbd{x} to represent 
#'   the mean.
#'
#'   The optimization is performed using \code{\link[optimx]{optimr}} with the 
#'   \code{"L-BFGS-B"} method, which supports bound constraints. If \option{lower} 
#'   and \option{upper} are not specified, the parameters are assumed to be 
#'   unconstrained.
#'
#'   If \option{plotting = TRUE}, the function produces a log-scale variance 
#'   plot for each condition, showing the pooled variance, the fitted model, 
#'   and 68\% and 95\% confidence bounds.
#'
#' @return By default, a data frame is returned, containing the original data 
#'   with updated \code{sigma} values estimated from the error model.
#'
#'   If \option{blather = TRUE}, additional information is returned, including:
#'   - The fitted parameter values.
#'   - The error model used.
#'   - Confidence intervals for \code{sigma} at 68\% and 95\% levels.
#'   - Effective pooling conditions.
#'
#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
#' @author Simon Beyer, \email{simon.beyer@@fdm.uni-freiburg.de}
#'
#' @export
#' @importFrom stats qchisq
#' @importFrom ggplot2 ggplot aes geom_point geom_line geom_ribbon ylab facet_wrap scale_y_log10 theme
#' @importFrom optimx optimr
fitErrorModel <- function(data, factors, errorModel = "exp(s0)+exp(srel)*x^2",
                          par = c(s0 = 1, srel = .1), 
                          lower = NULL, upper = NULL,  # Optional: Parametergrenzen
                          plotting = TRUE, blather = FALSE, ...) {
  
  # Assemble conditions
  condidnt <- Reduce(paste, subset(data, select = factors))
  conditions <- unique(condidnt)
  
  # Fit error model
  nColData <- ncol(data)
  dataErrorModel <- cbind(data, as.list(par))
  
  for (cond in conditions) {
    subdata <- dataErrorModel[condidnt == cond,]
    x <- subdata$value
    n <- subdata$n
    y <- subdata$sigma * sqrt(n)
    
    # Zielfunktion mit der analytischen Maximum-Likelihood
    obj <- function(par) {
      with(as.list(par), {
        sigma2 <- eval(parse(text = errorModel)) 
        negLogLik <- sum((n - 1) * (log(sigma2) + (y^2 / sigma2)), na.rm = TRUE)
        return(negLogLik)
      })
    }
    
    # Falls keine lower/upper-Bounds definiert sind, Standardwerte setzen
    if (is.null(lower)) lower <- rep(-Inf, length(par))
    if (is.null(upper)) upper <- rep(Inf, length(par))
    
    # Optimierung mit L-BFGS-B
    fit <- optimr(par, obj, method = "L-BFGS-B", lower = lower, upper = upper, ...)
    
    sigma <- sqrt(with(as.list(fit$par), eval(parse(text = errorModel))))
    dataErrorModel[condidnt == cond, ]$sigma <- sigma 
    dataErrorModel[condidnt == cond, -(nColData:1)] <- data.frame(as.list(fit$par))
  }
  
  # Calculate confidence bounds for sigma
  p68 <- (1 - .683) / 2
  p95 <- (1 - .955) / 2
  dataErrorModel$cbLower68 <- dataErrorModel$sigma^2 * qchisq(p = p68, df = dataErrorModel$n - 1) / (dataErrorModel$n - 1)
  dataErrorModel$cbUpper68 <- dataErrorModel$sigma^2 * qchisq(p = p68, df = dataErrorModel$n - 1, lower.tail = FALSE) / (dataErrorModel$n - 1)
  dataErrorModel$cbLower95 <- dataErrorModel$sigma^2 * qchisq(p = p95, df = dataErrorModel$n - 1) / (dataErrorModel$n - 1)
  dataErrorModel$cbUpper95 <- dataErrorModel$sigma^2 * qchisq(p = p95, df = dataErrorModel$n - 1, lower.tail = FALSE) / (dataErrorModel$n - 1)
  
  # Assemble result
  dataErrorModel <- cbind(dataErrorModel, condidnt, sigmaLS = data$sigma)
  attr(dataErrorModel, "errorModel") <- errorModel
  
  # Plot if requested
  if (plotting) {
    print(ggplot(dataErrorModel, aes(x = value)) +
            geom_point(aes(y = sigmaLS^2 * (n))) +
            geom_line(aes(y = sigma^2)) +
            geom_ribbon(aes(ymin = cbLower95, ymax = cbUpper95), alpha = .3) +
            geom_ribbon(aes(ymin = cbLower68, ymax = cbUpper68), alpha = .3) +
            ylab("variance") +
            facet_wrap(~condidnt, scales = "free") +
            scale_y_log10() +
            theme_dMod()
    )}
  
  # Return standard error of the mean
  dataErrorModel$sigma <- dataErrorModel$sigma / sqrt(dataErrorModel$n)
  data$sigma <- dataErrorModel$sigma
  if (blather)
    return(dataErrorModel)
  else 
    return(data)
}
dkaschek/dMod documentation built on June 12, 2025, 2:50 a.m.