R/toolsWolfgang.R

#' Pad string to desired width
#'
#' @param string String to pad
#' @param width Desired width of padded string
#' @param where Padding can be inserted to the right or left of <string>.
#'        Default to 'right'.
#' @param padding A single character with with the padding space is filled.
#'        Defaults to blank ' ' yielding invisible padding.
#' @param autoelide If TRUE, <string> is elided if it is wider than <width>. The
#'        position of eliding follows <where>. Defaults to FALSE.
#'
#' @return Padded string of length <width>.
#'
#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
strpad <- function(string, width, where = "right", padding = " ", autoelide = FALSE) {

  # Check function arguments
  # <padding> is assumed to have exactry one character
  if (nchar(padding) != 1) {
    stop("<padding> must be of width 1")
  }

  # We only work on a character vector
  if (!inherits(string, "character")) {
    warning("Argument <string> does not inherit class 'character'")
    return(NULL)
  }


  # Assemble parameters
  strWidth <- nchar(string)


  # Check if eliding is needed
  if (strWidth > width) {
    if (!autoelide) {
      stop("<string> too wide. Consider autoelide = TRUE")

    } else {
      return(strelide(string, width, where))
    }
  }


  # Do the padding
  m_padding <- paste0(rep(padding, width - strWidth), collapse = "")
  if (where == "left") {
    return(paste0(m_padding, string))

  } else if (where == "right") {
    return(paste0(string, m_padding))

  } else {
    stop("<where> must be 'left' or 'right'")
  }
}



#' Elide character vector
#'
#' @param string String subject to eliding
#' @param width Width including eliding ... of return string
#' @param where Eliding can happen at 'left', 'middel', or 'right'. Defaults to
#'        'right'.
#' @param force Elide, even is <string> is shorter than <width>. Default to
#'        'FALSE'.
#'
#' @return Elided string of length <width>.
#'
#' @details
#' Elide a string to <width>. Eliding can happen at 'left', 'middle', or
#' 'right'. #' If forcing = FALSE, which is the default, strings shorten than
#' <width> are returend unaltered; forcing = TRUE inserts eliding symbols (...)
#' in any case.
#'
#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
strelide <- function(string, width, where = "right", force = FALSE) {

  # Functions for eliding
    # Eliding in the middle
  elideMiddle <- function(string, width) {
    if (width == 1) {
      return('.')
    }
    strWidth <- nchar(string)
    frontWidth <- max(ceiling((width - 3) / 2), 1)
    endWitdh <- floor((width - 3) / 2)
    string <- paste0(substr(string, 1, frontWidth),  "...",
                     substr(string, strWidth - endWitdh + 1, strWidth))
    return(strtrim(string, width))
  }

  # Elide on the right
  elideRight <- function(string, width) {
    if (width <= 3) {
      return(elideMiddle(string, width))
    } else {

      substr(string, width - 2, width) <- "..."
      return(strtrim(string, width))
    }
  }

  # Eliding on the left
  elideLeft <- function(string, width) {
    string <- paste(rev(substring(string, 1:nchar(string), 1:nchar(string))), collapse="")
    string <- elideRight(string, width)
    return(paste(rev(substring(string, 1:nchar(string), 1:nchar(string))), collapse=""))
  }

  # Check function argumets
  # We only work on a character vector
  if (!inherits(string, "character")) {
    warning("Argument does not inherit class 'character'")
    return(NULL)
  }


  # Assemble parameters
  strWidth <- nchar(string)


  # Eliding
  # Do we need to elide?
  if (strWidth <= width && !force) {
    return(string)
  }


  # Are we forced to elide?
  if (width > strWidth && force) {
    width <- strWidth
  }


  # Eliding the normal case
  if (where == "left") {
    return(elideLeft(string, width))

  } else if (where == "middle") {
    return(elideMiddle(string, width))

  } else if (where == "right") {
    return(elideRight(string, width))

  } else {
    stop("<where> must be 'left', 'middle', or 'right'")
  }
}



#' Select parameter values with lowest Chi^2 among profiles.
#'
#' @param prf A profile as returned from \link[dMod]{profile}.
#' @param context If TRUE, the chi^2 and other context of the profile is
#'                returned. This parameter defaults to FALSE in which case the
#'                output can be used as outer parameters directly.
#'
#' @return Parameter values with lowest chi^2 w/ or w/o profile context. If all
#'         profiles are invalid, NULL is returned.
#'
#' @details
#' On profiling a model, parameter values yielding a lower chi^2 than the one of
#' the fit providing the optimal values for the profiles might be encountered.
#' This function extracts the set of parameter values possessing the lowest
#' chi^2 among all profiles. Profiles which are invalid due to e.g., integration
#' problem on calculating them, are ignored. If all profiles are invalid, NULL
#' is returned.
#'
#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
#'
#' @export

plSelectMin <- function(prf, context = FALSE) {

  # Remove invalid profiles.
  apt <- sapply(prf, class) == "matrix"
  if (!any(apt)) {
    return(NULL)
  } else {
    prf <- prf[apt]
  }

  # Minium chi^2 parameter values sets per profile.
  chi2MinAllProfiles <- sapply(prf, function(species) {
    
    col.select <- c("value", attr(species, "parameters"))
    row.select <- which.min(species[, "value"])
    
    return(species[row.select, col.select])
  })

  # Minimum chi^2 parameter values set accross all profiles.
  chi2MinBest <- chi2MinAllProfiles[, which.min(chi2MinAllProfiles[1, ])]

  if (context) {
    return(chi2MinBest)
  } else {
    return(chi2MinBest[-1])
  }

}


#' Non-Linear Optimization, multi start
#' 
#' @description Wrapper around \code{\link{trust}} allowing for multiple fits 
#'   from randomly chosen initial values.
#'   
#' @param objfun Objective function, see \code{\link{trust}}.
#' @param center Parameter values around which the initial values for each fit 
#'   are randomly sampled. The initial values handed to \link{trust} are the sum
#'   of center and the output of \option{samplefun}, center + 
#'   \option{samplefun}. See \code{\link{trust}}, parinit.
#' @param studyname The names of the study or fit. This name is used to 
#'   determine filenames for interim and final results. See Details.
#' @param rinit Starting trust region radius, see \code{\link{trust}}.
#' @param rmax Maximum allowed trust region radius, see \code{\link{trust}}.
#' @param fits Number of fits (jobs).
#' @param cores Number of cores for job parallelization.
#' @param samplefun Function to sample random initial values. It is assumed, 
#'   that \option{samplefun} has a named parameter "n" which defines how many 
#'   random numbers are to be returned, such as for \code{\link{rnorm}} or 
#'   \code{\link{runif}}. By default \code{\link{rnorm}} is used. Parameteres 
#'   for samplefun are simply appended as named parameters to the mstrust call 
#'   and automatically handed to samplefun by matching parameter names.
#' @param stats If true, the same summary statistic as written to the logfile is
#'   printed to command line on mstrust completion.
#' @param narrowing Don't touch this. Status flag used when mstrust is called 
#'   via \code{\link{msnarrow}}. In narrowing mode, this parameter indicates the
#'   narrowing status, see \code{\link{msnarrow}}.
#' @param ... Additional parameters handed to trust(), samplefun(), or the 
#'   objective function by matching parameter names. All unmatched parameters 
#'   are handed to the objective function objfun(). The log file starts with a 
#'   table telling which parameter was assigend to which function.
#'   
#' @details By running multiple fits starting at randomly chosen inital 
#'   parameters, the chisquare landscape can be explored using a deterministic 
#'   optimizer. Here, \code{\link{trust}} is used for optimization. The standard
#'   procedure to obtain random initial values is to sample random variables 
#'   from a uniform distribution (\code{\link{rnorm}}) and adding these to 
#'   \option{center}. It is, however, possible, to employ any other sampling 
#'   strategy by handing the respective function to mstrust(), 
#'   \option{samplefun}.
#'   
#'   In case a special sampling is required, a customized sampling function can 
#'   be used. If, e.g., inital values leading to a non-physical systems are to 
#'   be discarded upfront, the objective function can be addapted accordingly.
#'   
#'   All started fits either lead to an error or complete converged or
#'   unconverged. A statistics about the return status of fits can be shown by
#'   setting \option{stats} to TRUE.
#'   
#'   Fit final and intermediat results are stored under \option{studyname}. For
#'   each run of mstrust for the same study name, a folder under
#'   \option{studyname} of the form "trial-x-date" is created. "x" is the number
#'   of the trial, date is the current time stamp. In this folder, the
#'   intermediate results are stored. These intermediate results can be loaded
#'   by \code{\link{load.parlist}}. These are removed on successfull completion
#'   of mstrust. In this case, the final list of fit parameters
#'   (parameterList.Rda) and the fit log (mstrust.log) are found instead.
#'   
#' @return A parlist holding errored and converged fits.
#'   
#' @seealso \code{\link{trust}}, \code{\link{rnorm}}, \code{\link{runif}}, 
#'   \code{\link{msnarrow}}, \code{\link{msbest}}, \code{\link{as.parframe}}
#'   
#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
#'   
#' @export
mstrust <- function(objfun, center, studyname, rinit = .1, rmax = 10, fits = 20, cores = 1,
                    samplefun = "rnorm", resultPath = ".", stats = FALSE,
                    narrowing = NULL, ...) {

  # Argument parsing, sorting, and enhancing
  # Gather all function arguments
  varargslist <- list(...)

  argslist <- as.list(formals())
  argslist <- argslist[names(argslist) != "..."]

  argsmatch <- as.list(match.call(expand.dots = TRUE))
  namesinter <- intersect(names(argslist), names(argsmatch))

  argslist[namesinter] <- argsmatch[namesinter]
  argslist <- c(argslist, varargslist)

  # Add extra arguments
  argslist$n <- length(center) # How many inital values do we need?

  # Determine target function for each function argument.
  # First, define argument names used locally in mstrust().
  # Second, check what trust() and samplefun() accept and check for name clashes.
  # Third, whatever is unused is passed to the objective function objfun().
  nameslocal <- c("studyname", "center", "fits", "cores", "samplefun",
                  "resultPath", "stats", "narrowing")
  namestrust <- intersect(names(formals(trust)), names(argslist))
  namessample <- intersect(names(formals(samplefun)), names(argslist))
  if (length(intersect(namestrust, namessample) != 0)) {
    stop("Argument names of trust() and ", samplefun, "() clash.")
  }
  namesobj <- setdiff(names(argslist), c(namestrust, namessample, nameslocal))


  # Assemble argument lists common to all calls in mclapply
  # Sample function
  argssample <- structure(vector("list", length = length(namessample)), names = namessample)
  for (name in namessample) {
    argssample[[name]] <- argslist[[name]]
  }

  # Objective function
  argsobj <- structure(vector("list", length = length(namesobj)), names = namesobj)
  for (name in namesobj) {
    argsobj[[name]] <- argslist[[name]]
  }

  # Trust optimizer, except for initial values
  argstrust <- structure(vector("list", length = length(namestrust)), names = namestrust)
  for (name in namestrust) {
    argstrust[[name]] <- argslist[[name]]
  }


  # Assemble and create output filenames, folders and files
  m_timeStamp <- paste0(format(Sys.time(), "%d-%m-%Y-%H%M%S"))
  
  # Folders
  resultFolderBase <- file.path(argslist$resultPath, argslist$studyname)
  m_trial <- paste0("trial-", length(dir(resultFolderBase, pattern = "trial*")) + 1)
  resultFolder <- file.path(resultFolderBase, paste0(m_trial, "-", m_timeStamp))
  
  interResultFolder <- file.path(resultFolder, "interRes")
  dir.create(path = interResultFolder, showWarnings = FALSE, recursive = TRUE)
  
  # Files
  fileNameLog <- paste0("mstrust.log")
  fileNameParList <- paste0("parameterList.Rda")
  fileLog <- file.path(resultFolder, fileNameLog)
  fileParList <- file.path(resultFolder, fileNameParList)
  
  
  
  # Apply trust optimizer in parallel
  # The error checking leverages that mclappy runs each job in a try().
  logfile <- file(fileLog, open = "w")
  
  # Parameter assignment information
  if (is.null(narrowing) || narrowing[1] == 1) {
    msg <- paste0("Parameter assignment information\n",
                  strpad("mstrust", 12),                        ": ", paste0(nameslocal, collapse = ", "), "\n",
                  strpad("trust", 12),                          ": ", paste0(namestrust, collapse = ", "), "\n",
                  strpad(as.character(argslist$samplefun), 12), ": ", paste0(namessample, collapse = ", "), "\n",
                  strpad(as.character(argslist$objfun), 12),    ": ", paste0(namesobj, collapse = ", "), "\n\n")
    writeLines(msg, logfile)
    flush(logfile)
  }

  # Write narrowing status information to file
  if (!is.null(narrowing)) {
    msg <- paste0("--> Narrowing, run ", narrowing[1], " of ", narrowing[2], "\n",
                  "--> " , fits, " fits to run\n")
    writeLines(msg, logfile)
    flush(logfile)
  }

  m_parlist <- as.parlist(mclapply(1:fits, function(i) {
    argstrust$parinit <- center + do.call(samplefun, argssample)
    fit <- do.call(trust, c(argstrust, argsobj))
    
    # In some crashes a try-error object is returned which is not a list. Since
    # each element in the parlist is assumed to be a list, we wrap these cases.
    if (!is.list(fit)) {
      f <- list()
      f$error <- fit
      fit <- f
    }
    
    fit$parinit <- argstrust$parinit

    # Write current fit to disk
    saveRDS(fit, file = file.path(interResultFolder, paste0("fit-", i, ".Rda")))

    # Reporting
    # With concurent jobs and everyone reporting, this is a classic race
    # condition. Assembling the message beforhand lowers the risk of interleaved
    # output to the log.
    msgSep <- "-------"
    if (any(names(fit) == "error")) {
      msg <- paste0(msgSep, "\n",
                    "Fit ", i, " failed after ", fit$iterations, " iterations with error\n",
                    "--> ", fit$error,
                    msgSep, "\n")

      writeLines(msg, logfile)
      flush(logfile)
    } else {
      msg <- paste0(msgSep, "\n",
                    "Fit ", i, " completed\n",
                    "--> iterations : ", fit$iterations, "\n",
                    "-->  converged : ", fit$converged, "\n",
                    "--> obj. value : ", round(fit$value, digits = 2), "\n",
                    msgSep)

      writeLines(msg, logfile)
      flush(logfile)
    }

    return(fit)
  }, mc.preschedule = FALSE, mc.silent = FALSE, mc.cores = cores))
  close(logfile)


  # Cull failed and completed fits Two kinds of errors occure. The first returns
  # an object of class "try-error". The reason for these failures are unknown to
  # me. The second returns a list of results from trust(), where one name of the
  # list is error holding an object of class "try-error". These abortions are 
  # due to errors which are captured within trust(). Completed fits return with 
  # a valid result list from trust(), with "error" not part of its names. These
  # fits, can still be unconverged, if the maximim number of iterations was the
  # reason for the return of trust(). Be also aware of fits which converge due
  # to the trust radius hitting rmin. Such fits are reported as converged but
  # are not in truth.
  m_trustFlags.converged = 0
  m_trustFlags.unconverged = 1
  m_trustFlags.error = 2
  m_trustFlags.fatal = 3
  idxStatus <- sapply(m_parlist, function(fit) {
    if (inherits(fit, "try-error") || any(names(fit) == "error")) {
      return(m_trustFlags.error)
    } else if (!any(names(fit) == "converged")) {
      return(m_trustFlags.fatal)
    } else if (fit$converged) {
      return(m_trustFlags.converged)
    } else {
      return(m_trustFlags.unconverged)
    }
  })

  
  # Wrap up
  # Write out results
  saveRDS(m_parlist, file = fileParList)

  # Remove temporary files
  unlink(interResultFolder, recursive = TRUE)
  

  # Show summary
  sum.error <- sum(idxStatus == m_trustFlags.error)
  sum.fatal <- sum(idxStatus == m_trustFlags.fatal)
  sum.unconverged <- sum(idxStatus == m_trustFlags.unconverged)
  sum.converged <- sum(idxStatus == m_trustFlags.converged)
  msg <- paste0("Mutli start trust summary\n",
                "Outcome     : Occurrence\n",
                "Error       : ", sum.error, "\n",
                "Fatal       : ", sum.fatal, " must be 0\n",
                "Unconverged : ", sum.unconverged, "\n",
                "Converged   : ", sum.converged, "\n",
                "           -----------\n",
                "Total       : ", sum.error + sum.fatal + sum.unconverged + sum.converged, paste0("[", fits, "]"), "\n")
  logfile <- file(fileLog, open = "a")
  writeLines(msg, logfile)
  flush(logfile)
  close(logfile)

  if (stats) {
    cat(msg)
  }

  return(m_parlist)
}



#' Discover promissing regions in parameter space
#'
#' @description
#' Use consecutive multi start (ms) fits to arrive in a well-behaved region in
#' parameter space.
#'
#' @param center Parameter values for the first ms fit.
#' @param spread Vector giving the spread of initial parameters for consecutive
#'        ms fits. The length of <spread> defines how many consecutive ms fits
#'        are run.
#' @param fits Number of fits carried out per ms fit. If <fits> is a integer the
#'        same number of fits are used for all ms fits. If a different number of
#'        fits are desired for each ms fit, <fits> must be a vector of the same
#'        length as <spread>.
#' @param safety If a non-empty string is given, the fitlist of each mstrust is
#'        written to a file <safety> with the number of the current run
#'        appended.
#' @param ... Parameters handed to the mulit start optimizer. Right now
#'        \code{\link{mstrust}} is the only option available.
#'
#' @details
#' It might be desirable to start a multi start (ms) fit in a region in
#' parameter space yielding reasonable fit results. If such a region is unknown,
#' it can be hoped, that the best fit of a ms fit with a large spread of inital
#' values identifies such a region. In order to explore the region in greater
#' detail, a consecutive ms fit about the best fit can be used. The procedure of
#' consecutive ms-fits about the best fit of the last can be iterated. Thus, it
#' is expected to arrive in a region in parameter space which gives reasonable
#' fits.
#'
#' In case any of the ms fits return without a single valid result, the
#' algorithm stops, and the fitlist of the last ms fit is returned. Moreover, a
#' warning is issued. In case the first fit failes in such a way, NULL is
#' returned.
#'
#' @return fitlist A data frame of the fits of the last multi start fit.
#'
#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
#'
#' @export
msnarrow <- function(center, spread, fits = 100, safety = "", ...) {
  nnarrow <- length(spread)

  # Parse arguments
  if (length(fits) == 1) {
    fits <- rep(fits, nnarrow)
  } else if (length(fits) != nnarrow) {
    stop("<fits> is a vector of different length than <spread>.")
  }

  # Assemble inital parameter list
  trustargs <- list(...)
  trustargs$center <- center

  # Fitting: run mstrust for all values in spread.
  for (ms in 1:length(spread)) {
    cat("Narrowing step ", ms, " of ", nnarrow, ", ", fits[ms], " fits to run.\n", sep = "")

    trustargs$sd <- spread[ms]
    trustargs$fits <- fits[ms]
    trustargs$narrowing <- c(ms, nnarrow)
    fitlist <- do.call(mstrust, trustargs)

    if (nchar(safety) > 0) {
      outname <- paste0(safety, ms, ".rda")
      save(fitlist, file = outname)
    }

    trustargs$center <- msbest(fitlist)
    
    if (ms == 1) {
      fitlistAll <- fitlist
    } else {
      fitlistAll <- rbind.fitlist(fitlistAll, fitlist)
    }

    if (is.null(trustargs$center)) {
      cat("Narrowing aborted at run", ms)
      if (ms == 1) {
        return(NULL)
      } else {
        return(fitlistAll)
      }
    }
  }

  return(fitlistAll)
}



#' Construct fitlist from temporary files.
#'
#' @description An aborted \code{\link{mstrust}} or \code{\link{msnarrow}}
#'   leaves behind results of already completed fits. This command loads these
#'   fits into a fitlist.
#'
#' @param folder Path to the folder where the fit has left its results.
#'
#' @details The commands \code{\link{mstrust}} or \code{\link{msnarrow}} save
#'   each completed fit along the multi-start sequence such that the results can
#'   be resurected on abortion. This commands loads a fitlist from these
#'   intermediate results.
#'
#' @return A fitlist as data frame.
#'
#' @seealso \code{\link{mstrust}}, \code{\link{msnarrow}}
#'
#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
#'
#' @export
load.parlist <- function(folder) {
  # Read in all fits
  m_fileList <- dir(folder, pattern = "*.Rda")
  m_parVec <- lapply(m_fileList, function(file) {
    return(readRDS(file.path(folder, file)))
  })
  
  return(as.parlist(m_parVec))
}






#' Select a parameter vector from a parameter frame.
#' 
#' @description Obtain a parameter vector from a parameter frame.
#' 
#' @param parframe A parameter frame, e.g., the output of
#'   \code{\link{as.parframe}}.
#' @param index Integer, the parameter vector with the \code{index}-th lowest
#'   objective value.
#'   
#' @details With this command, additional information included in the parameter
#'   frame as the objective value and the convergence state are removed and a
#'   parameter vector is returned. This parameter vector can be used to e.g.,
#'   evaluate an objective function.
#'   
#'   On selection, the parameters in the parameter frame are ordered such, that
#'   the parameter vector with the lowest objective value is at \option{index}
#'   1. Thus, the parameter vector with the \option{index}-th lowest objective
#'   value is easily obtained.
#'   
#' @return The parameter vector with the \option{index}-th lowest objective
#'   value.
#'   
#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
#'   
#' @export
as.parvec.parframe <- function(parframe, index = 1) {
  m_order <- order(parframe$value)
  metanames <- attr(parframe, "metanames")
  best <- as.parvec(unlist(parframe[m_order[index], attr(parframe, "parameters")]))
  if ("converged" %in% metanames && !parframe[m_order[index],]$converged) {
    warning("Parameter vector of an unconverged fit is selected.", call. = FALSE)
    }
  return(best)
}



#' Select attributes.
#' 
#' @description Select or discard attributes from an object.
#'   
#' @param x The object to work on
#' @param atr An optional list of attributes which are either kept or removed. 
#'   This parameter defaults to dim, dimnames, names,  col.names, and row.names.
#' @param keep For keep = TRUE, atr is a positive list on attributes which are 
#'   kept, for keep = FALSE, \option{atr} are removed.
#'   
#' @return x with selected attributes.
#'   
#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
#' @author Mirjam Fehling-Kaschek, \email{mirjam.fehling@@physik.uni-freiburg.de}
#'   
#' @export
attrs <- function(x, atr = NULL, keep = TRUE) {

  if (is.null(atr)) {
    atr <- c("class", "dim", "dimnames", "names", "col.names", "row.names")
  }
  
  xattr <- names(attributes(x))
  if (keep == TRUE) {
    attributes(x)[!xattr %in% atr] <- NULL
  } else {
    attributes(x)[xattr %in% atr] <- NULL
  }
  
  return(x)
}



#' Print object and its "default" attributes only.
#' 
#' @param x Object to be printed
#' @param list_attributes Prints the names of all attribute of x, defaults to 
#'   TRUE
#'   
#' @details Before the \option{x} is printed by print.default, all its arguments
#'   not in the default list of \code{\link{attrs}} are removed.
#'   
#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
#' @author Mirjam Fehling-Kaschek, 
#'   \email{mirjam.fehling@@physik.uni-freiburg.de}
#'   
#' @export
print0 <- function(x, list_attributes = TRUE ) {
  if (list_attributes == TRUE) {
    cat("List of all attributes: ", names(attributes(x)), "\n")
  }
  
  print.default(attrs(x))
}



#' Reduce replicated measurements to mean and standard deviation
#'
#' @description
#' Obtain the mean and standard deviation from replicates per condition.
#'
#' @param file Data file of csv. See Format for details.
#' @param select Names of the columns in the data file used to define
#'        conditions, see Details.
#' @param datatrans Character vector describing a function to transform data.
#'        Use \kbd{x} to refere to data.
#'
#'
#' @format
#' The following columns are mandatory for the data file.
#' \describe{
#'  \item{Species}{Name of the observed species.}
#'  \item{Time}{Measurement time point.}
#'  \item{Value}{Measurement value.}
#'  \item{Condition}{The condition under which the observation was made.}
#' }
#'
#' In addition to these columns, any number of columns can follow to allow a
#' fine grained definition of conditions. The values of all columns named in
#' \option{select} are then merged to get the set of conditions.
#'
#' @details
#' Experiments are usually repeated multiple times possibly under different
#' conditions leading to replicted measurements. The column "Condition" in the
#' data allows to group the data by their condition. However, sometimes, a more
#' fine grained grouping is desirable. In this case, any number of additional
#' columns can be append to the data. These columns are referred to as
#' "condition identifier". Which of the condition identifiers are used to do the
#' grouping is user defined by anouncing the to \option{select}. The mandatory
#' column "Condition" is always used. The total set of different conditions is
#' thus defined by all combinations of values occuring in the selected condition
#' identifiers. The replicates of each condition are then reduced to mean and
#' variance.New conditions names are derived by merging all conditions which
#' were used in mean and std.
#'
#' @return
#' A data frame of the following variables
#' \describe{
#'  \item{Species}{Name of the observed species.}
#'  \item{Time}{Measurement time point.}
#'  \item{Mean}{Mean of replicates.}
#'  \item{Sd}{Standard deviation of replicates, NA for single measurements.}
#'  \item{df}{Degrees of freedom. The number of replictes reduced.}
#'  \item{Condition}{The condition for which the mean and sd were calculated. If
#'        more than one column were used to define the condition, this variable
#'        holds the effecive condition which is the combination of all applied
#'        single conditions. }
#' }
#'
#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
#'
#' @export
reduceReplicates <- function(file, select = "Condition", datatrans = NULL) {

  # File format definiton
  fmtnames <- c("Species", "Time",  "Value", "Condition")
  fmtnamesnumber <- length(fmtnames)

  # Read data and sanity checks
  data <- read.csv(file)
  if (length(intersect(names(data), fmtnames)) != fmtnamesnumber) {
    stop(paste("Mandatory column names are:", paste(fmtnames, collapse = ", ")))
  }

  # Transform data if requested
  if (is.character(datatrans)) {
    x <- data$Value
    data$Value <- eval(parse(text = datatrans))
  }

  # Experiments are usually repeated multiple times possibly under different
  # conditions. The column "Condition" in the data thus groups the data per
  # condition. However, sometimes, a more fine grained grouping is desirable. In
  # this case, any number of additional columns can be append to the data. These
  # columns are referred to as "condition identifier". Which of the condition
  # identifiers are used to do the grouping is user defined by giving their
  # names in <select>. The mandatory column "Condition" is always used. The
  # total set of different conditions is thus defined by all combinations of
  # values occuring in the condition identifiers named for grouping. Mean and
  # variance is computed for each condition by averaging over measurements
  # recorded at the same time point. New conditions names are derived by merging
  # all conditions which were used in mean and std.
  select <- unique(c("Species", "Time", "Condition", select))
  condidnt <- Reduce(paste, subset(data, select = select))
  conditions <- unique(condidnt)
  reduct <- do.call(rbind, lapply(conditions, function(cond) {
    conddata <- data[condidnt == cond,]
    mergecond <- Reduce(paste, conddata[1, setdiff(select, c("Species", "Time"))])
    data.frame(Species = conddata[1, "Species"],
               Time = conddata[1, "Time"],
               Mean = mean(conddata[, "Value"]),
               Sd = sd(conddata[, "Value"]),
               df = nrow(conddata) - 1,
               Condition = mergecond)
  }))

  return(reduct)
}



#' Fit an error model
#'
#' @description Fit an error model to reduced replicate data, see
#'   \code{\link{reduceReplicates}}.
#'
#' @param data Reduced replicate data, see \code{\link{reduceData}}.
#' @param factors \option{data} is pooled with respect to the columns named
#'   here, see Details.
#' @param errorModel Character vector defining the error model. Use \kbd{x} to
#'   reference the independend variable, see Details.
#' @param par Inital values for the parameters of the error model.
#' @param plotting If TRUE, a plot of the pooled variance together with the fit
#'   of the error model is shown.
#' @param ... Parameters handed to the optimizer \code{\link{optim}}.
#'
#' @details The variance estimator using \eqn{n-1} data points is \eqn{chi^2}
#'   distributed with \eqn{n-1} degrees of freedom. Given replicates for
#'   consecutive time points, the sample variance can be assumed a function of
#'   the sample mean. By defining an error model which must hold for all time
#'   points, a maximum likelihood estimator for the parameters of the error
#'   model can be derived. The parameter \option{errorModel} takes the error
#'   model as a character vector, where the mean (independent variable) is
#'   refered to as \kbd{x}.
#'
#'   It is desireable to estimate the variance from many replicates. The
#'   parameter \option{data} must provide one or more columns which define the
#'   pooling of data. In case more than one column is announced by
#'   \option{factors}, all combinations are constructed. If, e.g.,
#'   \option{factors = c("Condition", "Species")} is used, where "Condition" is
#'   "a", "b", "c" and repeating and Species is "d", "e" and repeating, the
#'   effective conditions used for pooling are "a d", "b e", "c d", "a e", "b
#'   d", and "c e".
#'
#'   By default, a plot of the pooled data, Sigma and its confidence bound at
#'   68\% and 95\% is shown.
#'
#' @return Returned is a data frame with the first columns identical to
#'   \option{data}. Appended are fit values of the parameters of the error
#'   model, with the column names equal to the parameter names and the estimated
#'   uncertainty Sigma. Sigma is derived by evaluating the error model with the
#'   fit parameters. The error model is appended as the attribute "errorModel".
#'
#'   Confidence bounds for Sigma at confidence level 68\% and 95\% are
#'   calculated, Their values come next in the returned data frame. Finally, the
#'   effective conditions are appended to easily check how the pooling was done.
#'
#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
#'
#' @export
fitErrorModel <- function(data, factors, errorModel = "exp(s0)+exp(srel)*x^2",
                          par = c(s0 = 1, srel = .1), plotting = TRUE, ...) {

  # Assemble conditions
  condidnt <- Reduce(paste, subset(data, select = factors))
  conditions <- unique(condidnt)


  # Fit error model
  nColData <- ncol(data)
  data <- cbind(data, as.list(par), Sigma = NA)

  for (cond in conditions) {
    subdata <- data[condidnt == cond,]
    x <- subdata$Mean
    y <- subdata$Sd
    df <- subdata$df

    obj <- function(par) {
      value <- with(as.list(par), {
        z <- eval(parse(text = errorModel))
        sum(log(z)-log(dchisq((df)*(y^2)/z, df = df)), na.rm = TRUE)
      })
      return(value)
    }

    fit <- optim(par = par, fn = obj, ...)
    sigma <- sqrt(with(as.list(fit$par), eval(parse(text = errorModel))))
    data[condidnt == cond, -(nColData:1)] <- data.frame(as.list(fit$par), Sigma = sigma)
  }


  # Calculate confidence bounds about sigma
  p68 <- (1-.683)/2
  p95 <- (1-.955)/2
  data$cbLower68 <- sqrt(data$Sigma^2*qchisq(p = p68, df = data$df)/data$df)
  data$cbUpper68 <- sqrt(data$Sigma^2*qchisq(p = p68, df = data$df, lower.tail = FALSE)/data$df)
  data$cbLower95 <- sqrt(data$Sigma^2*qchisq(p = p95, df = data$df)/data$df)
  data$cbUpper95 <- sqrt(data$Sigma^2*qchisq(p = p95, df = data$df, lower.tail = FALSE)/data$df)


  # Assemble result
  data <- cbind(data, condidnt)
  attr(data, "errorModel") <- errorModel


  # Plot if requested
  if (plotting) {
    print(ggplot(data, aes(x=Mean)) +
            geom_point(aes(y=Sd)) +
            geom_line(aes(y=Sigma)) +
            geom_ribbon(aes(ymin=cbLower95, ymax=cbUpper95), alpha=.3) +
            geom_ribbon(aes(ymin=cbLower68, ymax=cbUpper68), alpha=.3) +
            facet_wrap(~condidnt, scales = "free") +
            scale_y_log10() +
            theme_dMod()
    )}

  return(data)
}



#' Concatenate parameter lists
#'
#' @description Fitlists carry an fit index which must be held unique on merging
#' multiple fitlists.
#'
#' @param ... Fitlists
#' @param shiftrownames If true (default) the row names of the returned data frame equal the fit index.

#' @author Wolfgang Mader, \email{Wolfgang.Mader@@fdm.uni-freiburg.de}
#'
#' @export
#' @export c.parlist
c.parlist <- function(...) {
    m_fits <- lapply(list(...), unclass)
    m_fits <- do.call(c, m_fits)
    m_parlist <- mapply(function(fit, idx) {
      if (is.list(fit)) fit$index <- idx
      return(fit)
      }, fit = m_fits, idx = seq_along(m_fits))
    
    return(as.parlist(m_parlist))
  }



#' @export
python.version.sys <- function(version = NULL) {
  
  # Which python versions are installed on the system
  m_sysPath <- strsplit(Sys.getenv("PATH"), ":")
  m_sysPath <- m_sysPath[[1]]
  m_python <- do.call(rbind, lapply(m_sysPath, function(p) {
    m_py <- dir(p, pattern = "^python[0-9,.]+$")
    if (length(m_py) != 0) {
      return(file.path(p, m_py))
    } else {
      return(NULL)
    }
    }))
  
  m_version <- strsplit(m_python, "python")
  m_version <- lapply(m_version, function(p) {
    return(p[2])
    })
  
  m_python <- as.data.frame(m_python)
  names(m_python) <- m_version
  
  
  if (is.null(version)) {
    return(m_python)  
  } else {
    # Is requested version available
    if (any(m_version == version)) {
      return(as.character(m_python[[version]]))
      attr(out, "version") <- m_version
    } else {
      return(NULL)
    }
  }
}



#' @export
python.version.rpython <- function() {
  python.exec(c("def ver():", "\timport sys; return list(sys.version_info)"))
  m_info <- as.data.frame(python.call("ver"))
  names(m_info) <- c("major", "minor", "micro", "releselevel", "serial")
  
  m_version <- paste0(m_info[[1]], ".", m_info[[2]])
  attr(m_version, "info") <- m_info

  return(m_version)
}



#' @export
python.version.request <- function(version) {
  
  # Is rPythen installed and linked against requested python version?
  m_installed <- if (inherits(try(library(rPython)), "try-error")) FALSE else TRUE
  if (m_installed) {
    m_curVersion <- python.version.rpython()
    if (m_curVersion == version)
      return(TRUE)
    }
  
  # rPython not installed or linked agains wrong python version.
  m_sysVersion <- python.version.sys(version)
  if (is.null(m_sysVersion)) {
    msg <- paste0("Requested python version ", version, " not available\n",
                 "Your options are\n")
    cat(msg)
    print(python.version.sys())
  } else {
    # If rPython is already installed, double check with user
    if (m_installed) {
      msg <- paste0("rPython is installed on your system using python ", m_curVersion, "\n",
                    "Proceeding the installation enables ", version, " for R\n",
                    "This will prevent python programms needing version ", m_curVersion, " from executing\n",
                    "Do you want to abort [1] or procede [2] with the installation?\n")
      cat(msg)
      m_go <- scan(nmax = 1, what = integer(), quiet = TRUE)
      if (m_go != 2) {
        stop("Installation aborted")
      }
    }
    
    try(detach("package:rPython", unload = TRUE), silent = TRUE)
    Sys.setenv(RPYTHON_PYTHON_VERSION = version)
    install.packages("rPython")
    library(rPython)
  }
}
dlill/dMod2ndsens documentation built on May 30, 2019, 1:37 p.m.