R/autoHelpers.R

Defines functions tidyCoefNames ordinalIndicator getTransFun fivenumBy readData makeAutoEvmStuff readAutoInputs

Documented in fivenumBy getTransFun makeAutoEvmStuff ordinalIndicator readAutoInputs readData tidyCoefNames

#' Read user input from text file in order to set up automated extreme value modelling
#'
#' @aliases makeAutoEvmStuff
#' @param file Character string giving the name of the file.
#' @param make Whether or not to make additional objects based on the input file
#'   and system information. Defaults to \code{make=TRUE}.
#' @param assign. Whether or not to assign the values to R objects. Defaults to \code{assign.=TRUE}.
# @param x In \code{makeAutoEvmStuff}, a named list, presumably passed by \code{readAutoInputs}.
#' @return Returns a names list, invisibly, containing the values of the named fields
#'   in the input file. However, the named items in the list are also assigned to
#'   position 1 unless the call specifies \code{assign.=FALSE}.
#' @export readAutoInputs
readAutoInputs <- function(file, make=TRUE, assign.=TRUE){
  input <- readLines(file)
  ii <- substr(input, 1, 1)
  input <- input[ii != "#"] # drop comment lines

  input <- strsplit(input, "#")

  # i is a list with 1 element for each line of the input file
  # i[[j]][1] gives the object name and value, anything else is junk

  input <- unlist(lapply(input, function(x) x[1]))

  output <- strsplit(input, ": ")
  nms <- unlist(lapply(output, function(x) x[1]))
  output <- unlist(lapply(output, function(x) x[2]))
  names(output) <- nms
  # Drop empty lines
  output <- output[!is.na(names(output))]

  # Strip trailing and leading whitespace, turn into list (so that the next few
  # lines will work)
  output <- as.list(strstrip(output))

  # Make numerics of things that should be numeric
  output$basevisit <- as.numeric(output$basevisit)
  output$maxvisit <- as.numeric(output$maxvisit)
  output$iter <- as.numeric(output$iter)
  output$thin <- as.numeric(output$thin)
  output$burn <- as.numeric(output$burn)
  output$ULN <- as.numeric(output$ULN)
  
  # Deal with things that should be vectors (separator is comma):
  # popyes, returnLevels, multiplesOfULN
  output$popyes <- strstrip(unlist(strsplit(output$popyes, ",")))
  output$models <- strstrip(unlist(strsplit(output$models, ",")))
  if (length(output$models) > 1)
    stop("Currently, only a single model formula is allowed")
  output$returnLevels <- as.numeric(unlist(strsplit(output$returnLevels, ",")))
  output$multiplesOfULN <- as.numeric(unlist(strsplit(output$multiplesOfULN, ",")))

  output$stopgoULN <- eval(parse(text=output$stopgoULN))
  output$stopProb <- eval(parse(text=output$stopProb))
  output$goProb <- eval(parse(text=output$goProb))
  
  if (make) output <- makeAutoEvmStuff(output)
  
  for (i in 1:length(output)){
    assign(names(output)[i], output[[i]], pos=1)
  }

  invisible(output)
}

makeAutoEvmStuff <- function(x){
  x$dataPath <- file.path(x$dataRoot, x$drug, x$study)
  x$author <- paste0("Author: ", Sys.getenv("LOGNAME"))
  x$titleString <- paste0("Autogenerated extreme value modelling report: ",
                          x$drug, ", Study ", x$study)
  
  tfun <- getTransFun(x$trans)
  x$itfun <- tfun[[2]]
  x$tfun <- tfun[[1]]
  x
}

#' Strip leading and trailing whitespace from a character string
#' 
#' @param x A vector of character strings.
#' @param leading Whether or not to remove leading whitespace. Defaults to \code{leading=TRUE}.
#' @param trailing Whether or not to remove trailing whitespace. Defaults to \code{trailing=TRUE}.
#' @details Code copied from http://stackoverflow.com/questions/2261079/how-to-trim-leading-and-trailing-whitespace-in-r,
#'   2014-11-26.
#' @export strstrip
strstrip <- function (x, leading=TRUE, trailing=TRUE){
  if(!leading & !trailing) { x
  } else if (!leading) { sub("\\s+$", "", x)
  } else if (!trailing) {sub("^\\s+", "", x)
  } else { gsub("^\\s+|\\s+$", "", x)
  }
}


#' Read a data file, figuring out if it is a SAS file or something else
#' 
#' @param file The name, including the full path, of the data file
#' @param type The type, as indicated by the file extension. At present, must
#'             be either 'sas7bdat', 'csv' or NULL (the default). If NULL, the
#'             function uses the file extension to guess.
#
#' @details If the file appears to be a SAS file, the function uses read.sas7bdat
#'          from Matt Shotwell's sas7bdat package. Currently, compressed SAS
#'          data files are not supported (but see Shotwell's sas7bdat.parso
#'          package).
#' @importFrom tools file_ext
#' @export
readData <- function(file, type=NULL){
  if (is.null(type)) type <- file_ext(file)
  if (type == "sas7bdat"){
    res <- read.sas7bdat(file)
  } else if (type == ".csv") {
    res <- read.csv(file)
  } else {
    stop("File type (extension) must be sas7bdat or csv")
  }
  invisible(res)
}

#' Five number summary for a variable in a data.frame, by a subsetting variable
#' 
#' @param x A \code{data.frame}
#' @param what A character string giving the name of the variable to be summarized
#' @param which A character string giving the name of the subsetting variable
#' @param type A number passd into \code{quantile} telling it how to compute the
#'        quantiles. Defaults to \code{type=3} which should give output that
#'        matches the default output from SAS.
#' @param N Whether to include the sample size in the summary (making it a 6 number
#'        summary). Defaults to \code{N=FALSE}.
#' @return A matrix, each row of which gives the five number summary of
#'         \code{x[, what]} for each value of \code{x[, which]}.
#' @details The 'five number summary' is the minimum, quartiles and maximum,
#'          where the quartiles are computed using SAS's default approach. These
#'          will generally not be the same as the hinges as originally described
#'          by Tukey.
#' @export
fivenumBy <- function(x, what, which, type=3, N=FALSE){
  if (!N) fun <- function(x) quantile(x, type=type) # type 3 should match SAS
  else fun <- function(x) c(N=length(x), quantile(x, type=type))

  res <- tapply(x[, what], x[, which], FUN=fun)
  res <- t(sapply(res, as.vector))
  if (!N) colnames(res) <- c("Min.", "Q1", "Median", "Q3", "Max.")
  else colnames(res) <- c("N", "Min.", "Q1", "Median", "Q3", "Max.")
  res
} 

#' Get a transformation function and its inverse from a character string
#' 
#' @param x A character string indicating 'log', 'sqrt' or 'I'.
#' @return A names list with 2 elements, the first of which, 'tfun' is the
#'         appropriate transformation function; the second of which, 'itfun', is
#'         the inverse of 'tfun'
#' @export
getTransFun <- function(x){
  if (x == "log"){
    tfun <- log; itfun <- exp
  } else if (x == "sqrt"){
    tfun <- sqrt; itfun <- function(x) x*x
  } else if (x == "I"){
    tfun <- itfun <- I
  } else {
    stop("only log, sqrt and identity transformations are supported")
  }
  list(tfun=tfun, itfun=itfun)
}

#' Get the ordinal indicator for an integer
#' 
#' @param x An integer
#' @return A character string giving the ordinal indicator for x: 'st' if the last
#'         digit of x is 1, 'nd' if it is 2, 'rd' if it is 3, 'th' otherwise.
#' @export
ordinalIndicator <- function(x){
  if (!is.integer(x)) stop("x must by an integer")
  x <- as.character(x)
  x <- substring(x, nchar(x))
  switch(x, "1"="st", "2"="nd", "3"="rd", "4"=, "5"=, "6"=, "7"=, "8"=, "9"=, "0"="th")
}

#' Remove the factor name from the levels in the names of model coefficients
#' 
#' @param x A model object with an 'xlevels' element
#' @details If there are no factors in the model, the coefficient names are
#'          returned. Otherwise, the function strips the factor name of each
#'          coefficient name, for each factor. If two factors have overlapping
#'          level names, the function will fail.
#' @export
tidyCoefNames <- function(x){
  if (! "xlevels" %in% names(x)) stop("x should be an object with an 'xlevels' element")
  
  nms <- names(coef(x))
  nf <- length(x$xlevels) # number of factors in the model

  if (nf == 0) { # no factors
    nms
  } else {
    # Check if any factors have matching levels
    levs <- unlist(x$xlevels)
    if (length(levs) > length(unique(levs)))
      stop("At least one level is used in more than one factor so stripping the factor name would leave non-unique names")
    for (i in 1:length(x$xlevels)){
      r <- paste0(names(x$xlevels)[i], x$xlevels[[i]])
      whn <- na.omit(match(nms, r))
      whr <- na.omit(match(r, nms))
      nms[whr] <- x$xlevels[[i]][whn]
    }
    nms
  }
}
harrysouthworth/margarita documentation built on Aug. 19, 2021, 5 a.m.