R/auc_wide.R

#' Calculate AUC from a wide-format dataframe
#'
#' This function calculates the trapezoidal method AUC for data in WIDE format
#' dataframe, with each row encoding an individial subject. The function uses
#' the sfsmisc::integrate.xy() function, which can handle unordered time input.
#' Variables used must follow the naming convention "xxxmeasure_123"
#' @param dfWide a dataframe with repeated measures in "wide" format
#' @param measure Defines the measure to assess, and must be encoded in variable
#'   name. Must be quoted. Also defines the outcome variable name, unless
#'   'varname' is defined as something else
#' @param sep Defines the separator between the measure and time. Defaults to
#'   "_". Time must be encoded as a number AT THE END of the variable name: (
#'   e.g., "measure_000, measure_001, measure_002...")
#' @param varname Defines the output variable name, to be appended to the prefix
#'   "auc_".
#' @param use.spline Whether to use linear trapezoid (use.spline=FALSE), or to
#'   integrate under cubic spline (use.spline=TRUE). defaults to FALSE.
#' @param ... Passes to the `integrate.xy` function.  Options include "a, b,
#'   digits, and xtol"
#' @return Modified dataframe appended with a new variable (column) for AUC. The
#'   new variable will have named attributes "method", "sourceVars", and "times"
#'   which capture the methods used for the calculation
#' @export
#' @examples
#' # A very clean dataframe
#' dfWide1 <- data.frame(
#'   glucose_0  =c(120, 120, 140, 140, 150),
#'   glucose_30 =c(250, 250, 270, 325, 400),
#'   glucose_60 =c(150, 210, 300, 350, 275))
#' auc_wide(dfWide1, "glucose")
#'
#' # A messier dataframe: glucose from different study type, time unordered
#' dfWide5 <- data.frame(
#'   glucose_ogtt_030=c(250, 250, 270, 325, 400),
#'   XXXglucose_ogtt_000=c(120, 120, 140, 140, 150),
#'   glucose_ogtt_060 =c(150, 210, 300, 350, 275),
#'   glucose_hg_060 =c(150, 210, 300, 350, 275))
#'
#' auc_wide(dfWide5, "glucose", sep="_ogtt_")  # works
#' @seealso \code{\link[sfsmisc]{integrate.xy}}

auc_wide <- function(dfWide, measure, sep="_", varname=measure, use.spline=FALSE, ...) {
  if (is.data.frame(dfWide)==FALSE) {
    warning("Data needs to be a dataframe")
  } else {
    varsMeasureTime <- grep(paste0(measure, sep, "[0-9]+$"), names(dfWide))
    times <-  as.numeric(stringr::str_extract(names(dfWide)[varsMeasureTime], "([0-9]+)"))  # doesn't handle n001, n010 now
    dfWide$newvar <- NA
    N<-nrow(dfWide)
    i <- 1
    while(i<=N){
      dfWide$newvar[i] <- sfsmisc::integrate.xy(times, as.numeric(dfWide[i,varsMeasureTime]), use.spline, ...)
      i <- i+1
    }
    # Output the method details
    cat("Calculated AUC (uses sfsmisc::integrate.xy, use.spline=", use.spline, ")", "\n")
    cat("  input variables = ", names(dfWide[varsMeasureTime]), sep=" ", fill=TRUE)
    cat("  output variable =", paste0("auc_", varname), "\n")
    cat("  times =", times,"\n")
    cat("\n")
    # Assign attributes to capture source and method
    attr(dfWide$newvar,"method")<- c("sfsmisc::integrate.xy", use.spline)
    attr(dfWide$newvar,"sourceVars")<- names(dfWide[varsMeasureTime])
    attr(dfWide$newvar,"times")<- times
    names(dfWide)[names(dfWide)=="newvar"] <- paste0("auc_", varname)
    return(dfWide)
  }
}
JMLuther/metabolicR documentation built on May 7, 2019, 10:12 a.m.