# STANDARD PACKAGES ####################################################### library(magrittr) # mainly for %>% ) and conenient aliases (eg add) library(Hmisc) # also loads ggplot2, lattice, survival, Formula library(dplyr) # for dataframe cleaing/manipulation; includes the pipe library(tidyr) # for dataframe cleaing/manipulation library(viridis) # Nice color scales library(pander) # format wide tables library(knitr) # format simple tables # AUC PACKAGES ####################################################### library(MESS) # for trapezoidal and spline AUC function: auc() library(pracma) # for trapezoidal AUC calculation: trapz() library(Bolstad2) # for Simpon AUC function: sintegrate() library(PKNCA) # for PK-based AUC fx: pk.calc.auc() library(sfsmisc) # for linear/spline AUC fx: integrate.xy() library(metabolicR) ###MY GGPLOT THEME theme_set(theme_bw() + theme(axis.title.x=element_text(face='bold',size=rel(1.25)), axis.title.y=element_text(face='bold',size=rel(1.25)))) ### DAta to use for examples data <- data.frame( id=rep(1,7), genotype=rep("WT",7), time=c(0,15,30,45,60,90,120), glucose=c(120, 343, 297, 277, 253, 166, 162)) dataUnordered <- data.frame( id=rep(1,7), genotype=rep("WT",7), time=c(0,30,15,45,60,90,120), # switched t=15,30 glucose=c(120, 297, 343, 277, 253, 166, 162)) # switched glucose for t=15,30 to keep raw data the same data2 <- data.frame( # for id=1, data is the same as data and dataUnordered id=c(rep(1,7), rep(2,7),rep(3,7)), genotype=c(rep("WT",7),rep("WT",7),rep("KO",7)), time=rep(c(0,15,30,45,60,90,120), 3), glucose=c(120, 343, 297, 277, 253, 166, 162, 100, 242, 235, 189, 156, 160, 119, 142, 369, 377, 335, 316, 217, 184))
Calculating the area-under-the-curve (AUC) is a common task for statistical analysis of longitudinal data and for pharmacokinetic analysis. For a measurement repeated over time, the AUC can be estimated using several different methods:
This summary details functions useful for calculating the AUC using raw data for individual subjects. It does not focus on AUC for an ROC or for population data AUC for pharmacokinetics (see PKNCA package). Table 1 summarizes a few available packages, along with the result obtained with the simple example:
To estimate AUC based on actaul raw data (time, y), several packages provide functions to calculate AUC.
metabolicR::auc_wide
). This function expects the "measure" and the "time" to be encoded in the variable name (e.g. "glucose_120"). view code to see examples for each function
compare Result with ResultX (unordered time) to determine if unordered time values are processed correctly. Some functions throw an error, but some return an incorrect result.
AUCPackages <- read.csv("~/R Working folder/AUCPackages.csv") AUCPackages <- within(AUCPackages, { Result<-c( MESS::auc(data$time, data$glucose), sfsmisc::integrate.xy(data$time, data$glucose, use.spline = FALSE), pracma::trapz(data$time, data$glucose), pracma::cumtrapz(data$time, data$glucose)[7,], caTools::trapz(data$time, data$glucose), PKNCA::pk.calc.auc.last(data$glucose, data$time, method="linear"), PKNCA::pk.calc.auc.last(data$glucose, data$time, method='lin up/log down'), Bolstad2::sintegral(data$time, data$glucose)$int, MESS::auc(data$time, data$glucose, type="spline"), sfsmisc::integrate.xy(data$time, data$glucose, use.spline = TRUE)) } ) AUCPackages <- within(AUCPackages, { ResultX<-c( MESS::auc(dataUnordered$time, dataUnordered$glucose), sfsmisc::integrate.xy(dataUnordered$time, dataUnordered$glucose, use.spline = FALSE), pracma::trapz(dataUnordered$time, dataUnordered$glucose), pracma::cumtrapz(dataUnordered$time, dataUnordered$glucose)[7,], caTools::trapz(dataUnordered$time, dataUnordered$glucose), NA, #PKNCA::pk.calc.auc.last(dataUnordered$glucose, dataUnordered$time, method="linear"), NA, #PKNCA::pk.calc.auc.last(dataUnordered$glucose, dataUnordered$time, method='lin up/log down'), Bolstad2::sintegral(dataUnordered$time, dataUnordered$glucose)$int, MESS::auc(dataUnordered$time, dataUnordered$glucose, type="spline"), sfsmisc::integrate.xy(dataUnordered$time, dataUnordered$glucose, use.spline = TRUE)) } ) knitr::kable(AUCPackages, digits=0)
We will use a simple glucose tolerance test dataframe.
pander(data)
gttPoly <- data %>% select(id, time, glucose) %>% rbind(list(.$id[.$time==0], max(.$time), .$glucose[.$time==0])) %>% rbind(list(.$id[.$time==0], 0, .$glucose[.$time==0])) data %>% ggplot(aes(time, glucose)) + geom_line(color="red", size=1.2) + geom_point(size=3) + xlab("Time (minutes)") + ylab("Glucose (mg/dL)") + ylim(0,350)
The area under a curve can be estimated simply by breaking the curve up into multiple time intervals, and using the trapezoid formula to calculate the area of each interval (Area = 1/2 (base1 + base2) * height
). This method is standard and should produce the same result regardless of the package or function used. Some formulas provide smoothing options which create a cubic spline that deviates from the standard trapezoid method.
I have chosen the sfsmisc::integrate.xy
function because it will properly handle unordered time points, and also provides the flexibility to fit a spline if desired. Other functions listed in the summary table provide additional features, and could be substituted as well, but I would avoid functions which return a number without warning that the time is unordered.
AUC including baseline
AUC <- with(data, sfsmisc::integrate.xy(time, glucose, use.spline = FALSE)) pFull<- with(data, data.frame( time= c(time, max(time), 0, 0), glucose=c(glucose, 0, 0, glucose[data$time==0]) )) data %>% ggplot(aes(time, glucose)) + geom_polygon(data=pFull, aes(time, glucose), fill="red", alpha=0.3) + geom_line(color="red", size=1.2) + geom_point(size=3) + xlab("Time (minutes)") + ylab("Glucose (mg/dL)") + ylim(0,350) + geom_text(aes(25,200), label=paste("AUC = ", AUC))
When the response is the main value of interest, it may not make sense to include the contribution from varying baselin. To baseline adjust, the t=0 value * time_max_ needs to be subtracted from the AUC.
with(data, sfsmisc::integrate.xy(time, glucose, use.spline = FALSE) - glucose[time==0]*max(time))
Baseline-corrected AUC
pBlCorr<- with(data, data.frame( time= c(time, max(time), 0), glucose=c(glucose, glucose[data$time==0], glucose[data$time==0]) )) polyBL <- data %$% data.frame( time=c(0, 120, 120, 0), glucose=c(glucose[time==0], glucose[time==0], 0, 0)) BL <- data %$% glucose[time==0]*120 data %>% ggplot(aes(time, glucose)) + geom_polygon(data=pBlCorr, aes(time, glucose), fill="red", alpha=0.3) + geom_polygon(data=polyBL, aes(time, glucose), fill="grey", alpha=0.5) + geom_line(color="red", size=1.2) + geom_point(size=3) + xlab("Time (minutes)") + ylab("Glucose (mg/dL)") + ylim(0,350) + geom_text(aes(25,200), label=paste("AUC_adjusted = ", round(AUC-BL,0))) + geom_text(aes(25,60), label=paste("AUC_bl = ", BL))
pk.calc.auc
The PKNCA Package (by Pfizer) is designed for Pharmacokinetic data, and most functions require the data to be converted to a special format to be interpreted by the package functions. Calculation using the linear trapezoidal method is required by the OGD and FDA, and is the standard for bioequivalence trials.
By default the function uses a 'lin up/log down' method, so you must specify method="linear"
to obtain the standard trapezoidal calculation:
PKNCA::pk.calc.auc.last(data$glucose, data$time, method="linear")
PKNCA pk.calc.auc
uses a method other than the standard trapezoidal calculation. The default method is the "linear up-log down" method, which is useful for PK modeling. When concentrations are increasing, the linear trapezoid method is used. When concentrations are decreasing, it uses logarithmic trapezoidal estimation. This is felt to better represent the declining values typical in pharmacokinetics, because elimination is typically best expressed logarithmically.
Default method:
PKNCA::pk.calc.auc.last(data$glucose, data$time, method='lin up/log down')
For a dataframe with multiple subjects arranged in Longitudinal format, this can be summarised as follows.
plot <- data2 %>% group_by(id, genotype) %>% ggplot(aes(time, glucose, group=as.factor(id), color=as.factor(id))) + geom_line(size=1.2) + scale_color_brewer(palette="Set1") + xlab("Time (minutes)") + ylab("Glucose (mg/dL)") plot
data2 %>% group_by(id, genotype) %>% summarise(glucoseAUC=sfsmisc::integrate.xy(time, glucose, use.spline=FALSE), glucose_0=glucose[time==0], bl=glucose_0*max(time), glucoseAUCbl=glucoseAUC-bl) %>% knitr::kable(digits=0) # formats for printing table
Many datasets come in wide format, and to calculate AUC for multiple measures, it may be easier to hand calculate the summary AUC without converting to "long format." This can be reshaped to "long" format using the tidyr
package, but this may not be needed to run a quick calculation. To calculate simply and quickly, the auc_wide
function should handle most simple applications.
auc_wide
Requirements:
dataframe in a wide format
variable naming:
Results are appended to the end of the dataframe, with the default name "auc_measure."
varname
= something else, which is appended to "auc_" dfAUCWide1 <- data.frame( id=1:5, xxx_glucose_000=c(120, 120, 140, 140, 150), GIBBERISHglucose_030=c(250, 250, 270, 325, 400), GIBBERISHglucose_030x=c(250, 250, 270, 325, 400), ZzZglucose_060 =c(150, 210, 300, 350, 275)) dfAUCWide1
auc_wide(dfAUCWide1, "glucose")
auc_wide(dfAUCWide1, "glucose", varname="GLUC")
dfAUCWide5 <- data.frame( NAME=c("Marcus", "DeAndre", "Derrick", "Bishop", "Reggie"), glucose_ogtt_000=c(120, 120, 140, 140, 150), glucose_ogtt_030=c(250, 250, 270, 325, 400), glucose_ogtt_060 =c(150, 210, 300, 350, 275), glucose_hg_060 =c(150, 210, 300, 350, 275))
The default separator doesn't work because "ogtt" is in the variable name.
# auc_wide(dfAUCWide5, "glucose") # Do not run- creates an error
This can be fixed by indicating the separator explicitly.
auc_wide(dfAUCWide5, "glucose", sep="_ogtt_")
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.