# STANDARD PACKAGES #######################################################
library(magrittr)   # mainly for %>% ) and conenient aliases (eg add)
library(Hmisc)      # also loads ggplot2, lattice, survival, Formula
library(dplyr)      # for dataframe cleaing/manipulation; includes the pipe
library(tidyr)      # for dataframe cleaing/manipulation
library(viridis)    # Nice color scales
library(pander)     # format wide tables
library(knitr)      # format simple tables

# AUC PACKAGES #######################################################
library(MESS)       # for trapezoidal and spline AUC function: auc()
library(pracma)     # for trapezoidal AUC calculation: trapz()
library(Bolstad2)   # for Simpon AUC function: sintegrate() 
library(PKNCA)      # for PK-based AUC fx: pk.calc.auc()
library(sfsmisc)    # for linear/spline AUC fx: integrate.xy()

library(metabolicR)

###MY GGPLOT THEME
theme_set(theme_bw() +
  theme(axis.title.x=element_text(face='bold',size=rel(1.25)),
        axis.title.y=element_text(face='bold',size=rel(1.25))))

### DAta to use for examples
data <- data.frame(
  id=rep(1,7), 
  genotype=rep("WT",7),
  time=c(0,15,30,45,60,90,120),
  glucose=c(120, 343, 297, 277, 253, 166, 162))
dataUnordered <- data.frame(
  id=rep(1,7), 
  genotype=rep("WT",7),
  time=c(0,30,15,45,60,90,120), # switched t=15,30
  glucose=c(120, 297, 343, 277, 253, 166, 162)) # switched glucose for t=15,30 to keep raw data the same

data2 <- data.frame( # for id=1, data is the same as data and dataUnordered
  id=c(rep(1,7), rep(2,7),rep(3,7)), 
  genotype=c(rep("WT",7),rep("WT",7),rep("KO",7)),
  time=rep(c(0,15,30,45,60,90,120), 3),
  glucose=c(120, 343, 297, 277, 253, 166, 162,
            100, 242, 235, 189, 156, 160, 119,
            142, 369, 377, 335, 316, 217, 184))

Introduction

Calculating the area-under-the-curve (AUC) is a common task for statistical analysis of longitudinal data and for pharmacokinetic analysis. For a measurement repeated over time, the AUC can be estimated using several different methods:

This summary details functions useful for calculating the AUC using raw data for individual subjects. It does not focus on AUC for an ROC or for population data AUC for pharmacokinetics (see PKNCA package). Table 1 summarizes a few available packages, along with the result obtained with the simple example:

To estimate AUC based on actaul raw data (time, y), several packages provide functions to calculate AUC.

Summary Table:

view code to see examples for each function
compare Result with ResultX (unordered time) to determine if unordered time values are processed correctly. Some functions throw an error, but some return an incorrect result.

AUCPackages <- read.csv("~/R Working folder/AUCPackages.csv")
AUCPackages <- within(AUCPackages, { Result<-c(
       MESS::auc(data$time, data$glucose),
       sfsmisc::integrate.xy(data$time, data$glucose, use.spline = FALSE),
       pracma::trapz(data$time, data$glucose),
       pracma::cumtrapz(data$time, data$glucose)[7,],
       caTools::trapz(data$time, data$glucose),
       PKNCA::pk.calc.auc.last(data$glucose, data$time, method="linear"),
       PKNCA::pk.calc.auc.last(data$glucose, data$time, method='lin up/log down'),
       Bolstad2::sintegral(data$time, data$glucose)$int,
       MESS::auc(data$time, data$glucose, type="spline"),
       sfsmisc::integrate.xy(data$time, data$glucose, use.spline = TRUE))
}     )
AUCPackages <- within(AUCPackages, { ResultX<-c(
       MESS::auc(dataUnordered$time, dataUnordered$glucose),
       sfsmisc::integrate.xy(dataUnordered$time, dataUnordered$glucose, use.spline = FALSE),
       pracma::trapz(dataUnordered$time, dataUnordered$glucose),
       pracma::cumtrapz(dataUnordered$time, dataUnordered$glucose)[7,],
       caTools::trapz(dataUnordered$time, dataUnordered$glucose),
       NA, #PKNCA::pk.calc.auc.last(dataUnordered$glucose, dataUnordered$time, method="linear"),
       NA, #PKNCA::pk.calc.auc.last(dataUnordered$glucose, dataUnordered$time, method='lin up/log down'),
       Bolstad2::sintegral(dataUnordered$time, dataUnordered$glucose)$int,
       MESS::auc(dataUnordered$time, dataUnordered$glucose, type="spline"),
       sfsmisc::integrate.xy(dataUnordered$time, dataUnordered$glucose, use.spline = TRUE))
}     )
knitr::kable(AUCPackages, digits=0)

Example data

We will use a simple glucose tolerance test dataframe.

The data:

pander(data)

Plot:

gttPoly <-   data %>% select(id, time, glucose) %>% 
  rbind(list(.$id[.$time==0], max(.$time), .$glucose[.$time==0])) %>% 
  rbind(list(.$id[.$time==0], 0, .$glucose[.$time==0]))
data %>% ggplot(aes(time, glucose)) +
  geom_line(color="red", size=1.2) +
  geom_point(size=3) +
  xlab("Time (minutes)") + ylab("Glucose (mg/dL)") + ylim(0,350) 

AUC Trapezoid Rule Calculation

The area under a curve can be estimated simply by breaking the curve up into multiple time intervals, and using the trapezoid formula to calculate the area of each interval (Area = 1/2 (base1 + base2) * height). This method is standard and should produce the same result regardless of the package or function used. Some formulas provide smoothing options which create a cubic spline that deviates from the standard trapezoid method.

I have chosen the sfsmisc::integrate.xy function because it will properly handle unordered time points, and also provides the flexibility to fit a spline if desired. Other functions listed in the summary table provide additional features, and could be substituted as well, but I would avoid functions which return a number without warning that the time is unordered.

Plot of standard AUC:

AUC including baseline

AUC <- with(data, sfsmisc::integrate.xy(time, glucose, use.spline = FALSE))
pFull<- 
  with(data,
       data.frame(
         time= c(time, max(time), 0, 0),
         glucose=c(glucose, 0, 0, glucose[data$time==0])
         ))
data %>% ggplot(aes(time, glucose)) +
  geom_polygon(data=pFull, aes(time, glucose), fill="red", alpha=0.3) +
  geom_line(color="red", size=1.2) +
  geom_point(size=3) +
  xlab("Time (minutes)") + ylab("Glucose (mg/dL)") + ylim(0,350) +
  geom_text(aes(25,200), label=paste("AUC = ", AUC))

Baseline Adjustment

When the response is the main value of interest, it may not make sense to include the contribution from varying baselin. To baseline adjust, the t=0 value * time_max_ needs to be subtracted from the AUC.

with(data, sfsmisc::integrate.xy(time, glucose, use.spline = FALSE) - 
       glucose[time==0]*max(time))

Plot of Baseline-corrected AUC:

Baseline-corrected AUC

pBlCorr<- 
  with(data,
       data.frame(
         time= c(time, max(time), 0),
         glucose=c(glucose, glucose[data$time==0], glucose[data$time==0])
         ))
polyBL <- data %$% data.frame(
  time=c(0, 120, 120, 0),
  glucose=c(glucose[time==0], glucose[time==0], 0, 0))
BL <-   data %$% glucose[time==0]*120
data %>% ggplot(aes(time, glucose)) +
  geom_polygon(data=pBlCorr, aes(time, glucose), fill="red", alpha=0.3) +
  geom_polygon(data=polyBL, aes(time, glucose), fill="grey", alpha=0.5) +
  geom_line(color="red", size=1.2) +
  geom_point(size=3) +
  xlab("Time (minutes)") + ylab("Glucose (mg/dL)") + ylim(0,350) +
  geom_text(aes(25,200), label=paste("AUC_adjusted = ", round(AUC-BL,0))) +
  geom_text(aes(25,60), label=paste("AUC_bl = ", BL))

PKNCA Package pk.calc.auc

The PKNCA Package (by Pfizer) is designed for Pharmacokinetic data, and most functions require the data to be converted to a special format to be interpreted by the package functions. Calculation using the linear trapezoidal method is required by the OGD and FDA, and is the standard for bioequivalence trials.
By default the function uses a 'lin up/log down' method, so you must specify method="linear" to obtain the standard trapezoidal calculation:

PKNCA::pk.calc.auc.last(data$glucose, data$time, method="linear")

PKNCA pk.calc.auc uses a method other than the standard trapezoidal calculation. The default method is the "linear up-log down" method, which is useful for PK modeling. When concentrations are increasing, the linear trapezoid method is used. When concentrations are decreasing, it uses logarithmic trapezoidal estimation. This is felt to better represent the declining values typical in pharmacokinetics, because elimination is typically best expressed logarithmically.
Default method:

PKNCA::pk.calc.auc.last(data$glucose, data$time, method='lin up/log down')

Multiple subjects

For a dataframe with multiple subjects arranged in Longitudinal format, this can be summarised as follows.

Plot:

plot <- data2 %>% group_by(id, genotype) %>% 
  ggplot(aes(time, glucose, group=as.factor(id), color=as.factor(id))) +
  geom_line(size=1.2) + scale_color_brewer(palette="Set1") +
  xlab("Time (minutes)") + ylab("Glucose (mg/dL)") 
plot 

Summary:

data2 %>% group_by(id, genotype) %>%  
  summarise(glucoseAUC=sfsmisc::integrate.xy(time, glucose, use.spline=FALSE),
            glucose_0=glucose[time==0],
            bl=glucose_0*max(time),
            glucoseAUCbl=glucoseAUC-bl) %>% 
  knitr::kable(digits=0)  # formats for printing table

Wide Format Data

Many datasets come in wide format, and to calculate AUC for multiple measures, it may be easier to hand calculate the summary AUC without converting to "long format." This can be reshaped to "long" format using the tidyr package, but this may not be needed to run a quick calculation. To calculate simply and quickly, the auc_wide function should handle most simple applications.

metabolicR Package auc_wide

Requirements:

Results are appended to the end of the dataframe, with the default name "auc_measure."

sample data:

dfAUCWide1 <- data.frame(
  id=1:5,
  xxx_glucose_000=c(120, 120, 140, 140, 150),
  GIBBERISHglucose_030=c(250, 250, 270, 325, 400),
  GIBBERISHglucose_030x=c(250, 250, 270, 325, 400),
    ZzZglucose_060 =c(150, 210, 300, 350, 275))
dfAUCWide1

application

auc_wide(dfAUCWide1, "glucose")

change the output variable name

auc_wide(dfAUCWide1, "glucose", varname="GLUC")

example with different separator

dfAUCWide5 <- data.frame(
   NAME=c("Marcus", "DeAndre", "Derrick", "Bishop", "Reggie"),
   glucose_ogtt_000=c(120, 120, 140, 140, 150),
   glucose_ogtt_030=c(250, 250, 270, 325, 400),
   glucose_ogtt_060 =c(150, 210, 300, 350, 275),
   glucose_hg_060 =c(150, 210, 300, 350, 275))

The default separator doesn't work because "ogtt" is in the variable name.

# auc_wide(dfAUCWide5, "glucose") # Do not run- creates an error

This can be fixed by indicating the separator explicitly.

auc_wide(dfAUCWide5, "glucose", sep="_ogtt_") 

Session Info

sessionInfo()


JMLuther/metabolicR documentation built on May 7, 2019, 10:12 a.m.