In dereksonderegger/BurkPx: Burkholderia Assay

library(BurkPx)
library(tidyverse)
library(modelr)
#library(broom)

# First load the Test and training sets
data('Human_BurkPx_test', package='BurkPx')
data('Human_BurkPx_train', package='BurkPx')

# Make one data frame that has both the test and training observations
Human_BurkPx <- rbind( 
  Human_BurkPx_test  %>% mutate(Set = 'Test'),
  Human_BurkPx_train %>% mutate(Set = 'Train') )
## Do I just want to do this for the Humans or for the NHP as well?

# For a given data frame for a particular antigen: 
#   1. Make a model using the training observations
#   2. Confront it with the test observations
#   3. Record the AUC taken from just the test observations.
#   4. Do a bootstrap procedure so as to get a 95% CI for the AUC

#df <- Human_BurkPx %>% filter(Antigen == 'LPSA', Type == 'IgG')
Results <- NULL
for( type in c('IgG', 'IgM') ){
  for( antigen in unique(Human_BurkPx$Antigen) ){
    for( timeGroup in c('Week1', 'Week2', 'Week3', 'Week4') ){
      for( rep in 1:40 ){

        train <- Human_BurkPx_train %>% filter( Antigen == antigen ) %>%
          filter( TimeGroup %in% c('Prior','Healthy','Week1','Week2') ) 
        test <- Human_BurkPx_test %>% filter( Antigen == antigen ) %>%
          filter( TimeGroup %in% c('Prior','Healthy', timeGroup) ) %>%
          sample_frac(1, replace = TRUE)

        auc <- tryCatch({
          model <- glm( Status=='Melioid' ~ Value,  family=binomial,
                      data = train )

          test %>% add_predictions(model, type='response') %>%
            pROC::roc(Status=='Melioid' ~ pred, data=.) %>% 
            magrittr::extract2('auc') %>% as.numeric()
        },  
        error=function(err){NA}, warning=function(war){NA})

        Results <- rbind(Results, data.frame(Type = type, Antigen = antigen, TimeGroup = timeGroup, rep=rep, AUC = auc))

      }
    }
  }
}

Human Single Antigen Predictors

For the human test/training data sets, we fit a model using only a single antigen on the training set (which included all the negatives but only positives from that time point) and then calculated the AUC values using the corresponding test set. The following table gives the top 10 antigens for the IgG and IgM serum AUC values.

# Print out a nice looking table that shows the best antigens.
Results %>% 
  group_by(Type, Antigen, TimeGroup) %>%
  summarize( lwr=quantile(AUC,.025, na.rm=TRUE), upr=quantile(AUC,.975, na.rm=TRUE), AUC=mean(AUC, na.rm=TRUE) ) %>% 
  mutate( AUC_string = str_c(round(AUC,3), ' (',round(lwr,3),',',round(upr,3),')') ) %>%
  filter( Type == 'IgG' ) %>%
  select(-lwr, -upr, -AUC) %>%
  spread(TimeGroup, AUC_string) %>%
  mutate( best = Week1 %>% str_sub(1, 5) %>% as.numeric() ) %>%
  arrange(desc(best)) %>% select(-best) %>% 
  group_by() %>% slice(1:10) %>%
  pander::pander()

Results %>%  
  group_by(Type, Antigen, TimeGroup) %>%
  summarize( lwr=quantile(AUC,.025, na.rm=TRUE), upr=quantile(AUC,.975, na.rm=TRUE), AUC=mean(AUC, na.rm=TRUE) ) %>% 
  mutate( AUC_string = str_c(round(AUC,3), ' (',round(lwr,3),',',round(upr,3),')') ) %>%
  filter( Type == 'IgM' ) %>%
  select(-lwr, -upr, -AUC) %>%
  spread(TimeGroup, AUC_string) %>%
  mutate( best = Week1 %>% str_sub(1, 5) %>% as.numeric() ) %>%
  arrange(desc(best)) %>% select(-best) %>% 
  group_by() %>% slice(1:10) %>%
  pander::pander()

Nonhuman Primates - Single Antigen Predictors

For the non-human primates we do something similar, and we utilize the NHP version 2 datasets. Here the single antigen model

data('NHP_BurkPx_train2', package='BurkPx')
data('NHP_BurkPx_test2',  package='BurkPx')


# Now actually do the work for each antigen!
NHP_Results <- NULL
for( type in c('IgG', 'IgM') ){
  for( antigen in unique(NHP_BurkPx2$Antigen) ){
    for( timeGroup in c('Week1', 'Week2', 'Week3', 'Week4') ){

      train <- NHP_BurkPx_train2 %>% filter( Antigen == antigen ) %>%
        filter( TimeGroup %in% c('Prior','Healthy','Week1','Week2') ) 
      test <- NHP_BurkPx_test2 %>% filter( Antigen == antigen ) %>%
        filter( TimeGroup %in% c('Prior','Healthy', timeGroup) )

      auc <- tryCatch({
        model <- glm( Status=='Melioid' ~ Value,  family=binomial,
                    data = train )

        test %>% add_predictions(model, type='response') %>%
          pROC::roc(Status=='Melioid' ~ pred, data=.) %>% 
          magrittr::extract2('auc') %>% as.numeric()
      },  
      error=function(err){NA}, warning=function(war){NA})

      NHP_Results <- rbind(NHP_Results, data.frame(Type = type, Antigen = antigen, TimeGroup = timeGroup, AUC = auc))
    }
  }
}

NHP_Results %>%
  group_by(Type, Antigen, TimeGroup) %>%
  summarize( lwr=quantile(AUC,.025, na.rm=TRUE), upr=quantile(AUC,.975, na.rm=TRUE), AUC=mean(AUC, na.rm=TRUE) ) %>% 
  mutate( AUC_string = str_c(round(AUC,3), ' (',round(lwr,3),',',round(upr,3),')') ) %>%
  filter( Type == 'IgG' ) %>%
  select(-lwr, -upr, -AUC) %>%
  spread(TimeGroup, AUC_string) %>%
  mutate( best = Week1 %>% str_sub(1, 5) %>% as.numeric() ) %>%
  arrange(desc(best)) %>% select(-best) %>%  
  group_by() %>% slice(1:10) %>%
  pander::pander()

NHP_Results %>%
  group_by(Type, Antigen, TimeGroup) %>%
  summarize( lwr=quantile(AUC,.025, na.rm=TRUE), upr=quantile(AUC,.975, na.rm=TRUE), AUC=mean(AUC, na.rm=TRUE) ) %>% 
  mutate( AUC_string = str_c(round(AUC,3), ' (',round(lwr,3),',',round(upr,3),')') ) %>%
  filter( Type == 'IgM' ) %>%
  select(-lwr, -upr, -AUC) %>%
  spread(TimeGroup, AUC_string) %>%
  mutate( best = Week1 %>% str_sub(1, 5) %>% as.numeric() ) %>%
  arrange(desc(best)) %>% select(-best) %>%  
  group_by() %>% slice(1:10) %>%
  pander::pander()