library(BurkPx) library(tidyverse) library(modelr) #library(broom)
# First load the Test and training sets data('Human_BurkPx_test', package='BurkPx') data('Human_BurkPx_train', package='BurkPx') # Make one data frame that has both the test and training observations Human_BurkPx <- rbind( Human_BurkPx_test %>% mutate(Set = 'Test'), Human_BurkPx_train %>% mutate(Set = 'Train') ) ## Do I just want to do this for the Humans or for the NHP as well?
# For a given data frame for a particular antigen: # 1. Make a model using the training observations # 2. Confront it with the test observations # 3. Record the AUC taken from just the test observations. # 4. Do a bootstrap procedure so as to get a 95% CI for the AUC #df <- Human_BurkPx %>% filter(Antigen == 'LPSA', Type == 'IgG') Results <- NULL for( type in c('IgG', 'IgM') ){ for( antigen in unique(Human_BurkPx$Antigen) ){ for( timeGroup in c('Week1', 'Week2', 'Week3', 'Week4') ){ for( rep in 1:40 ){ train <- Human_BurkPx_train %>% filter( Antigen == antigen ) %>% filter( TimeGroup %in% c('Prior','Healthy','Week1','Week2') ) test <- Human_BurkPx_test %>% filter( Antigen == antigen ) %>% filter( TimeGroup %in% c('Prior','Healthy', timeGroup) ) %>% sample_frac(1, replace = TRUE) auc <- tryCatch({ model <- glm( Status=='Melioid' ~ Value, family=binomial, data = train ) test %>% add_predictions(model, type='response') %>% pROC::roc(Status=='Melioid' ~ pred, data=.) %>% magrittr::extract2('auc') %>% as.numeric() }, error=function(err){NA}, warning=function(war){NA}) Results <- rbind(Results, data.frame(Type = type, Antigen = antigen, TimeGroup = timeGroup, rep=rep, AUC = auc)) } } } }
For the human test/training data sets, we fit a model using only a single antigen on the training set (which included all the negatives but only positives from that time point) and then calculated the AUC values using the corresponding test set. The following table gives the top 10 antigens for the IgG and IgM serum AUC values.
# Print out a nice looking table that shows the best antigens. Results %>% group_by(Type, Antigen, TimeGroup) %>% summarize( lwr=quantile(AUC,.025, na.rm=TRUE), upr=quantile(AUC,.975, na.rm=TRUE), AUC=mean(AUC, na.rm=TRUE) ) %>% mutate( AUC_string = str_c(round(AUC,3), ' (',round(lwr,3),',',round(upr,3),')') ) %>% filter( Type == 'IgG' ) %>% select(-lwr, -upr, -AUC) %>% spread(TimeGroup, AUC_string) %>% mutate( best = Week1 %>% str_sub(1, 5) %>% as.numeric() ) %>% arrange(desc(best)) %>% select(-best) %>% group_by() %>% slice(1:10) %>% pander::pander() Results %>% group_by(Type, Antigen, TimeGroup) %>% summarize( lwr=quantile(AUC,.025, na.rm=TRUE), upr=quantile(AUC,.975, na.rm=TRUE), AUC=mean(AUC, na.rm=TRUE) ) %>% mutate( AUC_string = str_c(round(AUC,3), ' (',round(lwr,3),',',round(upr,3),')') ) %>% filter( Type == 'IgM' ) %>% select(-lwr, -upr, -AUC) %>% spread(TimeGroup, AUC_string) %>% mutate( best = Week1 %>% str_sub(1, 5) %>% as.numeric() ) %>% arrange(desc(best)) %>% select(-best) %>% group_by() %>% slice(1:10) %>% pander::pander()
For the non-human primates we do something similar, and we utilize the NHP version 2 datasets. Here the single antigen model
data('NHP_BurkPx_train2', package='BurkPx') data('NHP_BurkPx_test2', package='BurkPx') # Now actually do the work for each antigen! NHP_Results <- NULL for( type in c('IgG', 'IgM') ){ for( antigen in unique(NHP_BurkPx2$Antigen) ){ for( timeGroup in c('Week1', 'Week2', 'Week3', 'Week4') ){ train <- NHP_BurkPx_train2 %>% filter( Antigen == antigen ) %>% filter( TimeGroup %in% c('Prior','Healthy','Week1','Week2') ) test <- NHP_BurkPx_test2 %>% filter( Antigen == antigen ) %>% filter( TimeGroup %in% c('Prior','Healthy', timeGroup) ) auc <- tryCatch({ model <- glm( Status=='Melioid' ~ Value, family=binomial, data = train ) test %>% add_predictions(model, type='response') %>% pROC::roc(Status=='Melioid' ~ pred, data=.) %>% magrittr::extract2('auc') %>% as.numeric() }, error=function(err){NA}, warning=function(war){NA}) NHP_Results <- rbind(NHP_Results, data.frame(Type = type, Antigen = antigen, TimeGroup = timeGroup, AUC = auc)) } } }
NHP_Results %>% group_by(Type, Antigen, TimeGroup) %>% summarize( lwr=quantile(AUC,.025, na.rm=TRUE), upr=quantile(AUC,.975, na.rm=TRUE), AUC=mean(AUC, na.rm=TRUE) ) %>% mutate( AUC_string = str_c(round(AUC,3), ' (',round(lwr,3),',',round(upr,3),')') ) %>% filter( Type == 'IgG' ) %>% select(-lwr, -upr, -AUC) %>% spread(TimeGroup, AUC_string) %>% mutate( best = Week1 %>% str_sub(1, 5) %>% as.numeric() ) %>% arrange(desc(best)) %>% select(-best) %>% group_by() %>% slice(1:10) %>% pander::pander() NHP_Results %>% group_by(Type, Antigen, TimeGroup) %>% summarize( lwr=quantile(AUC,.025, na.rm=TRUE), upr=quantile(AUC,.975, na.rm=TRUE), AUC=mean(AUC, na.rm=TRUE) ) %>% mutate( AUC_string = str_c(round(AUC,3), ' (',round(lwr,3),',',round(upr,3),')') ) %>% filter( Type == 'IgM' ) %>% select(-lwr, -upr, -AUC) %>% spread(TimeGroup, AUC_string) %>% mutate( best = Week1 %>% str_sub(1, 5) %>% as.numeric() ) %>% arrange(desc(best)) %>% select(-best) %>% group_by() %>% slice(1:10) %>% pander::pander()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.