In sgosline/mpnstXenoModeling: MPNST Xenograft Modeling

knitr::opts_chunk$set(echo = TRUE)
if(!require(mpnstXenoModeling)){
  remotes::install_github('sgosline/mpnstXenoModeling')
  library(mpnstXenoModeling)
}
if(!require('dplyr')){
  BiocManager::install('dplyr')
  library(dplyr)
}
if(!require('tidyr')){
  BiocManager::install('tidyr')
  library(tidyr)
}
if(!require('drc')){
  BiocManager::install('drc')
  library(drc)
}
if(!require('ggplot2')){
  BiocManager::install('ggplot2')
  library(ggplot2)
}
if(!require('data.table')){
  BiocManager::install('data.table')
  library(data.table)
}
if(!require('pROC')){
  BiocManager::install('pROC')
  library(pROC)
}

Load all data from Synapse

Let's load all the data using the mpnstXenoModeling package function. This will pull the data from synapse and format it accordingly.

#this function simply loads all the data into memory
#loadPDXData()
loadPDXData()
loadMicrotissueMetadata()
##let's clean up drug Dat
#head(drugData)

#library(reticulate)
library(dplyr)
#sync<-reticulate::import('synapseclient')
#syn=sync$login()

Load microtissue data and compare

The first thing we want to do is to load the microtissue data and carry out the bayesian correction. This is pretty straightforward.

cleanupMTDrugData<-function(mt.meta){
  mt.drug<-mt.meta[mt.meta$experimentalCondition!='DMSO',]
  #mt.drug$modelSystemName<-gsub('.*CL','CL',mt.drug$modelSystemName)
  #mt.drug$modelSystemName[mt.drug$modelSystemName=="NaN"]<-NA
  #mt.drug<-mt.drug%>%tidyr::unite(individualID,c(individualID,modelSystemName), sep=" ", na.rm=TRUE)
  mt.drug$modelSystemName<-unlist(mt.drug$modelSystemName)
  mt.drug$specimenID<-gsub('MN3-001','001',mt.drug$specimenID)
  mt.drug$specimenID[mt.drug$specimenID!="001"]<-NA
  mt.drug<-mt.drug%>%tidyr::unite(individualID,c(individualID,specimenID), sep="-", na.rm=TRUE)
  mt.drug$individualID<-gsub('MN-3$','MN-3-002',mt.drug$individualID)
  return(mt.drug)
}
cleanupMTDMSOData<-function(mt.meta){
  mt.dmso<-mt.meta[mt.meta$experimentalCondition=='DMSO',]
  mt.dmso<-mt.dmso%>%dplyr::select(-c(specimenID,modelSystemName))
  return(mt.dmso)
}

mt.drug<-cleanupMTDrugData(mt.meta)%>%
  subset(modelSystemName=='NaN')
mt.dmso<-cleanupMTDMSOData(mt.meta)

process_DMSO <- function(mt.dmso){
  # TODO unite(CellLine, ExperimentID) #when uploaded to metadata folder
  res = mpnstXenoModeling::getMicroTissueDrugData(mt.dmso) 
  #res=aggregate(Viabilities ~ CellLine, data=res, FUN=mean)
  return(res)
}
#dmso.dat<-process_DMSO(syn,mt.dmso)

###TODO###
# mutate dmso.dat column name to MaxViability
# unite(mt.drg,dmso.dat) by experimentID and individualID
# mt.drug now has a column of MaxViability

 # res = mpnstXenoModeling::
 #   res<-getMicroTissueDrugData(syn,mt.dmso) 

# return(res)
#}

dmso.dat<-process_DMSO(mt.dmso)
dmso.stat<- dmso.dat%>%
 # aggregate(Viabilities ~ CellLine, data=., FUN=mean)
  group_by(DrugCol,CellLine,experimentId)%>%
  summarize(meanViab=mean(Viabilities,rm.na=T))

#' filter_byDrug: Retrieves the drug sensitivity data for a 
#' subset of drug experiments filtered by a particular drug name
#'
#' @param syn 
#' @param mtdrug 
#' @param drugID 
#'
#' @return a data.frame of drug data
#' @export
#'
#' @examples
filter_byDrug <- function(syn, mtdrug, drugID) {
  mtd <- subset(mtdrug, experimentalCondition == drugID)
  res = mpnstXenoModeling::getMicroTissueDrugData(mtd)
  # does this need to not be sorted for comparison to DMSO
  out = res[order(res$Conc),]
  # Assumes log(M) concentration
  return(out)
}


#' TryFit
#'
#' @param dose_response 
#' @param fixed 
#' @param names 
#' @param nan.handle 
#' @param curve_type 
#'
#' @return
#' @export
#'
#' @examples
TryFit <- function(dose_response, 
                   fixed = c(NA, NA, NA, NA), 
                   names = c(NA, NA, NA, NA), 
                   nan.handle = c("LL4", "L4"), 
                   curve_type){

    if (var(dose_response$Viabilities) == 0) {
        dose_response$Viabilties[nrow(dose_response)] <- dose_response$Viabilities[nrow(dose_response)] + 10^-10
    }
    #dose_response[dose_response == 0] <- 10^-10
    nan.handle <- match.arg(nan.handle)
    #, logDose = exp(10)
    drug.model <- tryCatch({
        drcmod(dose_response, LL.4(fixed = fixed, names = names), curve_type)
    }, warning = function(w) {
        if(nan.handle == "L4"){
            drcmod(dose_response, L.4(fixed = fixed, names = names), curve_type)
    } else {
        drcmod(dose_response, LL.4(fixed = fixed, names = names), curve_type)
    }
    }, error = function(e) {
        drcmod(dose_response, L.4(fixed = fixed, names = names), curve_type)
    })
    return(drug.model = drug.model)
}


#' drcmod Creates a dose response model object 
#'
#' @param dose_resp 
#' @param fctval 
#' @param curve_type 
#'
#' @return
#' @export
#'
#' @examples
drcmod <- function(dose_resp, fctval, curve_type){
  drm(formula   = Viabilities ~ Conc
      , curveid   = CellLine
      , data      = dose_resp
      , fct       = fctval
      , na.action = na.omit
      , control   = drmc(errorm = FALSE)
  )
}


#' generate_DR_plots
#'
#' @param res 
#' @param drugID 
#'
#' @return
#' @export
#'
#' @examples
generate_DR_plots <- function(res, drugID) {
  require(data.table)
  dr.df <- res %>% dplyr::select(Conc, Viabilities, CellLine)%>%
    rowwise()%>%
    mutate(meanViability=median(Viabilities,na.rm=T))%>%
    unnest(Viabilities)#%>% setDT()

  #dr.df <- tidyr::unnest(dr.df, Viabilities)
  dr.df$Viabilities<- na_if(dr.df$Viabilities, "NA")
  dr.df <- dr.df[complete.cases(dr.df), ]

  dr.dt <- data.table(dr.df)
  dt2 <- data.table()
  dat.dt <- data.table(Drug=character(),CellLine=character(),Hill=numeric(),ec50=numeric(),
                       MinViability=numeric(),MaxViability=numeric(),ic50=numeric(),auc=numeric(),
                       auc_hi=numeric(),auc_low=numeric())
  # factor by CellLine
  dr.dt[,CellLine:=as.factor(CellLine)]
  scale.num <- nlevels(dr.dt$CellLine)
  # iterate over CellLine: Conc, Viabilities
  for (i in 1:nlevels(dr.dt$CellLine)) {
    # subset data.table by CellLine level
    dt <- dr.dt[CellLine %in% levels(CellLine)[i]]
    dt[, CellLine := as.character(CellLine)]
    # TryFit of subsetted data.table
    #dft <- dt[,.(Viabilities = unlist(Viabilities)), by = setdiff(names(dt), 'Viabilities')]
    df <- as.data.frame(dt)
#    min_value <- min(df$Viabilities)
#    max_value <- max(df$Viabilities)

    ## TODO add MaxViability here for normalization to DMSO##
    min_value <- min(df$meanViability)
    max_value <-max(df$meanViability)

    ##here we create the actual fit values
    fit.LL4 <- TryFit(df, fixed = c(NA, min_value, max_value, NA), 
                      names = c("hill", "min_value", "max_value", "ec_50"), 
                      nan.handle = "L4", "CellLine")
    # hill from fit.LL4
    # ic50 code from https://rstudio-pubs-static.s3.amazonaws.com/378543_5b5bda32bf0541a485ccc49efbea680a.html
    coefs <- setNames(
      c(fit.LL4$coefficients, min_value, max_value),
      c("hill", "ec_50", "min_value","max_value"
      ))

## updated to de-log ec50
    #https://rstudio-pubs-static.s3.amazonaws.com/378543_5b5bda32bf0541a485ccc49efbea680a.html
    ic_50 <- with(as.list(coefs),
      exp(ec_50 + (1 / hill) * log(max_value / (max_value - 2* min_value)))) #log(ec_50)
          #cbind(auc,ED(fit.LL4,50))
    #rbindlist(dt.dat,auc)

    dt[, Pred := predict(object=fit.LL4)]
    sd.lst = dt[, list(SD=sd(Viabilities)/4),by=Conc]
    df <- as.data.frame(dt)
    cells <- unique(df$CellLine)
    # pROC::auc() https://cran.r-project.org/web/packages/pROC/pROC.pdf
    auc <- pROC::auc(df$Viabilities,df$Conc)
    ci <- pROC::ci.auc(auc,method='bootstrap',progress='none')
    #print(ci)
    meta2 <- data.table(Drug=drugID,CellLine=cells,
                        Hill=coefs['hill'],
                        ec50=coefs['ec_50'],
                       MinViability=min_value,
                       MaxViability=max_value,
                       ic50=ic_50,auc=as.numeric(auc),
                       auc_low=ci[1], #2.5% confidence interval
                       auc_hi=ci[3]) #97.5% confidence interval

    dat.dt <- rbind(dat.dt,meta2)
    dt = merge(dt,sd.lst)
    dt2 <- rbind(dt2, dt)
  }
  temp.plot = ggplot(dt2, aes(x=Conc, y=Viabilities, group=CellLine, color=CellLine, shape=CellLine)) +
            geom_ribbon(aes(y=Pred, ymin=Pred-SD, ymax=Pred+SD, fill=CellLine), alpha=0.2, color=NA) +
            geom_line(aes(y = Pred)) +
            scale_shape_manual(values=seq(0, scale.num)) +
            labs(x = "Conc log(M)", y = "Viabilities %", shape="Temp", color="Temp") +
            theme_bw() +
            ggtitle(paste("Dose-response curves for Drug:", drugID))
  return(list('plot'=temp.plot,'table'=dat.dt))
}


#' Title
#'
#' @param syn 
#' @param master.drug 
#' @param master.dmso 
#' @param upload 
#' @param path 
#' @param parentID 
#'
#' @return
#' @export
#'
#' @examples
process_MicroTissueDrugData <- function(syn, master.drug, master.dmso=NULL, 
                                        upload=FALSE, path='.', parentID=NULL){

  ##final table
  meta.dt <- data.table(Drug=character(),CellLine=character(),Hill=numeric(),ec50=numeric(),MinViability=numeric(),MaxViability=numeric(),ic50=numeric(),auc=numeric(),auc_hi=numeric(),auc_low=numeric())
  plt.list <- list()

  ##drugs to process
  drugIDs <- unique(master.drug$experimentalCondition)

  for (drug in drugIDs) {
    message(drug)
    res <- filter_byDrug(syn, master.drug, drug)

    if(!is.null(master.dmso)) ## if we have DMSO data we can normalize viabilities
      res<-res%>%
        left_join(dplyr::select(ungroup(master.dmso),'CellLine','experimentId','meanViab'))%>%
        mutate(Viabilities=100*(Viabilities/meanViab))

    plt = generate_DR_plots(res,drug)
    myplt <- plt$plot
    mytab <- plt$table
    meta.dt <- rbind(meta.dt, plt$table)
    plt.list[[drug]] = plt$plotsub
  }

  meta.dt<-meta.dt%>%
    tidyr::replace_na(list(ic50=0.0))%>%as.data.frame()

  if(is.null(master.dmso)){
    fwrite(meta.dt,file="doseResponse_table.csv")
      pdf("doseResponsePlots.pdf", onefile = TRUE)
  }else{
    fwrite(meta.dt,file="doseResponseNorm_table.csv")
    pdf("doseResponsePlotsNorm.pdf", onefile = TRUE)

  }
  for (drug in drugIDs) {print(plt.list[[drug]])}
  dev.off()

  return(meta.dt)
}

Reprocess MT data

How does the MT data look? how does the DMSO change things?

#unique(mt.drug$individualID)
meta.dt<-process_MicroTissueDrugData(syn, mt.drug, NULL, upload=FALSE)
meta.with.dmso<-process_MicroTissueDrugData(syn, mt.drug, dmso.stat, upload=FALSE)

full<-meta.dt%>%left_join(meta.with.dmso,by=c('Drug','CellLine'),suffix=c('.orig','.norm'))


ggplot(full,aes(x=auc.norm,y=auc.orig,col=CellLine))+geom_point()

ggplot(full,aes(x=ic50.norm,y=ic50.orig,col=CellLine))+geom_point()+scale_x_log10()+scale_y_log10()

ggplot(full,aes(x=MinViability.norm,y=MinViability.orig,col=CellLine))+geom_point()

ggplot(full,aes(x=MaxViability.norm,y=MaxViability.orig,col=CellLine))+geom_point()

Now we store to synapse

#meta.dt<-process_MicroTissueDrugData(syn, mt.meta, upload=TRUE, parentID='syn26066928')
res  = syn$tableQuery('select * from syn26136282')
syn$delete(res)
res  = syn$tableQuery('select * from syn26136282')
#synTableStore(tabname='Miccrotissue Response Valsue',tab=meta.dt,parentId='syn21984813')
pd <- sync$build_table('Microtissue Response Values','syn21984813','doseResponse_table.csv')
syn$store(pd)

res  = syn$tableQuery('select * from syn26485995')
syn$delete(res)
res  = syn$tableQuery('select * from syn26485995')
pd <- sync$build_table('Microtissue Response Values with DMSO','syn21984813','doseResponseNorm_table.csv')
syn$store(pd)