R/TRIM_1_metabolic_pipeline.R

Defines functions regulatory.pipeline calc_auc_pval calc.joint.auc assemble_data processRP

#' @importFrom magrittr %>%
#' @import data.table
processRP <- function(cistrome.location){
  rpdata.id <- rhdf5::h5read(cistrome.location,"IDs")
  rpdata.rp <- rhdf5::h5read(cistrome.location,"RPnormalize")
  rpdata.ref <- rhdf5::h5read(cistrome.location,"RefSeq")
  rpdata.rp<- as.data.frame(rpdata.rp)
  rownames(rpdata.rp)<- rpdata.id
  colnames(rpdata.rp)<- rpdata.ref
  
  rpdata.rp$sample_id = rownames(rpdata.rp)
  rpdata.rp.melt = reshape2::melt(rpdata.rp,id= "sample_id",variable.name = "gene",value.name = "RP")
  rpdata.rp.melt$gene <-stringr::str_remove(pattern = ".*:",rpdata.rp.melt$gene) 
  setDT(rpdata.rp.melt)[, mean(RP), by = c("sample_id","gene")]->rpdata.rp.pcd
  
  return(list(rpdata.rp.pcd=rpdata.rp.pcd,
              rpdata.rp=rpdata.rp))
  
}



assemble_data <- function(cistrome.info,rpdata.rp.pcd,pathways,rpdata.rp){
  #ca: atac-seq and dnase data
  cistrome.info_used <- cistrome.info[,1:4] %>%
    dplyr::filter(!(factor_type%in% c("None","not sure","other","hm","ca")))  %>%
    dplyr::mutate(DCid=as.character(DCid))
  
  cistrome.intgrated<- dplyr::inner_join(rpdata.rp.pcd,cistrome.info_used[,c(1,3)],
                                  by=c("sample_id"="DCid"))   %>%
    dplyr::mutate(oxphos_gene = if_else(gene%in% pathways,1,0)) %>%
    dplyr::rename(RP="V1",TF="factor") 
  
  
  rpdata.rp.used = rpdata.rp[rownames(rpdata.rp)%in% cistrome.info_used$DCid,]
  rpdata.rp.used=tidyr::drop_na(rpdata.rp.used)
  
  all.genes = sapply(colnames(rpdata.rp.used), function(tt) strsplit(tt, split=":")[[1]][5])
  pathways.indicator = (all.genes %in% pathways)+0
  
  out.list=list(pathways.indicator=pathways.indicator,
                cistrome.intgrated=cistrome.intgrated,
                rpdata.rp.used=rpdata.rp.used)
  return(out.list)
}

calc.joint.auc <- function(mat, response){
  xx = apply(mat, 1, function(tt) 
    c(pROC::roc(response, as.numeric(tt), ci=T)$ci)
  )
  if(ncol(xx)== 1) return(c(xx[2], xx[2] - xx[1]))
  ci = sqrt(sum((xx[2,] - xx[1,])^2))/ncol(xx)
  auc = mean(xx[2,])
  return(c(auc, ci))
}


calc_auc_pval<- function(cistrome.intgrated, rpdata.rp.used,pathways.indicator,
                         filename=NULL, debug = FALSE) {
  tf_oxphos_effect=list()
  
  n=1
  TFs =unique(cistrome.intgrated$TF) 
  if(debug) TFs = TFs[1:2]
  for (tf in TFs) {
    print(paste(n,tf,sep = ":"))
    cistrome.intgrated[cistrome.intgrated$TF==tf,] ->tc.cancer
    if (length(unique(tc.cancer$sample_id))==1) {
      stats::glm(oxphos_gene~RP,data = tc.cancer,family = binomial)->mod.tmp
    }else{
      lme4::glmer(oxphos_gene~RP+(1|sample_id),family = binomial,data = tc.cancer)->mod.tmp
    }
    
    #get pvalue
    tmp<-as.data.frame(coef(summary(mod.tmp) ))
    #get auc : auc indicates the regulatory potential of a TF for this signature
    sample.id = tc.cancer$sample_id
    common.id= intersect(rownames(rpdata.rp.used) ,sample.id)
    if (!is_empty(common.id)) {
      
      calc.joint.auc(as.matrix(rpdata.rp.used[common.id,]),pathways.indicator)->tmp.auc
      tmp$auc=tmp.auc[1]
      tmp$CI=tmp.auc[2]
    }else{
      next
    }
    tmp[2,]->tf_oxphos_effect[[tf]]
    if(!is.null(filename)) saveRDS(tf_oxphos_effect,file = filename)
    n=n+1
  }
  
  return(tf_oxphos_effect)
  
}

#' @export
regulatory.pipeline <- function(pathways,
                    cistrome.location = "data/human_100kRP.hd5", debug = FALSE) {
  out_filename <- NULL 
  #cistrome.info available in R/sysdata.rda which is automatically loaded
  print("Identifying regulators of input pathways...")
  rpdata.rp.data <- processRP(cistrome.location=cistrome.location)
  outlist<- assemble_data(cistrome.info = cistrome.info, 
                          pathways = pathways,
                          rpdata.rp = rpdata.rp.data$rpdata.rp,
                          rpdata.rp.pcd = rpdata.rp.data$rpdata.rp.pcd)
  auc.res<- calc_auc_pval(cistrome.intgrated = outlist$cistrome.intgrated,
                          rpdata.rp.used = outlist$rpdata.rp.used,
                          pathways.indicator = outlist$pathways.indicator,
                          filename = out_filename, debug=debug)
  regulation.result = do.call(rbind,auc.res) %>% as.data.table %>% .[,TF:=names(auc.res)]
  regulation.result[,auc_ci:=auc-CI]
  setnames(regulation.result, 1:4, c("Estimate", "SE", "zval", "p.val"))

  print("regulation module calculation completed!")
  return(regulation.result)
}
vinash85/TRIM documentation built on Dec. 23, 2021, 3:12 p.m.