R/pathwaySelect.R

Defines functions pathSelect pathEnrich metaPath

Documented in pathSelect

##' Select pathways of interest 
##' The \code{pathSelect} is function to select pathways of interest for 
##' pathway-level resemblance analysis
##' @title Select pathways of interest for pathway-level resemblance analysis
##' @param mcmc.merge.list: a list of merged MCMC output matrices.
##' @param pathway.list: list of pathway database. 
##' @param pathwaysize.lower.cut: pathway size lower bound cutoff;
##' @param pathwaysize.upper.cut: pathway size upper bound cutoff;
##' @param overlapsize.cut: lower bound cutoff of overlap size between genes 
##' from input data and genes from a pathway.
##' @param med.de.cut: lower bound cutoff of median number of DE genes in a 
##' pathway. 
##' @param qfisher.cut: fisher q-value cutoff from the meta enrichment 
##' analysis.

##' @return an vector of selected pathway names.
##' @export
##' @examples
##' \dontrun{
##' #mcmc.merge.list from the merge step
##' data(pathwayDB) ## include pathway.list
##' select.pathway <- pathSelect(mcmc.merge.list,pathway.list)
##' select.pathway.list <- pathway.list[select.pathway]
##' }


pathSelect <- function(mcmc.merge.list,pathway.list, 
                       pathwaysize.lower.cut=10,
                       pathwaysize.upper.cut=200,
                       overlapsize.cut=10,med.de.cut=5,
                       qfisher.cut = 0.01){
  M <- length(mcmc.merge.list)
  summary.list <- vector("list",M) 
  for(m in 1:M){
    print(paste("dataset:",m))
    dat <- mcmc.merge.list[[m]]
    deIndex <-  attr(dat,"DEindex") 
    summary.list[[m]] <- pathEnrich(dat,deIndex,pathway.list)
  }
  
  meta.summary <- metaPath(summary.list,pathway.list)
  
  select.ind <- which(meta.summary$pathway.size >= pathwaysize.lower.cut & 
                        meta.summary$pathway.size < pathwaysize.upper.cut &
                        meta.summary$min.overlap.size >=overlapsize.cut &
                        meta.summary$med.DE.inpathway >=med.de.cut &
                        meta.summary$q.fisher < qfisher.cut) 
  select.pathways <- rownames(meta.summary)[select.ind]
  return(select.pathways)
}

pathEnrich <- function(dat, deIndex, pathway.list){
  ## dat is data matrix (either rawData or pData) from the individual model for pathway analysis, deIndex is the DE index
  geneDat <- rownames(dat)
  K <- length(pathway.list)
  pathwayName <- names(pathway.list)
  pathwaySize <- sapply(pathway.list,length)
  
  bk <- intersect(unique(unlist(pathway.list)),geneDat)
  de <- intersect(unique(unlist(pathway.list)),geneDat[deIndex])
  
  out <- gsa.fisher(x= de, background = bk ,
                    pathway = pathway.list)
  
  overlapSize <- as.integer(out$DE_in_Set)+as.integer(out$NonDE_in_Set)
  DEnum <- as.integer(out$DE_in_Set)
  OR <- (as.numeric(out$DE_in_Set)*as.numeric(out$NonDE_not_in_Set))/
    (as.numeric(out$DE_not_in_Set)*as.numeric(out$NonDE_in_Set))
  OR <- sapply(OR, function(x) {
    if(is.na(x) || is.infinite(x)) {
      x <- 0 } else{
        x <- x
      }
  }, simplify = T)
  
  logOR <- ifelse(OR==0,1,log(OR))
  pvalue <- as.numeric(out$pvalue)
  
  summary <- data.frame(pathway.size= pathwaySize, overlap.size= overlapSize, 
                        DE.inpathway = DEnum, Odds.ratio = OR, 
                        log.odds.ratio = logOR, p.value=pvalue)
  rownames(summary) <- pathwayName
  return(summary)
}


metaPath <- function(pathSummary, pathway.list){
  
  M <- length(pathSummary)
  K <- length(pathway.list)
  pathwayName <- names(pathway.list)
  pathwaySize <- sapply(pathway.list,length)
  
  pdat <- Reduce("cbind",lapply(pathSummary,function(x) x$p.value))
  rownames(pdat) <- pathwayName
  p.fisher <- apply(pdat,1,fisher)
  q.fisher <- p.adjust(p.fisher,method="BH")
  
  overlapMinSize <- apply(Reduce("cbind",lapply(pathSummary,
                                                function(x) x$overlap.size)),
                          1,min)
  
  overlapMedDE <-  apply(Reduce("cbind",lapply(pathSummary,
                                               function(x) x$DE.inpathway)),1,
                         function(x) (
                           if(M %% 2 ==0){
                             (sort(x)[M/2]+sort(x)[(M/2+1)])/2
                           } else { sort(x)[(M+1)/2] } ))
  
  meta.summary <- data.frame(pathway.size= pathwaySize, 
                             min.overlap.size= overlapMinSize, 
                             med.DE.inpathway = overlapMedDE, 
                             p.fisher=as.numeric(p.fisher), 
                             q.fisher=as.numeric(q.fisher))
  rownames(meta.summary) <- pathwayName
  return(meta.summary)
  
}
matianzhou/CAMO documentation built on May 21, 2019, 10:12 a.m.