R/aggregateGeneSet.R

#' Combine individual gene differential expresseion for each pathway (Neg) ~ 1 minute
#' @param geneResults A QSarray object, as generated by makeComparison 
#' @param geneSets a list of pathways to be compared, each item in the list is a vector of names that correspond to the gene names from Baseline/PostTreatment
#' @param n.points the number of points to sample the convoluted t-distribution at.
#' @param silent If false, print a "." every fifth pathway, as a way to keep track of progress
#' @export
#' @return gene set aggregations
aggregateGeneSet<-function(geneResults,  ##A QSarray object, as generated by makeComparison 
                           geneSets,     ##a list of pathways to be compared, each item in the list is a vector of names that correspond to the gene names from Baseline/PostTreatment
                           n.points=2^12, ##the number of points to sample the convoluted t-distribution at.
                           silent=TRUE   ##If false, print a "." every fifth pathway, as a way to keep track of progress
                          ){

#   NumSDs<-c(20,20,20,10,5,2,2,2,2,1,1,rep(.5,220))*30
  NumSDs<-abs(qt(10^-10,1:250))
  NumSDs[NumSDs>750] = 750
#   ,rep(abs(qt(10^-8,50)),220))  

  Means = geneResults$mean
  SD = geneResults$SD
  DOF=geneResults$dof
  COLS = names(Means)

  if(is.vector(geneSets) & !is.list(geneSets)){
    n = deparse(substitute(geneSets))
    geneSets = list(geneSets)
    names(geneSets) = n
  }
  if(is.null(names(geneSets))){names(geneSets) = 1:length(geneSets)}
  geneSets = lapply(geneSets,function(x){
    if(is.numeric(x)){
      if(any(!(x %in% 1:length(COLS)))){stop("Numeric gene set indices out of bounds")}
      return(x)
    }
 which(COLS%in%x)
  })

  #########First set MaxDiff to adjust to data:
  ##calculate standard deviation
  SumSigma<-sapply(names(geneSets),function(i){
      Indexes = geneSets[[i]]
      x<-sqrt(sum((SD^2*(DOF/(DOF-2)))[Indexes]))
      return(x)
  })

  MinDof<-sapply(names(geneSets),function(i){
      Indexes = geneSets[[i]]
      if(length(Indexes)==0){return(NA)}
      return(floor(min(DOF[Indexes])))
      })
#   MaxDiff<-pmax(NumSDs[floor(min(DOF))]*SumSigma,1)  
  MaxDiff<-pmax(NumSDs[MinDof]*SumSigma,1,na.rm=TRUE)
  PDFs = pathMeans = Sizes = NULL
  for(i in 1:length(geneSets)){
    if(!silent & i%%5==0){cat(".")}
    Indexes = geneSets[[i]]
    if(length(Indexes)!=0){
      Norm<-(2*MaxDiff[i]/{n.points-1}) #normalize 
      PDF<-sapply(Indexes,function(j){
          x = SD[j]
          MaxDiffInt<-MaxDiff[i]/x
          #y<-dt(seq(-MaxDiffInt,MaxDiffInt,length.out=n.points),DOF)  
          x1<-seq(-MaxDiffInt,MaxDiffInt,length.out=n.points)
          y<-dt(x1[1:(n.points/2)],DOF[j])
          y<-c(y,rev(y))
          y/sum(y)/Norm
      })
  Tmp<-multi_conv(PDF)
      Tmp = Tmp*(n.points-1)/MaxDiff[i]/2
    }else{
      warning(paste("Gene set: (index ",i, ") has 0 overlap with eset.",sep=""))
      Tmp = rep(NA, n.points)
    }
    PDFs = cbind(PDFs,Tmp)
    pathMeans = c(pathMeans, mean(Means[Indexes]))
    Sizes = c(Sizes, length(Indexes))
  }

  colnames(PDFs) = names(pathMeans) = names(Sizes) = names(geneSets)
  ##add the new data to the existing QSarray object
  geneResults$pathways = geneSets
  geneResults$path.mean = pathMeans
  geneResults$path.size = Sizes
  geneResults$ranges = MaxDiff/Sizes
  geneResults$n.points=n.points
  geneResults$path.PDF = PDFs
  return(geneResults)
}
arcolombo/junk documentation built on May 10, 2019, 12:49 p.m.