R/absoluteTest.genePvals.R

#' A function to calculate the P values for each gene as compared with either  0, the gene set mean, or the full PDF of the gene set.
#' @param QSarray A QSarray object, as generated by aggregateGeneSet and possibly modified by calcVIF
#' @param path.index The pathways to calculate the pVals for.
#' @param silent If false, print a "." every fifth pathway, as a way to keep track of progress  
#' @param FastApproximated fast mode which uses normal approximations for the PDF's
#' @param addVIF Whether to include the VIF in the calculation of the p-values. By default, if the VIF has been calculated, it will be used
#' @param NPoints points to use
#' @param compareTo the null hypothesis that each gene set is tested against.
#' @export
#' @return gene set associated P values
absoluteTest.genePvals<-function(QSarray,  ##A QSarray object, as generated by aggregateGeneSet and possibly modified by calcVIF
                                 path.index=1:numPathways(QSarray), ##The pathways to calculate the pVals for.
                                 silent=TRUE,   ##If false, print a "." every fifth pathway, as a way to keep track of progress  
                                 FastApproximated=TRUE, ##fast mode which uses normal approximations for the PDF's
                                 addVIF=!is.null(QSarray$vif), ##Whether to include the VIF in the calculation of the p-values. By default, if the VIF has been calculated, it will be used
                                 NPoints=QSarray$n.points/8, ##,
                                 compareTo=c("zero","mean","pdf") ##the null hypothesis that each gene set is tested against.
                                ){
  ##check path.index
  if(is.character(path.index)){
    path.index = match(path.index, names(QSarray$pathways))
  }

  NumSDs<-c(1000,abs(qt(10^-8,2:250)))
  if(is.null(QSarray$pathways)){stop("Pathway Information not found. Please run aggregateGeneSet first.")}
  geneSets = QSarray$pathways
  if(addVIF==TRUE){addVIF=!is.null(QSarray$vif)}
  Means = QSarray$mean
  SD = QSarray$SD
  DOF=QSarray$dof
  Ps = list()
  if (FastApproximated) {
   SDPath = apply(calcBayesCI(QSarray,low=0.5,up=0.8413448)[,path.index,drop=F],2,function(x)x[2]-x[1])
    if(!addVIF)SDPath = SDPath / sqrt(QSarray$vif[path.index])
  }

  compareTo = match.arg(compareTo)
  if(compareTo=="pdf"){
    #NPoints=QSarray$n.points/8
    for(i in path.index){
      XPath = getXcoords(QSarray,i,addVIF=addVIF)
      if(!silent & i%%5==0){cat(".")}
      Indexes = geneSets[[i]]
      if(!FastApproximated){
        XGene <- seq(-1,1,length.out=NPoints) * NumSDs[floor(min(DOF[Indexes]))]
        Min<-min(c(XGene[1]+ Means[Indexes],XPath[1]))
        Max<-max(c(XGene[NPoints]+ Means[Indexes],XPath[QSarray$n.points]))
        X_Sample<-seq(Min,Max,length.out=NPoints)
        PDFPath<-approx( XPath, QSarray$path.PDF[,i],X_Sample,rule=2)$y
        if(length(Indexes)!=0){
          PS<-NULL
          for(j in Indexes){
            y<-dt(XGene[1:(NPoints/2)]/ SD[j],DOF[j])
            PDFGene<-c(y,rev(y))
            PDFGene<-approx( XGene+ Means[j], PDFGene,X_Sample,rule=2)$y
            PS<-c(PS,compareTwoDistsFaster(PDFGene,PDFPath, alternative="two.sided"))
          }
          Ps<-c(Ps,list(PS))
        }
      }
 else{
        if(length(Indexes)!=0){
          PS<-NULL
          for(j in Indexes){
            TMP<-pnorm( ( Means[j] - QSarray$path.mean[i]  ) / sqrt( SDPath[i]^2 + (DOF[j])/(DOF[j]-2)*SD[j]^2) )
            if(TMP>0.5) TMP <- TMP - 1
            TMP <- TMP*2
            PS<-c(PS,TMP)
          }
          Ps<-c(Ps,list(PS))
        }
      }
    }
  }
  else{
    for(i in path.index){
      if(!silent & i%%5==0){cat(".")}
      Indexes = geneSets[[i]]
      if(length(Indexes)!=0){
        PS<-NULL
        for(j in Indexes){
          SUBSTRACT=0
          if(compareTo=="mean")SUBSTRACT=QSarray$path.mean[i]
          p<-pt((Means[j]-SUBSTRACT)/SD[j],DOF[j])
          p[which(p>0.5)] = -1+p[which(p>0.5)]   ### turn it into a two-tailed p
          p = p*2
          PS<-c(PS,p)
        }
        Ps<-c(Ps,list(PS))
      }
    }
 }
  names(Ps) = names(geneSets)[i]
  return(Ps)
}
arcolombo/junk documentation built on May 10, 2019, 12:49 p.m.