R/calcPCor.R

#' A function to calculate the Variance Inflation Factor (VIF) for each of the gene sets input in geneSets This function depends on which method you used to calculate the variance originally.  If you assumed pooled variance, then the variance will be calculated using LIMMA's "interGeneCorrelation" method (see Wu and Smyth, Nucleic Acids Res. 2012). Otherwise, the method will calculate the VIF separately for each group and return the average of each group's vif.
#' @param eset a matrix of log2(expression values). This must be the same dataset that was used to create geneResults
#' @param geneResults A QSarray object, as generated by either makeComparison or aggregateGeneSet
#' @param useCAMERA The method used to calculate variance. See the description for more details. By default, it uses the parameter stored in the geneResults
#' @param useAllData Boolean parameter determining whether to use all data in eset to calculated the VIF, or whether to only use data from the groups being contrasted. Only used if useCAMERA==FALSE
#' @import limma
#' @export
#' @return correlation of vif between gene sets
calcPCor = function(eset,         ##a matrix of log2(expression values). This must be the same dataset that was used to create geneResults
                    geneResults,  ##A QSarray object, as generated by either makeComparison or aggregateGeneSet
                    useCAMERA = geneResults$var.method=="Pooled",  ##The method used to calculate variance. See the description for more details. By default, it uses the parameter stored in the geneResults
#                    geneSets=NULL, ##a list of pathways calculate the vif for, each item in the list is a vector of names that correspond to the gene names from Baseline/PostTreatment
                    useAllData = TRUE ##Boolean parameter determining whether to use all data in eset to calculated the VIF, or whether to only use data from the groups being contrasted. Only used if useCAMERA==FALSE
){
  if(is.null(geneResults$pathways)){stop("Pathway Information not found. Please provide a list of gene sets.")}
  geneSets = geneResults$pathways
  if(class(geneResults) != "QSarray"){stop("geneResults must be a QSarray object, as created by makeComparison")}

  ##create design matrix
  if(useCAMERA){
    labels = geneResults$labels
    paired=F
    if(!is.null(geneResults$pairVector)){paired=T; pairVector = geneResults$pairVector}

    f = "~0+labels"
    designNames = levels(labels)
    if(paired){
      f = paste(f,"+pairVector",sep="")
 designNames = c(designNames, paste("P",levels(pairVector)[-1],sep=""))
    }
    design <- model.matrix(formula(f))
    colnames(design) <- designNames
  }

  Cor = sapply(names(geneSets),function(i){
    GNames<-names(geneResults$mean)[geneSets[[i]]]
    gs.i = which(rownames(eset)%in%GNames)
    #     gs.i = geneSets[[i]]
    if(length(gs.i)<2){warning("GeneSet '",i,"' contains one or zero overlapping genes. NAs produced.");return(NA)}
    if(useCAMERA){
      return(interGeneCorrelation(eset[gs.i,],design)$vif)
    }
    else{
      ##pooled covariance matrix
      grps = split(1:ncol(eset),geneResults$labels)
      if(!useAllData){
        toInclude = sub("\\s","",strsplit(geneResults$contrast,"-")[[1]])  ##only calc vif for the groups that were compared
        grps = grps[toInclude]
      }
      covar.mat = cov(t(eset[gs.i,grps[[1]]])) * (length(grps[[1]])-1)
      if(length(grps)>1){
        for(i in 2:length(grps)){
          covar.mat = covar.mat + ( cov(t(eset[gs.i,grps[[i]]])) * (length(grps[[i]])-1) )
  }
      }
      covar.mat = covar.mat / (ncol(eset) - length(grps))

      ##multiply matrix by the sd.alpha vectors
      if(!is.null(geneResults$sd.alpha)){
        a = geneResults$sd.alpha[rownames(eset)[gs.i]]
        covar.mat = t(covar.mat*a)*a
      }
      Cor = (covar.mat)
      Cor = sapply(1:ncol(Cor),function(i)Cor[i,]/sqrt(covar.mat[i,i]))
      Cor = sapply(1:ncol(Cor),function(i)Cor[,i]/sqrt(covar.mat[i,i]))
      Cor[!is.finite(Cor)] = 0
      return(Cor)
    }
  })

  return(Cor)
}
arcolombo/junk documentation built on May 10, 2019, 12:49 p.m.