R/ScoreGSEA.R

#' Compute pairwise distances between samples with method in package GSEA
#' 
#' Compute pairwise distances between sample according to their (Prototype
#' Ranked List) PRL, a N x N distance matrix is generated by calling this
#' function, N is the length of PRL.
#' 
#' Once the PRL obtained for each sample, the distances between samples are
#' calculated base on gene signature, including the expression of genes that
#' seemed to consistently vary in response to the across different experimental
#' conditions (e.g., different cell lines and different dosages). We take two
#' distance measurements between PRLs: the Average Enrichment-Score Distance
#' Davg=(TESx,y+TESy,x)/2, and the Maximum Enrichment-Score Distance
#' Dmax=Min(TESx,y,TESy,x)/2.The avg is more stringent than max, where max is
#' more sensitive to weak similarities, with lower precision but large recall.
#' 
#' @param MergingSet an ExpressionSet object. The assay data represents the
#' PRLs of the samples, each column represents one PRL. The number of sample of
#' this argument must be greater than 1, otherwise, this function is not
#' meaningful.
#' @param SignatureLength the length of "gene signature". In order to compute
#' pairwise distances among samples, genes lists are ranked according to the
#' gene expression ratio (fold change). And the "gene signature" includes the
#' most up-regulated genes (near the top of the list) and the most
#' down-regulated genes (near the bottom of the list).
#' @param ScoringDistance the distance measurements between PRLs: the Average
#' Enrichment Score Distance (avg), and the Maximum Enrichment Score Distance
#' (max).
#' @param verbose verbose
#' @param p.value logical, if TRUE return a matrix of p.values of the distance
#' matrix, default FALSE
#' @return an distance-matrix, the max distance is more sensitive to weak
#' similarities, providing a lower precision but a larger recall.
#' 
#' If p.value is set to TRUE, then a list is returned that consists of the
#' distance matrix as well as their p.values, otherwise, without p.vlues in the
#' result.
#' @seealso \code{\link{ScorePGSEA}}, \code{\link{SignatureDistance}}
#' @import Biobase
#' @import PGSEA
#' @examples
#' 
#' # load the sample expressionSet
#' data(exampleSet)
#' 
#' # Merging each group of the ranked lists in the exampleSet with the 
#' # same phenotypic data into a single PRL
#' MergingSet=RankMerging(exampleSet,"Spearman")
#' 
#' # get the distance matrix
#' ds=ScoreGSEA(MergingSet,250,"avg")
#' 
#' @export ScoreGSEA
ScoreGSEA <-
function(MergingSet,SignatureLength,ScoringDistance=c("avg", "max"),p.value=F, verbose=FALSE){

        #compute the distance of ranked PRL
        PRL.Distance <-function(PRL,SignatureLength,ScoringDistance=c("avg", "max"), verbose=verbose)
       {
           ScoringDistance=match.arg(ScoringDistance,c("avg","max"))
	   ES=matrix(1) 
	   if (ncol(exprs(PRL))<=1){
		stop("the number of column of argument PRL must be greater than 1")
           }
           KBR=PRL[,1]
	   PRLcol=ncol(exprs(PRL))
	   for (i in 2:PRLcol){
	    if (verbose) {
	        print (paste(i, "out of ", ncol(exprs(PRL) )))
	    }
		newPRL=PRL[,i]
		d=integratePRL(ES,KBR,newPRL,SignatureLength,ScoringDistance)
		ES=d[[2]]
		KBR=d[[1]]
	   }
	   return((d[[3]]))
        }

       #compute the distance between two random permutations(RP)
       RP.Distance <- function(RP1,RP2,SignatureLength,ScoringDistance=c("avg", "max"))
      {
           up1=which(RP1<=SignatureLength)
           down1=which(RP1>=length(RP1)-SignatureLength+1)
           up2=which(RP2<=SignatureLength)
           down2=which(RP2>=length(RP2)-SignatureLength+1)
           ES1=quickenrichmentscore(up1,down1,RP2)
           ES2=quickenrichmentscore(up2,down2,RP1)
           TES1=1-ES1
           TES2=1-ES2
           if (ScoringDistance=="avg")
              distances = (TES1+TES2)/2
           else
              distances= min(TES1+TES2)/2 
       } 

        ScoringDistance=match.arg(ScoringDistance,c("avg","max"))
	PRL=MergingSet
	if (is.null(ncol(exprs(PRL)))==TRUE){
		stop("the class of argument MergingSet is incorrect")
	}
	if (ncol(exprs(PRL))<=1){
		stop("the number of samples of argument MergingSet must be greater than 1")
	}
        dis=PRL.Distance(PRL,SignatureLength,ScoringDistance, verbose=verbose)
        if (p.value){
           p.results=matrix(0,nrow(dis),ncol(dis))
           prb_num=nrow(exprs(PRL))
           RPdis=c()
           RP1=sample(prb_num);
           for (i in 1:10000){
              RP2=sample(prb_num);
              RPdis[i]=RP.Distance(RP1,RP2,SignatureLength,ScoringDistance)
           }
           RPdis=sort(RPdis)
           for (i in 1:nrow(dis)){
               for (j in 1:ncol(dis)){
                  p.results[i,j]=(length(which(RPdis<=dis[i,j]))+1)/10001
               }
           }
           return(list(distances=dis,p.results=p.results))
        }
        return(dis)
}
zhilongjia/GeneExpressionSignature documentation built on May 4, 2019, 11:21 p.m.