R/ScorePGSEAm.R

#' Compute pairwise distances between samples with method in package PGSEA
#' 
#' Compute pairwise distances between sample according to their (Prototype
#' Ranked List) PRL, get a N x N distance matrix is generated by calling this
#' function , N is the length of PRL.
#' 
#' This function is the parallel version of ScorePGSEAm.
#' 
#' @param MergingSet an ExpressionSet object. The assay data represents the
#' PRLs of the samples, each column represents one PRL. The number of sample
#' must be greater than 1, oherwise, this function is not meaningful.
#' @param SignatureLength the length of "gene signature". In order to compute
#' pairwise distances among samples, genes lists are ranked according to the
#' gene expression ratio (fold change). And the "gene signature" includes the
#' most up-regulated genes (near the top of the list) and the most
#' down-regulated genes (near the bottom of the list).
#' @param ScoringDistance the distance measurements between PRLs: the Average
#' Enrichment Score Distance (avg), or the Maximum Enrichment Score Distance
#' (max).
#' @param ncore number of cores
#' @param p.value logical, if TRUE return a matrix of p.values of the distance
#' matrix, default FALSE
#' @param verbose verbose
#' @seealso \code{\link{ScoreGSEA}}, \code{\link{SignatureDistance}}
#' @import methods
#' @import PGSEA
#' @import parallel
#' @import foreach
#' @import doParallel
#' @examples
#' 
#' # load the sample expressionSet
#' data(exampleSet)
#' 
#' # Merging each group of the ranked lists in the exampleSet with the same 
#' # phenotypic data into a single PRL
#' MergingSet=RankMerging(exampleSet,"Spearman")
#' 
#' # get the distance matrix
#' ds=ScorePGSEAm(MergingSet,250, ScoringDistance="avg", ncore=2)
#' 
#' @export ScorePGSEAm
ScorePGSEAm <-
function(MergingSet, SignatureLength, ScoringDistance=c("avg", "max"), ncore=2, p.value=FALSE, verbose=FALSE){
    ScoringDistance=match.arg(ScoringDistance,c("avg","max"))
    PRLs=exprs(MergingSet)
    n=ncol(PRLs)
    m=nrow(PRLs)
    # pgscores=matrix(0,1,n)
    
    ############################################################################
    # parallel pgscores caculation
    cl <- parallel::makeCluster(ncore)
    doParallel::registerDoParallel(cl)
    if (verbose) {print(paste("getDoParWorkers:", foreach::getDoParWorkers()))}
    nc=NULL
    pgscores <- foreach::foreach (nc = 1:n, .combine=rbind, .verbose=verbose) %dopar% {
        if (verbose) {print (paste("Starting nClust:", nc))}
        # setClass("smc",representation(reference="character",desc="character",source="character",design="character",identifier="character",species="character",data="character",private="character", creator="character",ids="vector"))
        
        prls=as.matrix(PRLs[,nc])
        up=which(prls<=SignatureLength)
        down=which(prls>=m-SignatureLength+1)
        upsmc=new("smc",ids=up)
        downsmc=new("smc",ids=down)
        uppgscore=PGSEA::PGSEA(MergingSet,list(upsmc), p.value=NA)
        downpgscore=PGSEA::PGSEA(MergingSet,list(downsmc),p.value=NA)
        pgscore=(uppgscore-downpgscore)/2
    }
    parallel::stopCluster(cl)
    
    if (verbose) {print ("Paralleling Done.")}
    ############################################################################
    if (any(is.na(pgscores))) {warning("NA appears in the results."); return (pgscores)}
    
    Mvalue=max(abs(pgscores), na.rm=TRUE)
    pgscores=pgscores/Mvalue
    # the distance of oneself (pgscores[1,1])
    if (pgscores[1,1]>0){
        pgscores=pgscores
    } else{
        pgscores=-pgscores
    } 
     rownames(pgscores)=colnames(pgscores)
     
     
     if (ScoringDistance=="avg"){ 
         distances=1-(pgscores+t(pgscores))/2 
     } else {
         # the orignial distances for "max" is incorrect.
         # distances=pmin(1-pgscores, t(1-pgscores) )/2
         distances=1 - pmax(pgscores, t(pgscores) )    
     }
     
     # P-value
     if (isTRUE(p.value) & ScoringDistance=="avg"){
        p.results=matrix(0,n,n)
        for (i in 1:n){
           for (j in 1:n){
               if (distances[i,j]<1)
                  p.results[i,j]=2 * pnorm(distances[i,j],mean=1,sd=1/(2*Mvalue))
               else
                  p.results[i,j]=2 * (1-pnorm(distances[i,j],mean=1,sd=1/(2*Mvalue)))
            }
        }
     } else if (isTRUE(p.value) & ScoringDistance=="max") {
    
       p.results=matrix(0,nrow(distances),ncol(distances))
       for (i in 1:n){
           for (j in 1:n){
               if (distances[i,j]<1/2)
                   p.results[i,j]=2 * pnorm(distances[i,j],mean=1/2,sd=1/(8^(1/2)*Mvalue))
               else
                   p.results[i,j]=2 * (1-pnorm(distances[i,j],mean=1/2,sd=1/(8^(1/2)*Mvalue)))
            }
        }
     }
     if (p.value==T)
         return(list(distances = distances, p.results = p.results))
     return(distances)
}
zhilongjia/GeneExpressionSignature documentation built on May 4, 2019, 11:21 p.m.