#' Compute pairwise distances between samples with method in package PGSEA
#'
#' Compute pairwise distances between sample according to their (Prototype
#' Ranked List) PRL, get a N x N distance matrix is generated by calling this
#' function , N is the length of PRL.
#'
#' This function is the parallel version of ScorePGSEAm.
#'
#' @param MergingSet an ExpressionSet object. The assay data represents the
#' PRLs of the samples, each column represents one PRL. The number of sample
#' must be greater than 1, oherwise, this function is not meaningful.
#' @param SignatureLength the length of "gene signature". In order to compute
#' pairwise distances among samples, genes lists are ranked according to the
#' gene expression ratio (fold change). And the "gene signature" includes the
#' most up-regulated genes (near the top of the list) and the most
#' down-regulated genes (near the bottom of the list).
#' @param ScoringDistance the distance measurements between PRLs: the Average
#' Enrichment Score Distance (avg), or the Maximum Enrichment Score Distance
#' (max).
#' @param ncore number of cores
#' @param p.value logical, if TRUE return a matrix of p.values of the distance
#' matrix, default FALSE
#' @param verbose verbose
#' @seealso \code{\link{ScoreGSEA}}, \code{\link{SignatureDistance}}
#' @import methods
#' @import PGSEA
#' @import parallel
#' @import foreach
#' @import doParallel
#' @examples
#'
#' # load the sample expressionSet
#' data(exampleSet)
#'
#' # Merging each group of the ranked lists in the exampleSet with the same
#' # phenotypic data into a single PRL
#' MergingSet=RankMerging(exampleSet,"Spearman")
#'
#' # get the distance matrix
#' ds=ScorePGSEAm(MergingSet,250, ScoringDistance="avg", ncore=2)
#'
#' @export ScorePGSEAm
ScorePGSEAm <-
function(MergingSet, SignatureLength, ScoringDistance=c("avg", "max"), ncore=2, p.value=FALSE, verbose=FALSE){
ScoringDistance=match.arg(ScoringDistance,c("avg","max"))
PRLs=exprs(MergingSet)
n=ncol(PRLs)
m=nrow(PRLs)
# pgscores=matrix(0,1,n)
############################################################################
# parallel pgscores caculation
cl <- parallel::makeCluster(ncore)
doParallel::registerDoParallel(cl)
if (verbose) {print(paste("getDoParWorkers:", foreach::getDoParWorkers()))}
nc=NULL
pgscores <- foreach::foreach (nc = 1:n, .combine=rbind, .verbose=verbose) %dopar% {
if (verbose) {print (paste("Starting nClust:", nc))}
# setClass("smc",representation(reference="character",desc="character",source="character",design="character",identifier="character",species="character",data="character",private="character", creator="character",ids="vector"))
prls=as.matrix(PRLs[,nc])
up=which(prls<=SignatureLength)
down=which(prls>=m-SignatureLength+1)
upsmc=new("smc",ids=up)
downsmc=new("smc",ids=down)
uppgscore=PGSEA::PGSEA(MergingSet,list(upsmc), p.value=NA)
downpgscore=PGSEA::PGSEA(MergingSet,list(downsmc),p.value=NA)
pgscore=(uppgscore-downpgscore)/2
}
parallel::stopCluster(cl)
if (verbose) {print ("Paralleling Done.")}
############################################################################
if (any(is.na(pgscores))) {warning("NA appears in the results."); return (pgscores)}
Mvalue=max(abs(pgscores), na.rm=TRUE)
pgscores=pgscores/Mvalue
# the distance of oneself (pgscores[1,1])
if (pgscores[1,1]>0){
pgscores=pgscores
} else{
pgscores=-pgscores
}
rownames(pgscores)=colnames(pgscores)
if (ScoringDistance=="avg"){
distances=1-(pgscores+t(pgscores))/2
} else {
# the orignial distances for "max" is incorrect.
# distances=pmin(1-pgscores, t(1-pgscores) )/2
distances=1 - pmax(pgscores, t(pgscores) )
}
# P-value
if (isTRUE(p.value) & ScoringDistance=="avg"){
p.results=matrix(0,n,n)
for (i in 1:n){
for (j in 1:n){
if (distances[i,j]<1)
p.results[i,j]=2 * pnorm(distances[i,j],mean=1,sd=1/(2*Mvalue))
else
p.results[i,j]=2 * (1-pnorm(distances[i,j],mean=1,sd=1/(2*Mvalue)))
}
}
} else if (isTRUE(p.value) & ScoringDistance=="max") {
p.results=matrix(0,nrow(distances),ncol(distances))
for (i in 1:n){
for (j in 1:n){
if (distances[i,j]<1/2)
p.results[i,j]=2 * pnorm(distances[i,j],mean=1/2,sd=1/(8^(1/2)*Mvalue))
else
p.results[i,j]=2 * (1-pnorm(distances[i,j],mean=1/2,sd=1/(8^(1/2)*Mvalue)))
}
}
}
if (p.value==T)
return(list(distances = distances, p.results = p.results))
return(distances)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.