#' Compute pairwise distances between samples with method in package GSEA
#'
#' Compute pairwise distances between sample according to their (Prototype
#' Ranked List) PRL, a N x N distance matrix is generated by calling this
#' function, N is the length of PRL.
#'
#' Once the PRL obtained for each sample, the distances between samples are
#' calculated base on gene signature, including the expression of genes that
#' seemed to consistently vary in response to the across different experimental
#' conditions (e.g., different cell lines and different dosages). We take two
#' distance measurements between PRLs: the Average Enrichment-Score Distance
#' Davg=(TESx,y+TESy,x)/2, and the Maximum Enrichment-Score Distance
#' Dmax=Min(TESx,y,TESy,x)/2.The avg is more stringent than max, where max is
#' more sensitive to weak similarities, with lower precision but large recall.
#'
#' @param MergingSet an ExpressionSet object. The assay data represents the
#' PRLs of the samples, each column represents one PRL. The number of sample of
#' this argument must be greater than 1, otherwise, this function is not
#' meaningful.
#' @param SignatureLength the length of "gene signature". In order to compute
#' pairwise distances among samples, genes lists are ranked according to the
#' gene expression ratio (fold change). And the "gene signature" includes the
#' most up-regulated genes (near the top of the list) and the most
#' down-regulated genes (near the bottom of the list).
#' @param ScoringDistance the distance measurements between PRLs: the Average
#' Enrichment Score Distance (avg), and the Maximum Enrichment Score Distance
#' (max).
#' @param verbose verbose
#' @param p.value logical, if TRUE return a matrix of p.values of the distance
#' matrix, default FALSE
#' @return an distance-matrix, the max distance is more sensitive to weak
#' similarities, providing a lower precision but a larger recall.
#'
#' If p.value is set to TRUE, then a list is returned that consists of the
#' distance matrix as well as their p.values, otherwise, without p.vlues in the
#' result.
#' @seealso \code{\link{ScorePGSEA}}, \code{\link{SignatureDistance}}
#' @import Biobase
#' @import PGSEA
#' @examples
#'
#' # load the sample expressionSet
#' data(exampleSet)
#'
#' # Merging each group of the ranked lists in the exampleSet with the
#' # same phenotypic data into a single PRL
#' MergingSet=RankMerging(exampleSet,"Spearman")
#'
#' # get the distance matrix
#' ds=ScoreGSEA(MergingSet,250,"avg")
#'
#' @export ScoreGSEA
ScoreGSEA <-
function(MergingSet,SignatureLength,ScoringDistance=c("avg", "max"),p.value=F, verbose=FALSE){
#compute the distance of ranked PRL
PRL.Distance <-function(PRL,SignatureLength,ScoringDistance=c("avg", "max"), verbose=verbose)
{
ScoringDistance=match.arg(ScoringDistance,c("avg","max"))
ES=matrix(1)
if (ncol(exprs(PRL))<=1){
stop("the number of column of argument PRL must be greater than 1")
}
KBR=PRL[,1]
PRLcol=ncol(exprs(PRL))
for (i in 2:PRLcol){
if (verbose) {
print (paste(i, "out of ", ncol(exprs(PRL) )))
}
newPRL=PRL[,i]
d=integratePRL(ES,KBR,newPRL,SignatureLength,ScoringDistance)
ES=d[[2]]
KBR=d[[1]]
}
return((d[[3]]))
}
#compute the distance between two random permutations(RP)
RP.Distance <- function(RP1,RP2,SignatureLength,ScoringDistance=c("avg", "max"))
{
up1=which(RP1<=SignatureLength)
down1=which(RP1>=length(RP1)-SignatureLength+1)
up2=which(RP2<=SignatureLength)
down2=which(RP2>=length(RP2)-SignatureLength+1)
ES1=quickenrichmentscore(up1,down1,RP2)
ES2=quickenrichmentscore(up2,down2,RP1)
TES1=1-ES1
TES2=1-ES2
if (ScoringDistance=="avg")
distances = (TES1+TES2)/2
else
distances= min(TES1+TES2)/2
}
ScoringDistance=match.arg(ScoringDistance,c("avg","max"))
PRL=MergingSet
if (is.null(ncol(exprs(PRL)))==TRUE){
stop("the class of argument MergingSet is incorrect")
}
if (ncol(exprs(PRL))<=1){
stop("the number of samples of argument MergingSet must be greater than 1")
}
dis=PRL.Distance(PRL,SignatureLength,ScoringDistance, verbose=verbose)
if (p.value){
p.results=matrix(0,nrow(dis),ncol(dis))
prb_num=nrow(exprs(PRL))
RPdis=c()
RP1=sample(prb_num);
for (i in 1:10000){
RP2=sample(prb_num);
RPdis[i]=RP.Distance(RP1,RP2,SignatureLength,ScoringDistance)
}
RPdis=sort(RPdis)
for (i in 1:nrow(dis)){
for (j in 1:ncol(dis)){
p.results[i,j]=(length(which(RPdis<=dis[i,j]))+1)/10001
}
}
return(list(distances=dis,p.results=p.results))
}
return(dis)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.