Nothing
##' DCSA
##'
##'
##' @title Dynamic Changed Subpathway Analysis (DCSA)
##' @description This function will perform the Dynamic Changed Subpathway Analysis (DCSA) method to estimate the dynamic
##' changed subpathways associated with the sample phenotypes (like the developmental stage of cancer).
##' @param expr Matrix of gene expression values (rows are genes, columns are samples).
##' @param input.cls Input sample phenotype class vector file in CLS format.
##' @param subpathwaylist Character string denoting the gene label of the subpathway list is `Entrezid` (default) or `Symbol`.
##' Users can also enter their own subpathway list data. This list should be consistent with the gene label in the input gene expression profile.
##' @param kcdf Character string denoting the kernel to use during the non-parametric estimation of the cumulative
##' distribution function of expression levels across samples when method="gsva". By default, `kcdf="Gaussian"` which
##' is suitable when input expression values are continuous, such as microarray fluorescent units in logarithmic scale,
##' RNA-seq log-CPMs, log-RPKMs or log-TPMs. When input expression values are integer counts, such as those derived from
##' RNA-seq experiments, then this argument should be set to `kcdf="Poisson"`.
##' @param method Method to employ in the estimation of subpathway enrichment scores per sample. By default, this is set to
##' `gsva` (Hänzelmann et al, 2013) and other options are `ssgsea` (Barbie et al, 2009).
##' @param min.sz Minimum size of the resulting subpathway.
##' @param max.sz Maximum size of the resulting subpathway.
##' @param nperm Number of random permutations (default: 100).In practice, the users can set their own values as needed, and more than 1000 times may be fine in general.
##' @param fdr.th Cutoff value for FDR. The only subpathway with lower FDR.th are listed (default: 1).
##' @param mx.diff Offers two approaches to calculate the sample enrichment score (SES) from the KS random walk statistic.
##' `mx.diff=FALSE`: SES is calculated as the maximum distance of the random walk from 0. `mx.diff=TRUE` (default): SES is
##' calculated as the magnitude difference between the largest positive and negative random walk deviations.
##' @param parallel.sz Number of processors to use when doing the calculations in parallel. If this argument is left with its
##' default value (parallel.sz=0) then it will use all available core processors unless we set this argument with a smaller number.
##' @details
##' This function calculates the subpathway activity profile based on the gene expression profile and subpathway list by 'gsva' or 'ssgssea'.
##' Next, we used the information-theoretic measure of statistical dependence,
##' mutual information (MI), to estimate the dynamically changed subpathways associated with the sample phenotypes.
##' Finally we used the perturbation analysis of the gene label rearrangement to estimating the statistical significance.
##'
##' @return A list containing the results of DCSA and subpathway activity profile.
##' @author Xudong Han,
##' Junwei Han,
##' Qingfei Kong
##' @examples
##' # load depend package.
##' require(GSVA)
##' require(parallel)
##' require(mpmi)
##' # get ACC disease stage gene expression profiling.
##' # ACCgenematrix<-get("DCgenematrix")
##' # get path of the sample disease stage phenotype files.
##' # Stagelabels<-system.file("extdata", "DClabels.cls", package = "psSubpathway")
##' # perform the DCSA method.
##' # DCSA(ACCgenematrix,input.cls=Stagelabels,nperm=50,fdr.th=0.01,parallel.sz=2)
##' # get the result of the SubSEA function
##' # DCSAresult<-get("DCspwresult")
##' # str(DCSAresult)
##' # head(DCSAresult$DCSA)
##'
##' # Simulated gene matrix.
##' genematrix <- matrix(rnorm(500*40), nrow=500, dimnames=list(1:500, 1:40))
##' # Construct subpathway list data.
##' subpathwaylist <- as.list(sample(2:100, size=20, replace=TRUE))
##' subpathwaylist <- lapply(subpathwaylist, function(n) sample(1:500, size=n, replace=FALSE))
##' names(subpathwaylist)<-c(paste(rep("spw",20),c(1:20)))
##' # Construct sample labels data.
##' stagelabel<-list(phen=c("stage1","stage2","stage3","stage4"),
##' class.labes=c(rep("stage1",10),rep("stage2",10),
##' rep("stage3",10),rep("stage4",10)))
##' DCSAcs<-DCSA(genematrix,stagelabel,subpathwaylist,nperm=0,parallel.sz=1)
##'
##'
##' @importFrom parallel parLapply
##' @importFrom parallel detectCores
##' @importFrom parallel makeCluster
##' @importFrom parallel clusterExport
##' @importFrom parallel clusterEvalQ
##' @importFrom parallel stopCluster
##' @importFrom GSVA gsva
##' @importFrom GSVA filterGeneSets
##' @importFrom mpmi mminjk.pw
##' @importFrom stats na.omit
##' @importFrom stats p.adjust
##'
##' @useDynLib psSubpathway,.registration = TRUE
##' @export
DCSA<-function(expr,input.cls="",subpathwaylist="Symbol",kcdf="Gaussian",
method="gsva",min.sz=1,max.sz=Inf,nperm=100,fdr.th=1,
mx.diff=TRUE,parallel.sz=0){
if(is.list(subpathwaylist)==TRUE){
spwlist1<-subpathwaylist
}else{
if(subpathwaylist=="Entrezid"){
spwlist1<-get("spwentrezidlist")
}else if(subpathwaylist=="Symbol"){
spwlist1<-get("spwsymbollist")
}
}
if (kcdf == "Gaussian") {
rnaseq <- FALSE
kernel <- TRUE
} else if (kcdf == "Poisson") {
rnaseq <- TRUE
kernel <- TRUE
}
if(is.list(input.cls)) {
CLS <- input.cls
} else {
CLS <- ReadClsFile(file=input.cls)
}
phen <- rev(CLS$phen)
samples.v <-CLS$class.labes
expr<-data.matrix(expr)
allgenes<-row.names(expr)
spwlist <- lapply(spwlist1,
function(x ,y) na.omit(match(x, y)),
allgenes)
spwlist <- filterGeneSets(spwlist,
min.sz=max(1, min.sz),
max.sz=max.sz)
spwnames<-names(spwlist)
N<-length(allgenes)
n.samples <- ncol(expr)
mclapp <- get('mclapply', envir=getNamespace('parallel'))
detCor <- get('detectCores', envir=getNamespace('parallel'))
if(parallel.sz==0){
nCores <- parallel::detectCores()
}else{
nCores<-parallel.sz
}
if(method=="gsva"){
sample.idxs<-c(1:n.samples)
gene.density <- getgenedensity(expr, sample.idxs, rnaseq, kernel)
rank.scores <- rep(0, N)
sort.sgn.idxs <- apply(gene.density, 2, order, decreasing=TRUE)
rank.scores <- apply(sort.sgn.idxs, 2, compute_rank_score,N)
#ture spw matrix
spw_matrix<-t(sapply(spwlist, ks_test_m, rank.scores, sort.sgn.idxs,
mx.diff=mx.diff, abs.ranking=FALSE,
tau=1))
row.names(spw_matrix)<-spwnames
colnames(spw_matrix) <- samples.v
#perturbation
if(nperm>0){
zgindex<-seq(1,N)
n.gset<-length(spwlist)
cl <- makeCluster(nCores)
clusterExport(cl,"ks_test_m")
clusterEvalQ(cl,library(GSVA))
rdeslist<-parLapply(cl,1:nperm, function(n,zgindex,n.gset,spwlist,rank.scores,sort.sgn.idxs,mx.diff){
rdgs.list<-lapply(1:n.gset, function(s){
ng<-length(spwlist[[s]])
rdgs<-sample(zgindex,ng,replace = F)
return(rdgs)
})
m <- t(sapply(rdgs.list, ks_test_m, rank.scores, sort.sgn.idxs,
mx.diff=mx.diff, abs.ranking=FALSE,
tau=1))
return(m)
},zgindex,n.gset,spwlist,rank.scores,sort.sgn.idxs,mx.diff)
stopCluster(cl)
}
}
if(method=="ssgsea"){
genename<-row.names(expr)
spw_matrix<-ssgsea(expr,spwlist,parallel.sz=nCores)
if(nperm>0){
rdeslist<-lapply(1:nperm,function(n){
expr1<-expr
row.names(expr1)<-sample(genename,replace = F)
m<-ssgsea(expr1,spwlist,parallel.sz=nCores)
return(m)
})}
}
hxx<-apply(spw_matrix,1,mminjk.pw,samples.v)
if(nperm>0){
cl1 <- makeCluster(nCores)
clusterEvalQ(cl1,library(mpmi))
rdh<-parLapply(cl1,1:nperm,function(n,rdeslist,samples.v){
rdspwmatrix<-rdeslist[[n]]
yrdhxx<-apply(rdspwmatrix,1,function(hang){
hxx1<-mminjk.pw(hang,samples.v)
return(hxx1)
})
return(yrdhxx)
},rdeslist,samples.v)
stopCluster(cl1)
rdhxx<-do.call(cbind,rdh)
pz<-NULL
for(i in 1:length(rdhxx[,1])){
pz[i]<-sum(rdhxx[i,] >= hxx[i])/nperm
}
fdr<-p.adjust(pz,method = "BH",length(pz))
sxindex<-which(fdr<=fdr.th)
if(is.list(subpathwaylist)==TRUE){
pp<-match(names(spwlist),names(spwlist1))
spwtitle<-names(spwlist)
}else{
pp<-match(names(spwlist),names(spwlist1))
spwtitle<-get("spwtitle")
spwtitle<-spwtitle[pp]
}
genetag<-unlist(lapply(spwlist1[pp],
function(x){
x<-as.character(x)
a<-paste(unlist(x),collapse=" ")
return(a)
}))
gene1<- genetag[sxindex]
spwtitle1<-spwtitle[sxindex]
}
if(nperm>0){
report<-data.frame(SubpahwayID=names(spwlist)[sxindex],PahwayName=spwtitle1,SubpathwayGene=gene1,"D(M)"=hxx[sxindex],Pvalue=pz[sxindex],FDR=fdr[sxindex],
check.names = F)
hxxreport<-list(report,spw_matrix)
names(hxxreport)<-c("DCSA","spwmatrix")
}else{
report<-data.frame(SubpahwayID=names(spwlist),"D(M)"=hxx,
check.names = F)
hxxreport<-list(report,spw_matrix)
names(hxxreport)<-c("DCSA","spwmatrix")
}
return(hxxreport)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.