Nothing
#' @title Calculate the single-sample mutation-based pathway enrichment score.
#' @description The function `get_RWR_ES` is used to calculate the single-sample mutation-based pathway enrichment score. Using somatic mutation data,PPI network and pathway data.
#' @param mut_status A binary mutation matrix.The file can be generated by the function `get_mut_status`.
#' @param min_sample The minimum number of mutated genes contained in a sample,default to 0.
#' @param max_sample The maximum number of mutated genes contained in a sample.
#' @param net_data A list of the PPI network information, including nodes and edges.
#' @param pathway_data A data frame containing the pathways and their corresponding genes. The first column is the names of pathways and the second column is the genes included in the pathways.
#' @param r A numeric value between 0 and 1. r is a certain probability of continuing the random walk or restarting from the restart set. Default to 0.7.
#' @param Numcore The number of threads when running programs with multiple threads,default to 2 .
#' @param BC_Num Number of background genes required to calculate seed node weight.
#' @param cut_point The threshold of indicator function .
#' @importFrom igraph V
#' @importFrom igraph get.adjacency
#' @importFrom Matrix colSums
#' @importFrom parallel makeCluster
#' @importFrom parallel clusterExport
#' @importFrom parallel clusterEvalQ
#' @importFrom parallel parApply
#' @importFrom parallel stopCluster
#' @importFrom stats median
#' @return A single-sample mutation-based pathway enrichment score profiles, where each element represents the enrichment score of a pathway in a sample.
#' @export
#' @examples
#' #load the data
#' data(mut_status)
#' net_path <- system.file("extdata","ppi_network.Rdata",package = "ssMutPA")
#' load(net_path)
#' pathway_path<-system.file("extdata","kegg_323_gmt.Rdata",package = "ssMutPA")
#' load(pathway_path)
#' samp_name<-c("TCGA-06-0881-01A","TCGA-76-4934-01A")
#' examp_data<-mut_status[,samp_name]
#' #perform the function `get_RWR_ES`.
#' \donttest{Path_ES<-get_RWR_ES(examp_data,net_data=ppi_network,pathway_data=kegg_323_gmt,BC_Num=12436)}
get_RWR_ES<-function(mut_status,min_sample=0,max_sample=dim(mut_status)[1],net_data,pathway_data,
r=0.7,Numcore=2,BC_Num=length(V(net_data)$name),cut_point=0){
col_0<-colSums(mut_status)
mut_status<-as.data.frame(mut_status)
mut_status<-mut_status[,names(col_0[col_0>=min_sample&max_sample>=col_0])]
pathway_list<-split(pathway_data[,2],pathway_data[,1])
net_AdjMatr<-as.matrix(get.adjacency(net_data))
net_AdjMatrNorm <- t(t(net_AdjMatr)/(Matrix::colSums(net_AdjMatr, na.rm = FALSE, dims = 1)))
samp_matr<-as.data.frame(colnames(mut_status),row.names = colnames(mut_status))
clus <- makeCluster(Numcore)
clusterExport(clus,"samp_matr",envir = environment())
clusterExport(clus,"mut_status",envir = environment())
clusterExport(clus,"net_data",envir = environment())
clusterExport(clus,"net_AdjMatrNorm",envir = environment())
clusterExport(clus,c("r","BC_Num","cut_point"),envir = environment())
clusterExport(clus,c("MRWR","get_seeds_score","get_indicate"),envir = environment())
clusterEvalQ(clus,library(igraph))
RWR_Res<-parApply(clus,samp_matr,1,function(x){
samp_mut<-mut_status[,x]
names(samp_mut)<-rownames(mut_status)
seed<-intersect(names(samp_mut)[which(samp_mut!=0)],V(net_data)$name)
mut_gene<-intersect(names(samp_mut)[which(samp_mut!=0)],V(net_data)$name)
if(length(seed)!=0){
a<-MRWR(net_AdjMatrNorm,Seeds=seed,net_data = net_data,mut_gene = mut_gene,r=r,
BC_Num=BC_Num,cut_point=cut_point)
walk_score<-a$RWRM_Results$Score
walk_score<- log10(walk_score)
walk_score<-walk_score-median(walk_score)
names(walk_score)<-a$RWRM_Results$NodeNames
}else{
walk_score<-rep(0,length(V(net_data)$name))
names(walk_score)<-V(net_data)$name
}
return(walk_score)
})
stopCluster(clus)
clus <- makeCluster(Numcore)
clusterExport(clus,"samp_matr",envir = environment())
clusterExport(clus,"pathway_list",envir = environment())
clusterExport(clus,"RWR_Res",envir = environment())
clusterExport(clus,"FastSEAscore",envir = environment())
RWR_Path<-parApply(clus,as.matrix(samp_matr),1,function(rp){
samp_RWR<-RWR_Res[,rp]
if(length(which(samp_RWR%in%0))==length(samp_RWR)){
pathway_ES<-rep(0,length(pathway_list))
names(pathway_ES)<-names(pathway_list)
}else{
gene_list<-sort(samp_RWR,decreasing=T)
pathway_ES<-sapply(pathway_list, function(s){
tag.indicator <- sign(match(names(gene_list), s, nomatch = 0))
ES <- FastSEAscore(tag.indicator,correl_vector = gene_list)
if(ES%in%NA|ES<0){ES<-0}
return(ES)
})
}
return(pathway_ES)
})
stopCluster(clus)
return(RWR_Path)
}
#' @title Calculate the local wight of seed nodes.
#' @description The function `get_seeds_score` is used to calculate the local wight of seed nodes in a single sample.
#' @param net_data A list of the PPI network information, including nodes and edges.
#' @param seed A vector containing the gene symbols of the seed nodes.
#' @param mut_gene A vector containing the gene symbols of the mutated genes in a single sample.
#' @param BC_Num Number of background genes.
#' @param cut_point The threshold of indicator function.
#' @importFrom igraph neighbors
#' @return A data frame containing the weight of seed nodes.
#' @export
#' @examples
#' #load the data
#' net_path <- system.file("extdata","ppi_network.Rdata",package = "ssMutPA")
#' load(net_path)
#' data(mut_status)
#' seed<-intersect(names(mut_status[,1])[which(mut_status[,1]!=0)],igraph::V(ppi_network)$name)
#' mut_gene<-intersect(names(mut_status[,1])[which(mut_status[,1]!=0)],igraph::V(ppi_network)$name)
#' #perform the function `get_seeds_score`.
#' Seeds_Score<-get_seeds_score(net_data=ppi_network,seed=seed,mut_gene,BC_Num=12436,cut_point=0)
get_seeds_score<-function(net_data,seed,mut_gene,BC_Num,cut_point=0){
seed_frame<-as.data.frame(seed,row.names = seed)
seeds_score<-apply(seed_frame, 1, function(x){
seed_neig<-c(x,names(neighbors(net_data,x)))
score<-((length(seed_neig))*length(mut_gene))/BC_Num
weig<-length(intersect(seed_neig,mut_gene))-score
weig_score<-log2(weig*(get_indicate(weig,cut_point))+2)
return(weig_score)
})
seeds_score<-as.data.frame(cbind(Seeds_ID=names(seeds_score),Score=seeds_score))
seeds_score[,"Score"]<-as.numeric(seeds_score[,"Score"])
rownames(seeds_score)<-NULL
return(seeds_score)
}
#' @title Indicator function.
#' @param vector A numerical value.
#' @param cutpoint The threshold of the indicator function.
#' @return An integer with the value 1 or 0.
#' @export
get_indicate<-function(vector,cutpoint){
if(vector>cutpoint){
vector<-1
}else{
vector<-0
}
return(vector)
}
#' @title A global propagation algorithm, random walk with restart (RWR), to predict probable influence of nodes in the network by seed nodes.
#' @description The function `MRWR` is used to predict probable influence of nodes in the network by seed nodes.
#' @param net_AdjMatrNorm Row normalized network adjacency matrix.
#' @param Seeds A vector containing the gene symbols of the seed nodes.
#' @param net_data A list of the PPI network information,including nodes and edges .
#' @param mut_gene A vector containing the gene symbols of the mutated genes in a sample.
#' @param r A numeric value between 0 and 1. r is a certain probability of continuing the random walk or restarting from the restart set. Default to 0.7.
#' @param BC_Num Number of background genes required to calculate seed node weight.
#' @param cut_point The threshold of indicator function .
#' @importFrom igraph V
#' @importFrom igraph get.adjacency
#' @return An matrix of global weight, where the row names are genes in the network and the column names are samples.
#' @export
#' @examples
#' #load the data
#' net_path <- system.file("extdata","ppi_network.Rdata",package = "ssMutPA")
#' load(net_path)
#' net_AdjMatr<-as.matrix(igraph::get.adjacency(ppi_network))
#' net_AdjMatrNorm <- t(t(net_AdjMatr)/(Matrix::colSums(net_AdjMatr, na.rm = FALSE, dims = 1)))
#' data(mut_status)
#' mut_gene<-intersect(names(mut_status[,1])[which(mut_status[,1]!=0)],igraph::V(ppi_network)$name)
#' seed<-intersect(names(mut_status[,1])[which(mut_status[,1]!=0)],igraph::V(ppi_network)$name)
#' #perform the function `MRWR`.
#' \donttest{RWR_res<-MRWR(net_AdjMatrNorm,Seeds=seed,net_data=ppi_network,mut_gene,BC_Num = 12436)}
MRWR<- function(net_AdjMatrNorm,Seeds,net_data,mut_gene,r=0.7,BC_Num = length(V(net_data)$name),cut_point=0){
N<-nrow(net_AdjMatrNorm)
Seeds <- as.character(Seeds)
if (length(Seeds) < 1 | length(Seeds) >= N){
stop("The length of the vector containing the seed nodes is not
correct")
} else {
if (!all(Seeds %in% rownames(net_AdjMatrNorm))){
stop("Some of the seeds are not nodes of the network")
}
}
if (r >= 1 || r <= 0) {
stop("Restart partameter should be between 0 and 1")
}
Threeshold <- 1e-10
NetworkSize <- ncol(net_AdjMatrNorm)
residue <- 1
iter <- 1
Seeds_Score<-get_seeds_score(net_data,Seeds,mut_gene,BC_Num=BC_Num,cut_point=cut_point)
prox_vector <- matrix(0,nrow = NetworkSize,ncol=1)
prox_vector[match(Seeds_Score[,1],colnames(net_AdjMatrNorm))] <- (Seeds_Score[,2])
prox_vector <- prox_vector/sum(prox_vector)
restart_vector <- prox_vector
while(residue >= Threeshold){
old_prox_vector <- prox_vector
prox_vector <- (1-r)*(net_AdjMatrNorm %*% prox_vector) + r*restart_vector
residue <- sqrt(sum((prox_vector-old_prox_vector)^2))
iter <- iter + 1;
}
NodeNames <- character(length = N)
Score = numeric(length = N)
rank_global <- data.frame(NodeNames = NodeNames, Score = Score)
rank_global$NodeNames <- row.names(prox_vector)
rank_global$Score<-prox_vector[,1]
rownames(rank_global) <- c()
RWRM_ranking <- list(RWRM_Results = rank_global,Seed_Nodes = Seeds)
return(RWRM_ranking)
}
#' @title Calculate the enrichment score of the pathways .
#' @description The function `FastSEAscore` is used to calculate the pathway enrichment score.
#' @param labels.list The position of the genes in the pathway in the ranked gene list .
#' @param correl_vector A ranked list of all genes in the PPI network.
#' @return Enrichment scores of pathways based on predefined gene ranked lists.
#' @export
#' @examples
#' #load the data
#' pathway_path<-system.file("extdata","kegg_323_gmt.Rdata",package = "ssMutPA")
#' load(pathway_path)
#' pathway_list<-split(kegg_323_gmt[,2],kegg_323_gmt[,1])
#' data(RWR_res)
#' gene_list<-sort(RWR_res,decreasing=TRUE)
#' tag.indicator <- sign(match(names(gene_list), pathway_list[[1]], nomatch = 0))
#' #perform the function `FastSEAscore`.
#' Path_ES<-FastSEAscore(labels.list=tag.indicator,correl_vector = gene_list)
#' names(Path_ES)<-names(pathway_list)[1]
FastSEAscore<-function(labels.list,correl_vector){
tag.indicator <- labels.list
no.tag.indicator <- 1 - tag.indicator
N <- length(labels.list)
Nh <- length(labels.list[labels.list==1])
Nm <- N - Nh
correl_vector <- abs(correl_vector)
sum.correl.tag <- sum(correl_vector[tag.indicator == 1])
norm.tag <- 1.0/sum.correl.tag
norm.no.tag <- 1.0/Nm
RES <- cumsum(tag.indicator * correl_vector * norm.tag - no.tag.indicator * norm.no.tag)
max.ES <- max(RES)
min.ES <- min(RES)
ES <- signif(ifelse(max.ES > - min.ES, max.ES, min.ES), digits=5)
return(ES)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.