Nothing
#' @title Converts MAF file into mutation matrix.
#' @description The function 'get_mut_status' uses to convert MAF file into mutation matrix.
#' @param mutvariant A nx3 data frame of patients' somatic mutation data,the first line is gene symbol,the second line is sample ID and the third line is mutation classification.
#' @param nonsynonymous Logical, tell if extract the non-synonymous somatic mutations (nonsense mutation, missense mutation, frame-shif indels, splice site, nonstop mutation, translation start site, inframe indels).
#' @return A binary mutations matrix, in which 1 represents that a particular gene has mutated in a particular sample, and 0 represents that gene has no mutation in a particular sample.
#' @importFrom utils read.delim
#' @export
#' @examples
#' \donttest{maf<-system.file("extdata","maffile.maf",package = "ProgModule")
#' maf_data<-read.delim(maf)
#' mutvariant<-maf_data[,c("Hugo_Symbol","Tumor_Sample_Barcode","Variant_Classification")]
#' #perform the function `get_mut_status`.
#' mut_status.example<-get_mut_status(mutvariant,nonsynonymous = TRUE)}
get_mut_status<-function(mutvariant,nonsynonymous = TRUE){
if(nonsynonymous){
mafmut<-mutvariant[which(
mutvariant$Variant_Classification == "Missense_Mutation" |
mutvariant$Variant_Classification == "Frame_Shift_Del" |
mutvariant$Variant_Classification == "Frame_Shift_Ins" |
mutvariant$Variant_Classification == "In_Frame_Del" |
mutvariant$Variant_Classification == "Nonsense_Mutation" |
mutvariant$Variant_Classification == "In_Frame_Ins" |
mutvariant$Variant_Classification == "Splice_Site" |
mutvariant$Variant_Classification == "Nonstop_Mutation" |
mutvariant$Variant_Classification == "Translation_Start_Site"
),]
}else{
mafmut<-mutvariant
}
mut_status<-matrix(data=0,nrow=length(unique(mafmut[,1])),ncol=length(unique(mafmut[,2])))
colnames(mut_status)<-unique(mafmut[,2])
rownames(mut_status)<-unique(mafmut[,1])
for(i in 1:dim(mut_status)[2]){
un_sname<-table(mafmut[which(mafmut[,2]%in%colnames(mut_status)[i]),1])
mut_status[match(names(un_sname),rownames(mut_status)),i]<-un_sname
}
mut_status[mut_status>1]<-1
return(mut_status)
}
#' @title Calculate mutual information.
#' @description The function `MI` is used to calculate the mutual information score between samples' survival status and mutation status.
#' @param mylist1 Is input a list of samples' survival status.
#' @param mylist2 Is input a list of samples' mutation status.
#' @return The mutual information score
#' @importFrom infotheo entropy
#' @importFrom utils read.delim
#' @export
#' @examples
#' \donttest{#load the data
#' data(mut_status)
#' sur<-system.file("extdata","sur.csv",package = "ProgModule")
#' sur<-read.delim(sur,sep=",",header=TRUE,row.names = 1)
#' #perform the function 'MI'
#' mut_matrix<-MI(mylist1 = as.numeric(mut_status[1,]),mylist2 = sur[,1])}
MI<-function(mylist1,mylist2){
return(entropy(mylist1)+entropy(mylist2)-entropy(cbind(mylist1,mylist2)))
}
#' @title Extract the local networks from the PPI network.
#' @description The function `get_local_network` is used to search local network of each gene by breadth-first algorithm.
#' @param network The PPI network.
#' @param freq_matrix The mutations matrix,generated by `get_mut_status`.
#' @param max.size The size of maximum connected local network,default is 500.
#' @return local nerwork.
#' @importFrom igraph get.vertex.attribute
#' @importFrom igraph bfs
#' @export
#' @examples
#' \donttest{#load the data
#' data(mut_status)
#' data(subnet)
#' #perform the function `get_local_network`.
#' localnetwork.example<-get_local_network(network=subnet,freq_matrix=mut_status,max.size=500)}
get_local_network<-function(network,freq_matrix,max.size=500){
node<-get.vertex.attribute(network)[[1]]
inter<-intersect(node,rownames(freq_matrix))
node<-inter
freq_matrix<-freq_matrix[inter,]
re<-list()
for(i in 1:length(node)){
ne<-bfs(network,node[i],order = TRUE, unreachable = FALSE,dist=TRUE,mode= "all")
m=0
l=0
r1<-c()
while(l<length(ne$dist)){
l<-l+length(which(ne$dist==m))
if(l>max.size){
break
}
r1<-c(r1,names(which(ne$dist==m)))
m=m+1
}
re[[i]]<-r1
names(re)[[i]]<-node[i]
}
return(re)
}
#' @title Extract the mutually exclusive module.
#' @description The function `get_mutual_module` is used to determine if neighbor genes should be added to the module by calculating the score.
#' @param module The Original modular gene set.
#' @param net The local network extracted from PPI network.
#' @param freq_matrix The mutations matrix,generated by `get_mut_status`.
#' @param sur A nx2 data frame of samples' survival data,the first line is samples' survival event and the second line is samples' overall survival.
#' @param module_sig A label for whether the module is a risk factor or a protective factor for survival.
#' @param univarCox_result The result of Cox univariate analysis,generated by `get_univarCox_result`.
#' @param rate The rate of increase in score,default is 0.05.
#' @return The mutually exclusive module.
#' @importFrom igraph neighbors
#' @importFrom utils read.delim
#' @export
#' @examples
#' \donttest{#load the data
#' data(mut_status)
#' sur<-system.file("extdata","sur.csv",package = "ProgModule")
#' sur<-read.delim(sur,sep=",",header=TRUE,row.names = 1)
#' data(net)
#' data(module)
#' data(univarCox_result)
#' #perform the function `get_mutual_module`.
#' mutuallyexclusivemodule.example<-get_mutual_module(module,net,freq_matrix=mut_status,sur,
#' module_sig="risk",univarCox_result,rate=0.05)}
get_mutual_module<-function(module,net,freq_matrix,sur,module_sig,univarCox_result,rate){
inter<-intersect(colnames(freq_matrix),rownames(sur))
if(length(inter)==0){
stop("Plese input the same patients' mutation and survival data!")
}else{
freq_matrix<-freq_matrix[,inter]
sur<-sur[inter,]
}
nodes<-get.vertex.attribute(net)$name
freq_matrix<-freq_matrix[intersect(rownames(freq_matrix),nodes),]
sample_mut<-colSums(freq_matrix)
if(length(module)==1){
MI_score<-MI(mylist1 = as.numeric(freq_matrix[module,]),mylist2 = sur[,1])
CovEs<-length(intersect(names(which(sample_mut==1)),names(freq_matrix[module,])[which(freq_matrix[module,]==1)]))/length(which(freq_matrix[module,]==1))
S_score<-MI_score*CovEs*(length(which(freq_matrix[module,]!=0))/length(freq_matrix[module,]))
}else{
CovEs<-c()
module_matrix<-freq_matrix[module,]
for(i in 1:length(module)){
CovE1<-length(intersect(names(which(sample_mut==1)),names(freq_matrix[module[i],])[which(freq_matrix[module[i],]==1)]))/length(which(freq_matrix[module[i],]==1))
CovEs<-c(CovEs,CovE1)
}
module_mut<-colSums(module_matrix)
module_mut[which(module_mut>0)]<-1
MI_score<-MI(mylist1 = as.numeric(module_mut),mylist2 = sur[,1])
CovE_score<-mean(CovEs)
S_score<-MI_score*CovE_score*(length(which(colSums(freq_matrix[module,])!=0))/dim(freq_matrix)[2])
}
if(S_score==0){
final_s_score<-0
diff_s_score<-0
diff_s_score_rate<-0
}else{
neig<-names(neighbors(net,module))
neig<-setdiff(neig,module)
if(length(neig)!=0){
univarCox_result_tem<-univarCox_result[match(neig,names(univarCox_result))]
if(module_sig=="favor"){
neig<-intersect(neig,names(univarCox_result_tem[which(univarCox_result_tem<1)]))
}
if(module_sig=="risk"){
neig<-intersect(neig,names(univarCox_result_tem[which(univarCox_result_tem>1)]))
}
}
if(length(neig)!=0){
S_score1<-c()
for(i in 1:length(neig)){
new_module_mut<-colSums(freq_matrix[c(module,neig[i]),])
new_module_mut[which(new_module_mut>0)]<-1
m1<-MI(mylist1 = as.numeric(new_module_mut),mylist2 = sur[,1])
cove1<-length(intersect(names(which(sample_mut==1)),names(freq_matrix[neig[i],])[which(freq_matrix[neig[i],]==1)]))/length(which(freq_matrix[neig[i],]==1))
S_score1[i]<-m1*mean(c(CovEs,cove1))*(length(which(colSums(freq_matrix[c(module,neig[i]),])!=0))/dim(freq_matrix)[2])
}
if((max(S_score1)-S_score)/S_score>rate){
module<-c(module,neig[which.max(S_score1)])
diff_s_score<-max(S_score1)-S_score
diff_s_score_rate<-(max(S_score1)-S_score)/S_score
final_s_score<-max(S_score1)
}else{
final_s_score<-S_score
diff_s_score<-0
diff_s_score_rate<-0
}
}else{
final_s_score<-S_score
diff_s_score<-0
diff_s_score_rate<-0
}}
return(list(module=module,final_s_score=final_s_score,diff_s_score=diff_s_score,diff_s_score_rate=diff_s_score_rate))
}
#' @title Get candidate module.
#' @description The function `get_candidate_module` is used to search candidate module of each local network using greedy algorithm.
#' @param local_network The local networks,generated by `get_local_network`.
#' @param network The maximum connected subnet,extracted by mapping all mutated genes to the PPI network.
#' @param freq_matrix The mutations matrix,generated by `get_mut_status`.
#' @param sur A nx2 data frame of samples' survival data,the first line is samples' survival event and the second line is samples' overall survival.
#' @param seed The canonical drivers from NCG database, which use as the starting node of the greedy algorithm.
#' @param max.size The maximum size of the module,default is 200.
#' @param rate The rate of increase in score,default is 0.05.
#' @return candidate module.
#' @importFrom igraph delete_vertices
#' @importFrom igraph decompose
#' @importFrom igraph simplify
#' @importFrom utils read.delim
#' @importFrom utils read.table
#' @export
#' @examples
#' \donttest{#load the data
#' data(local_network)
#' data(mut_status)
#' data(subnet)
#' canonical_drivers<-system.file("extdata","canonical_drivers.txt",package = "ProgModule")
#' seed_gene<-read.table(canonical_drivers,header=FALSE)
#' sur<-system.file("extdata","sur.csv",package = "ProgModule")
#' sur<-read.delim(sur,sep=",",header=TRUE,row.names=1)
#' #perform the function `get_candidate_module`.
#' candidatemodule.example<-get_candidate_module(local_network=local_network,network=subnet,
#' freq_matrix=mut_status,sur=sur,seed=seed_gene[,1],max.size=200,rate=0.05)}
get_candidate_module<-function(local_network,network,freq_matrix,sur,seed,max.size,rate){
if(!is.null(seed)){
local_network<-local_network[unlist(lapply(seed,function(x){which(names(local_network)==x)}))]
}
univarCox_result<-suppressWarnings(get_univarCox_result(freq_matrix=freq_matrix,sur))
if(sum(is.na(univarCox_result))!=0){
univarCox_result<-univarCox_result[-which(univarCox_result%in%NA)]
}
module_set<-list()
final_score<-c()
final_diff_s_score<-c()
final_diff_s_score_rate<-c()
for(i in 1:length(local_network)){
module<-names(local_network[i])
net<-delete_vertices(network, setdiff(names(network[[]]),local_network[[i]]))#local_network[[i]]$name
net<-simplify(decompose(net)[[1]])
diff_s_score_rate<-1
final_s_score<-0
diff_s_score<-0
if(length(which(names(univarCox_result)%in%module))!=0){
if(univarCox_result[which(names(univarCox_result)%in%module)]!=1){
if(univarCox_result[which(names(univarCox_result)%in%module)]<1){module_sig="favor"}
if(univarCox_result[which(names(univarCox_result)%in%module)]>1){module_sig="risk"}
while(diff_s_score_rate > rate & length(module)<max.size & length(net)>1){
get_module_res<-get_mutual_module(module=module,net=net,freq_matrix=freq_matrix,sur=sur,module_sig=module_sig,univarCox_result=univarCox_result,rate)
if(length(module)==length(get_module_res$module)){
module<-get_module_res$module
final_s_score<- get_module_res$final_s_score
diff_s_score<-get_module_res$diff_s_score
diff_s_score_rate<-get_module_res$diff_s_score_rate
}else{
module<-unique(c(module,get_module_res$module))
final_s_score<- get_module_res$final_s_score
diff_s_score<-get_module_res$diff_s_score
diff_s_score_rate<-get_module_res$diff_s_score_rate
}
}
}
}
module_set[[i]]<-module
final_score[i]<-final_s_score
final_diff_s_score[i]<-diff_s_score
final_diff_s_score_rate [i]<-diff_s_score_rate
}
return(list(module_set=module_set,final_score=final_score,final_diff_s_score=final_diff_s_score,final_diff_s_score_rate=final_diff_s_score_rate))
}
#' @title Get perturbed p-value.
#' @description The function `get_Pvalue_S` is used to calculate the perturbed p-value.
#' @param local_network The local network gene sets,generated by `get_local_network`.
#' @param freq_matrix The mutations matrix,generated by `get_mut_status`.
#' @param sur A nx2 data frame of samples' survival data,the first line is samples' survival event and the second line is samples' overall survival.
#' @param module The candidate module,generated by `get_candidate_module`.
#' @param perms The perturbation number,default is 1000.
#' @return Perturbed p-value.
#' @importFrom utils read.delim
#' @export
#' @examples
#' \donttest{#load the data
#' data(local_network)
#' data(mut_status)
#' data(candidate_module)
#' sur<-system.file("extdata","sur.csv",package = "ProgModule")
#' sur<-read.delim(sur,sep=",",header=TRUE,row.names = 1)
#' #perform the function `get_Pvalue_S`.
#' turbulence.example<-get_Pvalue_S(module=candidate_module,freq_matrix=mut_status,
#' sur=sur,perms=100,local_network)}
get_Pvalue_S<-function(module,freq_matrix,sur,perms=1000,local_network){
pvalue<-c()
for(i in 1:length(module$module_set)){
lo<-which(names(local_network)%in%module$module_set[[i]][1])
if(length(local_network[[lo]])==1){
sample_mut<-freq_matrix[local_network[[lo]],]
}else{
sample_mut<-colSums(freq_matrix[local_network[[lo]],])
}
freq_matrix_part<-freq_matrix[module$module_set[[i]],]
S_score<-c()
for(j in 1:perms){
a<-freq_matrix_part
if(length(module$module_set[[i]])==1){a<-as.data.frame(a)}
colnames(a)<-sample(colnames(freq_matrix_part))
inter<-intersect(colnames(a),rownames(sur))
a<-a[,inter]
sur<-sur[inter,]
if(dim(a)[1]==1){
MI_score<-MI(mylist1 = as.numeric(a),mylist2 = sur[,1])
CovE_score<-length(intersect(names(which(sample_mut==1)),names(a)[which(a==1)]))/length(which(a==1))
res<-MI_score*CovE_score*(length(which(a!=0))/length(a))
}else{
CovEs<-c()
for(n in 1:dim(a)[1]){
CovE1<-length(intersect(names(which(sample_mut==1)),names(a[n,])[which(a[n,]==1)]))/length(which(a[n,]==1))
CovEs<-c(CovEs,CovE1)
}
module_mut<-colSums(a)
module_mut[which(module_mut>0)]<-1
MI_score<-MI(mylist1 = as.numeric(module_mut),mylist2 = sur[,1])
CovE_score<-mean(CovEs)
res<-MI_score*CovE_score*(length(which(colSums(a)!=0))/dim(a)[2])
}
S_score[j]<-res
}
if(length(which(S_score>module$final_score[[i]]))==0){
pvalue[i]<-1e-16
}else{
pvalue[i]<-length(which(S_score>module$final_score[[i]]))/perms
}
names(pvalue)[i]<-module$module_set[[i]][1]
}
return(pvalue)
}
#' @title Get final module.
#' @description The function `get_final_module` is used to identify the final module.
#' @param index A index file of PPI networks,downloaded from http://compbio-research.cs.brown.edu/pancancer/hotnet2/.
#' @param edge The edge lists of PPI networks,downloaded from http://compbio-research.cs.brown.edu/pancancer/hotnet2/.
#' @param mut_status The mutations matrix,generated by `get_mut_status`.
#' @param sur A nx2 data frame of samples' survival data,the first line is samples' survival event and the second line is samples' overall survival.
#' @param seed The canonical drivers from NCG database, which use as the starting node of the greedy algorithm.
#' @param cutoff The perturbed p-value cutoff point,default is 0.05.
#' @param max.size.candidate The maximum size of the candidate module,default is 200.
#' @param max.size.local The size of maximum connected local network,default is 500.
#' @param rate The rate of increase in score,default is 0.05.
#' @param perm The perturbation number,default is 1000.
#' @return The final module.
#' @importFrom igraph graph_from_data_frame
#' @importFrom igraph delete_vertices
#' @importFrom igraph decompose
#' @importFrom igraph simplify
#' @importFrom igraph V
#' @importFrom stats p.adjust
#' @importFrom utils read.delim
#' @importFrom utils read.table
#' @export
#' @examples
#' \donttest{#load the data
#' indexdata<-system.file("extdata","hint+hi2012_index_file.txt",package="ProgModule")
#' index<-read.table(indexdata,sep="\t",header=FALSE)
#' edgedata<-system.file("extdata","hint+hi2012_edge_file.txt",package="ProgModule")
#' edge<-read.table(edgedata,sep="\t",header=FALSE)
#' data(mut_status)
#' sur<-system.file("extdata","sur.csv",package ="ProgModule")
#' sur<-read.delim(sur,sep=",",header=TRUE,row.names=1)
#' canonical_drivers<-system.file("extdata","canonical_drivers.txt",package="ProgModule")
#' seed_gene<-read.table(canonical_drivers,header=FALSE)
#' #perform the function `get_final_module`.
#' finalmodule.example<-get_final_module(index,edge,mut_status,sur,seed=seed_gene,
#' cutoff=0.05,max.size.local=500,max.size.candidate=200,rate=0.05,perm=100)}
get_final_module<-function(index,edge,mut_status,sur,seed,cutoff=0.05,max.size.local=500,max.size.candidate=200,rate=0.05,perm=1000){
edge<-edge[,1:2]
edge[,1]<-index[match(edge[,1],index[,1]),2]
edge[,2]<-index[match(edge[,2],index[,1]),2]
if(sum(is.na(edge[,1]))!=0){
edge<-edge[-which(edge[,1]%in%NA),]
}
if(sum(is.na(edge[,2]))!=0){
edge<-edge[-which(edge[,2]%in%NA),]
}
net <- igraph::graph_from_data_frame(d=edge,directed =FALSE)
mut_status<- mut_status[rowSums(mut_status)>0,]
subnet<-delete_vertices(net,setdiff(names(net[[]]),rownames(mut_status)))
components <- igraph::clusters(subnet)
biggest_cluster_id <- which.max(components$csize)
vert_ids <- V(subnet)[components$membership == biggest_cluster_id]
subnet<-igraph::induced_subgraph(subnet, vert_ids)
#save(subnet,file="subnet.Rdata")
local_network<-get_local_network(network=subnet,freq_matrix=mut_status,max.size=max.size.local)
#save(local_network,file="local_network.Rdata")
gene<-intersect(seed[,1],names(local_network))
if(length(gene)!=0){
candidate_module<-get_candidate_module(local_network,network=subnet,freq_matrix=mut_status,sur,seed=gene,max.size=max.size.candidate,rate=rate)
#save(candidate_module,file="candidate_module.Rdata")
turbulence<-get_Pvalue_S(freq_matrix=mut_status,sur,module=candidate_module,perms=perm,local_network)
#save(turbulence,file="turbulence.Rdata")
}
final_module<-candidate_module[["module_set"]][which(p.adjust(turbulence,method = "fdr")<cutoff)]
final_module<-final_module[which(sapply(final_module,function(x) length(x)>1)==TRUE)]
return(final_module)
}
#' @title Get univarCox result.
#' @description The function `get_univarCox_result` is used to calculate the result of Cox univariate analysis.
#' @param freq_matrix The mutations matrix,generated by `get_mut_status`.
#' @param sur A nx2 data frame of samples' survival data,the first line is samples' survival event and the second line is samples' overall survival.
#' @return The result of Cox univariate analysis.
#' @importFrom survival coxph
#' @importFrom utils read.delim
#' @importFrom stats as.formula
#' @export
#' @examples
#' \donttest{#load the data
#' data(mut_status)
#' sur<-system.file("extdata","sur.csv",package ="ProgModule")
#' sur<-read.delim(sur,sep=",",header=TRUE,row.names=1)
#' #perform the function `get_univarCox_result`.
#' univarCoxresult.example<-get_univarCox_result(freq_matrix=mut_status,sur)}
get_univarCox_result<-function (freq_matrix,sur)
{
DE_path_sur<-cbind(t(freq_matrix),sur)
path_name <- cbind(colnames(DE_path_sur), paste0("a", 1:length(colnames(DE_path_sur))))
colnames(DE_path_sur)[1:(dim(DE_path_sur)[2] - 2)] <- path_name[1:(dim(path_name)[1]-2),2]
covariates <- colnames(DE_path_sur)[1:(length(DE_path_sur[1,]) - 2)]
univ_formulas <- sapply(covariates, function(x) as.formula(paste("Surv(survival, event) ~",x)))
univ_models <- lapply(univ_formulas, function(x) {
coxph(x, data = DE_path_sur)
})
univ_results <- sapply(univ_models, function(x) {
x <- summary(x)
HR <- signif(x$coef[2], digits = 2)
return(HR)
})
result<-univ_results
names(result) <- path_name[match(names(result), path_name[,2]), 1]
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.