#' Searching for mutations related to signature score
#'
#' @param mutation_matrix mutation matrix with sample name in the row and genes in the column
#' @param signature_matrix signature data frame with identifier and target signatures
#' @param id_signature_matrix column name of identifier
#' @param signature name of target signature
#' @param min_mut_freq minimum frequency of mutations
#' @param plot logical variable, if TRUE, plot will be save in the `save_path`
#' @param method multi or Wilcoxon test only, if `multi` is applied, both `cuzick test` and `wilcoxon` will be performed
#' @param save_path path to save plot and statistical analyses
#' @param palette palette of box plot
#' @param show_plot logical variable, if TRUE, plot will be printed.
#' @param show_col show code of palette
#' @param width width of oncoprint
#' @param height height of oncoprint
#' @param oncoprint_group_by signature must be group by mean or quantile
#' @param oncoprint_col color of mutation
#' @param jitter if true, each point will be drawn in the box plot with jitter
#' @param gene_counts define the number of genes which will be shown in the oncoprint
#' @param genes genes for drawing
#' @param point_size default is 4.5
#' @param point.alpha point.alpha of boxplot
#'
#' @author Dongqiang Zeng
#' @return
#' @export
#'
#' @examples
#' # MAF data download from UCSC Xena hub
#' maf_file<-"E:/18-Github/Organization/TCGA.STAD.mutect.c06465a3-50e7-46f7-b2dd-7bd654ca206b.DR-10.0.somatic.maf"
#' mut_list<-make_mut_matrix(maf = maf_file, isTCGA = T, category = "multi")
#' mut<-mut_list$snp
#' res<-find_mutations(mutation_matrix = mut, signature_matrix = tcga_stad_sig, id_signature_matrix = "ID",signature = "CD_8_T_effector",min_mut_freq = 0.01, plot = TRUE, jitter = TRUE, point.alpha = 0.25
find_mutations<-function(mutation_matrix, signature_matrix, id_signature_matrix = "ID", signature,
min_mut_freq = 0.05, plot = TRUE, method = "multi", point.alpha = 0.1,
save_path = NULL,palette = "jco", show_plot = TRUE,
show_col = FALSE, width = 8, height = 4, oncoprint_group_by = "mean",
oncoprint_col = "#224444", gene_counts = 10, jitter = FALSE, genes = NULL, point_size = 4.5){
if(is.null(save_path)){
file_name<-paste0(signature,"-relevant-mutations")
}else{
file_name<-save_path
}
if (!file.exists(file_name)) dir.create(file_name)
abspath<-paste0(getwd(),"/",file_name,"/" )
#######################################################
signature_matrix<-as.data.frame(signature_matrix)
if(max(mutation_matrix)>4){
mutation_matrix[mutation_matrix>=3&mutation_matrix<=5]<-3
mutation_matrix[mutation_matrix>5]<-4
}
mut2<-mutation_matrix
mut2[mut2>=1]<-1
mut_onco<-mut2
if(is.null(genes)){
mutfreq<-data.frame(head(sort(colSums(mut2),decreasing = T),500))
colnames(mutfreq)<-"Freq"
index<-which(mutfreq$Freq>=dim(mut2)[1]*min_mut_freq)
index<-max(index)
input_genes<-names(head(sort(colSums(mut2),decreasing = T),index))
input_genes<-unique(input_genes)
input_genes<-input_genes[!is.na(input_genes)]
}else{
input_genes<-genes
}
mutation_matrix<-mutation_matrix[,colnames(mutation_matrix)%in%input_genes]
genes<-genes[genes%in%colnames(mutation_matrix)]
########################################################
colnames(signature_matrix)[which(colnames(signature_matrix)==id_signature_matrix)]<-"ID"
mutation_matrix<-as.data.frame(mutation_matrix)
mutation_matrix<-tibble:: rownames_to_column(mutation_matrix,var = "ID")
sig_mut<-merge(signature_matrix,mutation_matrix,by = "ID",all = F)
sig_mut<-tibble:: column_to_rownames(sig_mut,var = "ID")
mytheme<-theme_light()+
theme(
plot.title=element_text(size=rel(2.3),hjust=0.5,face="italic"),
axis.title.y=element_text(size=rel(1.8)),
axis.title.x= element_blank(),
axis.text.x= element_text(face="plain",size=18,angle=0,color="black"),#family="Times New Roman"
axis.text.y = element_text(face="plain",size=15,angle=90,color="black"),#family="Times New Roman"
axis.line=element_line(color="black",size=0.6))+theme(
legend.key.size=unit(.3,"inches"),
legend.title=element_blank(),
legend.position="none",
legend.direction="horizontal",
legend.justification=c(.5,.5),
legend.box="vertical",
legend.box.just="top",
legend.text=element_text(colour="black",size=10,face = "plain")
)
if(method == "multi"){
if(!is.null(genes)){
if(length(genes)<10){
print(genes)
stop(paste0("Please provide at least 10 genes with mutaion freq larger than ",min_mut_freq))
}
input_genes<-genes
}
input_genes<-input_genes[input_genes%in%colnames(sig_mut)]
input<-sig_mut[,c(signature,input_genes)]
##############################
aa<-lapply(input[,input_genes], function(x) PMCMRplus::cuzickTest(input[,1]~x))
res1<-data.frame(p.value = sapply(aa, getElement, name = "p.value"),
names=input_genes,
statistic = sapply(aa, getElement, name = "statistic"))
res1$adjust_pvalue<-p.adjust(res1$p.value,method = "BH",n=length(res1$p.value))
print(">>>> Result of Cuzick Test")
res1<-res1[order(res1$p.value,decreasing = F),]
print(res1[1:10,])
write.csv(res1,paste0(abspath,"1-cuzickTest-test-relevant-mutations.csv"))
if(plot){
####################################
top10_genes<-res1$names[1:10]
top10_genes<- top10_genes[!is.na(top10_genes)]
top10_genes<-as.character(top10_genes[top10_genes%in%colnames(input)])
input_long<-input[,c(signature,top10_genes)]
input_long<-reshape2:: melt(input_long,id.vars = 1,
variable.name = "Gene",
value.name = "mutation")
input_long[,signature]<-as.numeric(input_long[,signature])
input_long$mutation<-as.factor(input_long$mutation)
####################################
pl<-list()
for(i in 1:length(top10_genes)){
gene<- top10_genes[i]
dd<-input_long[input_long$Gene==gene,]
pl[[i]]<-ggplot(dd, aes(x=mutation, y = !!sym(signature), fill=mutation)) +
geom_boxplot(outlier.shape = NA,outlier.size = -0.5)+
# geom_jitter(width = 0.25,size= 5.9,alpha=0.75,color ="black")+
scale_fill_manual(values = palettes(category = "box",palette = palette,show_col = show_col))+
mytheme+theme(legend.position="none")+
ggtitle(paste0(top10_genes[i]))+
mytheme+stat_compare_means(comparisons = combn(as.character(unique(dd[,"mutation"])), 2, simplify=F),size=6)+
stat_compare_means(size=6)
if(jitter){
pl[[i]]<-pl[[i]]+geom_jitter(width = 0.25,size= point_size, alpha=point.alpha,color ="black")
}
ggsave(pl[[i]],filename = paste0(4+i,"-1-",gene,"-continue.pdf"),
width = 4,height = 5.8,path = file_name)
}
com_plot<- cowplot::plot_grid(pl[[1]],pl[[2]],pl[[3]],pl[[4]],pl[[5]],pl[[6]],pl[[7]],pl[[8]],pl[[9]],pl[[10]],
labels = "AUTO",ncol = 5,nrow = 2,label_size = 32)
if(show_plot) print(com_plot)
#####################################
ggsave(com_plot,filename = "3-Relevant_mutations_Continue.pdf",width = 14,height = 10.5,path = file_name)
#####################################
}
sig_mut2<- rownames_to_column(sig_mut,var = "ID")
patr1<-sig_mut2[,c("ID",signature)]
part2<-sig_mut2[,input_genes]
part2[part2>=1]<-1
sig_mut2<-cbind(patr1,part2)
sig_mut2<-column_to_rownames(sig_mut2,var = "ID")
input2<-sig_mut2
# print(input2[1:5,1:5])
aa<-lapply(input2[,input_genes], function(x) wilcox.test(input2[,1]~x))
res2<-data.frame(p.value = sapply(aa, getElement, name = "p.value"),
names = input_genes,
statistic = sapply(aa, getElement, name = "statistic"))
res2$adjust_pvalue<-p.adjust(res2$p.value,method = "BH",n=length(res2$p.value))
res2<-res2[order(res2$p.value,decreasing = F),]
print(">>> Result of Wilcoxon test (top 10)")
print(res2[1:10,])
write.csv(res2, paste0(abspath,"2-Wilcoxon-test-relevant-mutations.csv"))
result<-list("cuzick_test" = res1, "wilcoxon_test" = res2,
'sig_mut_data1' = input, "sig_mut_data2" = input2)
if(plot){
####################################
top10_genes<-res2$names[1:10]
top10_genes<- top10_genes[!is.na(top10_genes)]
top10_genes<-as.character(top10_genes[top10_genes%in%colnames(input2)])
input_long<-input2[,c(signature,top10_genes)]
input_long<-reshape2:: melt(input_long,id.vars = 1,
variable.name = "Gene",
value.name = "mutation")
input_long[,signature]<-as.numeric(input_long[,signature])
input_long$mutation<-as.factor(input_long$mutation)
input_long$mutation<-ifelse(input_long$mutation==0,"WT","Mutated")
####################################
pl<-list()
for(i in 1:length(top10_genes)){
gene<- top10_genes[i]
dd<-input_long[input_long$Gene==gene,]
pl[[i]]<-ggplot(dd, aes(x=mutation, y = !!sym(signature), fill=mutation)) +
geom_boxplot(outlier.shape = NA,outlier.size = -0.5)+
# geom_jitter(width = 0.25,size=5.5,alpha=0.75,color ="black")+
scale_fill_manual(values= palettes(category = "box", palette = palette, show_col = show_col))+
theme(legend.position="none")+
ggtitle(paste0(top10_genes[i]))+
mytheme+
stat_compare_means(comparisons = combn(as.character(unique(dd[,"mutation"])), 2, simplify=F),size=6)
if(jitter){
pl[[i]]<-pl[[i]]+geom_jitter(width = 0.25,size = point_size,alpha=point.alpha,color ="black")
}
ggsave(pl[[i]],filename = paste0(4+i,"-2-",gene,"-binary.pdf"),
width = 4,height = 5.8,path = file_name)
}
com_plot<-cowplot:: plot_grid(pl[[1]],pl[[2]],pl[[3]],pl[[4]],pl[[5]],pl[[6]],pl[[7]],pl[[8]],pl[[9]],pl[[10]],
labels = "AUTO",ncol = 5,nrow = 2,label_size = 32)
if(show_plot) print(com_plot)
#####################################
ggsave(com_plot,filename = "4-Relevant_mutations_binary.pdf",width = 14,height = 10.5,path = file_name)
#####################################
}
}else if(tolower(method) =="wilcoxon" ){
if(!is.null(genes)){
if(length(genes)<10){
print(genes)
stop(paste0("Please provide at least 10 genes with mutaion freq larger than ",min_mut_freq))
}
input_genes<-genes
}
sig_mut2<- rownames_to_column(sig_mut,var = "ID")
patr1<-sig_mut2[,c("ID",signature)]
input_genes<-input_genes[input_genes%in%colnames(sig_mut2)]
part2<-sig_mut2[,input_genes]
part2[part2>=1]<-1
sig_mut2<-cbind(patr1,part2)
sig_mut2<-column_to_rownames(sig_mut2,var = "ID")
input2<-sig_mut2
# print(input2[1:5,1:5])
##############################
aa<-lapply(input2[,input_genes], function(x) wilcox.test(input2[,1]~x))
res<-data.frame(p.value = sapply(aa, getElement, name = "p.value"),
names=input_genes,
statistic = sapply(aa, getElement, name = "statistic"))
res$adjust_pvalue<-p.adjust(res$p.value,method = "BH",n=length(res$p.value))
res<-res[order(res$p.value,decreasing = F),]
print(">>> Result of Wilcoxon test (top 10): ")
print(res[1:10,])
write.csv(res, paste0(abspath,"0-Wilcoxon-test-relevant-mutations.csv"))
result<-list("wilcoxon_test" = res,"sig_mut_data" = input2)
#################################
if(plot){
####################################
top10_genes<-res$names[1:10]
top10_genes<- top10_genes[!is.na(top10_genes)]
top10_genes<-as.character(top10_genes[top10_genes%in%colnames(input2)])
input_long<-input2[, c(signature,top10_genes)]
input_long<-reshape2:: melt(input_long,
id.vars = 1,
variable.name = "Gene",
value.name = "mutation")
input_long[,signature]<-as.numeric(input_long[,signature])
input_long$mutation<-as.factor(input_long$mutation)
input_long$mutation<-ifelse(input_long$mutation==0,"WT","Mutated")
####################################
pl<-list()
for(i in 1:length(top10_genes)){
gene<- top10_genes[i]
dd<-input_long[input_long$Gene==gene,]
pl[[i]]<-ggplot(dd, aes(x=mutation, y = !!sym(signature), fill=mutation)) +
geom_boxplot(outlier.shape = NA,outlier.size = NA)+
# geom_jitter(width = 0.25,size= 3.5,alpha=0.75,color ="black")+
scale_fill_manual(values= palettes(category = "box",palette = palette, show_col = show_col))+
mytheme+theme(legend.position="none")+
ggtitle(paste0(top10_genes[i]))+
mytheme+
stat_compare_means(comparisons = combn(as.character(unique(dd[,"mutation"])), 2, simplify=F),size=6)
if(jitter){
pl[[i]]<-pl[[i]]+geom_jitter(width = 0.25,size=point_size, alpha = point.alpha,color ="black")
}
ggsave(pl[[i]],filename = paste0(i,"-1-",gene,"-binary.pdf"),
width = 4,height = 5.8,path = file_name)
}
com_plot<-cowplot:: plot_grid(pl[[1]],pl[[2]],pl[[3]],pl[[4]],pl[[5]],pl[[6]],pl[[7]],pl[[8]],pl[[9]],pl[[10]],
labels = "AUTO",ncol = 5,nrow = 2,label_size = 32)
if(show_plot) print(com_plot)
#####################################
ggsave(com_plot,filename = "0-Relevant_mutations_binary.pdf",width = 14,height = 10.5,path = file_name)
#####################################
}
}
# if(!is.null(genes)){
# if(length(genes)<10) stop("Please provide at least 10 genes")
# genes_for_oncoprint<-genes
# genes_for_oncoprint<-genes_for_oncoprint[1:gene_counts]
# genes_for_oncoprint<-genes_for_oncoprint[!is.na(genes_for_oncoprint)]
# }else{
# genes_for_oncoprint<-result$wilcoxon_test$names
# genes_for_oncoprint<-genes_for_oncoprint[1:gene_counts]
# genes_for_oncoprint<-genes_for_oncoprint[!is.na(genes_for_oncoprint)]
# }
#
genes_for_oncoprint<-result$wilcoxon_test$names
genes_for_oncoprint<-genes_for_oncoprint[1:gene_counts]
genes_for_oncoprint<-genes_for_oncoprint[!is.na(genes_for_oncoprint)]
signature_matrix<-signature_matrix[!duplicated(signature_matrix$ID),]
signature_matrix<-signature_matrix[signature_matrix$ID%in%rownames(mut_onco),]
mut_onco<-mut_onco[rownames(mut_onco)%in%signature_matrix$ID,]
####################################
if(oncoprint_group_by!="mean"&oncoprint_group_by!="quantile3"){
pdata_group<- signature_matrix[,c("ID",signature,oncoprint_group_by)]
pdata_group[,signature]<-as.numeric(pdata_group[,signature])
}else{
pdata_group<- signature_matrix[,c("ID",signature)]
pdata_group[,signature]<-as.numeric(pdata_group[,signature])
}
max_sig<-max(pdata_group[,signature],na.rm = TRUE )
min_sig<-min(pdata_group[,signature],na.rm = TRUE)
#################################
if(oncoprint_group_by=="mean"){
if(!"group2"%in%colnames(pdata_group)){
pdata_group$group<-ifelse(pdata_group[,signature]>=mean(pdata_group[,signature]),"High","Low")
}
}else if(oncoprint_group_by=="quantile3"){
if(!"group3"%in%colnames(pdata_group)){
q1<-quantile(pdata_group[,signature],probs = 1/3)
q2<-quantile(pdata_group[,signature],probs = 2/3)
pdata_group$group<-ifelse(pdata_group[,signature]<=q1,"Low",ifelse(pdata_group[,signature]>=q2,"High","Middle"))
}
}else{
if("group"%in%colnames(pdata_group)&"group"!=oncoprint_group_by) stop(" Variable:`group` already exist in colname of pdata!")
if(!"group"%in%colnames(pdata_group)){
colnames(pdata_group)[which(colnames(pdata_group)==oncoprint_group_by)]<-"group"
}
# print(head(pdata_group))
if(nlevels(as.factor(pdata_group$group))>2) stop("Levels of `oncoprint_group_by` must less than 3")
if(!"High"%in%unique(pdata_group$group)) {
print(summary(as.factor(pdata_group$group)))
stop("Levels of `oncoprint_group_by` must be `High` or `Low`")
}
}
# print(head(pdata_group))
idh<-pdata_group[pdata_group$group=="High","ID"]
idl<-pdata_group[pdata_group$group=="Low","ID"]
pdata1<-pdata_group[pdata_group$ID%in%idh, ]
pdata2<-pdata_group[pdata_group$ID%in%idl, ]
# library(ComplexHeatmap)
group_col<-palettes(category = "box",palette = palette, show_col = show_col)
h1<-ComplexHeatmap:: HeatmapAnnotation(Signature_score = anno_barplot(as.numeric(pdata1[,signature]),
border=FALSE,
gp = gpar(fill="#2D004B"),
axis = TRUE,
ylim = c(min_sig,max_sig)),
Group = pdata1$group,
annotation_height=unit.c(rep(unit(1.5, "cm"), 1), rep(unit(0.5, "cm"), 1)), #unit.c(rep(unit(0.9, "cm"), 5))
annotation_legend_param=list(labels_gp = gpar(fontsize = 10),
title_gp = gpar(fontsize = 10, fontface = "bold"),
ncol=1),
gap=unit(c(2,2), "mm"),
col=list(Group = c("High" = group_col[1],"Low" = group_col[2])),
show_annotation_name = TRUE,
#annotation_name_side="left",
annotation_name_gp = gpar(fontsize = 12))
h2<-ComplexHeatmap:: HeatmapAnnotation(Signature_score = anno_barplot(as.numeric(pdata2[,signature]),
border=FALSE,
gp = gpar(fill="#2D004B"),
axis = TRUE,
ylim = c(min_sig,max_sig)),
Group = pdata2$group,
annotation_height=unit.c(rep(unit(1.5, "cm"), 1), rep(unit(0.5, "cm"), 1)), #unit.c(rep(unit(0.9, "cm"), 5))
annotation_legend_param=list(labels_gp = gpar(fontsize = 10),
title_gp = gpar(fontsize = 10, fontface = "bold"),
ncol=1),
gap=unit(c(2,2), "mm"),
col=list(Group = c("High" = group_col[1],"Low" = group_col[2])),
show_annotation_name = TRUE,
#annotation_name_side="left",
annotation_name_gp = gpar(fontsize = 12))
col = c(mut = oncoprint_col)
mut1<-t(mut_onco[rownames(mut_onco)%in%idh, colnames(mut_onco)%in%genes_for_oncoprint])
mut2<-t(mut_onco[rownames(mut_onco)%in%idl, colnames(mut_onco)%in%genes_for_oncoprint])
mut1<-list(mut = mut1)
mut2<-list(mut = mut2)
# width_h<- c(length(idh)/c(length(idh)+length(idl)))*width
# width_l<- c(length(idl)/c(length(idh)+length(idl)))*width
#########################################
ho1<-ComplexHeatmap:: oncoPrint(mut1,
alter_fun_is_vectorized = FALSE,
alter_fun = list(mut = function(x, y, w, h) grid.rect(x, y, w*0.9, h*0.88,
gp = gpar(fill = oncoprint_col, col = oncoprint_col))),
column_title = paste0(" High ", signature),
show_heatmap_legend = FALSE,
heatmap_legend_param = list(title = "", labels = ""),
col = col,
row_names_gp = gpar(fontface = "italic"),
# width = unit(width_h, "cm"),
top_annotation = h1)
ho2<-ComplexHeatmap:: oncoPrint(mut2,
alter_fun_is_vectorized = FALSE,
alter_fun = list(mut = function(x, y, w, h) grid.rect(x, y, w*0.9, h*0.88,
gp = gpar(fill = oncoprint_col, col = oncoprint_col))),
column_title = paste0(" Low ", signature),
show_heatmap_legend = FALSE,
heatmap_legend_param = list(title = "", labels = "Mutation"),
col = col,
row_names_gp = gpar(fontface = "italic"),
# width = unit(width_l, "cm"),
top_annotation = h2)
p<-ho1+ho2
# fig.path<-paste0(getwd(),"/",save_path)
# save to pdf
pdf(file.path(abspath, paste0("0-OncoPrint-",signature,".pdf")), width = width, height = height)
draw(p)
invisible(dev.off())
# print to screen
# draw(p)
if(show_plot){
print(p)
}
return(result)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.