knitr::opts_chunk$set(echo = TRUE)
## pre-step: set directory and Initialization library(readxl) rm(ls()) setwd("D:/Project/Xinyu/test_TCGA") data() #################################################################################### ## Step1 (option) : run Ccube @ccube_test_v2.R | results already saved in test folder #################################################################################### ## Step2: produce input for rank estimate (build post_summary and ccf count matrix) # from scratch library(EvoSig) Ccube_folder = "D:/Project/Data/TCGA/Ccube/TCGA_0106/" #typefile= "D:/Project/Data/TCGA/Ccube/TCGA_cancertype.csv" output = "D:/Project/Xinyu/EvoSig.test/3.10_x100/" EvoSig_input(Ccube_folder=Ccube_folder,output=output,ccfMatBuild=T,postSummaryBuild=T,TCGA=T,ccfupper=1,minsample=30) ############################################################################### #============================= go to python ==================================# ## Step 3: Rank estimate ==> output rank_summary ("4.rank_summary.xlsx") # modify python script manually # -test.py: set path = `rank_estimate_folder`, set output = `rank_result_folder` # -estimate_plot.py : set output = `rank_result_folder`, set export_csv = output # run python script: 1.test.py 2.estimate_plot.py | results already saved in `rank_result_folder` # Generate rank estimate plots (go back to R) and "4.rank_summary_test.csv" with suggested rss rank rank_file_blank <- paste0(output,"rank_summary_test_blank.csv") rank_result_folder <- paste0(output,"rank_result/") rank_estimate_plot(outputFolder = rank_result_folder,rankfilepath = rank_file_blank) # * fill manually $rank in "4.rank_summary_test_blank.csv" and change name to "4.rank_summary_test.csv" ###################################################### ## Step 5: NMF for each type based on specified ranks # Notice: must fill the rank_summary sheet first to specify ranks ## make sure first column(cancertype) and 2nd column(rank) rank_summary <- read.csv(file= rank_file_blank)[,-1] nmf_folder <- paste0(output,"NMF_result/") Matrix_folder <- paste0(output,"ccfMat/") # run NMF for each cancer type nmf_sig_all_plot(input_folder=Matrix_folder,output=nmf_folder,rank_summary=rank_summary,MatType="count",unit=30) # Manually adjust rank from plot nmf_sig_plot_type <- function(type,MatType="fraction",input_folder,output,rank,unit) ######################################################## ## Step 6: HC + LCD ## Combine signatures extracted from differernt cancer types, run Hierarchical cluster and plot (save hc_heatmap, consensus sig and plot to output folder) ## combine signatures combine_sig <- combine_sig_nmf(input_folder= nmf_folder,cancertype=rank_summary$cancertype) %>% apply(2,function(x) x/sum(x)) ## test the number of cluster lista.methods = c("kl","ch","hartigan", "cindex","db","silhouette","ratkowsky","ball", "ptbiserial","gap", "frey", "mcclain", "gamma", "gplus", "tau", "dunn", "sdindex", "sdbw") # "hubert","dindex" lista.distance = c("metodo","euclidean", "maximum", "manhattan", "canberra") hc_cluster_test(data=combine_sig,methods=lista.methods,distance=lista.distance,max=10,min=2) #=================== specify cluster numbers ==============================# ## run hierarchical cluster based on specified cluster and use LCD to get the exposure (save in folder `6.HC_consensus`) ## check all the results in folder `6.HC_consensus` ## Construct ccf matrix for all samples ccfMat <- CountMatBuild(samplelist=post_summary$samplename,upper=1,input_folder=Ccube_folder) save(ccfMat,file=paste0(Matrix_folder,"CCFMatrix_overall.RData")) #load(file=paste0(Matrix_folder,"CCFMatrix_overall.RData")) consensus_sig_eu <- hc_consensus(combine_sig,cluster=6,ccfMatrix=ccfMat[[3]],output=HC_folder,distance="euclidean") consensus_sig_correlation <- hc_consensus(combine_sig,cluster=6,ccfMatrix=ccfMat[[3]],output=HC_folder,distance="correlation") ###NMF load(file=paste0(HC_folder,"correlation_5_consensus_sig.RData")) samplelist <- read.csv(paste0(HC_folder,"correlation_5_sample_list_random.csv"))[,-1] Extract_sig(ccfMat=ccfMat[[3]],consensus_sig=consensus_sig,samplelist=samplelist) ## extract 6 signature library(EvoSig) library(dplyr) library(magrittr) data("SNV_exposure") data("TCGAtypefile") data("immune_landscape_measure") lcd_sig_new <- cbind(cluster=c(1,2,3,4,5,6),t(lcd_sig)) EvoDynamics_exposure <- Extract_sig(ccfMat=ccfMat[[3]],consensus_sig=lcd_sig_new,samplelist=samplelist) n_evo_sig <- ncol(EvoDynamics_exposure) -1 n_snv_sig <- nrow(SNV_exposure) SNV_exposure <- as.data.frame(t(SNV_exposure)) %>% set_colnames(paste0("snv_sig_",1:n_snv_sig)) %>% mutate(sample=rownames(.)) %>% file_format(n_snv_sig+1) sample_type_map <- post_summary[,c("samplename","cancertype")] immune <- immune_landscape_measure %>% file_format(1) Type <- TCGAtypefile %>% file_format(1) colnames(Type)[2] <-"cancertype" ### Merge all datasets # EvoExposure_merge_snv_immune_new <- Reduce(function(...) merge(...,by="samplename",all.x=TRUE),list(EvoDynamics_exposure,SNV_exposure,immune,sample_type_map)) EvoExposure_merge_snv_immune <- left_join(EvoDynamics_exposure,SNV_exposure,by="samplename") %>% left_join(.,immune,by="samplename") %>% left_join(.,Type,by="samplename") save(EvoExposure_merge_snv_immune,file=paste0(Exposure_folder,"EvoExposure_merge_snv_immune_6_sig.RData")) ### No need for Normalization #Absolutely not, correlation analysis describe the nature of the relationship between two variables what ever the range and the measurement units of them. plot_output <- "6_signature/facet_cancertype/" setwd("D:/Project/Xinyu/test_TCGA") plot_output <- "6_signature/facet_signature/snv/" Cor_heatmap(EvoExposure_merge_snv_immune,8:37,output=plot_output,n_sig=6,minsample=10,width = 15,height = 30,facet="signature") plot_output <- "6_signature/facet_signature/immune/" Cor_heatmap(EvoExposure_merge_snv_immune,41:100,output=plot_output,n_sig=6,minsample=10,width = 15,height = 48,facet="signature") plot_output <- "6_signature/facet_signature/snv/" Cor_heatmap(EvoExposure_merge_snv_immune,8:37,output=plot_output,n_sig=6,minsample=10,width = 48,height = 12,facet="cancertype") plot_output <- "6_signature/facet_cancertype/immune/" Cor_heatmap(EvoExposure_merge_snv_immune,41:100,output=plot_output,n_sig=6,minsample=10,width = 48,height = 20,facet="cancertype") plot_output <- "6_signature/facet_measure/snv/" Cor_heatmap(EvoExposure_merge_snv_immune,8:37,output=plot_output,n_sig=6,minsample=10,width = 48,height = 15,facet="measure") plot_output <- "6_signature/facet_measure/immune/" Cor_heatmap(EvoExposure_merge_snv_immune,41:100,output=plot_output,n_sig=6,minsample=10,width = 49 ,height = 8,facet="measure")
## Step 8: run Hierarchical clustering and plot (save hc_heatmap, consensus sig and plot to output folder) library("RColorBrewer") library("pheatmap") library("cowplot") # parameter distance="correlation" output=test ccfMatrix=ccfMat[[3]] cluster=5 #library(factoextra) upper = quantile(combine_sig,0.95) breaksList = seq(0, upper, by = 0.01) col <- colorRampPalette(rev(brewer.pal(n = 6, name = "RdYlBu")))(length(breaksList)) ## set `cutree_cols` based on suggested cluster number out <- pheatmap(combine_sig, cutree_cols = cluster, fontsize_col = 5,fontsize_row = 0.4,color = col, breaks = breaksList,clustering_distance_cols=distance, cluster_rows=F,filename=paste0(output,"/",distance,"_",cluster,"_hc_heatmap.pdf"),clustering_method = "ward.D2") ## Manually adjustment out$tree_col$ grid.draw(out) sig_label <- as.data.frame(cutree(out$tree_col,k=cluster)) %>% set_colnames("cluster") %>% mutate(sig=rownames(.)) write.csv(sig_label,file=paste0(output,"sig_label.csv")) ## Manually adjustment sig_label sig_label[which(sig_label$sig=="PAAD_sig5"),]$cluster <- ## Compute consensu signatures for each cluster consensus_sig <- as.data.frame(t(combine_sig)) %>% mutate(sig=rownames(.)) %>% left_join(.,sig_label,by="sig") %>% group_by(cluster) %>% mutate(sig=NULL) %>% summarise_all(mean) save(consensus_sig,file=paste0(output,"/",distance,"_",cluster,"_consensus_sig.RData")) consensus_sig <- apply(t(consensus_sig[,2:101]),2,as.numeric) p1 <- plot_grid(sig_plot(consensus_sig)) save_plot(paste0(output,"/",distance,"_",cluster,"_consensus_sig.pdf"),p1,base_asp = cluster) if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") if (!requireNamespace("YAPSA",quietly = TRUE)) BiocManager::install("YAPSA") library(YAPSA) load(paste0(HC_folder,dir(HC_folder)[grep("samples",dir(HC_folder))])) ccfMatrix[is.na(ccfMatrix)] = 0 idx_random <- sample(1:ncol(ccfMatrix)) ccfMatrix <- ccfMatrix[,idx_random] samplelist <- post_summary$samplename[idx_random] write.csv(samplelist,file=paste0(HC_folder,distance,"_",cluster,"_sample_list_random.csv")) exposure <- YAPSA::LCD(ccfMatrix,consensus_sig) table(rowSums(ccfMatrix)>0) exposure <- as.data.frame(t(exposure)) %>% set_colnames(paste0("sig_",1:ncol(.))) save(exposure,file=paste0(output,"/",distance,"_",cluster,"_lcd_exposure.RData")) return(consensus_sig)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.