R/plotMutSigProfile.R

Defines functions plotMutSigProfile

Documented in plotMutSigProfile

#'  plotMutSigProfile
#' 
#' @param sig_input Result generated by function \code{\link{fitSignatures}} or \code{\link{triMatrix}}.
#' @param patient.id Select the specific patients. Default NULL, all patients are included.
#' @param mode Type of mutation spectrum. Default NULL. Options:'Original','Reconstructed' or 'Difference'
#' @param contribution.type Type of Signature contribution. Default 'realative'. Options:'realative' or 'absolute'.
#' @param use.tumorSampleLabel Logical (Default: FALSE). Rename the 'Tumor_Sample_Barcode' by 'Tumor_Sample_Label'.
#' @return Mutational signature profile of patients
#' 
#' @examples
#' ## input from fitSignatures
#' maf.File <- system.file("extdata/", "CRC_HZ.maf", package = "MesKit")
#' clin.File <- system.file("extdata/", "CRC_HZ.clin.txt", package = "MesKit")
#' ccf.File <- system.file("extdata/", "CRC_HZ.ccf.tsv", package = "MesKit")
#' maf <- readMaf(mafFile=maf.File, clinicalFile = clin.File, ccfFile=ccf.File, refBuild="hg19")
#' phyloTree <- getPhyloTree(maf, patient.id = 'V402')
#' 
#' ## Load a reference genome.
#' library(BSgenome.Hsapiens.UCSC.hg19)
#' 
#' tri_matrix <- triMatrix(phyloTree)
#' fit_out <- fitSignatures(tri_matrix)
#' plotMutSigProfile(fit_out)
#' ## input from treeMatrix
#' plotMutSigProfile(tri_matrix)
#' 
#' @export  plotMutSigProfile


plotMutSigProfile <- function(sig_input, 
                              patient.id = NULL,
                              mode = NULL,
                              contribution.type = "relative",
                              use.tumorSampleLabel = FALSE){
    
    ## patient filter
    if(!is.null(patient.id)){
        patient.setdiff <- setdiff(patient.id, names(sig_input))
        if(length(patient.setdiff) > 0){
            stop(paste0(patient.setdiff, " can not be found in your data"))
        }
        sig_input <- sig_input[names(sig_input)  %in% patient.id] 
    }
    
    contribution.type <- match.arg(contribution.type, choices =  c("relative", "absolute"), several.ok = FALSE)
    
    ## set group level
    group_level <- c("C>A", "C>G", "C>T", "T>A", "T>C", "T>G")
    ## set color
    group_color <- c("#E64B35FF", "#4DBBD5FF", "#00A087FF",
                     "#3C5488FF", "#F39B7FFF", "#8491B4FF")
    names(group_color) <- group_level
    background_color <- rep(c("#fce7e4","#ecf8fa","#dbfff9",
                              "#e4e8f3","#fdefeb","#e5e8ef"))
    names(background_color) <- paste(group_level,1,sep = "")
    all_color <- c(group_color,background_color)
    
    result <- list()
    
    if(is(sig_input[[1]], "matrix") | all(is(sig_input[[1]], "list") & length(sig_input[[1]]) <=2)){
        
        processMSP1 <- function(i){
            patient <- names(sig_input)[[i]]
            
            if(is(sig_input[[i]], "list")){
                origin_matrix <- sig_input[[i]]$tri_matrix
                tsb.label <- sig_input[[i]]$tsb.label
            }else{
                origin_matrix <- sig_input[[i]]
                tsb.label <- data.frame()
            }
            
            ## convert mutation number to proportion
            origin_matrix <- t(apply(origin_matrix,1,function(x)x/sum(x)))
            ## convert matrix to data frame
            origin_df  <- as.data.frame(origin_matrix) 
            origin_df$Branch <- as.character(row.names(origin_df)) 
            rownames(origin_df) <- NULL
            origin_spectrum <- tidyr::pivot_longer(origin_df,
                                                   -"Branch",
                                                   names_to = "Type",
                                                   values_to = "Proportion") %>%
                dplyr::mutate(spectrum_type = "Original") %>% 
                dplyr::rowwise() %>% 
                ## get mutation group :"C>A", "C>G", "C>T", "T>A", "T>C", "T>G"
                dplyr::mutate(Group = paste(
                    strsplit(.data$Type,"")[[1]][3:5],
                    collapse = ''
                )) %>% as.data.frame() %>% 
                dplyr::mutate(group_background = paste(.data$Group,1,sep = ""))
            ## set levels
            type_level <- unique(origin_spectrum$Type)
            origin_spectrum$Type <- factor(origin_spectrum$Type, levels = type_level)
            origin_spectrum$Group <- factor(origin_spectrum$Group, levels = group_level)
            
            if(use.tumorSampleLabel){
                if(nrow(tsb.label) == 0){
                    stop("Tumor_Sample_Label was not found. Please check clinical data or let use.tumorSampleLabel be 'FALSE'")
                }
                tsb_tl <- tsb.label$Tumor_Sample_Label
                names(tsb_tl) <- tsb.label$Tumor_Sample_Barcode
                origin_spectrum$Branch <- lapply(origin_spectrum$Branch,
                                            function(x){
                                                split_tsb <- strsplit(x, "&")[[1]]
                                                tsb2tl <- tsb_tl[split_tsb]
                                                tl <- paste(tsb2tl, collapse = "&")
                                            })
            }
            
            
            ## plot signature profile for each branches
            pic_list <- list()
            
            ## initialize
            Type <- NULL
            Proportion <- NULL
            Group <- NULL
            group_background <- NULL
            
            processMSPB1 <- function(branch){
                name <- paste0(patient,": ",branch)
                branch_spectrum <- subset(origin_spectrum, origin_spectrum$Branch == branch)
                pic <- ggplot(branch_spectrum, aes(x=Type, y=Proportion, group=Group, fill=Group))+ 
                    geom_rect(data = branch_spectrum,
                              aes(fill = group_background),
                              xmin = -Inf,
                              xmax = Inf,
                              ymin = min(branch_spectrum$Proportion),
                              ymax = max(branch_spectrum$Proportion)) +
                    geom_bar(stat="identity",color = "#252525",width = 1) +
                    # geom_hline(yintercept = 0)+
                    theme(
                        legend.position='none', 
                        strip.background = element_rect(colour = "black", fill = "grey"),
                        strip.text.x=element_text(size=13),
                        strip.text.y=element_text(size=13),
                        panel.spacing.y = unit(0.5, "lines"),
                        panel.spacing.x = unit(0, "lines"),
                        plot.title = element_text(size = 13.5, hjust = 0,vjust = 0,face = "bold"),
                        plot.subtitle = element_text(size = 13.5, hjust = 0,vjust = 0),
                        axis.text.x= element_text(angle = 90,vjust = 0.2,size = 5,colour = "black"),
                        axis.ticks.x=element_line(color = "black"),
                        axis.ticks.length.y = unit(0.2, "cm"),
                        axis.text.y=element_text(size=6, color = "black"))+
                    facet_grid(. ~ Group,scales =  "free") + 
                    ## color setting
                    scale_fill_manual(values= all_color) +
                    scale_y_continuous(expand = c(0,0)) + 
                    xlab("Mutational type") + 
                    ylab("Mutation probability") + 
                    ggtitle(label = name)
                return(pic)
            }
            
            pic_list <- lapply(unique(origin_spectrum$Branch), processMSPB1)
            names(pic_list) <- unique(origin_spectrum$Branch)
            return(pic_list)
        }
        
        result <- lapply(seq_len(length(sig_input)), processMSP1)
        names(result) <- names(sig_input)
        
    }else if(is(sig_input[[1]], "list")){
        if(!is.null(mode)){
            mode.options <- c("Original","Reconstructed","Difference")
            if(!mode %in% mode.options){
                stop("'mode' should be 'NULL', 'Original', 'Reconstructed' or 'Difference'") 
            }
        }
        
        processMSP2 <- function(i){
            fit <- sig_input[[i]]
            patient <- names(sig_input)[[i]]
            RSS <- fit$RSS
            signatures_etiology <- fit$signatures.etiology
            total_cosine_similarity <- fit$total.cosine.similarity
            # tsb.label <- sig_input[[i]]$tsb.label
            
            ## convert reconstructed matrix to data frame 
            reconstructed.mat <- apply(fit$reconstructed.mat, 1, function(x)x/sum(x)) %>% t() 
            recon_df  <- as.data.frame(reconstructed.mat) 
            recon_df$Branch <- as.character(row.names(recon_df)) 
            rownames(recon_df) <- NULL
            recon_spectrum <- tidyr::pivot_longer(recon_df,
                                                  -"Branch",
                                                  names_to = "Type",
                                                  values_to = "Proportion") %>%
                dplyr::mutate(spectrum_type = "Reconstructed")
            ## convert origin matrix to data frame
            original.mat <- apply(fit$original.mat, 1, function(x)x/sum(x)) %>% t() 
            
            origin_df  <- as.data.frame(original.mat) 
            origin_df$Branch <- as.character(row.names(origin_df)) 
            rownames(origin_df) <- NULL
            origin_spectrum <- tidyr::pivot_longer(origin_df,
                                                   -"Branch",
                                                   names_to = "Type",
                                                   values_to = "Proportion") %>%
                dplyr::mutate(spectrum_type = "Original")
            
            ## diff proportion = reconstructed proportion - original proportion
            diff_spectrum <- origin_spectrum
            diff_spectrum$Proportion <- diff_spectrum$Proportion - recon_spectrum$Proportion
            diff_spectrum$spectrum_type <- "Difference" 
            
            mut_spectrum <- dplyr::bind_rows(recon_spectrum, origin_spectrum, diff_spectrum) %>% 
                dplyr::rowwise() %>% 
                dplyr::mutate(Group = paste(
                    strsplit(.data$Type,"")[[1]][3:5],
                    collapse = ''
                )) %>% as.data.frame()
            
            if(use.tumorSampleLabel){
                if(!"tsb.label" %in% names(sig_input[[i]])){
                    stop("There is no information about the Tumor_Sample_Label. Please check clinical data in readMaf or let use.tumorSampleLabel be FALSE")
                }
                tsb.label <- sig_input[[i]]$tsb.label
                tsb_tl <- tsb.label$Tumor_Sample_Label
                names(tsb_tl) <- tsb.label$Tumor_Sample_Barcode
                mut_spectrum$Branch <- lapply(mut_spectrum$Branch,
                                                 function(x){
                                                     split_tsb <- strsplit(x, "&")[[1]]
                                                     tsb2tl <- tsb_tl[split_tsb]
                                                     tl <- paste(tsb2tl, collapse = "&")
                                                 })
            }
            
            ## set levels
            type_level <- unique(mut_spectrum$Type)
            mut_spectrum$Type <- factor(mut_spectrum$Type, levels = type_level)
            mut_spectrum$Group <- factor(mut_spectrum$Group, levels = group_level)
            
            branch_list <- unique(mut_spectrum$Branch)
            pic_list <- list()
            
            processMSPB2 <- function(branch){
                branch_spectrum <- subset(mut_spectrum, mut_spectrum$Branch == branch)
                ## group of background color
                branch_spectrum$group_background <- paste(branch_spectrum$Group,1,sep = "")
                odata <- subset(branch_spectrum, branch_spectrum$spectrum_type == "Original")
                rdata <- subset(branch_spectrum, branch_spectrum$spectrum_type == "Reconstructed")
                ddata <- subset(branch_spectrum, branch_spectrum$spectrum_type == "Difference")
                
                ## calculate cosine similarity between original and reconstructed
                p1 <- odata$Proportion
                p2 <- rdata$Proportion
                # cos_sim <- round(sum(p1*p2)/(sqrt(sum(p1^2))*sqrt(sum(p2^2))),3)
                cos_sim <- total_cosine_similarity$cosine_similarity[which(total_cosine_similarity$Level_ID == branch)]
                
                ## select specific mode
                if(is.null(mode)){
                    branch_spectrum$spectrum_type <- factor(branch_spectrum$spectrum_type,
                                                            levels = c("Original","Reconstructed","Difference"))
                    branch_spectrum <- dplyr::arrange(branch_spectrum, .data$spectrum_type)
                }else{
                    branch_spectrum <- subset(branch_spectrum, branch_spectrum$spectrum_type %in% mode)
                    branch_spectrum$spectrum_type <- factor(branch_spectrum$spectrum_type,
                                                            levels = unique(branch_spectrum$spectrum_type)) 
                }
                
                ## get signature title
                sig_names <- signatures_etiology[signatures_etiology$Level_ID == branch,]$Signature
                
                if(contribution.type == "relative"){
                    sig_con <- signatures_etiology[signatures_etiology$Level_ID == branch,]$Contribution_relative 
                }else{
                    sig_con <- signatures_etiology[signatures_etiology$Level_ID == branch,]$Contribution_absolute
                }
                names(sig_con) <- sig_names
                ## sort by contribution
                sig_con <- sort(sig_con, decreasing = TRUE)
                sig_title <- ''
                sig_title_list <- vapply(seq_len(length(sig_con)), function(i){
                    c <- round(sig_con[i], 2)
                    n <- names(sig_con)[i]
                    if(i == 1){
                        t <- paste0(n,": ", c)
                    }else{
                        t <- paste0(" & ", n, ": ", c)
                    }
                    return(t)
                }, FUN.VALUE = character(1))
                sig_title <- paste(sig_title_list, collapse = "")
                
                # for(i in seq_len(length(sig_con))){
                #     c <- round(sig_con[i], 2)
                #     n <- names(sig_con)[i]
                #     if(i == 1){
                #         sig_title <- paste0(n,": ", c) 
                #     }else{
                #         sig_title <- paste0(sig_title, " & ", n, ": ", c)
                #     }
                # }
                
                # if(!is.null(mode)){
                #     branch_spectrum <- branch_spectrum[branch_spectrum$spectrum_type == mode, ]
                # }
                name <- paste0(patient,": ",branch)
                ## adjust Y axis
                adjY_table <- data.frame()
                
                ## initialize
                Type <- NULL
                Proportion <- NULL
                Group <- NULL
                group_background <- NULL
                
                
                pic <- ggplot(branch_spectrum, aes(x=Type, y=Proportion, group=Group, fill=Group))
                if("Original" %in% branch_spectrum$spectrum_type){
                    pic <- pic + geom_rect(data = odata,
                                           aes(fill = group_background),
                                           xmin = -Inf,
                                           xmax = Inf,
                                           ymin = -Inf,
                                           ymax = Inf)
                    odata$Proportion[which.max(odata$Proportion)] <- odata$Proportion[which.max(odata$Proportion)]+0.02
                    adjY_table <- rbind(adjY_table, odata)
                }
                if("Reconstructed" %in% branch_spectrum$spectrum_type){
                    pic <- pic + geom_rect(data = rdata,
                                           aes(fill = group_background),
                                           xmin = -Inf,
                                           xmax = Inf,
                                           ymin = -Inf,
                                           ymax = Inf)
                    rdata$Proportion[which.max(rdata$Proportion)] <- rdata$Proportion[which.max(rdata$Proportion)]+0.02
                    adjY_table <- rbind(adjY_table, rdata)
                }
                if("Difference" %in% branch_spectrum$spectrum_type){
                    pic <- pic + geom_rect(data = ddata,
                                           aes(fill = group_background),
                                           xmin = -Inf,
                                           xmax = Inf,
                                           ymin = -Inf,
                                           ymax = Inf)
                    ddata$Proportion[which.max(ddata$Proportion)] <- ddata$Proportion[which.max(ddata$Proportion)]+0.02
                    ddata$Proportion[which.min(ddata$Proportion)] <- ddata$Proportion[which.min(ddata$Proportion)]-0.02
                    adjY_table <- rbind(adjY_table, ddata)
                    
                }
                pic <- pic + 
                    geom_bar(stat="identity",color = "#252525",width = 1) +
                    ## adjust ylab
                    geom_point(data = adjY_table,alpha = 0) +
                    # geom_hline(yintercept = 0)+
                    theme(
                        legend.position='none',
                        strip.background = element_rect(colour = "black"),
                        strip.text.x=element_text(size=13),
                        strip.text.y=element_text(size=13),
                        panel.spacing.y = unit(0.5, "lines"),
                        panel.spacing.x = unit(0, "lines"),
                        plot.title = element_text(size = 13.5, hjust = 0,vjust = 0,face = "bold"),
                        plot.subtitle = element_text(size = 13.5, hjust = 0,vjust = 0),
                        axis.text.x= element_text(angle = 90,vjust = 0.2,size = 5,colour = "black"),
                        axis.ticks.x=element_line(color = "black"),
                        axis.ticks.length.y = unit(0.2, "cm"),
                        axis.text.y=element_text(size=6, color = "black"))+
                    facet_grid(factor(spectrum_type)  ~ Group,scales =  "free") +
                    # facet_grid(factor(spectrum_type)  ~ Group,scales =  "free") + 
                    ## color setting
                    scale_fill_manual(values= all_color) +
                    scale_y_continuous(expand = c(0,0)) + 
                    xlab("Mutational type") + 
                    ylab("Mutation probability") + 
                    ggtitle(label = name,
                            subtitle =  paste(
                                "\nRSS = ", as.character(round(RSS[branch],3) ),
                                "; Cosine similarity = ", round(cos_sim,3),"\n",
                                sig_title,
                                sep = ""))
                return(pic)
            }
            pic_list <- lapply(branch_list ,processMSPB2)
            names(pic_list) <- branch_list
            return(pic_list)
        }
        
        result <- lapply(seq_len(length(sig_input)), processMSP2)
        names(result) <- names(sig_input)
        
    }else{
        stop("Input data should be the result of function fitSignatures or triMatrix")
    }
    
   if(length(result) == 1){
       return(result[[1]])
   }else if(length(result) == 0){
       return(NA)
   }else{
       return(result)
   }
}

Try the MesKit package in your browser

Any scripts or data that you put into this service are public.

MesKit documentation built on March 28, 2021, 6 p.m.