R/mutHeatmap.R

Defines functions mutHeatmap

Documented in mutHeatmap

#' @title mutHeatmap
#' @description plot binary or CCF heatmap of somatic mutations.
#'
#' @param maf Maf or MafList object generated by \code{\link{readMaf}} function.
#' @param patient.id Select the specific patients. Default NULL, all patients are included.
#' @param min.vaf The minimum value of VAF. Default 0. Option: on the scale of 0 to 1.
#' @param min.ccf The minimum value of CCF. Default 0. Option: on the scale of 0 to 1.
#' @param use.adjVAF Use adjusted VAF in analysis when adjusted VAF or CCF is available. Default FALSE. 
#' @param use.ccf Logical. If FALSE (Default: FALSE), print a binary heatmap of mutations. 
#' Otherwise, print a cancer cell frequency (CCF) heatmap.
#' @param geneList List of genes to restrict the analysis. Default NULL.
#' @param plot.geneList Logical (Default: FALSE). If TRUE, plot heatmap with genes on geneList when geneList is not NULL.
#' @param show.geneList Show the names of gene on the geneList. Default TRUE.
#' @param mut.threshold show.gene and show.geneList will be FALSE when patient have more mutations than threshold. Default 150.
#' @param sample.text.size Size of sample name.Default 9.
#' @param legend.title.size Size of legend title.Default 10.
#' @param gene.text.size Size of gene text. Default 9.
#' @param sampleOrder A named list which contains the sample order used in plotting the heatmap. Default NULL.
#' @param use.tumorSampleLabel Logical (Default: FALSE). Rename the 'Tumor_Sample_Barcode' by 'Tumor_Sample_Label'.
#' @param classByTumor Logical Default: FALSE. Classify mutations based on "Tumor_ID".
#' @param ... Other options passed to \code{\link{subMaf}}
#' 
#' @examples
#' maf.File <- system.file("extdata/", "CRC_HZ.maf", package = "MesKit")
#' clin.File <- system.file("extdata/", "CRC_HZ.clin.txt", package = "MesKit")
#' ccf.File <- system.file("extdata/", "CRC_HZ.ccf.tsv", package = "MesKit")
#' maf <- readMaf(mafFile=maf.File, clinicalFile = clin.File, ccfFile=ccf.File, refBuild="hg19")
#' mutHeatmap(maf)
#' 
#' @return heatmap of somatic mutations
#' @importFrom grDevices colors
#' @export mutHeatmap

mutHeatmap <- function(maf,
                       patient.id = NULL,
                       min.vaf = 0,
                       min.ccf = 0,
                       use.adjVAF = FALSE,
                       use.ccf = FALSE,
                       geneList = NULL,
                       plot.geneList = FALSE,
                       show.geneList = TRUE,
                       mut.threshold = 50,
                       sample.text.size = 9,
                       legend.title.size = 10,
                       gene.text.size = 9,
                       sampleOrder = NULL,
                       use.tumorSampleLabel = FALSE,
                       classByTumor = FALSE,
                       ...){
    
    maf_input <- subMaf(maf,
                        patient.id = patient.id,
                        use.tumorSampleLabel = use.tumorSampleLabel,
                        min.vaf = min.vaf,
                        use.adjVAF = use.adjVAF,
                        min.ccf = min.ccf,
                         mafObj = TRUE,
                        ...)
    
    
    ## check sampleOrder
    if(!is.null(sampleOrder)){
        patient.setdiff <- setdiff(names(sampleOrder), names(maf_input))
        if(length(patient.setdiff) > 0){
            stop(paste0(patient.setdiff,
                        " can not be found in your data. Please check sampleOrder!"))
        }
    }
    
    heatmap_list <- list()
    for(m in maf_input){
        maf_data <- getMafData(m)
        patient <- getMafPatient(m)
        if(nrow(maf_data) == 0){
            message("Warning: there was no mutations in ", patient, " after filtering.")
            next
        }

        ## get mutation matrix
        if(use.ccf){
            if("CCF" %in% colnames(maf_data)){
                mat <- getMutMatrix(maf_data, use.ccf = TRUE)
            }else{
                stop("Heatmap requires CCF data when use.ccf is TRUE")
            }
            type <- "CCF"
            value.name <- "CCF"
        }else{
            mat <- getMutMatrix(maf_data, use.ccf = FALSE)
            type  <- "Mutation"
            value.name <- "Mutation"
        }
        
        ## delete "NORMAL"
        if(!1 %in% mat[,"NORMAL"]){
            if(nrow(mat) == 1){
                name <- rownames(mat)
                mat <- t(mat[,which(colnames(mat)!= "NORMAL")])
                rownames(mat) <- name
            }else{
                mat <- mat[,which(colnames(mat)!= "NORMAL")]
            }
        }
        
        ## classify mutation
        maf_data <- maf_data %>% 
            do.classify(class = "SP",classByTumor = classByTumor) %>% 
            tidyr::unite("mutation_id", c("Hugo_Symbol", "Chromosome", "Start_Position", "Reference_Allele", "Tumor_Seq_Allele2"), sep = ":", remove = FALSE)
        
        # mat <- binary.matrix
        # type  <- "Mutation"
        # if(use.ccf){
        #     type <- "CCF"
        #     mat <- ccf.matrix
        # }
        
        ## get mutation type
        maf_mutation_type <- maf_data$Mutation_Type
        names(maf_mutation_type) <- maf_data$mutation_id
        mat_mutaion_id <- rownames(mat)
        mutation_type <- maf_mutation_type[mat_mutaion_id]
        public <- unique(mutation_type)[grep("Public", unique(mutation_type))] 
        shared <- sort(unique(mutation_type)[grep("Shared", unique(mutation_type))]) 
        private <- sort(unique(mutation_type)[grep("Private", unique(mutation_type))])
        mutation_type_level <- c(public, shared, private)
        
        ## reorder sample by sampleOrder
        if(!is.null(sampleOrder)){
            ## select the appropriate patients
            if(patient %in% names(sampleOrder)){
                sample_order <- sampleOrder[[patient]]
                samples <- colnames(mat)
                ## check order
                sample.setdiff <- setdiff(sample_order, samples)
                
                if(length(sample.setdiff) > 0){
                    stop(paste0("Sample ", paste(sample.setdiff, collapse = ","),
                                " can not be found in ", patient, ". Please check sampleOrder!"))
                }else if(length(sample.setdiff) == 0 & length(sample_order) != length(samples)){
                    sample.setdiff <- setdiff(samples, sample_order)
                    stop(paste0(paste(sample.setdiff, collapse = ","),
                                " can not be found in sampleOrder of ", patient, ". Please check sampleOrder!"))
                }
                
                mat <- mat[,sample_order]
            }
        }
        
        mat <- as.data.frame(mat)
        sample_num <- ncol(mat)
        sample_levels <- colnames(mat)
        mat$mutation_type <- factor(mutation_type, levels = mutation_type_level)
        
        
        ## get gene name
        genes <- lapply(rownames(mat),function(x){
            s <- strsplit(x,":")[[1]][1]
            return(s)
        }) %>% unlist()
        mat$Gene <- as.character(genes) 
        
        ## flit mutation in gene list
        if(!is.null(geneList)){
            if(plot.geneList){
                mat <- mat  %>%
                    dplyr::filter(.data$Gene %in% geneList) %>%
                    as.data.frame() %>% 
                    dplyr::distinct(.data$mutation_type, .data$Gene, .keep_all = TRUE)
                if(nrow(mat) == 0){
                    message("Warning: none of mutated genes map to genelist")
                    next
                }
            }else{
                mat <- mat  %>%
                    dplyr::mutate(Gene = dplyr::if_else(
                        .data$Gene %in% geneList,
                        .data$Gene,
                        "genelist.out"
                    )) %>%
                    as.data.frame()
                mut.genelist.num <- length(which(mat$Gene != "genelist.out"))
            }
        }
        
        ## distinc mutation within the sample mutation type
        # 
        # mat <- dplyr::distinct(mat, mutation_type, Gene, .keep_all = TRUE)
        ## sort mutation
        mut.num <- nrow(mat)        
        mat <- dplyr::arrange(mat,  .data$mutation_type)
        mat$mutation <- seq_len(nrow(mat))
        
        
        ## cumsum postion of axis y
        mat$ymin <- cumsum(c(0, rep(2, mut.num-1)))
        mat$ymax <- mat$ymin + 1.5
        
        # mut_dat <- reshape2::melt(mat,
        #                           id.vars = c("ymin","ymax","mutation","mutation_type", "Gene"),
        #                           variable.name = "sample",
        #                           value.name = value.name)
        mut_dat <- tidyr::pivot_longer(
            mat,
            col = -c("ymin","ymax","mutation","mutation_type", "Gene"),
            names_to = "sample",
            values_to = value.name
        )
        mut_dat$sample <- factor(mut_dat$sample, levels = sample_levels)
        mut_dat <- dplyr::arrange(mut_dat,.data$sample)
        mut_dat$xmin <- rep(cumsum(c(0, rep(0.5, sample_num-1))) ,each = nrow(mat))
        mut_dat$xmax <- mut_dat$xmin + 0.49
        if(nrow(mut_dat) == 0){
            next
        }
        
        ## Do not gene names if the number of mutations is greater than mut.threshold
        # if(show.gene){
        #     if(mut.num >= mut.threshold){
        #         message("Warning: the number of mutations is ", mut.num,
        #                 " which is greater than mut.threthold. Let mut.threshold be larger than ", mut.num," if you want to show gene")
        #         show.gene = FALSE
        #         # show.geneList = FALSE 
        #     }
        # }
        
        if(!is.null(geneList) & show.geneList){
            if(plot.geneList){
                if(mut.num >= mut.threshold){
                    message("Warning: the number of mutations is larger than mut.threshold.")
                    show.geneList = FALSE
                }
            }
            else{
                if(mut.genelist.num >= mut.threshold){
                    message("Warning: the number of mutations is larger than mut.threshold.")
                    show.geneList = FALSE
                }
            }
        }
        
        ## set table for annotation bar
        annotation.bar <- data.frame()
        annotation.bar.width <- (max(mut_dat$xmax)- max(mut_dat$xmin))*0.5
        
        ## position of annotation bar
        ab.xmax <- min(mut_dat$xmin)
        ab.xmin <- ab.xmax - annotation.bar.width
        ymin <- min(mut_dat$ymin)
        
        ## the number of each mutation type
        mutation_type_num <- c()
        mutation_type_sum <- length(mut_dat$mutation_type)/length(unique(mut_dat$sample))
        
        ## set colors
        mutation_type_colors <- sample(grDevices::colors(),length(mutation_type_level),replace = FALSE)
        ## get colors
        mutation_type_colors <- c("#7fc97f","#fdc086", "#E64B35FF", "#82166E",
                                  "#B77B42","#6349B7","#D5017D","#B77562",
                                   "#88A4FF", "#439F18", "#971D37","#8C9F3C")
        if(length(mutation_type_level) > length(mutation_type_colors)){
            left_colors <- sample(grDevices::colors(),
                                  length(mutation_type_level)-length(mutation_type_colors),
                                  replace = FALSE)
            mutation_type_colors <- append(mutation_type_colors, left_colors)
        }else{
            mutation_type_colors <- mutation_type_colors[seq_len(length(mutation_type_level))] 
        }
        names(mutation_type_colors) <- mutation_type_level
        
        type_colors <- c()
        type_name_percentage <- c()
        for(type in unique(mut_dat$mutation_type)){
            type_colors <- append(type_colors, mutation_type_colors[as.character(type)])
            ## percentage of type
            type.num <- length(which(mut_dat$mutation_type == type))/(mutation_type_sum*length(unique(mut_dat$sample)))
            
            name_percentage <- paste0(as.character(type),paste0(" (",round(type.num,3)*100,"%)"))
            type_name_percentage <- append(type_name_percentage, name_percentage)
            
            ymax <- max(mut_dat[mut_dat$mutation_type == type,]$ymax)
            sub <- data.frame(ab.xmin, ab.xmax, ymin, ymax, name_percentage)
            annotation.bar <- rbind(annotation.bar, sub)
            ymin <- ymax
        }
        names(type_colors) <- type_name_percentage
        # type_colors <- as.factor(type_colors)
        colnames(annotation.bar) <- c("xmin","xmax","ymin","ymax", "mutation_type")
        
        
        # ## label of annotation bar
        # type_label <- paste0(mutation_type_level,mutation_type_num)
        
        ## initialize
        xmin <- NULL
        xmax <- NULL
        
        p_basic <- ggplot() +
            labs(x = "", y = "") + theme_bw() +
            theme(panel.border = element_blank()) +
            theme(axis.text.y = element_blank())+
            theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank()) +
            theme(legend.title = element_text(size = legend.title.size))+
            
            ## set label for axis X
            theme(axis.text.x.bottom = element_text(angle = 90,
                                                    hjust = 1,
                                                    size = sample.text.size,
                                                    color = "black",
                                                    margin = margin(t = -15)))+
            scale_x_continuous(
                breaks = unique(mut_dat$xmin) + (unique(mut_dat$xmax) - unique(mut_dat$xmin))/2,
                labels = unique(mut_dat$sample),
                position = "bottom")+
            
            ggtitle(paste0(patient," (n=",mut.num,")")) + 
            theme(plot.title = element_text(size = 13.5,face = "bold",colour = "black", hjust = 0.5,vjust = -3))+
            
            theme(axis.ticks = element_blank()) +
            theme(legend.title = element_text(color = "black")) +
            theme(legend.text = element_text( color = "black")) +
            theme(legend.position = "right")
        
        if(use.ccf){
            ## initialize
            CCF <- NULL
            p <- p_basic + 
                geom_rect(data = mut_dat,
                          mapping = aes(xmin = xmin,xmax = xmax,ymin = ymin, ymax = ymax,fill = CCF))+
                scale_fill_gradient(low = "#deebf7", high = "#08306b", na.value="black", limit=c(0, 1))
            #ggsave(paste(patientID, "_mut_CCF.pdf", sep = ""), p, width = 4.5, height = 6.5)
        }else if(!use.ccf){
            mut_dat$Mutation <- as.character(mut_dat$Mutation)
            ## initialize
            Mutation <- NULL
            p <- p_basic + 
                geom_rect(data = mut_dat,
                          mapping = aes(xmin = xmin,xmax = xmax,ymin = ymin, ymax = ymax,fill = Mutation))+
                scale_fill_manual(name = "Mutation",values = c("0" = "#deebf7","1" = "#08306b"))
            
            #ggsave(paste(patientID, "_mut.pdf", sep = ""), p, width = 4.5, height = 6.5)
        }
        
        mutation_legend <- (p+ theme(legend.background = element_blank()))%>%    
            ggplotGrob()
        mutation_legend <- mutation_legend$grobs[[which(unlist(lapply(mutation_legend$grobs, function(x) {x$name})) == "guide-box")]]

        
        # if(is.null(geneList) & show.gene){
        #     breaks.gene <- unique(mut_dat$ymin + (mut_dat$ymax - mut_dat$ymin)/2)
        #     p <- p + scale_y_continuous(breaks = breaks.gene,
        #                                 labels = mut_dat[mut_dat$sample==unique(mut_dat$sample)[1],]$Gene,
        #                                 position = "right")+
        #         theme(axis.text.y.right = element_text(size = gene.text.size,
        #                                                colour = "black",
        #                                                face = "italic",
        #                                                margin = margin(l = -15),
        #                                                hjust = 0))
        if(!is.null(geneList)){
            if(plot.geneList & show.geneList){
                y.dat <- mut_dat %>% 
                    dplyr::mutate(y.breaks = .data$ymin + (.data$ymax - .data$ymin)/2) %>% 
                    dplyr::distinct(.data$y.breaks, .keep_all = TRUE)
                y.breaks <- y.dat$y.breaks
                y.labels <- y.dat$Gene
                p <- p + 
                    scale_y_continuous(breaks = y.breaks,
                                       labels = y.labels,
                                       position = "right") +
                    theme(axis.text.y.right = element_text(size = gene.text.size,
                                                           colour = "black",
                                                           face = "italic",
                                                           margin = margin(l = -15),
                                                           hjust = 0))
            }
            else if(!plot.geneList & show.geneList){
                y.dat <- mut_dat %>% 
                    dplyr::filter(.data$Gene != "genelist.out") %>% 
                    dplyr::mutate(y.breaks = .data$ymin + (.data$ymax - .data$ymin)/2) %>% 
                    dplyr::distinct(.data$y.breaks, .keep_all = TRUE) %>% 
                    dplyr::distinct(.data$mutation_type, .data$Gene, .keep_all = TRUE)
                y.breaks <- y.dat$y.breaks
                y.labels <- y.dat$Gene
                p <- p +
                    scale_y_continuous(breaks = y.breaks,
                                       labels = y.labels,
                                       position = "right") +
                    theme(axis.text.y.right = element_text(size = gene.text.size,
                                                           colour = "black",
                                                           face = "italic",
                                                           margin = margin(l = -15),
                                                           hjust = 0))
            
            }
        }
        
        ## side bar
        p <- p +
            ## annotation bar
            geom_rect(data = annotation.bar,
                      mapping = aes(xmin = xmin,xmax = xmax,ymin = ymin, ymax = ymax),fill = type_colors)
           # scale_fill_manual(values = type_colors,name = "Class")
        ## multi legend
        type_legend <- (
            ggplot()+
                geom_rect(data = annotation.bar,
                          mapping = aes(xmin = xmin,xmax = xmax,ymin = ymin, ymax = ymax, fill = factor(mutation_type))) +
                scale_fill_manual(breaks = type_name_percentage,
                                  values = type_colors,
                                  name = "Type") + 
                theme(legend.background = element_blank(),
                      legend.title = element_text(size = legend.title.size)))%>%
            ggplotGrob() 
        type_legend <-  type_legend$grobs[[which(unlist(lapply(type_legend$grobs, function(x) {x$name})) == "guide-box")]]

        legends_column <-
            plot_grid(
                mutation_legend + theme(legend.position = "none"),
                mutation_legend,
                mutation_legend + theme(legend.position = "none"),
                type_legend,
                type_legend + theme(legend.position = "none"),
                ncol = 1,
                rel_heights = c(.3,1,.1,1,.9),
                align = "v")
        
        heat_map <- plot_grid(p + theme(legend.position = "none"),
                              legends_column,
                              nrow = 1,
                              rel_widths = c(1,.4))
        
        
        heatmap_list[[patient]] <- heat_map
    }
    
    if(length(heatmap_list) == 1){
        return(heatmap_list[[1]])
    }
    
    return(heatmap_list)
}

Try the MesKit package in your browser

Any scripts or data that you put into this service are public.

MesKit documentation built on March 28, 2021, 6 p.m.