R/plotting.R

Defines functions .discrete_colors .gg_default_theme .addSmallLegend .pivot_signatures plot_sample_reconstruction_error plot_signatures plot_sample_counts

Documented in plot_sample_counts plot_sample_reconstruction_error plot_signatures

#' @import ggplot2 
#' @importFrom rlang .data
#' @import dplyr
#' @import tidyr
#' @import tidyverse 
NULL

#' Plot distribution of sample counts
#' 
#' Displays the proportion of counts for each mutation type across one
#' or more samples.
#'
#' @param musica A \code{\linkS4class{musica}} object.
#' @param sample_names Names of the samples to plot.
#' @param table_name Name of table used for plotting counts. If \code{NULL},
#' then the first table in the \code{\linkS4class{musica}} object will be used.
#' Default \code{NULL}.
#' @return Generates a ggplot object
#' @examples
#' data(musica_sbs96)
#' plot_sample_counts(musica_sbs96, sample_names = 
#' sample_names(musica_sbs96)[1])
#' @export
plot_sample_counts <- function(musica, sample_names, table_name = NULL) {

  if (is.null(table_name)) {
   table_name <- names(tables(musica))[1]
  }  
  
  # Extract counts for specific samples
  tab <- .extract_count_table(musica, table_name)
  ix <- match(sample_names, colnames(tab))
  if (all(is.na(ix))) {
    stop("The values in 'sample_names' did not match any sample IDs in table '",
         table_name, "'.")
  }
  else if (anyNA(ix)) {
    warning("The following samples in 'sample_names' were not found  in ",
            "table '", table_name, 
            "' and will ", "be exlcuded from the plot: ",
            paste(sample_names[is.na(ix)], collapse = ", "))
    ix <- ix[!is.na(ix)]
  }
  sample_counts <- tab[, ix, drop = FALSE]
  
  result <- methods::new("musica_result",
                          signatures = sample_counts, exposures = matrix(),
                          algorithm = "sample", musica = musica,
                          table_name = table_name)
  g <- plot_signatures(result) + ggplot2::ylab("Mutation Counts")
  return(g)
}


#' Plots the mutational signatures
#'
#' After mutational signature discovery has been performed, this function
#' can be used to display the distribution of each mutational signature. The
#' \code{color_variable} and \code{color_mapping} parameters can be used
#' to change the default color scheme of the bars. 
#'
#' @param result A \code{\linkS4class{musica_result}} object generated by
#' a mutational discovery or prediction tool.
#' @param plotly If \code{TRUE}, the the plot will be made interactive
#' using \code{\link[plotly]{plotly}}. Default \code{FALSE}.
#' @param color_variable Name of the column in the variant annotation data.frame
#' to use for coloring the mutation type bars. The variant annotation data.frame 
#' can be found within the count table of the \code{\linkS4class{musica}}
#' object. If \code{NULL}, then the default column specified in the count
#' table will be used. Default \code{NULL}.
#' @param color_mapping A character vector used to map items in the
#' \code{color_variable} to a color. The items in \code{color_mapping}
#' correspond to the colors. The names of the items in \code{color_mapping}
#' should correspond to the uniqeu items in \code{color_variable}. If
#' \code{NULL}, then the default \code{color_mapping} specified in the count
#' table will be used. Default \code{NULL}.
#' @param text_size Size of axis text. Default \code{10}.
#' @param show_x_labels If \code{TRUE}, the labels for the mutation types
#' on the x-axis will be shown. Default \code{TRUE}.
#' @param show_y_labels If \code{TRUE}, the y-axis ticks and labels will be 
#' shown. Default \code{TRUE}.
#' @param same_scale If \code{TRUE}, the scale of the probability for each
#' signature will be the same. If \code{FALSE}, then the scale of the y-axis
#' will be adjusted for each signature. Default \code{TRUE}.
#' @param y_max Vector of maximum y-axis limits for each signature. One value 
#' may also be provided to specify a constant y-axis limit for all signatures.
#' Vector length must be 1 or equivalent to the number of signatures. Default 
#' \code{NULL}.
#' @param annotation Vector of annotations to be displayed in the top right
#' corner of each signature. Vector length must be equivalent to the number of
#' signatures. Default \code{NULL}.
#' @return Generates a ggplot or plotly object
#' @examples
#' data(res)
#' plot_signatures(res)
#' @export
plot_signatures <- function(result, plotly = FALSE,
                            color_variable = NULL, color_mapping = NULL,
                            text_size = 10, 
                            show_x_labels = TRUE, show_y_labels = TRUE,
                            same_scale = TRUE, y_max = NULL, annotation = NULL) {
  
  loc_num <- NULL
  mutation_color <- NULL
  x <- NULL
  xend <- NULL
  y <- NULL
  yend <- NULL
  
  signatures <- signatures(result)
  sig_names <- colnames(signatures)
  table_name <- table_selected(result) 
  tab <- tables(result)[[table_name]]
  annot <- get_annot_tab(tab) 
  num_sigs <- length(sig_names)
  
  if (is.null(color_mapping)) {
    color_mapping <- get_color_mapping(tab) 
  }
  plot_dat <- .pivot_signatures(signatures, tab,
                               color_variable = color_variable) 
  
  width <- 0.45 
  motif_label_locations <- plot_dat$df[plot_dat$df$signature == plot_dat$df[1,2],] %>% 
    ungroup() %>% 
    mutate(loc_num = c(1:dim(plot_dat$df[plot_dat$df$signature == plot_dat$df[1,2],])[1])) %>%
    group_by(mutation_color) %>% 
    summarise(x = min(loc_num) - width, xend = max(loc_num) + width,
              y = 0, yend = 0.01)
  
  # Whether to re-scale y axis
  scales <- ifelse(isTRUE(same_scale), "fixed", "free_y")
  
  # If annotation supplied
  if (!is.null(annotation)){
    annotation_text <- data.frame(
      label = annotation,
      signature = names(plot_dat$names),
      mutation_color = c("T>G")
    )
  }
  
  # Rename signature labels
  sig_name_labels <- data.frame(
    label = sig_names,
    signature = names(plot_dat$names),
    mutation_color = c("C>A")
  )
  
  # Add potential forced y-axis max
  plot_dat$df$ymax <- rep(y_max, length(unique(plot_dat$df$motif)))
  
  # Convert exposure probabilities to percentages
  plot_dat$df$exposure <- plot_dat$df$exposure * 100
  
  # Plot signatures
  plot_dat$df %>%
    ggplot(aes_string(y = "exposure", x = "motif", fill = "mutation_color")) +
    geom_bar(stat = "identity") +
    facet_grid(factor(signature) ~ ., scales = scales) +
    ggplot2::xlab("Motifs") + ggplot2::ylab("Percent of Mutations") +
    ggplot2::guides(fill = ggplot2::guide_legend(nrow = 1)) +
    ggplot2::scale_fill_manual(values = color_mapping) +
    ggplot2::scale_x_discrete(labels = annot$context) +
    ggplot2::scale_y_continuous(expand = expansion(mult = c(0, 0.2)), 
                                limits = c(0, NA), n.breaks = 4) +
    ggplot2::geom_text(data = sig_name_labels, 
                       mapping = aes_string(x = -Inf, y = Inf, label = "label"), 
                       hjust = -0.075, vjust = 1.5, 
                       fontface = "bold")  -> p
  
  # Add annotations, if necessary
  if (exists("annotation_text") == TRUE){
    p <- p + ggplot2::geom_text(data = annotation_text,
                                mapping = aes_string(x = Inf, y = Inf, label = "label"),
                                hjust = 1.075, vjust = 1.5, 
                                fontface = "bold")
  }
  
  # Change y-axis scale, if necessary
  if (!is.null(y_max)){
    p <- p + geom_blank(aes_string(y = "ymax")) 
  }
  
  # Plot motif labels
  plot_dat$df$exposure_null <- rep(0, dim(plot_dat$df)[1])
  plot_dat$df %>%
    ggplot(aes_string(y = "exposure_null", x = "motif")) +
    geom_bar(stat = "identity") +
    ggplot2::scale_y_continuous(expand = expansion(mult = c(0, 0)), 
                                limits = c(0, NA), breaks = c(0, 0.01), 
                                labels = c("  ","  "), n.breaks = 4) +
    ggplot2::ylab("") +
    ggplot2::geom_rect(data = motif_label_locations, 
                       aes(xmin = x, xmax = xend, ymin = max(y), ymax = max(yend)), 
                       fill = color_mapping, color = "black", 
                       linewidth = 0.25, inherit.aes = FALSE) +
    ggplot2::geom_text(data=motif_label_locations, 
                       aes(x=x+(xend-x)/2, y=y+(yend-y)/2, 
                           label = stringr::str_to_title(mutation_color)), 
                       fontface = "bold", size = 4, 
                       color = c("black", "white", rep("black", 4))) -> p2
  
  
  # Adjust theme
  p <- .gg_default_theme(p, text_size = text_size) +
    theme(axis.text.x = element_text(angle = 90, vjust = 0.5)) +
    theme(legend.position = "none") +
    theme(plot.margin = margin(0,1,0,1)) + 
    theme(strip.background = element_blank(), strip.text.y = element_blank())
  
  p2 <- .gg_default_theme(p2, text_size = text_size) +
    theme(plot.margin = margin(0,1,0,1)) + # see function below 
    theme(axis.text.x = element_blank(),
          axis.ticks.x = element_blank(),
          axis.title.x = element_blank(),
          axis.ticks.y = element_blank(),
          panel.grid.major.x = element_blank(),
          legend.position = "none")
  
  if (!isTRUE(show_x_labels)) {
    p <- p + theme(axis.text.x = element_blank(),
                   axis.ticks.x = element_blank(),
                   panel.grid.major.x = element_blank())
  }
  
  if (!isTRUE(show_y_labels)){
    p <- p + theme(axis.text.y = element_blank(),
                   axis.ticks.y = element_blank(),
                   axis.title.y = element_blank())
    p2 <- p2 + theme(axis.text.y = element_blank(),
                     axis.title.y = element_blank())
  }
  
  
  figure <- ggpubr::ggarrange(p2, p, ncol = 1, nrow = 2, heights = c(1,15))
  
  if (isTRUE(plotly)) {
    figure <- plotly::ggplotly(p)
  }
  
  return(figure)
  
}


#' Plot reconstruction error for a sample
#' 
#' Displays the observed distribution of counts for each mutation type, 
#' the distribution of reconstructed counts for each mutation type using 
#' the inferred mutational signatures, and the difference between the 
#' two distributions. 
#'
#' @param result A \code{\linkS4class{musica_result}} object generated by
#' a mutational discovery or prediction tool.
#' @param sample Name of the sample within the
#' \code{\linkS4class{musica_result}} object.
#' @param plotly If \code{TRUE}, the the plot will be made interactive
#' using \code{\link[plotly]{plotly}}. Default \code{FALSE}.
#' @return Generates a ggplot or plotly object
#' @examples
#' data(res)
#' plot_sample_reconstruction_error(res, "TCGA-ER-A197-06A-32D-A197-08")
#' @export
plot_sample_reconstruction_error <- function(result, sample,
                                             plotly = FALSE) {
  signatures <- .extract_count_table(get_musica(result), 
                                     table_selected(result))[, sample, 
                                                             drop = FALSE]
  sample_name <- colnames(signatures)
  reconstructed <- reconstruct_sample(result, sample)
  sigs <- cbind(signatures, reconstructed, signatures - reconstructed)
  colnames(sigs) <- c("Counts", "Reconstructed", "Difference")
  
  recontruct_result <- methods::new("musica_result",
                      signatures = sigs,
                      exposures = matrix(), algorithm = "NMF",
                      musica = get_musica(result),
                      table_name = table_selected(result))
  plot_signatures(recontruct_result, same_scale = FALSE) +
    ggplot2::ggtitle("Reconstruction error", subtitle = sample_name) + ylab("")
}


# Utility functions -------------------------------
.pivot_signatures <- function(signatures, tab, sig_names = NULL,
                              color_variable = NULL) {
  if (is.null(sig_names)) {
    sig_names <- colnames(signatures)  
  }
  annot <- tab@annotation
  rownames(annot) <- annot$motif
  
  # Ensure signature colnames are unique
  # They can not be unique in the sig_compare function if one signature
  # is matched up against several others in the second result object
  colnames(signatures) <- paste0(colnames(signatures),
                                 "-", seq(ncol(signatures)))
  names(sig_names) <- colnames(signatures)
    
  # Rormat signature matrix into long data.frame
  signatures %>%
    as.data.frame %>%
    tibble::rownames_to_column(var = "motif") %>%
    tidyr::pivot_longer(cols = dplyr::all_of(names(sig_names)),
                        names_to = "signature",
                        values_to = "exposure",
                        names_repair = "minimal") -> df
  
  # Check for mutation color variable in annot table
  final_color_variable <- NULL
  if (is.null(color_variable) && !is.null(tab@color_variable)) {
    color_variable <- tab@color_variable
  }
  
  # Set up color variable if supplied as vector or the name of a column in
  # the table annotation
  if (length(color_variable) == 1 && color_variable %in% colnames(annot)) {
    final_color_variable <- annot[df$motif, tab@color_variable]
  } else if (length(color_variable) == nrow(signatures)) {
    final_color_variable <- color_variable
  } else {
    warning("'color_variable' must be a column in the table annotation: ",
            paste(colnames(annot), collapse = ", "), ". Or it must be the ",
            "same length as the number of motifs in the signatures: ",
            nrow(signatures))
  }
  
  # Save color variable to df if it was specified
  if (!is.null(final_color_variable)) {
    df <- cbind(df, mutation_color = final_color_variable)
  }

  # Make sure signature order is preserved using factor
  df$signature <- factor(df$signature, levels = names(sig_names))
  return(list(df = df, names = sig_names))
}

.addSmallLegend <- function(myPlot, pointSize = 2,
                            textSize = 10, spaceLegend = 0.5) {
  myPlot +
    ggplot2::guides(shape = ggplot2::guide_legend(override.aes =
                                                    list(size = pointSize)),
                    color = ggplot2::guide_legend(override.aes =
                                                    list(size = pointSize))) +
    ggplot2::theme(legend.title = element_text(size = textSize),
            legend.text  = element_text(size = textSize),
            legend.key.size = ggplot2::unit(spaceLegend, "lines"),
            legend.box.background = ggplot2::element_rect(colour = "black"),
            legend.spacing.x = ggplot2::unit(0.25, "cm"))
}

.gg_default_theme <- function(p, text_size = 10) {
  p <- p + theme_bw() + theme(
    panel.grid = element_blank(),
    text = element_text(size = text_size))
  
  if("mono" %in% names(grDevices::pdfFonts())) {
    p <- p + theme(text = element_text(family = "mono",
                                       size = text_size))
  }
  return(p)
}

.discrete_colors <- function(n) {
  hues <- seq(15, 375, length = n + 1)
  colors <- grDevices::hcl(h = hues, l = 65, c = 100)[seq_len(n)]
  return(colors)
}
campbio/musicatk documentation built on Oct. 22, 2023, 8:28 p.m.