R/plot_profile_region.R

Defines functions plot_profile_region

Documented in plot_profile_region

#' Plot 96 trinucleotide profile per subgroup
#'
#' Plot relative contribution of 96 trinucleotides per subgroup.
#' This can be genomic regions but could also be other subsets.
#' The function uses a matrix generated by 'lengthen_mut_matrix()'
#' as its input.
#'
#' @param mut_matrix Mutation matrix
#' @param mode 'relative_sample', 'relative_sample_feature' or 'absolute'
#' When 'relative_sample', the number of variants will be shown
#' divided by the total number of variants in that sample.
#' When 'relative_sample_feature', the number of variants will be shown
#' divided by the total number of variants in that sample. and genomic region.
#' @param colors 6 value color vector
#' @param ymax Y axis maximum value, default = 0.2
#' @param condensed More condensed plotting format. Default = FALSE.
#' @return 96 trinucleotide profile plot per region
#'
#' @import ggplot2
#' @importFrom magrittr %>%
#'
#' @examples
#' ## See the 'lengthen_mut_matrix()' example for how we obtained the
#' ## mutation matrix information:
#' mut_mat_long <- readRDS(system.file("states/mut_mat_longregions.rds",
#'   package = "MutationalPatterns"
#' ))
#'
#' ## Plot the 96-profile of three samples
#' plot_profile_region(mut_mat_long[, c(1, 4, 7)])
#' @seealso
#' \code{\link{mut_matrix}}
#' @family genomic_regions
#'
#' @export
#'
plot_profile_region <- function(mut_matrix, mode = c("relative_sample", "relative_sample_feature", "absolute"), colors = NULL, ymax = 0.2, condensed = FALSE) {

  # These variables use non standard evaluation.
  # To avoid R CMD check complaints we initialize them to NULL.
  context <- feature <- substitution <- freq <- NULL

  # Match argument
  mode <- match.arg(mode)

  # if colors parameter not provided, set to default colors.
  if (is.null(colors)) {
    colors <- COLORS6
  }
  if (length(colors) != 6) {
    stop("Provide colors vector with length 6", call. = FALSE)
  }

  if (condensed) {
    bar_width <- 1
  } else {
    bar_width <- 0.6
  }

  # Count number muts for labelling.
  nr_muts <- colSums(mut_matrix)
  facet_labs_y <- stringr::str_c(colnames(mut_matrix), " (n = ", nr_muts, ")")
  names(facet_labs_y) <- colnames(mut_matrix)

  # Split the rownames in context, substitution and features
  row_names <- rownames(mut_matrix)
  full_context <- stringr::str_remove(row_names, "_.*")
  context <- stringr::str_replace(full_context, "\\[.*\\]", "\\.")
  substitution <- full_context %>%
    stringr::str_remove(".*\\[") %>%
    stringr::str_remove("\\].*")
  feature <- stringr::str_remove(row_names, ".*_")
  feature <- factor(feature, levels = rev(unique(feature)))

  # Combine everything in a tb
  tb <- mut_matrix %>%
    as.data.frame() %>%
    tibble::as_tibble() %>%
    dplyr::mutate(context = context, feature = feature, substitution = substitution) %>%
    tidyr::gather(value = "freq", key = "sample", -context, -feature, -substitution)

  if (mode == "relative_sample") {
    tb <- tb %>%
      dplyr::group_by(sample) %>%
      dplyr::mutate(freq = freq / sum(freq)) %>%
      dplyr::ungroup()
    y_axis_break_interval <- 0.1
  } else if (mode == "relative_sample_feature") {
    tb <- tb %>%
      dplyr::group_by(sample, feature) %>%
      dplyr::mutate(freq = freq / sum(freq)) %>%
      dplyr::ungroup()
    y_axis_break_interval <- 0.1
  } else if (mode == "absolute") {
    y_axis_break_interval <- 10
  }
  tb <- dplyr::mutate(tb, freq = ifelse(is.nan(freq), 0, freq))

  # Create figure.
  # Suppresses alpha warning.
  withCallingHandlers(
    {
      fig <- ggplot(data = tb, aes(x = context, y = freq, fill = substitution, alpha = feature)) +
        geom_bar(stat = "identity", colour = "black", size = 0.2, width = bar_width) +
        scale_fill_manual(values = colors) +
        facet_grid(sample ~ substitution, labeller = labeller(sample = facet_labs_y)) +
        ylab("Relative contribution") +
        coord_cartesian(ylim = c(0, ymax)) +
        scale_alpha_discrete(range = c(0.4, 1)) +
        scale_y_continuous(breaks = seq(0, ymax, y_axis_break_interval)) +
        guides(fill = "none") +
        theme_bw() +
        theme(
          axis.title.y = element_text(size = 12, vjust = 1),
          axis.text.y = element_text(size = 8),
          axis.title.x = element_text(size = 12),
          axis.text.x = element_text(size = 5, angle = 90, vjust = 0.5),
          strip.text.x = element_text(size = 9),
          strip.text.y = element_text(size = 9),
          panel.grid.major.x = element_blank(),
          panel.spacing.x = unit(0, "lines")
        )
    },
    warning = function(w) {
      if (grepl("Using alpha for a discrete variable is not advised.", conditionMessage(w))) {
        invokeRestart("muffleWarning")
      }
    }
  )

  return(fig)
}
UMCUGenetics/MutationalPatterns documentation built on Nov. 24, 2022, 4:31 a.m.