R/plot_indel_contexts.R

Defines functions plot_indel_contexts

Documented in plot_indel_contexts

#' Plot the indel contexts
#'
#' @details
#' Plots the number of indels  COSMIC context per sample.
#' It takes a tibble with counts as its input. This tibble can be generated by 'count_indel_contexts()'.
#' Each sample is plotted in a separate facet.
#' The same y axis can be used for all samples or a separate y axis can be used.
#' The facets at the top show the indel types. First the C and T deletions
#' Then the C and T insertions. Next are the multi base deletions and insertions.
#' Finally the deletions with microhomology (mh) are shown.
#' The x-axis at the bottom shows the number of repeat units.
#' For mh deletions the microhomology length is shown.
#'
#' @param counts A tibble containing the number of indels per COSMIC context.
#' @param same_y A boolean describing whether the same y axis should be used for all samples.
#' @param extra_labels A boolean describing whether extra labels should be added.
#'     These can clarify the plot, but will shift when different plot widths are used.
#'     We recommend saving a plot with a width of 12, when using this argument.
#' @param condensed More condensed plotting format. Default = F.
#'
#' @return A ggplot figure.
#'
#' @examples
#' ## Get The indel counts
#' ## See 'count_indel_contexts()' for more info on how to do this.
#' indel_counts <- readRDS(system.file("states/blood_indel_counts.rds",
#'   package = "MutationalPatterns"
#' ))
#'
#' ## Plot contexts
#' plot_indel_contexts(indel_counts)
#'
#' ## Use the same y axis for all samples.
#' plot_indel_contexts(indel_counts, same_y = TRUE)
#'
#' ## Add extra labels to make plot clearer
#' plot_indel_contexts(indel_counts, extra_labels = TRUE)
#'
#' ## Create a more condensed plot
#' plot_indel_contexts(indel_counts, condensed = TRUE)
#' @import ggplot2
#' @importFrom magrittr %>%
#' @family Indels
#'
#' @seealso \code{\link{count_indel_contexts}}, \code{\link{plot_main_indel_contexts}}
#'
#' @export
plot_indel_contexts <- function(counts, same_y = FALSE, extra_labels = FALSE, condensed = FALSE) {
  # These variables use non standard evaluation.
  # To avoid R CMD check complaints we initialize them to NULL.
  count <- muttype <- muttype_sub <- muttype_total <- sample <- NULL

  # Separate muttype and muttype_sub. Then make data long
  counts <- counts %>%
    as.data.frame() %>%
    tibble::rownames_to_column("muttype_total") %>%
    tidyr::separate(muttype_total, c("muttype", "muttype_sub"), sep = "_(?=[0-9])") %>%
    dplyr::mutate(muttype = factor(muttype, levels = unique(muttype))) %>%
    tidyr::gather(key = "sample", value = "count", -muttype, -muttype_sub) %>% 
    dplyr::mutate(sample = factor(sample, levels = unique(sample)))

  # Count nr mutations. (This is used for the facets)
  nr_muts <- counts %>%
    dplyr::group_by(sample) %>%
    dplyr::summarise(nr_muts = round(sum(count)))

  # Create facet texts
  facet_labs_y <- stringr::str_c(nr_muts$sample, " (n = ", nr_muts$nr_muts, ")")
  names(facet_labs_y) <- nr_muts$sample
  facet_labs_x <- c("1: C", "1: T", "1: C", "1: T", 2, 3, 4, "5+", 2, 3, 4, "5+", 2, 3, 4, "5+")
  names(facet_labs_x) <- levels(counts$muttype)

  # Set plotting parameters
  if (same_y) {
    facet_scale <- "free_x"
  } else {
    facet_scale <- "free"
  }

  # Add optional extra labels
  if (extra_labels) {
    title <- stringr::str_c(
      "Deletion           ",
      "Insertion          ",
      "Deletion                                   ",
      "Insertion                                  ",
      "Deletion (MH)"
    )
    x_lab <- stringr::str_c(
      "Homopolymer length                            ",
      "Number of repeat units                                                                               ",
      "Microhomology length"
    )
  } else {
    title <- x_lab <- ""
  }

  # Change plotting parameters based on whether plot should be condensed.
  if (condensed == TRUE) {
    width <- 1
    spacing <- 0
  } else {
    width <- 0.6
    spacing <- 0.5
  }

  # Create figure
  fig <- ggplot(counts, aes(x = muttype_sub, y = count, fill = muttype, width = width)) +
    geom_bar(stat = "identity") +
    facet_grid(sample ~ muttype,
      scales = facet_scale, space = "free_x",
      labeller = labeller(muttype = facet_labs_x, sample = facet_labs_y)
    ) +
    scale_fill_manual(values = INDEL_COLORS) +
    theme_bw() +
    labs(fill = "Mutation type", title = title, y = "Nr of indels", x = x_lab) +
    theme(
      panel.grid.major.x = element_blank(),
      panel.grid.minor.y = element_blank(),
      panel.spacing.x = unit(spacing, "lines")
    )

  return(fig)
}
CuppenResearch/MutationalPatterns documentation built on Nov. 23, 2022, 4:13 a.m.