R/mut-prev.R

Defines functions plot.mut_prev autoplot.mut_prev plot_mutation_prevalence mutation_prevalence `$<-.mut_prev` `names<-.mut_prev` `[.mut_prev` new_mut_prev

Documented in autoplot.mut_prev mutation_prevalence plot_mutation_prevalence plot.mut_prev

new_mut_prev <- function(x) {
  tibble::new_tibble(x, class = "mut_prev")
}

#' @export
`[.mut_prev` <- function(x, i, j, drop = FALSE) {
  mut_prev_reconstruct(NextMethod())
}

#' @export
`names<-.mut_prev` <- function(x, value) {
  mut_prev_reconstruct(NextMethod())
}

#' @export
`$<-.mut_prev` <- function(x, name, value) {
  mut_prev_reconstruct(NextMethod())
}

#------------------------------------------------
#' Compute prevalence of mutations
#'
#' Generate a table representing the prevalence of unique mutations. In order to
#' ensure confidence in the results, a threshold is provided indicating
#' confidence in genotype calls. All data that do not meet this threshold will
#' be removed from the computation.
#'
#' @param data A data frame, data frame extension (e.g. a tibble), or a lazy
#'   data frame (e.g. from dbplyr or dtplyr).
#' @param threshold A minimum UMI count which reflects the confidence in the
#'   genotype call. Data with a UMI count of less than the threshold will be
#'   filtered out from the analysis.
#'
#' @return
#' A [tibble][tibble::tibble()] with the extra class `mutation_prev`. The output
#' has the following columns:
#'
#' * `mutation_name`: The unique mutation sequenced.
#' * `n_total`: The number of samples for which a mutation site was sequenced.
#' * `n_mutant`: The number of samples for which a mutation occurred.
#' * `prevalence`: The prevalence of the mutation.
#'
#' @export
mutation_prevalence <- function(data, threshold) {
  # Ensure have a table with reference umi counts, alternate umi counts, and
  # coverage
  cols <- c("ref_umi_count", "alt_umi_count", "coverage")
  if (!all(cols %in% colnames(data))) {
    abort(c(
      "Data is mising required columns.",
      x = "Need a column for the reference UMI counts.",
      x = "Need a column for the alternate UMI counts.",
      x = "Need a column for the coverage."
    ))
  }

  # Use threshold to filter data
  total <- dplyr::filter(
    data,
    .data$coverage > threshold &
      (.data$alt_umi_count > threshold | .data$ref_umi_count > threshold)
  )

  mutant_data <- dplyr::filter(total, .data$alt_umi_count > threshold)

  # Need column mutation name
  if (!"mutation_name" %in% colnames(data)) {
    abort("Data needs the column `mutation_name`.")
  }

  # Get counts of mutations
  total_count <- total %>%
    dplyr::count(.data$mutation_name) %>%
    dplyr::rename(n_total = .data$n)

  mutant_count <- mutant_data %>%
    dplyr::count(.data$mutation_name) %>%
    dplyr::rename(n_mutant = .data$n)

  # Compute prevalence
  prevalence <- total_count %>%
    dplyr::full_join(mutant_count, by = "mutation_name") %>%
    dplyr::mutate(prevalence = .data$n_mutant / .data$n_total)

  # Assign a subclass "mut_prev"
  new_mut_prev(prevalence)
}

#------------------------------------------------
#' Plot prevalence of mutations
#'
#' Plot the prevalence of mutations generated by [mutation_prevalence()].
#' The prevalence is plotted on the y-axis and the amino acid change is plotted
#' on the x-axis. Data are grouped by the gene on which the mutation took place
#' and coloured according to their groupings.
#'
#' @param data An object of class `mutation_prev`. Derived from the output of
#' [mutation_prevalence()].
#'
#' @export
plot_mutation_prevalence <- function(data) {
  if (!inherits(data, "mut_prev")) {
    rlang::abort(c(
      "Data object must be of class `mut_prev`.",
      i = "Did you forget to run `mutation_prevalence()` first?"
    ))
  }

  plot(data)
}

#' @importFrom ggplot2 autoplot
#' @rdname plot_mutation_prevalence
#' @export
autoplot.mut_prev <- function(object, ...) {
  plot_data <- object %>%
    tidyr::drop_na() %>%
    tidyr::extract(
      col = .data$mutation_name,
      into = c("gene", "aa_change"),
      regex = "^(.*)(?=-)[-](.*)"
    ) %>%
    miplicorn::arrange_natural(.data$gene, .data$aa_change)

  ggplot2::ggplot(
    data = plot_data,
    mapping = ggplot2::aes(
      x = .data$aa_change,
      y = .data$prevalence,
      fill = .data$gene
    )
  ) +
    ggplot2::geom_col() +
    ggplot2::labs(
      x = "Amino Acid Change",
      y = "Prevalence",
      title = "Prevalence of Mutations"
    ) +
    ggplot2::scale_fill_viridis_d(name = "Gene") +
    miplicorn::theme_miplicorn() +
    ggplot2::theme(
      axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1)
    )
}

#' @importFrom graphics plot
#' @rdname plot_mutation_prevalence
#' @export
plot.mut_prev <- function(x, ...) {
  print(autoplot(x, ...))
}
arisp99/s3examples documentation built on Jan. 15, 2022, 11:11 a.m.