R/mutation-frequency.R

Defines functions plot.mut_freq autoplot.mut_freq plot_mutation_frequency mutation_frequency.ref_alt_cov_tbl mutation_frequency.default mutation_frequency `$<-.mut_freq` `names<-.mut_freq` `[.mut_freq` new_mut_freq

Documented in autoplot.mut_freq mutation_frequency mutation_frequency.ref_alt_cov_tbl plot_mutation_frequency plot.mut_freq

new_mut_freq <- function(x) {
  tibble::new_tibble(x, class = "mut_freq")
}

#' @export
`[.mut_freq` <- function(x, i, j, drop = FALSE) {
  mut_freq_reconstruct(NextMethod())
}

#' @export
`names<-.mut_freq` <- function(x, value) {
  mut_freq_reconstruct(NextMethod())
}

#' @export
`$<-.mut_freq` <- function(x, name, value) {
  mut_freq_reconstruct(NextMethod())
}

#------------------------------------------------
#' Compute frequency of mutations
#'
#' Generate a table representing the frequency of unique mutations. In order to
#' ensure confidence in the results, a threshold is provided indicating
#' confidence in genotype calls. All data that do not meet this threshold will
#' be removed from the computation.
#'
#' @param data A data frame, data frame extension (e.g. a tibble), or a lazy
#'   data frame (e.g. from dbplyr or dtplyr).
#' @param threshold A minimum UMI count which reflects the confidence in the
#'   genotype call. Data with a UMI count of less than the threshold will be
#'   filtered out from the analysis.
#'
#' @return
#' A [tibble][tibble::tibble-package] with the extra class `mut_freq`. The
#' output has the following columns:
#'
#' * `mutation_name`: The unique mutation sequenced.
#' * `frequency`: The frequency of the mutation.
#'
#' @export
#' @seealso [plot_mutation_frequency()] for plotting the table.
#' @examples
#' # Read example data
#' data <- read_tbl_ref_alt_cov(
#'   miplicorn_example("reference_AA_table.csv"),
#'   miplicorn_example("alternate_AA_table.csv"),
#'   miplicorn_example("coverage_AA_table.csv"),
#'   gene == "atp6" | gene == "crt"
#' )
#'
#' # Compute mutation frequency
#' mutation_frequency(data, 5)
mutation_frequency <- function(data, threshold) {
  UseMethod("mutation_frequency")
}

#' @export
mutation_frequency.default <- function(data, threshold) {
  cli_abort(c(
    "Cannot compute mutation frequency with this data object.",
    "i" = "Object must be a reference, alternate, coverage table."
  ))
}

#' @rdname mutation_frequency
#' @export
mutation_frequency.ref_alt_cov_tbl <- function(data, threshold) {
  # Ensure table has the column mutation name
  if (!"mutation_name" %in% colnames(data)) {
    cli_abort("Data needs the column `mutation_name`.")
  }

  # Filter data to threshold
  filtered <- dplyr::filter(
    data,
    .data$coverage > threshold,
    .data$alt_umi_count > threshold | .data$ref_umi_count > threshold
  )

  # Compute weighted average of the alt umi count
  wt_average <- filtered %>%
    dplyr::mutate(
      alt_umi_count = ifelse(
        .data$alt_umi_count < threshold,
        0,
        .data$alt_umi_count
      ),
      alt_freq = .data$alt_umi_count / .data$coverage
    ) %>%
    dplyr::group_by(.data$mutation_name) %>%
    dplyr::summarise(
      frequency = sum(.data$alt_freq * .data$coverage) / sum(.data$coverage)
    )

  # Assign a subclass "mut_freq"
  new_mut_freq(wt_average)
}

#------------------------------------------------
#' Plot frequency of mutations
#'
#' Plot the frequency of mutations generated by [mutation_frequency()].
#' The frequency is plotted on the y-axis and the amino acid change is plotted
#' on the x-axis. Data are grouped by the gene on which the mutation took place
#' and coloured according to their groupings.
#'
#' @param data,object,x An object of class `mut_freq`. Derived from the output
#'   of [mutation_frequency()].
#' @param ...	Other arguments passed to specific methods.
#'
#' @return A [ggplot2][ggplot2::ggplot2-package] object.
#'
#' @export
#' @seealso [mutation_frequency()] for generating the data for plotting.
#' @examples
#' # Read example data
#' data <- read_tbl_ref_alt_cov(
#'   miplicorn_example("reference_AA_table.csv"),
#'   miplicorn_example("alternate_AA_table.csv"),
#'   miplicorn_example("coverage_AA_table.csv"),
#'   gene == "atp6" | gene == "crt"
#' )
#'
#' # Compute the mutation frequency
#' frequency <- mutation_frequency(data, 5)
#'
#' # Plot
#' plot(frequency)
plot_mutation_frequency <- function(data) {
  if (!inherits(data, "mut_freq")) {
    cli_abort(c(
      "Data object must be of class `mut_freq`.",
      "x" = "Its class{?es} {?is/are} {backtick(class(data))}.",
      "i" = "Did you forget to run `mutation_frequency()` first?"
    ))
  }

  plot(data)
}

#' @importFrom ggplot2 autoplot
#' @rdname plot_mutation_frequency
#' @export
autoplot.mut_freq <- function(object, ...) {
  plot_data <- object %>%
    tidyr::drop_na() %>%
    dplyr::filter(.data$frequency != 0) %>%
    tidyr::extract(
      col = .data$mutation_name,
      into = c("gene", "aa_change"),
      regex = "^(.*)(?=-)[-](.*)"
    ) %>%
    arrange_natural(.data$gene, .data$aa_change)

  ggplot2::ggplot(
    data = plot_data,
    mapping = ggplot2::aes(
      x = .data$aa_change,
      y = .data$frequency,
      fill = .data$gene
    )
  ) +
    ggplot2::geom_col() +
    ggplot2::labs(
      x = "Amino Acid Change",
      y = "Frequency",
      title = "Frequency of Mutations"
    ) +
    ggplot2::scale_fill_viridis_d(name = "Gene") +
    default_theme() +
    ggplot2::theme(
      axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1)
    )
}

#' @importFrom graphics plot
#' @rdname plot_mutation_frequency
#' @export
plot.mut_freq <- function(x, ...) {
  print(autoplot(x, ...))
}
bailey-lab/miplicorn documentation built on March 19, 2023, 7:40 p.m.