new_mut_prev <- function(x) {
tibble::new_tibble(x, class = "mut_prev")
}
#' @export
`[.mut_prev` <- function(x, i, j, drop = FALSE) {
mut_prev_reconstruct(NextMethod())
}
#' @export
`names<-.mut_prev` <- function(x, value) {
mut_prev_reconstruct(NextMethod())
}
#' @export
`$<-.mut_prev` <- function(x, name, value) {
mut_prev_reconstruct(NextMethod())
}
#------------------------------------------------
#' Compute prevalence of mutations
#'
#' Generate a table representing the prevalence of unique mutations. In order to
#' ensure confidence in the results, a threshold is provided indicating
#' confidence in genotype calls. All data that do not meet this threshold will
#' be removed from the computation.
#'
#' @param data A data frame, data frame extension (e.g. a tibble), or a lazy
#' data frame (e.g. from dbplyr or dtplyr).
#' @param ... Other arguments passed to specific methods.
#' @param threshold A minimum UMI count which reflects the confidence in the
#' genotype call. Data with a UMI count of less than the threshold will be
#' filtered out from the analysis.
#'
#' @return
#' A [tibble][tibble::tibble-package] with the extra class `mut_prev`. The
#' output has the following columns:
#'
#' * `mutation_name`: The unique mutation sequenced.
#' * `n_total`: The number of samples for which a mutation site was sequenced.
#' * `n_mutant`: The number of samples for which a mutation occurred.
#' * `prevalence`: The prevalence of the mutation.
#'
#' @export
#' @seealso [plot_mutation_prevalence()] for plotting the table.
#' @examples
#' # Read example data
#' data <- read_tbl_ref_alt_cov(
#' miplicorn_example("reference_AA_table.csv"),
#' miplicorn_example("alternate_AA_table.csv"),
#' miplicorn_example("coverage_AA_table.csv"),
#' gene == "atp6" | gene == "crt"
#' )
#'
#' # Compute mutation prevalence
#' mutation_prevalence(data, threshold = 5)
mutation_prevalence <- function(data, ...) {
UseMethod("mutation_prevalence")
}
#' @export
mutation_prevalence.default <- function(data, ...) {
cli_abort(c(
"Cannot compute mutation prevalence with this data object.",
"i" = "Object must be a reference, alternate, coverage table or a genotype table."
))
}
#' @rdname mutation_prevalence
#' @export
mutation_prevalence.ref_alt_cov_tbl <- function(data, ..., threshold) {
# Use threshold to filter data
total <- dplyr::filter(
data,
.data$coverage > threshold &
(.data$alt_umi_count > threshold | .data$ref_umi_count > threshold)
)
mutant_data <- dplyr::filter(total, .data$alt_umi_count > threshold)
# Need column mutation name
if (!"mutation_name" %in% colnames(data)) {
cli_abort("Data needs the column `mutation_name`.")
}
# Get counts of mutations
total_count <- total %>%
dplyr::count(.data$mutation_name) %>%
dplyr::rename(n_total = n)
mutant_count <- mutant_data %>%
dplyr::count(.data$mutation_name) %>%
dplyr::rename(n_mutant = n)
# Compute prevalence
prevalence <- total_count %>%
dplyr::full_join(mutant_count, by = "mutation_name") %>%
dplyr::mutate(
n_mutant = tidyr::replace_na(.data$n_mutant, 0),
prevalence = .data$n_mutant / .data$n_total
)
# Assign a subclass "mut_prev"
new_mut_prev(prevalence)
}
#' @rdname mutation_prevalence
#' @export
mutation_prevalence.geno_tbl <- function(data, ...) {
# Get counts for all data
total_count <- data %>%
dplyr::filter(.data$genotype != -1) %>%
dplyr::count(.data$mutation_name) %>%
dplyr::rename(n_total = n)
# Get counts for mutant data
mutant_count <- data %>%
dplyr::filter(.data$genotype == 1 | .data$genotype == 2) %>%
dplyr::count(.data$mutation_name) %>%
dplyr::rename(n_mutant = n)
# Compute prevalence
prevalence <- total_count %>%
dplyr::full_join(mutant_count, by = "mutation_name") %>%
dplyr::mutate(
n_mutant = tidyr::replace_na(.data$n_mutant, 0),
prevalence = .data$n_mutant / .data$n_total
)
# Assign a subclass "mut_prev"
new_mut_prev(prevalence)
}
#------------------------------------------------
#' Plot prevalence of mutations
#'
#' Plot the prevalence of mutations generated by [mutation_prevalence()].
#' The prevalence is plotted on the y-axis and the amino acid change is plotted
#' on the x-axis. Data are grouped by the gene on which the mutation took place
#' and coloured according to their groupings.
#'
#' @param data,object,x An object of class `mut_prev`. Derived from the output
#' of [mutation_prevalence()].
#' @param ... Other arguments passed to specific methods.
#'
#' @return A [ggplot2][ggplot2::ggplot2-package] object.
#'
#' @export
#' @seealso [mutation_prevalence()] for generating the data for plotting.
#' @examples
#' # Read example data
#' data <- read_tbl_ref_alt_cov(
#' miplicorn_example("reference_AA_table.csv"),
#' miplicorn_example("alternate_AA_table.csv"),
#' miplicorn_example("coverage_AA_table.csv"),
#' gene == "atp6" | gene == "crt"
#' )
#'
#' # Compute mutation prevalence
#' prevalence <- mutation_prevalence(data, threshold = 5)
#'
#' # Plot
#' plot(prevalence)
plot_mutation_prevalence <- function(data) {
if (!inherits(data, "mut_prev")) {
cli_abort(c(
"Data object must be of class `mut_prev`.",
"x" = "Its class{?es} {?is/are} {backtick(class(data))}.",
"i" = "Did you forget to run `mutation_prevalence()` first?"
))
}
plot(data)
}
#' @importFrom ggplot2 autoplot
#' @rdname plot_mutation_prevalence
#' @export
autoplot.mut_prev <- function(object, ...) {
plot_data <- object %>%
tidyr::drop_na() %>%
tidyr::extract(
col = .data$mutation_name,
into = c("gene", "aa_change"),
regex = "^(.*)(?=-)[-](.*)"
) %>%
arrange_natural(.data$gene, .data$aa_change)
ggplot2::ggplot(
data = plot_data,
mapping = ggplot2::aes(
x = .data$aa_change,
y = .data$prevalence,
fill = .data$gene
)
) +
ggplot2::geom_col() +
ggplot2::labs(
x = "Amino Acid Change",
y = "Prevalence",
title = "Prevalence of Mutations"
) +
ggplot2::scale_fill_viridis_d(name = "Gene") +
default_theme() +
ggplot2::theme(
axis.text.x = ggplot2::element_text(angle = 90, vjust = 0.5, hjust = 1)
)
}
#' @importFrom graphics plot
#' @rdname plot_mutation_prevalence
#' @export
plot.mut_prev <- function(x, ...) {
print(autoplot(x, ...))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.