R/label_mutations.R

Defines functions label_mutations.ref_alt_cov_tbl label_mutations.default label_mutations

Documented in label_mutations label_mutations.ref_alt_cov_tbl

#------------------------------------------------
#' Label mutations
#'
#' @description
#' For each data point, label a mutation as `"ref"`, `"alt"`, `"ins"`, or
#' `"del"`.
#'
#' * Data points that have more reference (REF) than alternate (ALT) calls will
#'   be labeled as `"ref"`.
#' * Mutations whose REF and ALT calls are both one base pair and who have more
#'   ALT calls than REF calls will be labeled as `"alt"`.
#' * Mutations whose REF and ALT calls differ in length and have more ALT calls
#'   than REF calls will be labeled as `"ins"`.
#' * Mutations whose REF and ALT calls differ in length and have more REF calls
#'   than ALT calls will be labeled as `"del"`.
#'
#' @param .data The data set containing REF and ALT calls.
#' @param .before,.after `r lifecycle::badge("experimental")`
#'   <[`tidy-select`][dplyr_tidy_select]> Optionally, control where new columns
#'   should appear (the default is to add to the right-hand side). See
#'   [`dplyr::relocate()`][dplyr::relocate()] for more details.
#'
#' @return
#' The object `.data` with an added column that indicates the mutation type of
#' each row. The column is added to the right-hand side by default, but this may
#' be controlled by the `.before` and `.after` arguments.
#'
#' @export
#' @examples
#' # Read example data
#' data <- read_tbl_ref_alt_cov(
#'   miplicorn_example("reference_AA_table.csv"),
#'   miplicorn_example("alternate_AA_table.csv"),
#'   miplicorn_example("coverage_AA_table.csv"),
#'   gene == "atp6"
#' )
#'
#' # Add ref and alt calls to data
#' sequences <- c("A", "T", "C", "G", "AT", "TC", "TGC")
#' data <- dplyr::mutate(
#'   data,
#'   ref = sample(sequences, size = nrow(data), replace = TRUE),
#'   alt = sample(sequences, size = nrow(data), replace = TRUE)
#' )
#'
#' # Label the mutations
#' label_mutations(data)
#' label_mutations(data, .after = alt)
label_mutations <- function(.data, .before = NULL, .after = NULL) {
  UseMethod("label_mutations")
}

#' @export
label_mutations.default <- function(.data, .before = NULL, .after = NULL) {
  cli_abort(c(
    "Cannot label the mutations of this data object.",
    "i" = "Object must be a reference, alternate, coverage table.",
    "i" = "Object must additionally contain reference and alternate calls."
  ))
}

#' @importFrom stringr str_length
#' @rdname label_mutations
#' @export
label_mutations.ref_alt_cov_tbl <- function(.data,
                                            .before = NULL,
                                            .after = NULL) {
  .before <- enquo(.before)
  .after <- enquo(.after)

  # Ensure only one of .before and .after exist
  if (!rlang::quo_is_null(.before) && !rlang::quo_is_null(.after)) {
    cli_abort("Must supply only one of `.before` and `.after`.")
  }

  # Ensure that ref and alt call columns exist
  if (!all(c("ref", "alt") %in% colnames(.data))) {
    cli_abort(c(
      "Data object is missing reference and alternate calls.",
      "x" = "Call columns must be named `ref` and `alt`, respectively."
    ))
  }

  # Label the mutations
  dplyr::mutate(.data,
    mutation_label = dplyr::case_when(
      ref_umi_count > alt_umi_count ~ "ref",
      str_length(ref) == 1 & str_length(alt) == 1 & alt_umi_count > ref_umi_count ~ "alt",
      str_length(ref) < str_length(alt) & alt_umi_count > ref_umi_count ~ "ins",
      str_length(ref) > str_length(alt) & alt_umi_count > ref_umi_count ~ "del",
      TRUE ~ NA_character_
    ),
    .before = !!.before,
    .after = !!.after
  )
}
bailey-lab/miplicorn documentation built on March 19, 2023, 7:40 p.m.