R/mutation.R

Defines functions filter_mut_table mut_add_column

Documented in filter_mut_table mut_add_column

# Tools for mutation table data generated by pipeline

#' Add columns to the raw mutation tables
#'
#' @param table A dataframe - the mutation table downloaded from server
#' @param pattern A string - the pattern to extract sample names
#'
#' @return A dataframe - the table with added columns
#' @importFrom tibble add_column
#' @export

mut_add_column <- function(table, pattern) {

    table %<>%
        add_column(sample = stringr::str_extract(.data$sample_name, pattern), .before = 1) %>%
        add_column(normal_f = .data$n_alt_count / (.data$n_alt_count + .data$n_ref_count), .before = 11)

    return(table)
}


#' Filter raw mutation tables
#'
#' @param table A dataframe - the mutation table downloaded from server
#' @param t_cov An integer - the minimum coverage for tumor sample
#' @param n_cov An integer - the minimum coverage for normal sample
#' @param t_frac A double - the minimum allele fraction in tumor sample
#' @param n_frac A double - the maximum allele fraction in normal sample
#' @param pop_freq A double - the maximum population frequncy
#'
#' @return A dataframe - the table with added columns
#' @importFrom tibble add_column
#' @export

filter_mut_table <- function(table,
                           t_cov = 15,
                           n_cov = 8,
                           t_frac = 0.02,
                           n_frac = 0.01,
                           pop_freq = 0.01) {

    filtered <- table %>%
        filter(!stringr::str_detect(.data$gene.knowngene, ";")) %>%  # remove ambiguous genes
        filter(.data$func.knowngene == "exonic") %>%  # remove non-exonic mutations
        filter(.data$exonicfunc.knowngene != "unknown") %>%  # remove non-functional mutations
        filter(.data$t_alt_count + .data$t_ref_count >= t_cov) %>%  # tumor coverage
        filter(.data$n_alt_count + .data$n_ref_count >= n_cov) %>%  # normal coverage
        filter(.data$tumor_f >= t_frac & .data$normal_f <= n_frac) %>%  # tumor and normal allele fraction
        filter((is.na(.data$exac_all) | .data$exac_all < pop_freq) &
                   (is.na(.data$esp6500siv2_all) | .data$esp6500siv2_all < pop_freq) &
                   (is.na(.data$x1kg2015aug_max) | .data$x1kg2015aug_max < pop_freq))  # population frequency

    filtered %<>%
        dplyr::arrange(sample)

    return(filtered)
}
xmc811/xmcutil documentation built on June 4, 2021, 10:48 a.m.