R/mutFilterQual.R

Defines functions vafHelper mutFilterQual

Documented in mutFilterQual

#' mutFilterQual
#' @description Filter variants in low sequencing quality or low confidence.
#'
#' @param maf An MAF data frame, generated by \code{\link{vcfToMAF}} function.
#' @param panel The sequencing panel applied on the dataset. Parameters
#' for \code{\link{mutFilterQual}} function are set differently for different
#' panels. Default: "Customized". Options: "MSKCC", "WES".
#' @param tumorDP Threshold of tumor total depth. Default: 20
#' @param normalDP Threshold of normal total depth. Default: 10
#' @param tumorAD Threshold of tumor alternative allele depth. Default: 5
#' @param normalAD Threshold of normal alternative allele depth. Default: Inf
#' @param VAF Threshold of VAF value. Default: 0.05
#' @param VAFratio Threshold of VAF ratio (tVAF/nVAF). Default: 0
#'
#' @import dplyr
#' @importFrom methods is
#'
#' @return An MAF data frame where some variants
#' have Q tag in CaTag column for sequencing quality filtration
#'
#' @export mutFilterQual
#' @examples
#' maf <- vcfToMAF(system.file("extdata",
#' "WES_EA_T_1_mutect2.vep.vcf", package="CaMutQC"))
#' mafF <- mutFilterQual(maf)


mutFilterQual <- function(maf, panel = "Customized", tumorDP = 20, 
                          normalDP = 10, tumorAD = 5, normalAD = Inf, 
                          VAF = 0.05, VAFratio = 0) {
    # check user input
    if (!(is(maf, "data.frame"))) {
        stop("maf input should be a data frame, did you get it from vcfToMAF function?")
    }
    
    # set different parameters for different panels
    if (panel != "Customized"){
        normalAD <- 1
        if (panel == "MSKCC"){
            tumorAD <- 10
            VAFratio <- 5
        }else if (panel != "WES"){
            stop("Wrong panel input! Should be one of MSKCC, WES and Customized.")
        }
    }
    # check whether VAF, t_depth and n_depth column exists (has values in it)
    ## calculate t_depth based on alt and ref
    if (all(is.na(maf$t_depth))) {
        maf$t_depth <- maf$t_alt_count + maf$t_ref_count
    }
    ## calculate n_depth based on alt and ref
    if (all(is.na(maf$n_depth))) {
        maf$n_depth <- maf$n_alt_count + maf$n_ref_count
    }
    ## add VAF column
    if (!("VAF" %in% colnames(maf))) {
        maf$VAF <- vafHelper(maf$t_alt_count, maf$t_depth)
    }
    ## VAF filtering
    tags <- rownames(maf[maf$VAF < VAF, ])
    ## tumorAD, tumorDP, normalDP, normalAD, VAF ratio filtering
    tags <- union(tags, rownames(maf[((maf$t_alt_count < tumorAD) |
                                   (maf$t_depth < tumorDP) |
                                   (maf$n_depth < normalDP) |
                                   (maf$n_alt_count >= normalAD)), ]))
    VAFr <- (maf$t_alt_count/maf$t_depth)/(maf$n_alt_count/maf$n_depth)
    VAFr[which(is.na(VAFr))] <- 0
    # VAF ratio filtration
    tags <- union(tags, rownames(maf[VAFr < VAFratio, ]))
    maf[tags, 'CaTag'] <- paste0(maf[tags, 'CaTag'] , 'Q')
    return(maf)
}

# A helper function for calculating VAF and avoiding NA generation
vafHelper <- function(alt, depth) {
    res <- alt/depth
    # handle situation when depth is 0
    res[is.na(res)] <- 0
    return(res)
}
likelet/CaMutQC documentation built on Aug. 17, 2024, 4 a.m.