R/mutFilterTech.R

Defines functions mutFilterTech

Documented in mutFilterTech

#' mutFilterTech
#' @description Filter potential artifacts produced through technical issue,
#' including filtration for sequencing quality, strand bias, adjacent indel
#' tag, normal depth, panel of normal (PON) and FILTER field.
#' @param maf An MAF data frame, generated by \code{\link{vcfToMAF}} function.
#' @param PONfile Panel-of-Normals files, which can be either obtained through 
#' GATK (https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON-)
#' or generated by users. Should have at least four columns: CHROM, POS, REF, ALT
#' @param PONformat The format of PON file, either "vcf" or "txt". Default: "vcf"
#' @param panel The sequencing panel applied on the dataset. Parameters
#' for \code{\link{mutFilterQual}} function are set differently for different
#' panels. Default: "Customized". Options: "MSKCC", "WES".
#' @param tumorDP Threshold of tumor total depth. Default: 20
#' @param normalDP Threshold of normal total depth. Default: 10
#' @param tumorAD Threshold of tumor alternative allele depth. Default: 5
#' @param normalAD Threshold of normal alternative allele depth. Default: Inf
#' @param VAF Threshold of VAF value. Default: 0.05
#' @param VAFratio Threshold of VAF ratio (tVAF/nVAF). Default: 0
#' @param SBmethod Method will be used to detect strand bias,
#' including 'SOR' and 'Fisher'. Default: 'SOR'. SOR: StrandOddsRatio
#' (https://gatk.broadinstitute.org/hc/en-us/articles/360041849111-
#' StrandOddsRatio)
#' @param SBscore Cutoff strand bias score used to filter variants.
#' Default: 3
#' @param maxIndelLen Maximum length of indel accepted to be included.
#' Default: 50
#' @param minInterval Minimum length of interval between an SNV and an indel
#' accepted to be included. Default: 10
#' @param tagFILTER Variants with specific tag in FILTER column will be kept,
#' set to NULL if you want to skip this filter. Default: 'PASS'
#' @param progressbar Whether to show progress bar when running this function
#' Default: TRUE
#' @param verbose Whether to generate message/notification during the 
#' filtration process. Default: TRUE.
#'
#' @import dplyr
#' @importFrom methods is
#'
#' @return An MAF data frame after filtration for technical issue
#'
#' @export mutFilterTech
#' @examples
#' maf <- vcfToMAF(system.file("extdata",
#' "WES_EA_T_1_mutect2.vep.vcf", package="CaMutQC"))
#' mafF <- mutFilterTech(maf, PONfile=system.file("extdata",
#' "PON_test.txt", package="CaMutQC"), PONformat="txt")

mutFilterTech <- function(maf, PONfile, PONformat = "vcf", panel = "Customized", 
                          tumorDP = 20, normalDP = 10, tumorAD = 5, 
                          normalAD = Inf, VAF = 0.05, VAFratio = 0, 
                          SBmethod = 'SOR', SBscore = 3, maxIndelLen = 50, 
                          minInterval = 10, tagFILTER = 'PASS', 
                          progressbar = TRUE, verbose = TRUE){
    # check user input
    if (!(is(maf, "data.frame"))) {
        stop("maf input should be a data frame, did you get it from vcfToMAF function?")
    }
    
    # build a progress bar and turn it on is asked
    if (progressbar) { pb <- txtProgressBar(min=0, max=100, style=3)}
    # sequencing quality filtration
    maf <- mutFilterQual(maf, panel=panel, tumorDP=tumorDP, 
                         tumorAD=tumorAD, normalDP=normalDP, 
                         normalAD=normalAD, VAF=VAF, VAFratio=VAFratio)
    # strand bias filtration
    if (progressbar) { setTxtProgressBar(pb, 30, title=progressbar)}
    maf <- mutFilterSB(maf, method=SBmethod, SBscore=SBscore)
    # adjacent indel tag filtration
    if (progressbar) {setTxtProgressBar(pb, 50, title=progressbar)}
    maf <- mutFilterAdj(maf, maxIndelLen=maxIndelLen, minInterval=minInterval)
    # normalDP filtration
    if (progressbar) {setTxtProgressBar(pb, 60, title=progressbar)}
    maf <- mutFilterNormalDP(maf, verbose=verbose)
    # PON filtration
    if (progressbar) {
        setTxtProgressBar(pb, 80, title=progressbar)
    }
    maf <- mutFilterPON(maf, PONfile=PONfile, PONformat=PONformat, 
                        verbose=verbose)
    # FILTER field filtration
    if (!(is.null(tagFILTER))){
        if (progressbar) {
            setTxtProgressBar(pb, 100, title=progressbar)
            # close progres bar
            close(pb)
        }
        tagFilter <- rownames(maf[((!(is.na(maf$FILTER))) &
                                     (maf$FILTER != tagFILTER)), ])
        maf[tagFilter, 'CaTag'] <-  paste0(maf[tagFilter, 'CaTag'], 'F')
    }else{
        if (progressbar) {
            setTxtProgressBar(pb, 100, title=progressbar)
            # close progres bar
            close(pb)
        }
    }
    # complete filtration
    if (verbose) {
        message('  Filtration for technical issue is done!')
    }
    return(maf)
}
likelet/CaMutQC documentation built on Aug. 17, 2024, 4 a.m.