R/mutSelection.R

Defines functions mutSelection

Documented in mutSelection

#' mutSelection
#' @description Select candidate variants for cancer research.
#'
#' @param maf An MAF data frame, generated by \code{\link{vcfToMAF}} function.
#' @param dbVAF Threshold of VAF of certain population for variants
#'  in database. Default: 0.01
#' @param ExAC Whether to filter variants listed in ExAC with VAF higher than
#' cutoff(set in VAF parameter). Default: TRUE.
#' @param Genomesprojects1000 Whether to filter variants listed in
#' Genomesprojects1000 with VAF higher than cutoff(set in VAF parameter).
#' Default: TRUE.
#' @param ESP6500 Whether to filter variants listed in ESP6500 with VAF higher
#' than cutoff(set in VAF parameter). Default: TRUE.
#' @param gnomAD Whether to filter variants listed in gnomAD with VAF higher
#' than cutoff(set in VAF parameter). Default: TRUE.
#' @param dbSNP Whether to filter variants listed in dbSNP. Default: FALSE.
#' @param keepCOSMIC Whether to keep variants in COSMIC even
#' they have are present in germline database. Default: TRUE.
#' @param keepType A group of variant classifications will be kept,
#' including 'exonic', 'nonsynonymous' and 'all'. Default: 'exonic'.
#' @param bedFile A file in bed format that contains region information.
#' Default: NULL
#' @param bedHeader Whether the input bed file has a header or not. 
#' Default: FALSE.
#' @param bedFilter Whether to filter the information in bed file or not, which
#' only leaves segments in Chr1-Ch22, ChrX and ChrY. Default: TRUE
#' @param progressbar Whether to show progress bar when running this function
#' Default: TRUE
#' @param verbose Whether to generate message/notification during the 
#' filtration process. Default: TRUE.
#'
#' @import vcfR stringr
#' @importFrom  dplyr filter
#' @importFrom methods is
#'
#' @return An MAF data frame with variants after selection.
#'
#' @export mutSelection
#' @examples
#' maf <- vcfToMAF(system.file("extdata",
#' "WES_EA_T_1_mutect2.vep.vcf", package="CaMutQC"))
#' mafF <- mutSelection(maf)

mutSelection <- function(maf, dbVAF = 0.01, ExAC = TRUE,
                         Genomesprojects1000 = TRUE, ESP6500 = TRUE,
                         gnomAD = TRUE, dbSNP = FALSE,
                         keepCOSMIC = TRUE, keepType = 'exonic',
                         bedFile = NULL, bedHeader = FALSE,
                         bedFilter = TRUE, progressbar = TRUE, verbose = TRUE){
    # check user input
    if (!(is(maf, "data.frame"))) {
      stop("maf input should be a data frame, did you get it from vcfToMAF function?")
    }
    
    # build a progress bar and turn it on is asked
    if (progressbar) {
        pb <- txtProgressBar(min=0, max=100, style=3)
    }
    # database selection
    #message('Selection for germline variant database is in process.')
    if (progressbar) {
        setTxtProgressBar(pb, 40, title=progressbar)
    }
    maf <- mutFilterDB(maf, dbVAF=dbVAF, ExAC=ExAC,
                       Genomesprojects1000=Genomesprojects1000, 
                       ESP6500=ESP6500, gnomAD=gnomAD, dbSNP=dbSNP, 
                       keepCOSMIC=keepCOSMIC, verbose=verbose)
    # variant type selection
    if (progressbar) {
        setTxtProgressBar(pb, 60, title=progressbar)
    }
    maf <- mutFilterType(maf, keepType=keepType)
    # region selection
    if (progressbar) {
        setTxtProgressBar(pb, 80, title=progressbar)
    }
    maf <- mutFilterReg(maf, bedFile=bedFile, bedHeader=bedHeader,
                        bedFilter=bedFilter, verbose=verbose)
    # complete selection
    if (progressbar) {
        setTxtProgressBar(pb, 100, title=progressbar)
        close(pb)
    }
    if (verbose) {
        message('  Cancer somatic variant selection is done!')
    }
    return(maf)
}
likelet/CaMutQC documentation built on Aug. 17, 2024, 4 a.m.