R/mutFilterCan.R

Defines functions mutFilterCan

Documented in mutFilterCan

#' mutFilterCan
#' @description Apply common filtering strategies on a MAF data frame for 
#' different cancer types.
#' @param maf An MAF data frame.
#' @param cancerType Type of cancer whose filtering parameters
#' need to be referred to.  Options are: "COADREAD", "BRCA", "LIHC", "LAML",
#' "LCML", "UCEC", "UCS", "BLCA", "KIRC" and "KIRP"
#' @param PONfile Panel-of-Normals files, which can be either obtained through 
#' GATK (https://gatk.broadinstitute.org/hc/en-us/articles/360035890631-Panel-of-Normals-PON-)
#' or generated by users. Should have at least four columns: CHROM, POS, REF, ALT
#' @param PONformat The format of PON file, either "vcf" or "txt". Default: "vcf"
#' @param panel The sequencing panel applied on the dataset. Parameters
#' for \code{\link{mutFilterQual}} function are set differently for different
#' panels. Default: "Customized". Options: "MSKCC", "WES".
#' @param tumorDP Threshold of tumor total depth. Default: 20
#' @param normalDP Threshold of normal total depth. Default: 10
#' @param tumorAD Threshold of tumor alternative allele depth. Default:5
#' @param normalAD Threshold of normal alternative allele depth. Default: Inf
#' @param VAF Threshold of VAF value. Default: 0.05
#' @param VAFratio Threshold of VAF ratio (tVAF/nVAF). Default: 0
#' @param SBmethod Method will be used to detect strand bias,
#' including 'SOR' and 'Fisher'. Default: 'SOR'. SOR: StrandOddsRatio
#' (https://gatk.broadinstitute.org/hc/en-us/articles/360041849111-
#' StrandOddsRatio)
#' @param SBscore Cutoff strand bias score used to filter variants.
#' Default: 3
#' @param maxIndelLen Maximum length of indel accepted to be included.
#' Default: 50
#' @param minInterval Maximum length of interval between an SNV and an indel
#' accepted to be included. Default: 10
#' @param tagFILTER Variants with spcific tag in the FILTER column will be kept,
#' Default: 'PASS'
#' @param dbVAF Threshold of VAF of certain population for variants
#'  in database. Default: 0.01.
#' @param ExAC Whether to filter variants listed in ExAC with VAF higher than
#' cutoff(set in VAF parameter). Default: TRUE.
#' @param Genomesprojects1000 Whether to filter variants listed in
#' Genomesprojects1000 with VAF higher than cutoff(set in VAF parameter).
#' Default: TRUE.
#' @param ESP6500 Whether to filter variants listed in ESP6500 with VAF higher
#' than cutoff(set in VAF parameter). Default: TRUE.
#' @param gnomAD Whether to filter variants listed in gnomAD with VAF higher
#' than cutoff(set in VAF parameter). Default: TRUE.
#' @param dbSNP Whether to filter variants listed in dbSNP. Default: FALSE.
#' @param keepCOSMIC Whether to keep variants in COSMIC even
#' they have are present in germline database. Default: TRUE.
#' @param keepType A group of variant classifications will be kept,
#' including 'exonic', 'nonsynonymous' and 'all'. Default: 'exonic'.
#' @param bedFile A file in bed format that contains region information.
#' Default: NULL
#' @param bedHeader Whether the input bed file has a header or not. 
#' Default: FALSE.
#' @param bedFilter Whether to filter the information in bed file or not, which
#' only leaves segments in Chr1-Ch22, ChrX and ChrY. Default: TRUE
#' @param mutFilter Whether to directly return a filtered MAF data frame.
#' If FALSE, a simulation filtration process will be run, and the original MAF
#' data frame with tags in CaTag column, and  a filter report will be returned.
#' If TRUE, a filtered MAF data frame and a filter report will be generated.
#' Default: FALSE
#' @param selectCols Columns will be contained in the filtered data frame.
#' By default (TRUE), the first 13 columns and 'Tumor_Sample_Barcode' column.
#' Or a vector contains column names will be kept.
#' @param report Whether to generate report automatically. Default: TRUE
#' @param reportFile File name of the report. Default: 'FilterReport.html'
#' @param reportDir Path to the output report file. Default: './'
#' @param TMB Whether to calculate TMB. Default: TRUE
#' @param progressbar Whether to show progress bar when running this function
#' Default: TRUE
#' @param codelog If TRUE, your code, along with the parameters you set, 
#' will be export in a log file. It will be convenient for users to repeat 
#' experiments. Default: FALSE
#' @param codelogFile Where to store the codelog, only useful when codelog is
#' set to TRUE. Default: "mutFilterCan.log"
#' @param verbose Whether to generate message/notification during the 
#' filtration process. Default: TRUE.
#' @importFrom methods is
#'
#' @return An MAF data frame after common strategy filtration for a cancer type.
#' @return A filter report in HTML format
#'
#' @export mutFilterCan
#' @examples
#' maf <- vcfToMAF(system.file("extdata",
#' "WES_EA_T_1_mutect2.vep.vcf", package="CaMutQC"))
#' mafF <- mutFilterCan(maf, cancerType='BRCA', 
#' PONfile=system.file("extdata", "PON_test.txt", package="CaMutQC"), 
#' PONformat="txt", TMB=FALSE)

mutFilterCan <- function(maf, cancerType, PONfile, PONformat = "vcf", 
                         panel = 'Customized', tumorDP = 0,
                         normalDP = 0, tumorAD = 0, normalAD = Inf,
                         VAF = 0, VAFratio = 0, SBmethod = 'SOR',
                         SBscore = Inf, maxIndelLen = Inf, minInterval = 0,
                         tagFILTER = NULL, dbVAF = 0.01, ExAC = FALSE,
                         Genomesprojects1000 = FALSE, ESP6500 = FALSE,
                         gnomAD = FALSE, dbSNP = FALSE, keepCOSMIC = FALSE,
                         keepType = 'all', bedFile = NULL, bedFilter = TRUE,
                         bedHeader = FALSE, mutFilter = FALSE, 
                         selectCols = FALSE, report = TRUE,
                         reportFile = 'FilterReport.html', reportDir = './',
                         TMB = FALSE, progressbar = TRUE, codelog = FALSE, 
                         codelogFile = "mutFilterCan.log", verbose = TRUE) {
  # check user input
  if (!(is(maf, "data.frame"))) {
    stop("maf input should be a data frame, did you get it from vcfToMAF function?")
  }
  
  # BLCA
  if (cancerType == 'BLCA'){
    mafFiltered <- mutFilterCom(maf, SBmethod='Fisher', SBscore=20,
                    minInterval=30, tumorDP = 10, Genomesprojects1000=TRUE, 
                    ExAC=TRUE, normalDP=10, tumorAD=5, VAF=VAF,
                    VAFratio=VAFratio, maxIndelLen=maxIndelLen,
                    tagFILTER=tagFILTER, dbVAF=dbVAF, ESP6500=ESP6500, 
                    gnomAD=gnomAD, dbSNP=dbSNP, keepCOSMIC=keepCOSMIC,
                    keepType=keepType, bedFile=bedFile, bedHeader=bedHeader,
                    bedFilter=bedFilter, mutFilter=mutFilter,
                    selectCols=selectCols, report=report,
                    reportFile=reportFile, reportDir=reportDir, TMB=TMB, 
                    cancerType=cancerType, progressbar=progressbar, 
                    codelog=codelog, codelogFile=codelogFile, 
                    PONformat=PONformat, PONfile=PONfile,
                    verbose=verbose)
  # BRCA
  }else if(cancerType == 'BRCA'){
    mafFiltered <- mutFilterCom(maf, tumorAD=5, VAF=0.1,
                    dbSNP=TRUE, Genomesprojects1000=TRUE, ESP6500=TRUE,
                    keepCOSMIC=TRUE, tumorDP=6, normalDP=6, 
                    VAFratio=VAFratio, SBmethod=SBmethod, ExAC=ExAC,
                    SBscore=SBscore, maxIndelLen=maxIndelLen,
                    minInterval=minInterval, bedFile=bedFile,
                    bedHeader=bedHeader, tagFILTER=tagFILTER, dbVAF=dbVAF,
                    gnomAD=gnomAD, keepType=keepType, bedFilter=bedFilter, 
                    mutFilter=mutFilter, selectCols=selectCols, 
                    report=report, reportFile=reportFile, 
                    reportDir=reportDir, TMB=TMB, cancerType=cancerType,
                    progressbar=progressbar, codelog=codelog, 
                    codelogFile=codelogFile, PONformat=PONformat, 
                    PONfile=PONfile, verbose=verbose)
  # COADREAD
  }else if(cancerType == 'COADREAD'){
    mafFiltered <- mutFilterCom(maf, tumorDP=5, VAF=0.2,
                    dbSNP=TRUE, Genomesprojects1000=TRUE,
                    normalDP=normalDP, tumorAD=tumorAD, VAFratio=VAFratio,
                    SBmethod=SBmethod, SBscore=SBscore, 
                    maxIndelLen=maxIndelLen, minInterval=minInterval,
                    ExAC=ExAC, tagFILTER=tagFILTER, dbVAF=dbVAF,
                    ESP6500=ESP6500, gnomAD=gnomAD, keepCOSMIC=keepCOSMIC,
                    keepType=keepType, bedFile=bedFile, bedFilter=bedFilter,
                    bedHeader=bedHeader, mutFilter=mutFilter, 
                    selectCols=selectCols, report=report, 
                    reportFile=reportFile, reportDir=reportDir, TMB=TMB,
                    cancerType=cancerType, progressbar=progressbar,
                    codelog=codelog, codelogFile=codelogFile, 
                    PONformat=PONformat, PONfile=PONfile, verbose=verbose)
  # UCEC
  }else if(cancerType == 'UCEC'){
    mafFiltered <- mutFilterCom(maf, dbSNP=TRUE, Genomesprojects1000=TRUE,
                     tumorDP=tumorDP, VAF=VAF, normalDP=normalDP, 
                     tumorAD=tumorAD, VAFratio=VAFratio, SBmethod=SBmethod,
                     SBscore=SBscore, maxIndelLen=maxIndelLen,
                     minInterval=minInterval, ExAC=ExAC,
                     tagFILTER=tagFILTER, dbVAF=dbVAF, ESP6500=ESP6500,
                     gnomAD=gnomAD, keepCOSMIC=keepCOSMIC,
                     keepType=keepType, bedFile=bedFile, TMB=TMB,
                     bedFilter=bedFilter, bedHeader=bedHeader,
                     mutFilter=mutFilter, selectCols=selectCols,
                     report=report, reportFile=reportFile,
                     reportDir=reportDir, cancerType=cancerType,
                     progressbar=progressbar, codelog=codelog,
                     codelogFile=codelogFile, PONformat=PONformat, 
                     PONfile=PONfile, verbose=verbose)

  # UCS
  }else if(cancerType == 'UCS'){
    mafFiltered <- mutFilterCom(maf, tumorAD=5, tumorDP=12, normalDP=5,
                                keepCOSMIC=TRUE, dbSNP=dbSNP,
                                Genomesprojects1000=Genomesprojects1000,
                                VAF=VAF, VAFratio=VAFratio,
                                SBmethod=SBmethod, keepType=keepType,
                                SBscore=SBscore, maxIndelLen=maxIndelLen,
                                minInterval=minInterval, ExAC=ExAC,
                                tagFILTER=tagFILTER, dbVAF=dbVAF,
                                ESP6500=ESP6500, gnomAD=gnomAD,
                                bedFile=bedFile, bedFilter=bedFilter,
                                bedHeader=bedHeader, mutFilter=mutFilter, 
                                selectCols=selectCols, report=report, 
                                reportFile=reportFile, reportDir=reportDir, 
                                TMB=TMB, cancerType=cancerType, 
                                progressbar=progressbar, codelog=codelog, 
                                codelogFile=codelogFile, verbose=verbose, 
                                PONformat=PONformat, PONfile=PONfile)
  # KIRC
  }else if(cancerType == 'KIRC'){
    mafFiltered <- mutFilterCom(maf, dbSNP=TRUE,
                                Genomesprojects1000=Genomesprojects1000,
                                tumorDP=tumorDP, VAF=VAF,
                                normalDP=normalDP, tumorAD=tumorAD,
                                VAFratio=VAFratio, SBmethod=SBmethod,
                                SBscore=SBscore, maxIndelLen=maxIndelLen,
                                minInterval=minInterval, ExAC=ExAC,
                                tagFILTER=tagFILTER, dbVAF=dbVAF,
                                ESP6500=ESP6500, gnomAD=gnomAD,
                                keepCOSMIC=keepCOSMIC, keepType=keepType,
                                bedFile=bedFile, bedFilter=bedFilter,
                                bedHeader=bedHeader, mutFilter=mutFilter, 
                                selectCols=selectCols, report=report, 
                                reportFile=reportFile, reportDir=reportDir, 
                                TMB=TMB, cancerType=cancerType,
                                progressbar=progressbar, codelog=codelog,
                                codelogFile=codelogFile, verbose=verbose,
                                PONformat=PONformat, PONfile=PONfile)
  # KIRP
  }else if(cancerType == 'KIRP'){
    mafFiltered <- mutFilterCom(maf, tumorDP=8, normalDP=6, VAF=0.07,
                                dbSNP=TRUE, keepCOSMIC=TRUE,
                                Genomesprojects1000=TRUE, ExAC=TRUE,
                                tumorAD=tumorAD, keepType=keepType,
                                VAFratio=VAFratio, SBmethod=SBmethod,
                                SBscore=SBscore, maxIndelLen=maxIndelLen,
                                minInterval=minInterval,
                                tagFILTER=tagFILTER, dbVAF=dbVAF,
                                ESP6500=ESP6500, gnomAD=gnomAD,
                                bedFile=bedFile, bedFilter=bedFilter,
                                bedHeader=bedHeader, mutFilter=mutFilter, 
                                selectCols=selectCols, report=report, 
                                reportFile=reportFile, reportDir=reportDir, 
                                TMB=TMB, cancerType=cancerType,
                                progressbar=progressbar, codelog=codelog,
                                codelogFile=codelogFile, verbose=verbose,
                                PONformat=PONformat, PONfile=PONfile)
  # LCML
  }else if(cancerType == 'LCML'){
    mafFiltered <- mutFilterCom(maf, VAF=0.2, Genomesprojects1000=TRUE,
                                dbSNP=dbSNP, tumorDP=tumorDP,
                                normalDP=normalDP, tumorAD=tumorAD,
                                VAFratio=VAFratio, SBmethod=SBmethod,
                                SBscore=SBscore, maxIndelLen=maxIndelLen,
                                minInterval=minInterval, ExAC=ExAC,
                                tagFILTER=tagFILTER, dbVAF=dbVAF,
                                ESP6500=ESP6500, gnomAD=gnomAD,
                                keepCOSMIC=keepCOSMIC, keepType=keepType,
                                bedFile=bedFile, bedFilter=bedFilter,
                                bedHeader=bedHeader, mutFilter=mutFilter, 
                                selectCols=selectCols, report=report, 
                                reportFile=reportFile, reportDir=reportDir, 
                                TMB=TMB, cancerType=cancerType,
                                progressbar=progressbar, codelog=codelog,
                                codelogFile=codelogFile, verbose=verbose,
                                PONformat=PONformat, PONfile=PONfile)
  # LAML
  }else if(cancerType == 'LAML'){
    mafFiltered <- mutFilterCom(maf, dbSNP=TRUE, tumorAD=3,
                                ExAC=TRUE, ESP6500=TRUE, tagFILTER='PASS',
                                tumorDP=tumorDP, VAF=VAF,
                                normalDP=normalDP, dbVAF=dbVAF,
                                VAFratio=VAFratio, SBmethod=SBmethod,
                                SBscore=SBscore, maxIndelLen=maxIndelLen,
                                minInterval=minInterval, gnomAD=gnomAD,
                                keepCOSMIC=keepCOSMIC, keepType=keepType,
                                bedFile=bedFile, bedFilter=bedFilter,
                                bedHeader=bedHeader, mutFilter=mutFilter, 
                                selectCols=selectCols, report=report, 
                                reportFile=reportFile, reportDir=reportDir, 
                                TMB=TMB, cancerType=cancerType,
                                progressbar=progressbar, codelog=codelog,
                                codelogFile=codelogFile, verbose=verbose,
                                PONformat=PONformat, PONfile=PONfile)
  # LIHC
  }else if(cancerType == 'LIHC'){
    mafFiltered <- mutFilterCom(maf, tumorDP=15, normalDP=15, VAF=0.1,
                                dbSNP=TRUE, keepCOSMIC=TRUE,
                                Genomesprojects1000=TRUE,
                                tumorAD=tumorAD, keepType=keepType,
                                VAFratio=VAFratio, SBmethod=SBmethod,
                                SBscore=SBscore, maxIndelLen=maxIndelLen,
                                minInterval=minInterval, ExAC=ExAC,
                                tagFILTER=tagFILTER, dbVAF=dbVAF,
                                ESP6500=ESP6500, gnomAD=gnomAD,
                                bedFile=bedFile, bedFilter=bedFilter,
                                bedHeader=bedHeader, mutFilter=mutFilter, 
                                selectCols=selectCols, report=report, 
                                reportFile=reportFile, reportDir=reportDir, 
                                TMB=TMB, cancerType=cancerType,
                                progressbar=progressbar, codelog=codelog,
                                codelogFile=codelogFile, verbose=verbose,
                                PONformat=PONformat, PONfile=PONfile)
    }else{
        stop('Invaild cancer type detected, please provide a vaild cancer type.')
    }
    return(mafFiltered)
}
likelet/CaMutQC documentation built on April 3, 2024, 9:06 a.m.