inst/doc/VcfFilterRules.R

## ----filterRulesMotivation, echo=FALSE, results='asis'------------------------
motivations <- readRDS(
    system.file("misc", "motivations_rules.rds", package = "TVTB")
)
pander::pandoc.table(
    motivations,
    paste(
        "Motivation for each of the new classes extending `FilterRules`,
        to define VCF filter rules."
    ),
    style = "multiline",
    split.cells = c(15, 45),
    split.tables = c(Inf)
)

## ----FilterRules--------------------------------------------------------------
fr <- S4Vectors::FilterRules(list(
    mixed = function(x){
        VariantAnnotation::fixed(x)[,"FILTER"] == "PASS" &
            VariantAnnotation::info(x)[,"MAF"] >= 0.05
    }
))
fr

## ----makeCollapsedVCF---------------------------------------------------------
library(TVTB)
extdata <- system.file("extdata", package = "TVTB")
vcfFile <- file.path(extdata, "chr15.phase3_integrated.vcf.gz")
tabixVcf <- Rsamtools::TabixFile(file = vcfFile)
vcf <- VariantAnnotation::readVcf(file = tabixVcf)

## ----makeExpandedVCF----------------------------------------------------------
evcf <- VariantAnnotation::expand(x = vcf, row.names = TRUE)

## ----simpleRules--------------------------------------------------------------
fixedVcf <- colnames(fixed(vcf))
fixedVcf
infoVcf <- colnames(info(vcf))
infoVcf
csq <- ensemblVEP::parseCSQToGRanges(x = evcf)
vepVcf <- colnames(mcols(csq))
vepVcf

## ----simpleFixedRules---------------------------------------------------------
fixedRules <- VcfFixedRules(exprs = list(
    pass = expression(FILTER == "PASS"),
    qual20 = expression(QUAL >= 20)
))
active(fixedRules)["qual20"] <- FALSE
summary(evalSeparately(fixedRules, vcf))

## ----addFixedRule-------------------------------------------------------------
nucleotides <- c("A", "T", "G", "C")
SNPrule <- VcfFixedRules(exprs = list(
    SNP = expression(
    as.character(REF) %in% nucleotides &
        as.character(ALT) %in% nucleotides)
))
summary(evalSeparately(SNPrule, evcf, enclos = .GlobalEnv))

## ----simpleInfoRules----------------------------------------------------------
infoRules <- VcfInfoRules(exprs = list(
    samples = expression(NS > (0.9 * ncol(evcf))),
    avgSuperPopAF = expression(
        (EAS_AF + EUR_AF + AFR_AF + AMR_AF + SAS_AF) / 5 > 0.05
    )
))
summary(evalSeparately(infoRules, evcf, enclos = .GlobalEnv))

## ----infoFunctionRule---------------------------------------------------------
AFcutoff <- 0.05
popCutoff <- 2/3
filterFUN <- function(envir){
    # info(vcf) returns a DataFrame; rowSums below requires a data.frame
    df <- as.data.frame(envir)
    # Identify fields storing allele frequency in super-populations
    popFreqCols <- grep("[[:alpha:]]{3}_AF", colnames(df))
    # Count how many super-population have an allele freq above the cutoff
    popCount <- rowSums(df[,popFreqCols] > AFcutoff)
    # Convert the cutoff ratio to a integer count
    popCutOff <- popCutoff * length(popFreqCols)
    # Identifies variants where enough super-population pass the cutoff
    testRes <- (popCount > popCutOff)
    # Return a boolean vector, required by the eval method
    return(testRes)
}
funFilter <- VcfInfoRules(exprs = list(
    commonSuperPops = filterFUN
))
summary(evalSeparately(funFilter, evcf))

## ----applyInfoFunction--------------------------------------------------------
summary(filterFUN(info(evcf)))

## ----greplFilter--------------------------------------------------------------
missenseFilter <- VcfVepRules(
    exprs = list(
        exact = expression(Consequence == "missense_variant"),
        grepl = expression(grepl("missense", Consequence))
        ),
    vep = "CSQ")
summary(evalSeparately(missenseFilter, evcf))

## ----fixedInsertionFilter-----------------------------------------------------
fixedInsertionFilter <- VcfFixedRules(exprs = list(
    isInsertion = expression(
        Biostrings::width(ALT) > Biostrings::width(REF)
    )
))
evcf_fixedIns <- subsetByFilter(evcf, fixedInsertionFilter)
as.data.frame(fixed(evcf_fixedIns)[,c("REF", "ALT")])

## ----vepInsertionFilter-------------------------------------------------------
vepInsertionFilter <- VcfVepRules(exprs = list(
    isInsertion = expression(VARIANT_CLASS == "insertion")
))
evcf_vepIns <- subsetByFilter(evcf, vepInsertionFilter)
as.data.frame(fixed(evcf_vepIns)[,c("REF", "ALT")])

## ----fixedMultiallelicFilter--------------------------------------------------
multiallelicFilter <- VcfFixedRules(exprs = list(
    multiallelic = expression(lengths(ALT) > 1)
))
summary(eval(multiallelicFilter, vcf))

## ----combineRules-------------------------------------------------------------
vignetteRules <- VcfFilterRules(
    fixedRules,
    SNPrule,
    infoRules,
    vepInsertionFilter
)
vignetteRules
active(vignetteRules)
type(vignetteRules)
summary(evalSeparately(vignetteRules, evcf, enclos = .GlobalEnv))

## ----combinedRulesDeactivated-------------------------------------------------
active(vignetteRules)["SNP"] <- FALSE
summary(evalSeparately(vignetteRules, evcf, enclos = .GlobalEnv))

## ----combineFilterVep---------------------------------------------------------
combinedFilters <- VcfFilterRules(
    vignetteRules, # VcfFilterRules
    missenseFilter # VcfVepRules
)
type(vignetteRules)
type(combinedFilters)
active(vignetteRules)
active(missenseFilter)
active(combinedFilters)

## ----combineFilterFilter------------------------------------------------------
secondVcfFilter <- VcfFilterRules(missenseFilter)
secondVcfFilter

## ----combineMultipleRules-----------------------------------------------------
manyRules <- VcfFilterRules(
    vignetteRules, # VcfFilterRules
    secondVcfFilter, # VcfFilterRules
    funFilter # VcfInfoRules
)
manyRules
active(manyRules)
type(manyRules)
summary(evalSeparately(manyRules, evcf, enclos = .GlobalEnv))

## ----sessionInfo, echo=FALSE--------------------------------------------------
sessionInfo()

Try the TVTB package in your browser

Any scripts or data that you put into this service are public.

TVTB documentation built on Nov. 8, 2020, 6:09 p.m.