Nothing
## ----filterRulesMotivation, echo=FALSE, results='asis'------------------------
motivations <- readRDS(
system.file("misc", "motivations_rules.rds", package = "TVTB")
)
pander::pandoc.table(
motivations,
paste(
"Motivation for each of the new classes extending `FilterRules`,
to define VCF filter rules."
),
style = "multiline",
split.cells = c(15, 45),
split.tables = c(Inf)
)
## ----FilterRules--------------------------------------------------------------
fr <- S4Vectors::FilterRules(list(
mixed = function(x){
VariantAnnotation::fixed(x)[,"FILTER"] == "PASS" &
VariantAnnotation::info(x)[,"MAF"] >= 0.05
}
))
fr
## ----makeCollapsedVCF---------------------------------------------------------
library(TVTB)
extdata <- system.file("extdata", package = "TVTB")
vcfFile <- file.path(extdata, "chr15.phase3_integrated.vcf.gz")
tabixVcf <- Rsamtools::TabixFile(file = vcfFile)
vcf <- VariantAnnotation::readVcf(file = tabixVcf)
## ----makeExpandedVCF----------------------------------------------------------
evcf <- VariantAnnotation::expand(x = vcf, row.names = TRUE)
## ----simpleRules--------------------------------------------------------------
fixedVcf <- colnames(fixed(vcf))
fixedVcf
infoVcf <- colnames(info(vcf))
infoVcf
csq <- ensemblVEP::parseCSQToGRanges(x = evcf)
vepVcf <- colnames(mcols(csq))
vepVcf
## ----simpleFixedRules---------------------------------------------------------
fixedRules <- VcfFixedRules(exprs = list(
pass = expression(FILTER == "PASS"),
qual20 = expression(QUAL >= 20)
))
active(fixedRules)["qual20"] <- FALSE
summary(evalSeparately(fixedRules, vcf))
## ----addFixedRule-------------------------------------------------------------
nucleotides <- c("A", "T", "G", "C")
SNPrule <- VcfFixedRules(exprs = list(
SNP = expression(
as.character(REF) %in% nucleotides &
as.character(ALT) %in% nucleotides)
))
summary(evalSeparately(SNPrule, evcf, enclos = .GlobalEnv))
## ----simpleInfoRules----------------------------------------------------------
infoRules <- VcfInfoRules(exprs = list(
samples = expression(NS > (0.9 * ncol(evcf))),
avgSuperPopAF = expression(
(EAS_AF + EUR_AF + AFR_AF + AMR_AF + SAS_AF) / 5 > 0.05
)
))
summary(evalSeparately(infoRules, evcf, enclos = .GlobalEnv))
## ----infoFunctionRule---------------------------------------------------------
AFcutoff <- 0.05
popCutoff <- 2/3
filterFUN <- function(envir){
# info(vcf) returns a DataFrame; rowSums below requires a data.frame
df <- as.data.frame(envir)
# Identify fields storing allele frequency in super-populations
popFreqCols <- grep("[[:alpha:]]{3}_AF", colnames(df))
# Count how many super-population have an allele freq above the cutoff
popCount <- rowSums(df[,popFreqCols] > AFcutoff)
# Convert the cutoff ratio to a integer count
popCutOff <- popCutoff * length(popFreqCols)
# Identifies variants where enough super-population pass the cutoff
testRes <- (popCount > popCutOff)
# Return a boolean vector, required by the eval method
return(testRes)
}
funFilter <- VcfInfoRules(exprs = list(
commonSuperPops = filterFUN
))
summary(evalSeparately(funFilter, evcf))
## ----applyInfoFunction--------------------------------------------------------
summary(filterFUN(info(evcf)))
## ----greplFilter--------------------------------------------------------------
missenseFilter <- VcfVepRules(
exprs = list(
exact = expression(Consequence == "missense_variant"),
grepl = expression(grepl("missense", Consequence))
),
vep = "CSQ")
summary(evalSeparately(missenseFilter, evcf))
## ----fixedInsertionFilter-----------------------------------------------------
fixedInsertionFilter <- VcfFixedRules(exprs = list(
isInsertion = expression(
Biostrings::width(ALT) > Biostrings::width(REF)
)
))
evcf_fixedIns <- subsetByFilter(evcf, fixedInsertionFilter)
as.data.frame(fixed(evcf_fixedIns)[,c("REF", "ALT")])
## ----vepInsertionFilter-------------------------------------------------------
vepInsertionFilter <- VcfVepRules(exprs = list(
isInsertion = expression(VARIANT_CLASS == "insertion")
))
evcf_vepIns <- subsetByFilter(evcf, vepInsertionFilter)
as.data.frame(fixed(evcf_vepIns)[,c("REF", "ALT")])
## ----fixedMultiallelicFilter--------------------------------------------------
multiallelicFilter <- VcfFixedRules(exprs = list(
multiallelic = expression(lengths(ALT) > 1)
))
summary(eval(multiallelicFilter, vcf))
## ----combineRules-------------------------------------------------------------
vignetteRules <- VcfFilterRules(
fixedRules,
SNPrule,
infoRules,
vepInsertionFilter
)
vignetteRules
active(vignetteRules)
type(vignetteRules)
summary(evalSeparately(vignetteRules, evcf, enclos = .GlobalEnv))
## ----combinedRulesDeactivated-------------------------------------------------
active(vignetteRules)["SNP"] <- FALSE
summary(evalSeparately(vignetteRules, evcf, enclos = .GlobalEnv))
## ----combineFilterVep---------------------------------------------------------
combinedFilters <- VcfFilterRules(
vignetteRules, # VcfFilterRules
missenseFilter # VcfVepRules
)
type(vignetteRules)
type(combinedFilters)
active(vignetteRules)
active(missenseFilter)
active(combinedFilters)
## ----combineFilterFilter------------------------------------------------------
secondVcfFilter <- VcfFilterRules(missenseFilter)
secondVcfFilter
## ----combineMultipleRules-----------------------------------------------------
manyRules <- VcfFilterRules(
vignetteRules, # VcfFilterRules
secondVcfFilter, # VcfFilterRules
funFilter # VcfInfoRules
)
manyRules
active(manyRules)
type(manyRules)
summary(evalSeparately(manyRules, evcf, enclos = .GlobalEnv))
## ----sessionInfo, echo=FALSE--------------------------------------------------
sessionInfo()
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.