inst/doc/Introduction.R

## ----optChunkDefault, include=FALSE-------------------------------------------
stopifnot(
    requireNamespace("pander")
)
library(knitr)
opts_chunk$set( # Set these first, to make them default
    message = FALSE,
    warning=FALSE,
    error=FALSE
)
optChunkDefault <- opts_chunk$get()

## ----library------------------------------------------------------------------
library(TVTB)

## ----TVTBparamConstructor-----------------------------------------------------
tparam <- TVTBparam(Genotypes(
    ref = "0|0",
    het = c("0|1", "1|0", "0|2", "2|0", "1|2", "2|1"),
    alt = c("1|1", "2|2")),
    ranges = GenomicRanges::GRangesList(
        SLC24A5 = GenomicRanges::GRanges(
            seqnames = "15",
            IRanges::IRanges(
                start = 48413170, end = 48434757)
            )
        )
    )

## ----TVTBparamShow------------------------------------------------------------
tparam

## ----TVTBparamAsScanVcfParam--------------------------------------------------
svp <- as(tparam, "ScanVcfParam")
svp

## ----importPhenotypes---------------------------------------------------------
phenoFile <- system.file(
    "extdata", "integrated_samples.txt", package = "TVTB")
phenotypes <- S4Vectors::DataFrame(
    read.table(file = phenoFile, header = TRUE, row.names = 1))

## ----TabixFile----------------------------------------------------------------
vcfFile <- system.file(
    "extdata", "chr15.phase3_integrated.vcf.gz", package = "TVTB")
tabixVcf <- Rsamtools::TabixFile(file = vcfFile)

## ----svpTVTBparam-------------------------------------------------------------
VariantAnnotation::vcfInfo(svp(tparam)) <- vep(tparam)
VariantAnnotation::vcfGeno(svp(tparam)) <- "GT"
svp(tparam)

## ----readExpandVcf, message=FALSE---------------------------------------------
# Import variants as a CollapsedVCF object
vcf <- VariantAnnotation::readVcf(
    tabixVcf, param = tparam, colData = phenotypes)
# Expand into a ExpandedVCF object (bi-allelic records)
vcf <- VariantAnnotation::expand(x = vcf, row.names = TRUE)

## ----vcfShow, echo=FALSE------------------------------------------------------
vcf

## ----addOverallFrequencies----------------------------------------------------
initialInfo <- colnames(info(vcf))
vcf <- addOverallFrequencies(vcf = vcf)
setdiff(colnames(info(vcf)), initialInfo)

## ----addFrequenciesOverall----------------------------------------------------
vcf <- addFrequencies(vcf = vcf, force = TRUE)

## ----addPhenoLevelFrequencies-------------------------------------------------
initialInfo <- colnames(info(vcf))
vcf <- addPhenoLevelFrequencies(
    vcf = vcf, pheno = "super_pop", level = "AFR")
setdiff(colnames(info(vcf)), initialInfo)

## ----addFrequenciesPhenoLevel-------------------------------------------------
initialInfo <- colnames(info(vcf))
vcf <- addFrequencies(
    vcf,
    list(super_pop = c("EUR", "SAS", "EAS", "AMR"))
)
setdiff(colnames(info(vcf)), initialInfo)

## ----addFrequenciesPhenotype--------------------------------------------------
vcf <- addFrequencies(vcf, "pop")
head(grep("^pop_[[:alpha:]]+_REF", colnames(info(vcf)), value = TRUE))

## ----filterRulesMotivation, echo=FALSE, results='asis'------------------------
motivations <- readRDS(
    system.file("misc", "motivations_rules.rds", package = "TVTB")
)
pander::pandoc.table(
    motivations,
    paste(
        "Motivation for each of the new classes extending `FilterRules`,
        to define VCF filter rules."
    ),
    style = "multiline",
    split.cells = c(15, 45),
    split.tables = c(Inf)
)

## ----FilterRules--------------------------------------------------------------
S4Vectors::FilterRules(list(
    mixed = function(x){
        VariantAnnotation::fixed(x)[,"FILTER"] == "PASS" &
            VariantAnnotation::info(x)[,"MAF"] >= 0.05
    }
))

## ----VcfFixedRules------------------------------------------------------------
fixedR <- VcfFixedRules(list(
    pass = expression(FILTER == "PASS"),
    qual = expression(QUAL > 20)
))
fixedR

## ----VcfInfoRules-------------------------------------------------------------
infoR <- VcfInfoRules(
    exprs = list(
        rare = expression(MAF < 0.01 & MAF > 0),
        common = expression(MAF > 0.05),
        mac_ge3 = expression(HET + 2*ALT >= 3)),
    active = c(TRUE, TRUE, FALSE)
)
infoR

## ----VcfVepRules--------------------------------------------------------------
vepR <- VcfVepRules(exprs = list(
    missense = expression(Consequence %in% c("missense_variant")),
    CADD_gt15 = expression(CADD_PHRED > 15)
    ))
vepR

## ----VcfFilterRules-----------------------------------------------------------
vcfRules <- VcfFilterRules(fixedR, infoR, vepR)
vcfRules

## ----deactivateFilterRules----------------------------------------------------
active(vcfRules)["CADD_gt15"] <- FALSE
active(vcfRules)

## ----evalFilterRules----------------------------------------------------------
summary(eval(expr = infoR, envir = vcf))
summary(eval(expr = vcfRules, envir = vcf))
summary(evalSeparately(expr = vcfRules, envir = vcf))

## ----plotMAFCommon------------------------------------------------------------
plotInfo(
        subsetByFilter(vcf, vcfRules["common"]), "AAF",
        range(GenomicRanges::granges(vcf)),
        EnsDb.Hsapiens.v75::EnsDb.Hsapiens.v75,
        "super_pop",
        zero.rm = FALSE
    )

## ----plotMAFrare--------------------------------------------------------------
plotInfo(
        subsetByFilter(vcf, vcfRules["missense"]), "MAF",
        range(GenomicRanges::granges(vcf)),
        EnsDb.Hsapiens.v75::EnsDb.Hsapiens.v75,
        "super_pop",
        zero.rm = TRUE
    )

## ----pairsInfo----------------------------------------------------------------
pairsInfo(subsetByFilter(vcf, vcfRules["common"]), "AAF", "super_pop")

## ----sessionInfo, echo=FALSE--------------------------------------------------
sessionInfo()

Try the TVTB package in your browser

Any scripts or data that you put into this service are public.

TVTB documentation built on Nov. 8, 2020, 6:09 p.m.