inst/doc/derfinder-quickstart.R

## ----'installDer', eval = FALSE-----------------------------------------------
#  if (!requireNamespace("BiocManager", quietly = TRUE)) {
#        install.packages("BiocManager")
#    }
#  
#  BiocManager::install("derfinder")
#  
#  ## Check that you have a valid Bioconductor installation
#  BiocManager::valid()

## ----'citation'---------------------------------------------------------------
## Citation info
citation("derfinder")

## ----vignetteSetup, echo=FALSE, message=FALSE, warning = FALSE----------------
## Track time spent on making the vignette
startTime <- Sys.time()

## Bib setup
library("RefManageR")

## Write bibliography information
bib <- c(
    derfinder = citation("derfinder")[1],
    BiocStyle = citation("BiocStyle"),
    knitr = citation("knitr")[3],
    rmarkdown = citation("rmarkdown")[1],
    brainspan = RefManageR::BibEntry(
        bibtype = "Unpublished",
        key = "brainspan",
        title = "Atlas of the Developing Human Brain [Internet]. Funded by ARRA Awards 1RC2MH089921-01, 1RC2MH090047-01, and 1RC2MH089929-01.",
        author = "BrainSpan", year = 2011, url = "http://www.brainspan.org/"
    ),
    originalder = citation("derfinder")[2],
    R = citation(),
    IRanges = citation("IRanges"),
    sessioninfo = citation("sessioninfo"),
    testthat = citation("testthat"),
    GenomeInfoDb = RefManageR::BibEntry(
        bibtype = "manual",
        key = "GenomeInfoDb",
        author = "Sonali Arora and Martin Morgan and Marc Carlson and H. Pagès",
        title = "GenomeInfoDb: Utilities for manipulating chromosome and other 'seqname' identifiers",
        year = 2017, doi = "10.18129/B9.bioc.GenomeInfoDb"
    ),
    GenomicRanges = citation("GenomicRanges"),
    ggplot2 = citation("ggplot2"),
    bumphunter = citation("bumphunter")[1],
    TxDb.Hsapiens.UCSC.hg19.knownGene = citation("TxDb.Hsapiens.UCSC.hg19.knownGene"),
    AnnotationDbi = RefManageR::BibEntry(
        bibtype = "manual",
        key = "AnnotationDbi",
        author = "Hervé Pagès and Marc Carlson and Seth Falcon and Nianhua Li",
        title = "AnnotationDbi: Annotation Database Interface",
        year = 2017, doi = "10.18129/B9.bioc.AnnotationDbi"
    ),
    BiocParallel = citation("BiocParallel"),
    derfinderHelper = citation("derfinderHelper"),
    GenomicAlignments = citation("GenomicAlignments"),
    GenomicFeatures = citation("GenomicFeatures"),
    GenomicFiles = citation("GenomicFiles"),
    Hmisc = citation("Hmisc"),
    qvalue = citation("qvalue"),
    Rsamtools = citation("Rsamtools"),
    rtracklayer = citation("rtracklayer"),
    S4Vectors = RefManageR::BibEntry(
        bibtype = "manual", key = "S4Vectors",
        author = "Hervé Pagès and Michael Lawrence and Patrick Aboyoun",
        title = "S4Vectors: S4 implementation of vector-like and list-like objects",
        year = 2017, doi = "10.18129/B9.bioc.S4Vectors"
    ),
    bumphunterPaper = RefManageR::BibEntry(
        bibtype = "article",
        key = "bumphunterPaper",
        title = "Bump hunting to identify differentially methylated regions in epigenetic epidemiology studies",
        author = "Jaffe, Andrew E and Murakami, Peter and Lee, Hwajin and Leek, Jeffrey T and Fallin, M Daniele and Feinberg, Andrew P and Irizarry, Rafael A",
        year = 2012, journal = "International Journal of Epidemiology"
    ),
    derfinderData = citation("derfinderData"),
    RefManageR = citation("RefManageR")[1]
)

## ----'ultraQuick', eval = FALSE-----------------------------------------------
#  ## Load libraries
#  library("derfinder")
#  library("derfinderData")
#  library("GenomicRanges")
#  
#  ## Determine the files to use and fix the names
#  files <- rawFiles(system.file("extdata", "AMY", package = "derfinderData"),
#      samplepatt = "bw", fileterm = NULL
#  )
#  names(files) <- gsub(".bw", "", names(files))
#  
#  ## Load the data from disk -- On Windows you have to load data from Bam files
#  fullCov <- fullCoverage(files = files, chrs = "chr21", verbose = FALSE)
#  
#  ## Get the region matrix of Expressed Regions (ERs)
#  regionMat <- regionMatrix(fullCov, cutoff = 30, L = 76, verbose = FALSE)
#  
#  ## Get pheno table
#  pheno <- subset(brainspanPheno, structure_acronym == "AMY")
#  
#  ## Identify which ERs are differentially expressed, that is, find the DERs
#  library("DESeq2")
#  
#  ## Round matrix
#  counts <- round(regionMat$chr21$coverageMatrix)
#  
#  ## Round matrix and specify design
#  dse <- DESeqDataSetFromMatrix(counts, pheno, ~ group + gender)
#  
#  ## Perform DE analysis
#  dse <- DESeq(dse, test = "LRT", reduced = ~gender, fitType = "local")
#  
#  ## Extract results
#  mcols(regionMat$chr21$regions) <- c(mcols(regionMat$chr21$regions), results(dse))
#  
#  ## Save info in an object with a shorter name
#  ers <- regionMat$chr21$regions
#  ers

## ----'start', message=FALSE---------------------------------------------------
## Load libraries
library("derfinder")
library("derfinderData")
library("GenomicRanges")

## ----'locateAMYfiles'---------------------------------------------------------
## Determine the files to use and fix the names
files <- rawFiles(system.file("extdata", "AMY", package = "derfinderData"),
    samplepatt = "bw", fileterm = NULL
)
names(files) <- gsub(".bw", "", names(files))

## ----'getData', eval = .Platform$OS.type != "windows"-------------------------
## Load the data from disk
fullCov <- fullCoverage(
    files = files, chrs = "chr21", verbose = FALSE,
    totalMapped = rep(1, length(files)), targetSize = 1
)

## ----'getDataWindows', eval = .Platform$OS.type == "windows", echo = FALSE----
#  ## Load data in Windows case
#  foo <- function() {
#      load(system.file("extdata", "fullCov", "fullCovAMY.RData",
#          package = "derfinderData"
#      ))
#      return(fullCovAMY)
#  }
#  fullCov <- foo()

## ----'regionMatrix'-----------------------------------------------------------
## Get the region matrix of Expressed Regions (ERs)
regionMat <- regionMatrix(fullCov, cutoff = 30, L = 76, verbose = FALSE)

## ----'exploreRegMatRegs'------------------------------------------------------
## regions output
regionMat$chr21$regions

## ----'exploreRegMatBP'--------------------------------------------------------
## Base-level coverage matrices for each of the regions
## Useful for plotting
lapply(regionMat$chr21$bpCoverage[1:2], head, n = 2)

## Check dimensions. First region is 565 long, second one is 138 bp long.
## The columns match the number of samples (12 in this case).
lapply(regionMat$chr21$bpCoverage[1:2], dim)

## ----'exploreRegMatrix'-------------------------------------------------------
## Dimensions of the coverage matrix
dim(regionMat$chr21$coverageMatrix)

## Coverage for each region. This matrix can then be used with limma or other pkgs
head(regionMat$chr21$coverageMatrix)

## ----'phenoData'--------------------------------------------------------------
## Get pheno table
pheno <- subset(brainspanPheno, structure_acronym == "AMY")

## ----'identifyDERsDESeq2'-----------------------------------------------------
## Identify which ERs are differentially expressed, that is, find the DERs
library("DESeq2")

## Round matrix
counts <- round(regionMat$chr21$coverageMatrix)

## Round matrix and specify design
dse <- DESeqDataSetFromMatrix(counts, pheno, ~ group + gender)

## Perform DE analysis
dse <- DESeq(dse, test = "LRT", reduced = ~gender, fitType = "local")

## Extract results
mcols(regionMat$chr21$regions) <- c(
    mcols(regionMat$chr21$regions),
    results(dse)
)

## Save info in an object with a shorter name
ers <- regionMat$chr21$regions
ers

## ----'vennRegions', fig.cap = "Venn diagram showing ERs by annotation class."----
## Find overlaps between regions and summarized genomic annotation
annoRegs <- annotateRegions(ers, genomicState$fullGenome, verbose = FALSE)

library("derfinderPlot")
venn <- vennRegions(annoRegs,
    counts.col = "blue",
    main = "Venn diagram using TxDb.Hsapiens.UCSC.hg19.knownGene annotation"
)

## ----'nearestGene'------------------------------------------------------------
## Load database of interest
library("TxDb.Hsapiens.UCSC.hg19.knownGene")
txdb <- keepSeqlevels(TxDb.Hsapiens.UCSC.hg19.knownGene, "chr21")

## Find nearest feature
library("bumphunter")
genes <- annotateTranscripts(txdb)
annotation <- matchGenes(ers, subject = genes)

## Restore seqlevels
txdb <- restoreSeqlevels(txdb)

## View annotation results
head(annotation)
## You can use derfinderPlot::plotOverview() to visualize this information

## ----'firstfive', fig.cap = paste0("Base-pair resolution plot of differentially expressed region ", 1:5, ".")----
## Extract the region coverage
regionCov <- regionMat$chr21$bpCoverage
plotRegionCoverage(
    regions = ers, regionCoverage = regionCov,
    groupInfo = pheno$group, nearestAnnotation = annotation,
    annotatedRegions = annoRegs, whichRegions = seq_len(5), txdb = txdb,
    scalefac = 1, ask = FALSE, verbose = FALSE
)

## ----createVignette, eval=FALSE-----------------------------------------------
#  ## Create the vignette
#  library("rmarkdown")
#  system.time(render("derfinder-quickstart.Rmd", "BiocStyle::html_document"))
#  
#  ## Extract the R code
#  library("knitr")
#  knit("derfinder-quickstart.Rmd", tangle = TRUE)

## ----reproducibility1, echo=FALSE---------------------------------------------
## Date the vignette was generated
Sys.time()

## ----reproducibility2, echo=FALSE---------------------------------------------
## Processing time in seconds
totalTime <- diff(c(startTime, Sys.time()))
round(totalTime, digits = 3)

## ----reproducibility3, echo=FALSE-------------------------------------------------------------------------------------
## Session info
library("sessioninfo")
options(width = 120)
session_info()

## ----vignetteBiblio, results = 'asis', echo = FALSE, warning = FALSE, message = FALSE---------------------------------
## Print bibliography
PrintBibliography(bib, .opts = list(hyperlink = "to.doc", style = "html"))

Try the derfinder package in your browser

Any scripts or data that you put into this service are public.

derfinder documentation built on Dec. 20, 2020, 2 a.m.