knitr::opts_chunk$set(echo = TRUE, fig.width = 5, fig.height = 5)
# options(digits=3)

This is a vignette for the explainr R package with the Bioconductor theme. We consider the Bottomly data set available on Recount. After finding the differentially expressed genes using two R/Bioconductor packages voom + limma, the object that is created is an MArrayLM object. We will apply the explain() function to the MArrayLM object which will explain the methods used for finding the differentially expressed genes and provide some basic summary plots.

Load libraries and RNA-Seq data

# Load libraries
library(Biobase)
library(biomaRt)
library(edgeR)
library(limma)
library(dplyr)
library(explainr)

# Load data
bottomly.local <- load(url("http://bowtie-bio.sourceforge.net/recount/ExpressionSets/bottomly_eset.RData"))

Create the RNA-Seq count table (ExpressionSet), phenotypic information, design matrix.

eset <- exprs(bottomly.eset)
keepMeID <- sapply(1:nrow(eset), function(x){ any(eset[x,] != 0) })
eset <- eset[keepMeID,]

pd <- phenoData(bottomly.eset)@data # sample information about the experiment 
design <- model.matrix(~pd$strain)

Calculate normalization factors to scale raw library sizes

dge <- DGEList(counts = eset)
dge <- calcNormFactors(dge) 

# applies voom transformation to count data
v <- voom(dge, design = design) 

Find the differentially expressed genes

Create the MArrayLM object using the R/Bioconductor limma package

# Linear model for each gene and creates an MArrayLM object
fit <- lmFit(v, design)
fit <- eBayes(fit)

explain() the analysis and results

fit %>% explain(theme = "bioconductor")


hilaryparker/explainr documentation built on May 17, 2019, 3:58 p.m.