inst/doc/eisaR.R

## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----install_eisaR, eval=FALSE------------------------------------------------
#  # BiocManager is needed to install Bioconductor packages
#  if (!requireNamespace("BiocManager", quietly = TRUE))
#      install.packages("BiocManager")
#  
#  # Install eisaR
#  BiocManager::install("eisaR")

## ----availableOnline, eval=FALSE----------------------------------------------
#  pkgs <- c(BiocManager::available("TxDb")
#            BiocManager::available("EnsDb"))

## ----annotation, message=FALSE------------------------------------------------
# load package
library(eisaR)

# get TxDb object
txdbFile <- system.file("extdata", "hg19sub.sqlite", package = "eisaR")
txdb <- AnnotationDbi::loadDb(txdbFile)

## ----regions------------------------------------------------------------------
# extract filtered exonic and gene body regions
regS <- getRegionsFromTxDb(txdb = txdb, strandedData = TRUE)
regU <- getRegionsFromTxDb(txdb = txdb, strandedData = FALSE)

lengths(regS)
lengths(regU)

regS$exons

## ----exportregions------------------------------------------------------------
library(rtracklayer)
export(regS$exons, "hg19sub_exons_stranded.gtf")
export(regS$genebodies, "hg19sub_genebodies_stranded.gtf")

## ----extdata------------------------------------------------------------------
library(QuasR)
file.copy(system.file(package = "QuasR", "extdata"), ".", recursive = TRUE)

## ----align--------------------------------------------------------------------
sampleFile <- "extdata/samples_chip_single.txt"
genomeFile <- "extdata/hg19sub.fa"

proj <- qAlign(sampleFile = "extdata/samples_rna_single.txt", 
               genome = "extdata/hg19sub.fa",
               aligner = "Rhisat2", splicedAlignment = TRUE)
alignmentStats(proj)

## ----count--------------------------------------------------------------------
cntEx <- qCount(proj, regU$exons, orientation = "any")
cntGb <- qCount(proj, regU$genebodies, orientation = "any")
cntIn <- cntGb - cntEx
cntEx
cntIn

## ----loadcounts---------------------------------------------------------------
cntEx <- readRDS(system.file("extdata",
                             "Fig3abc_GSE33252_rawcounts_exonic.rds",
                             package = "eisaR"))
cntIn <- readRDS(system.file("extdata",
                             "Fig3abc_GSE33252_rawcounts_intronic.rds",
                             package = "eisaR"))

## ----runEISA------------------------------------------------------------------
# remove "width" column
Rex <- cntEx[, colnames(cntEx) != "width"]
Rin <- cntIn[, colnames(cntIn) != "width"]

# create condition factor (contrast will be TN - ES)
cond <- factor(c("ES", "ES", "TN", "TN"))

# run EISA
res <- runEISA(Rex, Rin, cond)

## ----compare------------------------------------------------------------------
res1 <- runEISA(Rex, Rin, cond, method = "Gaidatzis2015")
res2 <- runEISA(Rex, Rin, cond)

# number of quantifiable genes
nrow(res1$DGEList)
nrow(res2$DGEList)

# number of genes with significant post-transcriptional regulation
sum(res1$tab.ExIn$FDR < 0.05)
sum(res2$tab.ExIn$FDR < 0.05)

# method="Gaidatzis2015" results contain most of default results
summary(rownames(res2$contrasts)[res2$tab.ExIn$FDR < 0.05] %in%
        rownames(res1$contrasts)[res1$tab.ExIn$FDR < 0.05])

# comparison of deltas
ids <- intersect(rownames(res1$DGEList), rownames(res2$DGEList))
cor(res1$contrasts[ids,"Dex"], res2$contrasts[ids,"Dex"])
cor(res1$contrasts[ids,"Din"], res2$contrasts[ids,"Din"])
cor(res1$contrasts[ids,"Dex.Din"], res2$contrasts[ids,"Dex.Din"])
plot(res1$contrasts[ids,"Dex.Din"], res2$contrasts[ids,"Dex.Din"], pch = "*",
     xlab = expression(paste(Delta, "exon", -Delta, "intron for method='Gaidatzis2015'")),
     ylab = expression(paste(Delta, "exon", -Delta, "intron for default parameters")))

## ----modelSamples-------------------------------------------------------------
res3 <- runEISA(Rex, Rin, cond, modelSamples = FALSE)
res4 <- runEISA(Rex, Rin, cond, modelSamples = TRUE)
ids <- intersect(rownames(res3$contrasts), rownames(res4$contrasts))

# number of genes with significant post-transcriptional regulation
sum(res3$tab.ExIn$FDR < 0.05)
sum(res4$tab.ExIn$FDR < 0.05)

# modelSamples=TRUE results are a super-set of
# modelSamples=FALSE results
summary(rownames(res3$contrasts)[res3$tab.ExIn$FDR < 0.05] %in%
        rownames(res4$contrasts)[res4$tab.ExIn$FDR < 0.05])

# comparison of contrasts
diag(cor(res3$contrasts[ids, ], res4$contrasts[ids, ]))
plot(res3$contrasts[ids, 3], res4$contrasts[ids, 3], pch = "*",
     xlab = "Interaction effects for modelSamples=FALSE",
     ylab = "Interaction effects for modelSamples=TRUE")

# comparison of interaction significance
plot(-log10(res3$tab.ExIn[ids, "FDR"]), -log10(res4$tab.ExIn[ids, "FDR"]), pch = "*",
     xlab = "-log10(FDR) for modelSamples=FALSE",
     ylab = "-log10(FDR) for modelSamples=TRUE")
abline(a = 0, b = 1, col = "gray")
legend("bottomright", "y = x", bty = "n", lty = 1, col = "gray")

## ----plotEISA-----------------------------------------------------------------
plotEISA(res)

## ----normalization------------------------------------------------------------
# remove column "width"
Rex <- cntEx[,colnames(cntEx) != "width"]
Rin <- cntIn[,colnames(cntIn) != "width"]
Rall <- Rex + Rin
fracIn <- colSums(Rin)/colSums(Rall)
summary(fracIn)

# scale counts to the mean library size,
# separately for exons and introns
Nex <- t(t(Rex) / colSums(Rex) * mean(colSums(Rex)))
Nin <- t(t(Rin) / colSums(Rin) * mean(colSums(Rin)))

# log transform (add a pseudocount of 8)
NLex <- log2(Nex + 8)
NLin <- log2(Nin + 8)

## ----quantgenes---------------------------------------------------------------
quantGenes <- rownames(Rex)[ rowMeans(NLex) > 5.0 & rowMeans(NLin) > 5.0 ]
length(quantGenes)

## ----dIdE---------------------------------------------------------------------
Dex <- NLex[,c("MmTN_RNA_total_a","MmTN_RNA_total_b")] - NLex[,c("MmES_RNA_total_a","MmES_RNA_total_b")]
Din <- NLin[,c("MmTN_RNA_total_a","MmTN_RNA_total_b")] - NLin[,c("MmES_RNA_total_a","MmES_RNA_total_b")]
Dex.Din <- Dex - Din

cor(Dex[quantGenes,1], Dex[quantGenes,2])
cor(Din[quantGenes,1], Din[quantGenes,2])
cor(Dex.Din[quantGenes,1], Dex.Din[quantGenes,2])

## ----sig----------------------------------------------------------------------
# create DGEList object with exonic and intronic counts
library(edgeR)
cnt <- data.frame(Ex = Rex, In = Rin)
y <- DGEList(counts = cnt, genes = data.frame(ENTREZID = rownames(cnt)))

# select quantifiable genes and normalize
y <- y[quantGenes, ]
y <- calcNormFactors(y)

# design matrix with interaction term
region <- factor(c("ex","ex","ex","ex","in","in","in","in"), levels = c("in", "ex"))
cond <- rep(factor(c("ES","ES","TN","TN")), 2)
design <- model.matrix(~ region * cond)
rownames(design) <- colnames(cnt)
design

# estimate model parameters
y <- estimateDisp(y, design)
fit <- glmFit(y, design)

# calculate likelihood-ratio between full and reduced models
lrt <- glmLRT(fit)

# create results table
tt <- topTags(lrt, n = nrow(y), sort.by = "none")
head(tt$table[order(tt$table$FDR, decreasing = FALSE), ])

## ----plot---------------------------------------------------------------------
sig     <- tt$table$FDR < 0.05
sum(sig)
sig.dir <- sign(tt$table$logFC[sig])
cols <- ifelse(sig, ifelse(tt$table$logFC > 0, "#E41A1CFF", "#497AB3FF"), "#22222244")

# volcano plot
plot(tt$table$logFC, -log10(tt$table$FDR), col = cols, pch = 20,
     xlab = expression(paste("RNA change (log2 ",Delta,"exon/",Delta,"intron)")),
     ylab = "Significance (-log10 FDR)")
abline(h = -log10(0.05), lty = 2)
abline(v = 0, lty = 2)
text(x = par("usr")[1] + 3 * par("cxy")[1], y = par("usr")[4], adj = c(0,1),
     labels = sprintf("n=%d", sum(sig.dir == -1)), col = "#497AB3FF")
text(x = par("usr")[2] - 3 * par("cxy")[1], y = par("usr")[4], adj = c(1,1),
     labels = sprintf("n=%d", sum(sig.dir ==  1)), col = "#E41A1CFF")

# Delta I vs. Delta E
plot(rowMeans(Din)[quantGenes], rowMeans(Dex)[quantGenes], pch = 20, col = cols,
     xlab = expression(paste(Delta,"intron (log2 TN/ES)")),
     ylab = expression(paste(Delta,"exon (log2 TN/ES)")))
legend(x = "bottomright", bty = "n", pch = 20, col = c("#E41A1CFF","#497AB3FF"),
       legend = sprintf("%s (%d)", c("Up","Down"), c(sum(sig.dir == 1), sum(sig.dir == -1))))

## ----sessionInfo--------------------------------------------------------------
sessionInfo()

Try the eisaR package in your browser

Any scripts or data that you put into this service are public.

eisaR documentation built on Nov. 8, 2020, 8:26 p.m.