knitr::opts_chunk$set(fig.width=8, fig.height=6,echo=TRUE, warning=FALSE, message=FALSE)
pipeLIMMA Run analyses of differnetial expression in LIMMA. makeBinarySig Find and count the number of significantly differentially expressed genespqHists Plot the distribution of P and FDR-corrected P (Q) valuesvoom2PCA Conduct principal component analyses on voom (or otherwise) normalized expression matricesvolcanoPlot Generate log2 foldchange vs. P-value volcano plotsvolcanoPair Compare two sets of log2 fold changes, colored by p-valuespipeDESeq2 Runs an analysis of differential expression similar to that of pipeLIMMA, except through the DESeq2 package. The limmaDE2 packages is not on CRAN and can only be installed from github. Make sure the devtools package is installed on your system. The limmaDE2 package also requires several other R packages to be installed. These packages have functions upon which limmaDE2 depends:
limma contains all of the basic statistical elements that are called here.qvalue fdr-corrected p-valuesedgeR Differential expression package used for some transformationsSimSeq permits simulation of RNA-seq dataplyr summarize and manipulate lists and dataframesvegan some distance-based methodsHeatplus heatmap plottingtopGO gene ontology (GO) analysisggplot2 routines for complex plotshexbin methods to simplify data in plotslibrary(devtools) install_github("jtlovell/limmaDE2")
library("limmaDE2") library("ggplot2") library("reshape2") library("SimSeq") library("DESeq2")
counts: Raw transcript abundance countsinfo: Experimental design information
These two matrices must match exactly, where each row in the info matrix corresponds to each column in counts. For best performance, the name of each gene should be stored in the rownames of the counts matrix. data(kidney) # from simseq counts<-kidney$counts counts<-counts[sample(1:nrow(counts),1000),] info<-with(kidney, data.frame(id = paste(replic, treatment, sep = "_"), rep=replic, Treatment=ifelse(treatment == "Tumor","tumor","cntr"), stringsAsFactors=F)) colnames(counts)<-info$id
group.a<-c("4619", "4712", "4863", "4865", "5452", "5453", "5454", "5455", "5456", "5457","5458", "5461", "5462", "5463", "5465", "5467", "5468", "5469", "5470", "5549","5552", "5580", "5641", "5672", "5689", "5701", "5703", "5706", "5989", "6088") info$group<-ifelse(info$rep %in% group.a, "a","b") with(info, table(group, Treatment)) info$trt.grp<-with(info, paste(Treatment, group, sep="_"))
head(info)
counts[1:10,1:3]
In all statistical analyses, it is important to set the levels of the experimental factors. This is esspecially true in linear modelling, such as limma, where levels are tested against the base level. Here, we set the "Non-Tumor" treatment and "a" group as the base.
info$Treatment <- factor(info$Treatment, levels = c("cntr", "tumor")) info$group <- factor(info$group, levels = c("a", "b"))
stats <- pipeLIMMA(counts = counts, info = info, block = NULL, formula = "~ Treatment") lmStats<-stats$stats voom<-stats$voom$E
pipeLIMMA returns three elements: stats, voom and fstats. These are the statistical output of the limma functions: eBayes, voom and topTableF. Inspect the top few rows and columns of each.
stats: eBayes linear model statisticsknitr::kable(stats$stats[1:6,1:6])
voom: normalized expression matrixknitr::kable(stats$voom[["E"]][1:6,1:6])
fstats: F-statistics for each factor in the model, in this case, just treatmentknitr::kable(stats$fstats[1:6,])
duplicateCorrelation among blocks, then uses the blocking variable in the linear model fit.info$block <- rep(1:2,each=nrow(info)/2) stats.block <- pipeLIMMA(counts = counts, info = info, block = info$block, formula = "~ Treatment")
stats.factorial <- pipeLIMMA(counts = counts, info = info, block = NULL, formula = "~ Treatment + group + Treatment*group")
Sometimes, it may make more sense to use specific contrasts to test for differential expression. For example, let's say we are interested in how the tumors and non-tumor tissues differ for each of the two groups.
design <- with(info, model.matrix(~ 0 + trt.grp)) colnames(design)<-gsub("trt.grp","",colnames(design)) head(design)
contrast.matrix <- makeContrasts( tumor_a - cntr_a , tumor_b - cntr_b, levels = design) head(contrast.matrix)
stats <- pipeLIMMA(counts = counts, info = info, block = NULL, design = design, contrast.matrix = contrast.matrix) stats.contrasts <- stats$stats
The function makeBinarySig looks for a provided string (e.g. "Q.Value") and outputs a matrix with whether or not those columns have values <= the provided alpha
sigs <- makeBinarySig(stats.contrasts, what = "Qvalue", alpha = 0.05)
counts2Venn(x=sigs, cols=c(1,2), names=c("in.grpA","in.grpB"), colors=c("blue","red"),type="limma", legx=-3.3,legy=-3)
counts2Venn(x=sigs, cols=c(1,2), names=c("in.grpA","in.grpB"), colors=c("blue","red"),type="Euler", legx=-3.3,legy=-3)
Volcano plots are a good way to look at the differences between two experimental levels. Here, we compare the extent of differential expression between the "high" treatment to the "low" treatment.
with(lmStats, volcanoPlot(pval = ebayesPvalue_Treatmenttumor, lfc = Treatmenttumor_logFC, sig = ebayesQvalue_Treatmenttumor, main = "no tumor vs. tumor Volcano Plot", xlab = "tumor - no tumor Log2 Fold Change", bty = "n", legpos = "top", leginset = c(0,-.1)))
sigs <- data.frame(makeBinarySig(stats.contrasts, what = "Qvalue", alpha = 0.05)) names(sigs)<-c("sig.a","sig.b") sigs$sign.a<-sign(stats.contrasts$tumor_a...cntr_a_logFC) sigs$sign.b<-sign(stats.contrasts$tumor_b...cntr_b_logFC) cols<-with(sigs, ifelse(sig.a + sig.b == 0, "grey", ifelse(sig.a + sig.b == 2 & sign.a*sign.b == 1, "pink", ifelse(sig.a + sig.b == 2 & sign.a*sign.b == -1, "cornflowerblue", ifelse(sig.a == 1, "darkblue", "darkred")))))
with(stats.contrasts, volcanoPair(lfc1 = tumor_a...cntr_a_logFC, lfc2 = tumor_b...cntr_b_logFC, pt.col = cols, pt.pch = 19, pt.cex=.5, xlab = "Tumor - control LFC (group A)", ylab = "Tumor - control LFC (group B)"))
It is often a good idea to get an idea of how the individuals (libraries) are structured - how similar are they to eachother. Principal component analyses allow for this kind of inference.
pca12 <- counts2PCA(counts=voom, info = info, ids = info$id, pcas2return = 6) pca12.var <- pca12[[2]] pca12 <- pca12[[1]] gcols <- c("darkblue", "blue", "gold", "green", "pink", "red") ggplot(pca12, aes(x = PC1, y = PC2, col = group, shape = Treatment)) + geom_vline(xintercept = 0, lty = 2)+ geom_hline(yintercept = 0, lty = 2)+ geom_point() + theme_bw() + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), strip.background = element_blank()) + labs(x = paste("PCA1 (", pca12.var[1],"%)", sep = ""), y = paste("PCA2 (", pca12.var[2],"%)", sep = ""), title = "PCA analysis of voom-normalized expression")
For more information, visit lovelleeb.weebly.com/analytics
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.