# Set seed for reproducibility set.seed(1454944673) library(knitr) library(ggplot2) opts_chunk[["set"]]( autodep = TRUE, bootstrap.show.code = FALSE, cache = TRUE, cache.lazy = TRUE, dev = c("png", "pdf"), error = TRUE, fig.height = 10, fig.retina = 2, fig.width = 10, highlight = TRUE, message = FALSE, prompt = TRUE, # formatR required for tidy code tidy = TRUE, warning = FALSE) theme_set( theme_light(base_size = 14)) theme_update( legend.justification = "center", legend.position = "bottom") download.file("https://github.com/hbc/bcbioRNASeq/raw/master/inst/rmarkdown/shared/bibliography.bib", "bibliography.bib")
library(bcbioRNASeq) library(DEGreport) library(pheatmap) # Load bcbioRNASeq object bcbName <- load(params$bcbFile) bcb <- get(bcbName, inherits = FALSE) # Directory paths outputDir <- params$outputDir dataDir <- dirname(params$bcbFile) deDir <- file.path(outputDir, "results", "transcript_differential_expression") dir.create(deDir, showWarnings = FALSE, recursive = TRUE)
library(sleuth) sleuthify = function(bcb, design) { sampledirs = metadata(bcb)$sampleDirs caller = "salmon" quantdirs = file.path(sampledirs, caller) names(quantdirs) = Map(basename, sampledirs) sampledata = metrics(bcb) sampledata$sample = sampledata$sampleID sampledata$path = quantdirs return(sleuth_prep(sampledata, design)) } sl = sleuthify(bcb, ~knockdown)
sl = sleuth_fit(sl, ~knockdown, 'full') sl = sleuth_fit(sl, ~1, 'reduced')
sl = sleuth_lrt(sl, 'reduced', 'full')
sl = sleuth_wt(sl, 'knockdownyes')
library(dplyr) kdtable = sleuth_results(sl, 'knockdownyes', 'wald', show_all = TRUE)
Out of r nrow(kdtable)
transcripts we flag r kdtable %>% filter(qval < 0.05) %>% nrow()
differentially expressed, using a q-value cutoff of 0.05.
kdtable = kdtable %>% left_join(metadata(bcb)$tx2gene, by=c("target_id"="enstxp")) %>% na.omit() %>% group_by(ensgene) %>% mutate(reciprocal=(sum(b > 0 & qval < 0.05) > 0) & sum(b < 0 & qval < 0.05) > 0 & length(ensgene) > 1) %>% as.data.frame()
There are r sum(kdtable$reciprocal)
transcripts reciprocally spliced, meaning two transcripts
in a given gene are significantly moving in opposite directions.
kdtable_effsize = kdtable %>% group_by(ensgene) %>% mutate(reciprocal=(sum(b > 1 & qval < 0.05) > 0) & sum(b < -1 & qval < 0.05) > 0 & length(ensgene) > 1) %>% as.data.frame()
Since the replicates are very similar, it is helpful to filter by effect size as
well. If we filter with an effect size of 1 (in sleuth terms a LFC of e, since
everything is in natural log space) then we find
r sum(kdtable_effsize$reciprocal)
transcripts reciprocally DE.
Below is a MA plot for all transcripts with an abs(beta) > 1
and qval < 0.05
. You
can see there is a set of odd looking transcripts with high ln fold change which look
like an artifact. I haven't seen this before, we'd have to dig in a little bit to
uncover what is happening with those transcripts.
library(ggpubr) plotMA = function(res, contrast_name=NULL) { res = data.frame(res) res = subset(res, !is.na(qval)) p = ggplot(res, aes(mean_obs, b, color=qval < 0.05 & abs(b) > 1)) + geom_point(size=0.8, alpha=0.2) + scale_x_log10( breaks = scales::trans_breaks("log10", function(x) 10^x), labels = scales::trans_format("log10", scales::math_format(10^.x))) + annotation_logticks(sides='b') + xlab("mean expression across all samples") + ylab(expression(ln[]*" fold change")) + scale_color_manual(values=c("black", "red", "green")) + guides(color=FALSE) + theme_pubr(base_size=8, base_family="Gill Sans MT") if(!is.null(contrast_name)) { p = p + ggtitle(paste("MA-plot for ", contrast_name)) } return(p) } plotMA(kdtable)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.