#' Run DESeq2 analyses
#'
#' \code{RunDESeq2} performs a high-level differential gene expression analysis
#' with \code{DESeq2}. It, along with \link{ProcessDEGs}, form the crux of the
#' RNA-seq portion of this package.
#'
#' The default parameters should be an adequate starting place for most users,
#' but lazy folks can provide multiple thresholds to \code{padj.thresh}
#' and/or \code{fc.thresh} if they aren't sure how stringent or lenient they
#' need to be with their data.
#'
#' It's generally best to provide an empty directory as the output path, as
#' several directories will be generated.
#'
#' @param outpath Path to directory to be used for output. Additional
#' directories will be generated within this folder.
#' @param quants.path Path to directory containing a directory for each sample
#' with a salmon-generated \code{quant.sf} file inside.
#' @param samplesheet Path to samplesheet containing sample metadata.
#' @param tx2gene Path to file with transcript IDs in first column and gene
#' identifiers in second. Used for gene-level summarization.
#' @param level String defining variable of interest.
#' @param padj.thresh Number or numeric scalar indicating the adjusted p-value
#' cutoff(s) to be used for determining "significant" differential expression.
#' If multiple are given, multiple tables/plots will be generated using all
#' combinations of \code{padj.thresh} and \code{fc.thresh}.
#' @param fc.thresh Number or numeric scalar indicating the log2 fold-change
#' cutoff(s) to be used for determining "significant" differential expression.
#' If multiple are given, multiple tables/plots will be generated using all
#' combinations of \code{padj.thresh} and \code{fc.thresh}.
#' @param plot.annos String or character vector defining the column(s) in
#' \code{samplesheet} to use to annotate figures.
#' @param plot.box Boolean indicating whether box plots for DEGs should be
#' created for each comparison. If so, the \code{top.n} genes will be plotted.
#' This step is quite time-consuming with many genes.
#' @param plot.enrich Boolean indicating whether enrichment analyses for DEGs
#' should be run and plotted for each comparison.
#' @param enrich.libs Vector of valid \code{enrichR} libraries to test the
#' genes against.
#'
#' Available libraries can be viewed with \code{listEnrichrDbs} from the
#' \code{enrichR} package.
#' @param top.n Number of differentially expressed genes to create boxplots for,
#' ranked by adj. p-value after applying \code{padj.thresh} and
#' \code{fc.thresh} thresholds. If multiple thresholds are provided, the
#' lowest fold-change and highest adj. p-value thresholds will be used.
#' @param block String or character vector defining the column(s) in
#' \code{samplesheet} to use to block for unwanted variance, e.g. batch or
#' technical effects.
#' @param count.filt Number indicating read threshold. Genes with fewer counts
#' than this number summed across all samples will be removed from the
#' analysis.
#' @return Named List containing a list named 'res.list' containing
#' \linkS4class{DESeqResults} objects for all comparisons generated by
#' \code{\link{ProcessDEGs}}, a \linkS4class{DESeqDataSet} object named 'dds'
#' from running \code{\link[DESeq2]{DESeq}}, a
#' \linkS4class{RangedSummarizedExperiment} object named 'rld' from running
#' \code{\link[DESeq2]{rlog}}, and a \linkS4class{RangedSummarizedExperiment}
#' object from running \code{\link[DESeq2]{vst}} named 'vsd'.
#'
#' @import DESeq2
#' @importFrom tximport tximport
#' @importFrom utils read.table
#'
#' @export
#'
#' @author Jared Andrews
#'
#' @seealso
#' \code{\link[DESeq2]{DESeq}}, for more about differential expression analysis.
#' \code{\link{ProcessDEGs}}, for analyzing and visualizing the results.
#'
RunDESeq2 <- function(outpath, quants.path, samplesheet, tx2gene, level,
padj.thresh = 0.05, fc.thresh = 2, plot.annos = NULL, plot.box = TRUE,
plot.enrich = TRUE, enrich.libs = c("GO_Molecular_Function_2018",
"GO_Cellular_Component_2018", "GO_Biological_Process_2018", "KEGG_2019_Human",
"Reactome_2016", "BioCarta_2016", "Panther_2016"), top.n = 100, block = NULL,
count.filt = 10) {
message("### EXPLORATORY DATA ANALYSIS ###\n")
### CREATING DIRECTORY STRUCTURE ###
message("# CREATING DIRECTORY STRUCTURE AND MODEL DESIGN #\n")
# Create directory structure and set design formula.
base <- outpath
setup <- CreateOutputStructure(block, level, base)
base <- setup$base
design <- setup$design
message(paste0("\nDesign is: ", design, "\n"))
if (is.null(plot.annos)) {
plot.annos <- level
}
### FILE LOADING & PRE-FILTERING ###
message("# FILE LOADING & PRE-FILTERING #\n")
tx2gene <- read.table(tx2gene, sep = "\t")
samples <- read.table(samplesheet, header = TRUE)
rownames(samples) <- samples$name
files <- file.path(quants.path, samples$name, "quant.sf")
names(files) <- samples$name
# Read in our actual count files now.
txi <- tximport(files, type = "salmon", tx2gene = tx2gene)
# Create the DESeqDataSet object.
dds <- DESeqDataSetFromTximport(txi, colData = samples, design = design)
# Pre-filter transcripts with really low read counts.
message(paste0("\ndds starting with ", nrow(dds), " genes."))
dds <- dds[rowSums(counts(dds)) >= count.filt,]
message(paste0("\ndds has ", nrow(dds),
" genes after removing genes with under ", count.filt, " counts total.\n"))
### VARIANCE STABILIZATION COMPARISONS ###
message("\n# VARIANCE STABILIZATION COMPARISONS #\n")
vst.out <- paste0(base,"/EDAFigures/VarianceTransformations.pdf")
message(paste0("Output: ", vst.out))
trans <- PlotRNAVarianceTransformations(dds, vst.out)
rld <- trans$rld
vsd <- trans$vsd
### SAMPLE DISTANCES ###
message("\n# PLOTTING SAMPLE DISTANCES #\n")
dists.out <- paste0(base, "/EDAFigures/SampleDistances.pdf")
message(paste0("Output: ", dists.out))
PlotRNASampleDistances(rld, vsd, dists.out, level, plot.annos)
### PCA PLOTS ###
message("\n# PCA PLOTS #\n")
pca.out <- paste0(base, "/EDAFigures/PCA.pdf")
message(paste0("Output: ", pca.out))
PlotRNAEDAPCAs(rld, vsd, pca.out, level, plot.annos)
#======================================#
### DIFFERENTIAL EXPRESSION ANALYSIS ###
message("\n### DIFFERENTIAL EXPRESSION ANALYSIS ###\n")
dds <- DESeq(dds)
# All the actual processing occurs here.
full <- ProcessDEGs(dds, rld, vsd, base, level, plot.annos, padj.thresh,
fc.thresh, plot.box, plot.enrich, enrich.libs, top.n)
### SAVING OBJECTS ###
message("\n# SAVING ROBJECTS #\n")
message(paste0("DESeq2: ", paste0(base, "/Robjects/dds.rds")))
message(paste0("Regularized log transformation: ",
paste0(base, "/Robjects/rld.rds")))
message(paste0("Variance stabilized transformation: ",
paste0(base, "/Robjects/vsd.rds")))
saveRDS(dds, file=paste0(base, "/Robjects/dds.rds"))
saveRDS(rld, file=paste0(base, "/Robjects/rld.rds"))
saveRDS(vsd, file=paste0(base, "/Robjects/vsd.rds"))
return(full)
}
#' Extract, visualize, and save DEG lists
#'
#' \code{ProcessDEGs} wraps several plotting functions to generate figures
#' specifically for the differentially expressed genes between each possible
#' comparison.
#'
#' \code{ProcessDEGs} is called by \link{RunDESeq2} but can also be re-run with
#' the \link{RunDESeq2} output or its own output if you want to save time and
#' don't need to generate all of the exploratory data analysis figures again.
#'
#' This function will generate many figures in addition to saving the counts
#' and results tables.
#'
#' @param dds A \linkS4class{DESeqDataSet} object as returned by
#' \code{\link[DESeq2]{DESeq}}.
#' @param rld A \linkS4class{RangedSummarizedExperiment} object of
#' \code{\link[DESeq2]{rlog}} transformed counts as returned by
#' \link{RunDESeq2}.
#' @param vsd A \linkS4class{RangedSummarizedExperiment} object of
#' \code{\link[DESeq2]{vst}} transformed counts as returned by
#' \link{RunDESeq2}.
#' @param outpath Path to directory to be used for output.
#' @param level String defining variable of interest.
#' @param plot.annos String or character vector defining the column(s) in
#' \code{samplesheet} to use to annotate figures.
#' @param padj.thresh Number or numeric scalar indicating the adjusted p-value
#' cutoff(s) to be used for determining "significant" differential expression.
#' If multiple are given, multiple tables/plots will be generated using all
#' combinations of \code{padj.thresh} and \code{fc.thresh}.
#' @param fc.thresh Number or numeric scalar indicating the log2 fold-change
#' cutoff(s) to be used for determining "significant" differential expression.
#' If multiple are given, multiple tables/plots will be generated using all
#' combinations of \code{padj.thresh} and \code{fc.thresh}.
#' @param plot.box Boolean indicating whether box plots for DEGs should be
#' created for each comparison. If so, the \code{top.n} genes will be plotted.
#' @param plot.enrich Boolean indicating whether enrichment analyses for DEGs
#' should be run and plotted for each comparison.
#' @param enrich.libs A vector of valid \code{enrichR} libraries to test the
#' genes against.
#'
#' Available libraries can be viewed with
#' \code{\link[enrichR]{listEnrichrDbs}} from the \code{enrichR} package.
#' @param top.n Number of differentially expressed genes to create boxplots for,
#' ranked by adj. p-value after applying \code{padj.thresh} and
#' \code{fc.thresh} thresholds. If multiple thresholds are provided, the
#' lowest fold-change and highest adj. p-value thresholds will be used.
#' @return Named List containing a list named 'res.list' containing named lists
#' for each p-value threshold containing
#' \linkS4class{DESeqResults} objects for all comparisons generated by
#' \code{\link{ProcessDEGs}}, a \linkS4class{DESeqDataSet} object named 'dds'
#' from running \code{\link[DESeq2]{DESeq}}, a
#' \linkS4class{RangedSummarizedExperiment} object named 'rld' from running
#' \code{\link[DESeq2]{rlog}}, and a \linkS4class{RangedSummarizedExperiment}
#' object from running \code{\link[DESeq2]{vst}} named 'vsd'.
#'
#' @importFrom DESeq2 lfcShrink counts plotCounts plotMA
#'
#' @export
#'
#' @author Jared Andrews
#'
#' @seealso
#' \code{\link{RunDESeq2}}, for generating input for this function.
#'
ProcessDEGs <- function(dds, rld, vsd, outpath, level, plot.annos,
padj.thresh = 0.05, fc.thresh = 2, plot.box = TRUE, plot.enrich = TRUE,
enrich.libs = c("GO_Molecular_Function_2018",
"GO_Cellular_Component_2018", "GO_Biological_Process_2018", "KEGG_2019_Human",
"Reactome_2016", "BioCarta_2016", "Panther_2016"), top.n = 100) {
message("\n# COLLECTING RESULTS #\n")
# Get all possible sample comparisons.
combs <- combn(levels(colData(rld)[,level]), 2)
combs.seq <- seq(1, length(combs), by = 2)
res.list <- list()
for (p in padj.thresh) {
rezzes <- list()
for (samp in combs.seq) {
g1 <- combs[samp]
g2 <- combs[samp + 1]
r <- results(dds, contrast = c(level, g1, g2), alpha = p)
res <- suppressMessages(lfcShrink(dds, res = r, type='ashr'))
rezzes[[paste0(g1, "-v-", g2)]] <- res
}
res.list[[toString(p)]] <- rezzes
}
##### PLOTTING #####
### DEG BOXPLOTS, PCAS, VOLCANOES, HEATMAPS ###
message("\n# GENERATING PLOTS #\n")
for (p in padj.thresh) {
for (fc in fc.thresh) {
PlotRNADEGPCAs(res.list[[toString(p)]], rld, vsd, outpath, level,
plot.annos, p, fc)
PlotRNAVolcanoes(res.list[[toString(p)]], dds, outpath, p, fc)
PlotRNAHeatmaps(res.list[[toString(p)]], rld, vsd, level, outpath, p, fc,
plot.annos)
PlotRNACombinedHeatmaps(res.list[[toString(p)]], rld, vsd, outpath, p, fc,
plot.annos)
if (plot.enrich) {
PlotEnrichments(res.list[[toString(p)]], outpath, p, fc, enrich.libs)
}
}
}
if (plot.box) {
p <- max(padj.thresh)
fc <- min(fc.thresh)
PlotRNABoxplots(res.list[[toString(p)]], dds, rld, outpath, p, fc, top.n,
level)
}
### MA PLOTs ###
for (p in padj.thresh) {
rez <- res.list[[toString(p)]]
for (r in seq_along(rez)) {
res <- rez[[r]]
comp <- names(rez)[r]
message("Comparison set: ", comp)
message("Generating MA plots with p.adj cutoff of ", p)
pdf(paste0(outpath,"/MAPlots/", comp, ".padj.", p, ".MA_plot.pdf"))
plotMA(res, ylim = c(-6, 6), alpha = p)
dev.off()
}
}
### SAVING TABLES ###
message("\n# SAVING RESULTS TABLES #\n")
SaveResults(results = res.list, outpath = outpath, dds = dds)
return(list(res.list = res.list, dds = dds, rld = rld, vsd = vsd))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.