#!/usr/bin/env Rscript
#
# Copyright 2013 - 2022 Michael K. Schuster
#
# Biomedical Sequencing Facility (BSF), part of the genomics core facility of
# the Research Center for Molecular Medicine (CeMM) of the Austrian Academy of
# Sciences and the Medical University of Vienna (MUW).
#
#
# This file is part of BSF R.
#
# BSF R is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# BSF R is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with BSF R. If not, see <http://www.gnu.org/licenses/>.
# Description -------------------------------------------------------------
# BSF R script to annotate DESeq2 results with the Enrichr tool.
# Option Parsing ----------------------------------------------------------
suppressPackageStartupMessages(expr = library(package = "optparse"))
argument_list <-
optparse::parse_args(object = optparse::OptionParser(
option_list = list(
optparse::make_option(
opt_str = "--verbose",
action = "store_true",
default = TRUE,
help = "Print extra output [default]",
type = "logical"
),
optparse::make_option(
opt_str = "--quiet",
action = "store_false",
default = FALSE,
dest = "verbose",
help = "Print little output",
type = "logical"
),
optparse::make_option(
opt_str = "--design-name",
# default = "global",
dest = "design_name",
help = "Design name",
type = "character"
),
optparse::make_option(
opt_str = "--maximum-terms",
default = 20L,
dest = "maximum_terms",
help = "Maximum number of terms [20]",
type = "integer"
),
optparse::make_option(
opt_str = "--genome-directory",
default = ".",
dest = "genome_directory",
help = "Genome directory path [.]",
type = "character"
),
optparse::make_option(
opt_str = "--output-directory",
default = ".",
dest = "output_directory",
help = "Output directory path [.]",
type = "character"
),
optparse::make_option(
opt_str = "--plot-width",
default = 7.0,
dest = "plot_width",
help = "Plot width in inches [7.0]",
type = "double"
),
optparse::make_option(
opt_str = "--plot-height",
default = 7.0,
dest = "plot_height",
help = "Plot height in inches [7.0]",
type = "double"
)
)
))
if (is.null(x = argument_list$design_name)) {
stop("Missing --design-name option")
}
# Library Import ----------------------------------------------------------
# CRAN r-lib
suppressPackageStartupMessages(expr = library(package = "sessioninfo"))
# CRAN Tidyverse
suppressPackageStartupMessages(expr = library(package = "dplyr"))
suppressPackageStartupMessages(expr = library(package = "ggplot2"))
suppressPackageStartupMessages(expr = library(package = "readr"))
suppressPackageStartupMessages(expr = library(package = "tibble"))
# CRAN
suppressPackageStartupMessages(expr = library(package = "enrichR"))
suppressPackageStartupMessages(expr = library(package = "Nozzle.R1"))
# TODO: The list of Enrichr databases should be configurable.
enrichr_databases <- c(
"BioCarta_2016",
"GO_Biological_Process_2018",
"GO_Cellular_Component_2018",
"GO_Molecular_Function_2018",
"KEGG_2019_Human",
"KEGG_2019_Mouse",
"MGI_Mammalian_Phenotype_Level_4_2019",
"Mouse_Gene_Atlas",
"Reactome_2016",
"WikiPathways_2019_Human",
"WikiPathways_2019_Mouse"
)
# Save plots in the following formats.
graphics_formats <- c("pdf" = "pdf", "png" = "png")
prefix_deseq <-
bsfR::bsfrd_get_prefix_deseq(design_name = argument_list$design_name)
prefix_enrichr <-
bsfR::bsfrd_get_prefix_enrichr(design_name = argument_list$design_name)
output_directory <-
file.path(argument_list$output_directory, prefix_enrichr)
if (!file.exists(output_directory)) {
dir.create(path = output_directory,
showWarnings = TRUE,
recursive = FALSE)
}
# Design List -------------------------------------------------------------
design_list <- bsfR::bsfrd_read_design_list(
genome_directory = argument_list$genome_directory,
design_name = argument_list$design_name,
verbose = argument_list$verbose
)
#' Load a DESeq2 results tibble for a specific contrast.
#'
#' @param contrast_character A \code{character} scalar with the contrast.
#' @param verbose A \code{logical} scalar to emit messages.
#'
#' @return A \code{tbl_df} of DESeq2 results.
#' @seealso bsfrd_read_result_tibble
#' @noRd
#'
#' @examples
load_deseq_result_tibble <-
function(contrast_character,
verbose = FALSE) {
deseq_results_tibble <-
bsfR::bsfrd_read_result_tibble(
genome_directory = argument_list$genome_directory,
design_name = argument_list$design_name,
contrast_character = contrast_character,
verbose = verbose
)
if (is.null(x = deseq_results_tibble)) {
stop("No DESeqResults frame for contrast ", contrast_character)
}
# Filter for padj and l2fc criteria.
if (verbose) {
message("Total number of genes: ",
base::nrow(x = deseq_results_tibble))
}
# Filter for the adjusted p-value and the absolute log2-fold change.
# NOTE: The dplyr::filter() function automatically removes rows with NA
# values in the padj variable.
deseq_results_tibble <-
dplyr::filter(
.data = deseq_results_tibble,
.data$padj <= .env$design_list$padj_threshold,
abs(x = .data$log2FoldChange) >= .env$design_list$l2fc_threshold
)
if (verbose) {
message(
"Number of genes after applying filter criteria: ",
base::nrow(x = deseq_results_tibble)
)
}
return(deseq_results_tibble)
}
#' Load an Enrichr result tibble for a specific contrast and database.
#'
#' @param contrast_character A \code{character} scalar with the contrast.
#' @param enrichr_database A \code{character} scalar with the Enrichr database.
#' @param verbose A \code{logical} scalar to emit messages.
#'
#' @return A named \code{list} of Enrichr result \code{tbl_df} objects.
#' \describe{
#' \item{up}{A \code{tbl_df} with results of of up-regulated genes.}
#' \item{down}{A \code{tbl_df} with results of down-regulated genes.}
#' }
#' @noRd
#'
#' @examples
load_enrichr_results <-
function(contrast_character,
enrichr_database,
verbose = FALSE) {
result_list <- list()
directions <- c("up", "down")
file_paths <- file.path(output_directory,
paste(
paste(
prefix_enrichr,
contrast_character,
enrichr_database,
directions,
sep = "_"
),
"tsv",
sep = "."
))
if (all(file.exists(file_paths)) &&
all(file.info(file_paths)$size > 0L)) {
for (direction_index in seq_along(along.with = directions)) {
if (verbose) {
message("Loading: ",
contrast_character,
" ",
enrichr_database,
" ",
directions[direction_index])
}
result_list[[directions[direction_index]]] <-
readr::read_tsv(
file = file_paths[direction_index],
col_types = readr::cols(
"Term" = readr::col_character(),
"Overlap" = readr::col_character(),
"P.value" = readr::col_double(),
"Adjusted.P.value" = readr::col_double(),
"Old.P.value" = readr::col_double(),
"Old.Adjusted.P.value" = readr::col_double(),
"Odds.Ratio" = readr::col_double(),
"Combined.Score" = readr::col_double(),
"Genes" = readr::col_character()
)
)
}
rm(direction_index)
} else {
if (verbose) {
message("Loading: ",
contrast_character,
" ",
enrichr_database)
}
deseq_results_tibble <-
load_deseq_result_tibble(contrast_character = contrast_character,
verbose = verbose)
for (direction_index in seq_along(along.with = directions)) {
# Split the results into up- and down-regulated genes.
if (verbose) {
message("Processing: ",
contrast_character,
" ",
enrichr_database,
" ",
directions[direction_index])
}
enrichr_tibble <-
if (directions[direction_index] == "up") {
dplyr::filter(.data = deseq_results_tibble, .data$log2FoldChange > 0.0)
} else {
dplyr::filter(.data = deseq_results_tibble, .data$log2FoldChange < 0.0)
}
readr::write_tsv(x = enrichr_tibble,
file = file.path(output_directory,
paste(
paste(
prefix_enrichr,
contrast_character,
"genes",
directions[direction_index],
sep = "_"
),
"tsv",
sep = "."
)))
# Since Enrichr needs valid gene symbols in the gene_name variable,
# post-filter after writing the TSV file.
enrichr_result_list <- NULL
if ("gene_name" %in% base::names(x = enrichr_tibble)) {
enrichr_tibble <-
dplyr::filter(.data = enrichr_tibble, !is.na(x = .data$gene_name))
if (base::nrow(x = enrichr_tibble) > 0L) {
enrichr_result_list <-
enrichR::enrichr(genes = enrichr_tibble$gene_name,
databases = enrichr_databases)
}
}
# Save the Enrichr results for each database, so that it is
# automatically available for the next query.
for (edb in enrichr_databases) {
enrichr_result_tibble <-
if (is.null(x = enrichr_result_list) ||
base::nrow(x = enrichr_result_list[[edb]]) == 0L) {
# Initialise an empty Enrichr results tibble if no genes were filtered.
tibble::tibble(
Term = character(),
Overlap = character(),
P.value = double(),
Adjusted.P.value = double(),
Old.P.value = double(),
Old.Adjusted.P.value = double(),
Odds.Ratio = double(),
Combined.Score = double(),
Genes = character()
)
} else {
# Use readr::type_convert() since the Type variable is sometimes
# read as logical rather than character.
tibble::as_tibble(x = readr::type_convert(
df = enrichr_result_list[[edb]],
col_types = readr::cols(
"Term" = readr::col_character(),
"Overlap" = readr::col_character(),
"P.value" = readr::col_double(),
"Adjusted.P.value" = readr::col_double(),
"Old.P.value" = readr::col_double(),
"Old.Adjusted.P.value" = readr::col_double(),
"Odds.Ratio" = readr::col_double(),
"Combined.Score" = readr::col_double(),
"Genes" = readr::col_character()
)
))
}
readr::write_tsv(x = enrichr_result_tibble,
file = file.path(output_directory,
paste(
paste(
prefix_enrichr,
contrast_character,
edb,
directions[direction_index],
sep = "_"
),
"tsv",
sep = "."
)))
if (edb == enrichr_database) {
result_list[[directions[direction_index]]] <- enrichr_result_tibble
}
rm(enrichr_result_tibble)
}
rm(edb, enrichr_tibble, enrichr_result_list)
}
rm(direction_index)
}
rm(file_paths, directions)
return(result_list)
}
# Contrasts Tibble --------------------------------------------------------
# Read a contrast tibble with variables "Design", "Numerator", "Denominator" and
# "Label".
contrast_tibble <-
bsfR::bsfrd_read_contrast_tibble(
genome_directory = argument_list$genome_directory,
design_name = argument_list$design_name,
summary = TRUE,
verbose = argument_list$verbose
)
# Create a "Contrasts" report section
nozzle_section_contrasts <-
Nozzle.R1::newSection("Contrasts", class = Nozzle.R1::SECTION.CLASS.RESULTS)
nozzle_section_contrasts <-
Nozzle.R1::addTo(parent = nozzle_section_contrasts, Nozzle.R1::newTable(table = base::as.data.frame(x = contrast_tibble)))
nozzle_section_enrichr <-
Nozzle.R1::newSection("Enrichr Reports", class = Nozzle.R1::SECTION.CLASS.RESULTS)
for (contrast_index in seq_len(length.out = base::nrow(x = contrast_tibble))) {
contrast_character <-
bsfR::bsfrd_get_contrast_character(contrast_tibble = contrast_tibble, index = contrast_index)
nozzle_section_contrast <-
Nozzle.R1::newSubSection("Contrast ", contrast_tibble$Label[contrast_index])
nozzle_section_contrast <-
Nozzle.R1::addTo(
parent = nozzle_section_contrast,
Nozzle.R1::newParagraph(
"Lists of up-regulated (",
Nozzle.R1::asLink(url = paste(
paste(prefix_enrichr,
contrast_character,
"genes",
"up",
sep = "_"),
"tsv",
sep = "."
), "TSV"),
") and down-regulated (",
Nozzle.R1::asLink(url = paste(
paste(prefix_enrichr,
contrast_character,
"genes",
"down",
sep = "_"),
"tsv",
sep = "."
), "TSV"),
") genes."
)
)
for (enrichr_index in seq_len(length.out = length(x = enrichr_databases))) {
result_list <-
load_enrichr_results(
contrast_character = contrast_character,
enrichr_database = enrichr_databases[enrichr_index],
verbose = argument_list$verbose
)
result_tibble_up <- result_list$up
if (base::nrow(x = result_tibble_up) > 0L) {
result_tibble_up <-
result_tibble_up[order(result_tibble_up$Combined.Score, decreasing = TRUE), , drop = FALSE]
result_tibble_up <-
result_tibble_up[seq_len(length.out = min(
base::nrow(x = result_tibble_up),
argument_list$maximum_terms
)), , drop = FALSE]
result_tibble_up$Direction <- "up"
} else {
result_tibble_up$Direction <- character(length = 0L)
}
result_tibble_down <- result_list$down
if (base::nrow(x = result_tibble_down) > 0L) {
result_tibble_down <-
result_tibble_down[order(result_tibble_down$Combined.Score, decreasing = TRUE), , drop = FALSE]
result_tibble_down <-
result_tibble_down[seq_len(length.out = min(
base::nrow(x = result_tibble_down),
argument_list$maximum_terms
)), , drop = FALSE]
result_tibble_down$Direction <- "down"
} else {
result_tibble_down$Direction <- character(length = 0L)
}
result_tibble <-
dplyr::bind_rows(result_tibble_up, result_tibble_down)
rm(result_tibble_up, result_tibble_down)
if (base::nrow(x = result_tibble) == 0L) {
rm(result_tibble, result_list)
next()
}
# Order from lowest (down-regulated) to highest (up-regulated) combined
# score, which means that the up-regulated genes appear on top.
# FIXME: Keep the order of scores.
# result_tibble <- result_tibble[order(result_tibble$Combined.Score, decreasing = FALSE), , drop = FALSE]
result_tibble$Term <-
factor(x = result_tibble$Term,
levels = unique(x = result_tibble$Term))
ggplot_object <-
ggplot2::ggplot(data = result_tibble)
ggplot_object <- ggplot_object +
ggplot2::geom_bar(
mapping = ggplot2::aes(
x = .data$Term,
y = .data$Combined.Score,
fill = .data$Direction
),
stat = "identity",
width = 0.5
# position = "dodge"
)
ggplot_object <- ggplot_object +
ggplot2::labs(
x = "Term",
y = "Combined Enrichr Score",
fill = "Direction",
title = "Enrichr Analysis",
subtitle = enrichr_databases[enrichr_index]
)
# ggplot_object <- ggplot_object + ggplot2::facet_grid(rows = ggplot2::vars(Direction))
ggplot_object <- ggplot_object +
ggplot2::scale_fill_manual(
name = "Expression",
labels = c("Down regulated", "Up regulated"),
values = c("down" = "#00ba38", "up" = "#f8766d")
)
ggplot_object <- ggplot_object + ggplot2::coord_flip()
ggplot_object <-
ggplot_object + ggplot2::theme(axis.text.y = ggplot2::element_text(size = ggplot2::rel(x = 0.5)))
file_path_character <-
paste(
paste(
prefix_enrichr,
contrast_character,
enrichr_databases[enrichr_index],
sep = "_"
),
graphics_formats,
sep = "."
)
for (file_path in file_path_character) {
ggplot2::ggsave(
filename = file.path(output_directory, file_path),
plot = ggplot_object,
width = argument_list$plot_width,
height = argument_list$plot_height
)
}
nozzle_section_contrast <-
Nozzle.R1::addTo(
parent = nozzle_section_contrast,
Nozzle.R1::newFigure(
file = file_path_character[2L],
"Enrichr combined score ",
Nozzle.R1::asStrong(enrichr_databases[enrichr_index]),
" for contrast ",
Nozzle.R1::asStrong(contrast_tibble$Label[contrast_index]),
fileHighRes = file_path_character[1L]
)
)
nozzle_section_contrast <-
Nozzle.R1::addTo(
parent = nozzle_section_contrast,
Nozzle.R1::newParagraph(
"Full Enrichr results ",
Nozzle.R1::asStrong(enrichr_databases[enrichr_index]),
" for up-regulated (",
Nozzle.R1::asLink(url = paste(
paste(
prefix_enrichr,
contrast_character,
enrichr_databases[enrichr_index],
"up",
sep = "_"
),
"tsv",
sep = "."
), "TSV"),
") and down-regulated (",
Nozzle.R1::asLink(url = paste(
paste(
prefix_enrichr,
contrast_character,
enrichr_databases[enrichr_index],
"down",
sep = "_"
),
"tsv",
sep = "."
), "TSV"),
") genes."
)
)
rm(file_path,
file_path_character,
ggplot_object,
result_tibble,
result_list)
}
nozzle_section_enrichr <-
Nozzle.R1::addTo(parent = nozzle_section_enrichr, nozzle_section_contrast)
rm(enrichr_index, nozzle_section_contrast, contrast_character)
}
rm(contrast_index)
nozzle_report <-
Nozzle.R1::newCustomReport("Enrichr Report", version = 0)
nozzle_report <-
Nozzle.R1::addTo(parent = nozzle_report, nozzle_section_contrasts)
nozzle_report <-
Nozzle.R1::addTo(parent = nozzle_report, nozzle_section_enrichr)
Nozzle.R1::writeReport(report = nozzle_report,
filename = file.path(output_directory,
paste(prefix_enrichr,
"report",
sep = "_")))
rm(
nozzle_report,
nozzle_section_enrichr,
nozzle_section_contrasts,
contrast_tibble,
enrichr_databases,
output_directory,
design_list,
load_enrichr_results,
load_deseq_result_tibble,
prefix_enrichr,
prefix_deseq,
graphics_formats,
argument_list
)
message("All done")
# Finally, print all objects that have not been removed from the environment.
if (length(x = ls())) {
print(x = "Remaining objects:")
print(x = ls())
}
print(x = sessioninfo::session_info())
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.