#!/usr/bin/env Rscript
#
# Copyright 2013 - 2022 Michael K. Schuster
#
# Biomedical Sequencing Facility (BSF), part of the genomics core facility of
# the Research Center for Molecular Medicine (CeMM) of the Austrian Academy of
# Sciences and the Medical University of Vienna (MUW).
#
#
# This file is part of BSF R.
#
# BSF R is free software: you can redistribute it and/or modify
# it under the terms of the GNU Lesser General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# BSF R is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Lesser General Public License for more details.
#
# You should have received a copy of the GNU Lesser General Public License
# along with BSF R. If not, see <http://www.gnu.org/licenses/>.
# Description -------------------------------------------------------------
# BSF R script to run a DESeq2 analysis.
#
# Reads are counted on the basis of a reference (Ensembl) transcriptome supplied
# as a GenomicRanges::GRanges object. The exon GenomicRanges::GRanges are
# subsequently converted into a GenomicRanges::GRangesList object by (Ensembl)
# gene identifiers. A SummarizedExperiment::RangedSummarizedExperiment object is
# created by the GenomicAlignments::summarizeOverlaps() function. Reads in
# secondary alignments and reads failing vendor quality filtering are thereby
# dismissed.
#
# Sample Annotation DataFrame Description ---------------------------------
#
#
# See the bsfrd_read_sample_dframe() function for variable names.
#
# Design Annotation Tibble Description ------------------------------------
#
#
# See the bsfrd_read_design_tibble() function for variable names.
#
# Contrast Annotation Tibble Description ----------------------------------
#
#
# See the bsfrd_read_contrast_tibble() function for variable names.
#
# Option Parsing ----------------------------------------------------------
suppressPackageStartupMessages(expr = library(package = "optparse"))
argument_list <-
optparse::parse_args(object = optparse::OptionParser(
option_list = list(
optparse::make_option(
opt_str = "--verbose",
action = "store_true",
default = TRUE,
help = "Print extra output [default]",
type = "logical"
),
optparse::make_option(
opt_str = "--quiet",
action = "store_false",
default = FALSE,
dest = "verbose",
help = "Print little output",
type = "logical"
),
optparse::make_option(
opt_str = "--design-name",
# default = "global",
dest = "design_name",
help = "Design name",
type = "character"
),
optparse::make_option(
opt_str = "--transcriptome-path",
default = NULL,
dest = "transcriptome_path",
help = "RDS file path specifying a reference transcriptome GRanges object",
type = "character"
),
optparse::make_option(
# This option is required for PCA plots
opt_str = "--pca-dimensions",
default = 4L,
dest = "pca_dimensions",
help = "Principal components to plot [4]",
type = "integer"
),
optparse::make_option(
# This option is required for PCA plots
opt_str = "--pca-top-number",
default = 500L,
dest = "pca_top_number",
help = "Number of most variable features for PCA [500]",
type = "integer"
),
optparse::make_option(
opt_str = "--threads",
default = 1L,
dest = "threads",
help = "Number of parallel processing threads [1]",
type = "integer"
),
optparse::make_option(
opt_str = "--genome-directory",
default = ".",
dest = "genome_directory",
help = "Genome directory path [.]",
type = "character"
),
optparse::make_option(
opt_str = "--output-directory",
default = ".",
dest = "output_directory",
help = "Output directory path [.]",
type = "character"
),
# optparse::make_option(
# opt_str = "--plot-factor",
# default = 0.5,
# dest = "plot_factor",
# help = "Plot width increase per 24 samples [0.5]",
# type = "double"
# ),
optparse::make_option(
opt_str = "--plot-width",
default = 7.0,
dest = "plot_width",
help = "Plot width in inches [7.0]",
type = "double"
),
optparse::make_option(
opt_str = "--plot-height",
default = 7.0,
dest = "plot_height",
help = "Plot height in inches [7.0]",
type = "double"
)
)
))
if (is.null(x = argument_list$design_name)) {
stop("Missing --design-name option")
}
# Library Import ----------------------------------------------------------
# CRAN r-lib
suppressPackageStartupMessages(expr = library(package = "sessioninfo"))
# CRAN Tidyverse
suppressPackageStartupMessages(expr = library(package = "dplyr"))
suppressPackageStartupMessages(expr = library(package = "ggplot2"))
suppressPackageStartupMessages(expr = library(package = "purrr"))
suppressPackageStartupMessages(expr = library(package = "readr"))
suppressPackageStartupMessages(expr = library(package = "stringr"))
suppressPackageStartupMessages(expr = library(package = "tibble"))
suppressPackageStartupMessages(expr = library(package = "tidyr"))
# CRAN
suppressPackageStartupMessages(expr = library(package = "caret"))
suppressPackageStartupMessages(expr = library(package = "pheatmap"))
# Bioconductor
suppressPackageStartupMessages(expr = library(package = "BiocVersion"))
suppressPackageStartupMessages(expr = library(package = "DESeq2"))
suppressPackageStartupMessages(expr = library(package = "BiocParallel"))
suppressPackageStartupMessages(expr = library(package = "GenomicAlignments"))
suppressPackageStartupMessages(expr = library(package = "RColorBrewer"))
suppressPackageStartupMessages(expr = library(package = "Rsamtools"))
suppressPackageStartupMessages(expr = library(package = "genefilter"))
suppressPackageStartupMessages(expr = library(package = "rtracklayer"))
# BSF
suppressPackageStartupMessages(expr = library(package = "bsfR"))
# Global Variables --------------------------------------------------------
# Save plots in the following formats.
graphics_formats <- c("pdf" = "pdf", "png" = "png")
# Define a design-specific prefix.
prefix <-
bsfR::bsfrd_get_prefix_deseq(design_name = argument_list$design_name)
# Define a design-specific output directory.
output_directory <-
file.path(argument_list$output_directory, prefix)
if (!file.exists(output_directory)) {
dir.create(path = output_directory,
showWarnings = TRUE,
recursive = FALSE)
}
# Initialise a DESeqDataSet object ----------------------------------------
#' Fix a Model Matrix.
#'
#' Attempt to fix a model matrix by removing empty columns or by removing linear
#' combinations.
#'
#' @param model_matrix_local A model \code{matrix}.
#'
#' @return A model \code{matrix}.
#'
#' @examples
#' \dontrun{
#' model_matrix
#' <- fix_model_matrix(model_matrix_local = model_matrix)
#' }
#' @noRd
fix_model_matrix <- function(model_matrix_local) {
# Check, whether the model matrix is full rank.
# This is based on the DESeq2::checkFullRank() function.
full_rank <- TRUE
if (qr(x = model_matrix_local)$rank < base::ncol(x = model_matrix_local)) {
message("The model matrix is not full rank.")
full_rank <- FALSE
model_all_zero <-
apply(
X = model_matrix_local,
MARGIN = 2,
FUN = function(model_matrix_column) {
return(all(model_matrix_column == 0))
}
)
if (any(model_all_zero)) {
message(
"Levels or combinations of levels without any samples have resulted in\n",
"column(s) of zeros in the model matrix:\n ",
paste(base::colnames(x = model_matrix_local)[model_all_zero], collapse = "\n "),
"\n",
"Attempting to fix the model matrix by removing empty columns."
)
model_matrix_local <-
model_matrix_local[, -which(x = model_all_zero)]
} else {
linear_combinations_list <-
caret::findLinearCombos(x = model_matrix_local)
message_character <-
purrr::map_chr(
.x = linear_combinations_list$linearCombos,
.f = function(x) {
return(paste0(
" Linear combinations:\n ",
paste(base::colnames(x = model_matrix_local)[x], collapse = "\n "),
"\n"
))
}
)
message(
"One or more variables or interaction terms in the design formula are\n",
"linear combinations of the others.\n",
message_character,
"\n",
"Attempting to fix the model by removing linear combinations:\n ",
paste(base::colnames(x = model_matrix_local)[linear_combinations_list$remove], collapse = "\n ")
)
model_matrix_local <-
model_matrix_local[,-linear_combinations_list$remove]
}
rm(model_all_zero)
}
return(list("model_matrix" = model_matrix_local, "full_rank" = full_rank))
}
#' Check a Model Matrix.
#'
#' Check a model matrix for being full rank and attempt to fix it by removing
#' empty columns or by removing linear combinations.
#'
#' @param model_matrix A model \code{matrix}.
#'
#' @return A named \code{list}.
#' \describe{
#' \item{model_matrix}{A model \code{matrix}.}
#' \item{formula_full_rank}{A \code{logical} indicating that the original
#' formula was full rank.}
#' }
#'
#' @seealso fix_model_matrix
#' @examples
#' \dontrun{
#' result_list <-
#' check_model_matrix(model_matrix = model_matrix)
#' }
#' @noRd
check_model_matrix <- function(model_matrix) {
formula_full_rank <- NULL
matrix_full_rank <- FALSE
while (!matrix_full_rank) {
result_list <- fix_model_matrix(model_matrix_local = model_matrix)
model_matrix <- result_list$model_matrix
matrix_full_rank <- result_list$full_rank
if (is.null(x = formula_full_rank)) {
# Capture the intial state of the model matrix, which represents the formula.
formula_full_rank <- result_list$full_rank
}
}
# Return the model matrix and indicate whether the original formula was full
# rank and the current model matrix is.
return(
list(
"model_matrix" = model_matrix,
"formula_full_rank" = formula_full_rank,
"matrix_full_rank" = matrix_full_rank
)
)
}
#' Initialise a DESeqDataSet Object.
#'
#' Initialise or load a \code{DESeq2::DESeqDataSet} object.
#'
#' @param design_list A named \code{list} of design information.
#' @param transcriptome_granges A \code{GenomicRanges::GRanges} object
#' specifying the reference transcriptome. Only required, if the
#' \code{DESeq2::DESeqDataSet} object needs initialising.
#' @references argument_list
#' @references output_directory
#' @references prefix
#'
#' @return A \code{DESeq2::DESeqDataSet} object.
#'
#' @examples
#' \dontrun{
#' deseq_data_set <-
#' initialise_deseq_data_set(design_list = design_list)
#' }
#' @noRd
initialise_deseq_data_set <-
function(design_list, transcriptome_granges = NULL) {
deseq_data_set <- NULL
file_path <-
file.path(output_directory,
paste0(prefix, "_deseq_data_set.rds"))
if (file.exists(file_path) &&
file.info(file_path)$size > 0L) {
message("Loading a DESeqDataSet object")
deseq_data_set <-
readr::read_rds(file = file_path)
} else {
ranged_summarized_experiment <-
bsfR::bsfrd_initialise_rse(
genome_directory = argument_list$genome_directory,
design_list = design_list,
transcriptome_granges = transcriptome_granges,
verbose = argument_list$verbose
)
# Create a model matrix based on the model formula and column (sample
# annotation) data and check whether it is full rank.
model_matrix <- stats::model.matrix.default(
object = stats::as.formula(object = design_list$full_formula),
data = SummarizedExperiment::colData(x = ranged_summarized_experiment)
)
result_list <-
check_model_matrix(model_matrix = model_matrix)
if (argument_list$verbose) {
message("Writing initial model matrix")
utils::write.table(
x = base::as.data.frame(x = model_matrix),
file = file.path(
output_directory,
paste0(prefix, "_model_matrix_full_initial.tsv")
),
sep = "\t",
row.names = TRUE,
col.names = TRUE
)
message("Writing modified model matrix")
utils::write.table(
x = base::as.data.frame(x = result_list$model_matrix),
file = file.path(
output_directory,
paste0(prefix, "_model_matrix_full_modified.tsv")
),
sep = "\t",
row.names = TRUE,
col.names = TRUE
)
}
rm(model_matrix)
if (result_list$formula_full_rank) {
# The design formula *is* full rank, so set it as "design" option directly.
message("Creating a DESeqDataSet object with a model formula")
deseq_data_set <-
DESeq2::DESeqDataSet(
se = ranged_summarized_experiment,
design = stats::as.formula(object = design_list$full_formula)
)
# Start DESeq2 Wald testing.
#
# Set betaPrior = FALSE for consistent result names for designs,
# regardless of interaction terms. DESeq2 seems to set betaPrior = FALSE
# upon interaction terms, automatically.
#
# See: https://support.bioconductor.org/p/84366/
#
# betaPrior also has to be FALSE in case a user-supplied full model matrix
# is specified.
message("Started DESeq Wald testing with a model formula")
deseq_data_set <-
DESeq2::DESeq(
object = deseq_data_set,
test = "Wald",
betaPrior = FALSE,
parallel = TRUE
)
attr(x = deseq_data_set, which = "full_rank") <- TRUE
} else {
# The original design formula was not full rank. Unfortunately, to
# initialise the DESeqDataSet, a model matrix can apparently not be used
# directly. Thus, use the simplest possible design (i.e. ~ 1) for
# initialisation and perform Wald testing with the full model matrix.
message("Creating a DESeqDataSet object with design formula ~ 1")
deseq_data_set <-
DESeq2::DESeqDataSet(se = ranged_summarized_experiment,
design = ~ 1)
attr(x = deseq_data_set, which = "full_rank") <- FALSE
message("Started DESeq Wald testing with a model matrix")
deseq_data_set <-
DESeq2::DESeq(
object = deseq_data_set,
test = "Wald",
betaPrior = FALSE,
full = result_list$model_matrix,
parallel = TRUE
)
}
readr::write_rds(x = deseq_data_set,
file = file_path,
compress = "gz")
rm(result_list, ranged_summarized_experiment)
}
rm(file_path)
return(deseq_data_set)
}
# Initialise a DESeqTransform object --------------------------------------
#' Initialise a DESeqTransform Object.
#'
#' Initialise or load a \code{DESeq2::DESeqTransform} object.
#'
#' @param blind A \code{logical} scalar to create the
#' \code{DESeq2::DESeqTransform} object blindly or based on the model.
#' @references output_directory
#' @references prefix
#'
#' @return A \code{DESeq2::DESeqTransform} object.
#'
#' @examples
#' \dontrun{
#' deseq_transform <-
#' initialise_deseq_transform(deseq_data_set = deseq_data_set, blind = FALSE)
#' }
#' @noRd
initialise_deseq_transform <-
function(deseq_data_set, blind = FALSE) {
deseq_transform <- NULL
suffix <- if (blind)
"blind"
else
"model"
file_path <-
file.path(output_directory,
paste(
paste(prefix, "deseq", "transform", suffix, sep = "_"),
"rds",
sep = "."
))
if (file.exists(file_path) &&
file.info(file_path)$size > 0L) {
message("Loading a ", suffix, " DESeqTransform object")
deseq_transform <-
readr::read_rds(file = file_path)
} else {
message("Creating a ", suffix, " DESeqTransform object")
# Run variance stabilizing transformation (VST) to get homoscedastic data
# for PCA plots.
deseq_transform <-
DESeq2::varianceStabilizingTransformation(object = deseq_data_set, blind = blind)
readr::write_rds(x = deseq_transform,
file = file_path,
compress = "gz")
}
rm(file_path, suffix)
return(deseq_transform)
}
# Plot FPKM Values --------------------------------------------------------
#' Create an FPKM Density Plot.
#'
#' Create a density plot of log10(FPKM) values and save PDF and PNG documents.
#'
#' @param object A \code{matrix} object returned by \code{DESeq2::fpkm}.
#' @references argument_list
#' @references graphics_formats
#' @references output_directory
#' @references prefix
#'
#' @return \code{NULL}
#'
#' @examples
#' \dontrun{
#' plot_fpkm_values(object = DESeq2::fpkm(object = dds))
#' }
#' @noRd
plot_fpkm_values <- function(object) {
plot_paths <- file.path(output_directory,
paste(
paste(prefix,
"fpkm",
"density",
sep = "_"),
graphics_formats,
sep = "."
))
if (all(file.exists(plot_paths)) &&
all(file.info(plot_paths)$size > 0L)) {
message("Skipping a FPKM density plot")
} else {
message("Create a FPKM density plot")
# Pivot the tibble to get just a "name" and a "value" variable.
ggplot_object <-
ggplot2::ggplot(data = tidyr::pivot_longer(
data = tibble::as_tibble(x = object),
cols = tidyselect::everything()
))
ggplot_object <-
ggplot_object +
ggplot2::geom_density(
mapping = ggplot2::aes(
x = log10(x = .data$value),
y = ggplot2::after_stat(x = .data$density),
colour = .data$name
),
alpha = I(1 / 3),
na.rm = TRUE
)
ggplot_object <-
ggplot_object +
ggplot2::labs(
x = "log10(FPKM)",
y = "Density",
colour = "Sample",
title = "FPKM Density",
subtitle = paste("Design", argument_list$design_name)
)
# Increase the plot width per 24 samples.
# The number of samples is the number of columns of the matrix.
# Rather than argument_list$plot_factor, a fixed number of 0.25 is used here.
plot_width <-
argument_list$plot_width + (ceiling(x = base::ncol(x = object) / 24L) - 1L) * argument_list$plot_width * 0.25
for (plot_path in plot_paths) {
ggplot2::ggsave(
filename = plot_path,
plot = ggplot_object,
width = plot_width,
height = argument_list$plot_height,
limitsize = FALSE
)
}
rm(plot_path, ggplot_object)
}
rm(plot_paths)
}
# Plot Cook's Distances ---------------------------------------------------
#' Create a Cook's Distances Box Plot.
#'
#' Create a box plot of Cook's distances and save PDF and PNG documents.
#'
#' @param object A \code{DESeq2::DESeqDataSet} object.
#' @references argument_list
#' @references graphics_formats
#' @references output_directory
#' @references prefix
#'
#' @return \code{NULL}
#'
#' @examples
#' \dontrun{
#' }
#' @noRd
plot_cooks_distances <- function(object) {
plot_paths <-
file.path(output_directory, paste(
paste(prefix, "cooks", "distances", sep = "_"),
graphics_formats,
sep = "."
))
if (all(file.exists(plot_paths)) &&
all(file.info(plot_paths)$size > 0L)) {
message("Skipping a Cook's distances box plot")
} else {
message("Creating a Cook's distances box plot")
ggplot_object <-
ggplot2::ggplot(data = tidyr::pivot_longer(
data = tibble::as_tibble(x = SummarizedExperiment::assays(x = object)$cooks),
cols = tidyselect::everything()
))
ggplot_object <-
ggplot_object +
ggplot2::geom_boxplot(mapping = ggplot2::aes(x = .data$name,
y = .data$value),
na.rm = TRUE)
ggplot_object <-
ggplot_object +
ggplot2::labs(
x = "Sample",
y = "Cook's Distance",
title = "Cook's Distance per Sample",
subtitle = paste("Design", argument_list$design_name)
)
ggplot_object <-
ggplot_object +
ggplot2::theme(
axis.text.x = ggplot2::element_text(
size = ggplot2::rel(x = 0.8),
hjust = 0.0,
vjust = 0.5,
angle = 90.0
)
)
# Increase the plot width per 24 samples.
# The number of samples is the number of rows of the
# SummarizedExperiment::colData() S4Vectors::DataFrame. Rather than
# argument_list$plot_factor, a fixed number of 0.33 is used here.
plot_width <-
argument_list$plot_width + (ceiling(x = S4Vectors::nrow(x = SummarizedExperiment::colData(x = object)) / 24L) - 1L) * argument_list$plot_width * 0.33
for (plot_path in plot_paths) {
ggplot2::ggsave(
filename = plot_path,
plot = ggplot_object,
width = plot_width,
height = argument_list$plot_height,
limitsize = FALSE
)
}
rm(plot_path, plot_width, ggplot_object)
}
rm(plot_paths)
}
# Plot RIN Scores ---------------------------------------------------------
#' Create a RIN Score Density Plot.
#'
#' Create a density plot of the "RIN" variable in the sample annotation
#' DataFrame. Add vertical lines at 1.0, 4.0 and 7.0 in red, yellow and green,
#' respectively, to indicate quality tiers. Save PDF and PNG documents.
#'
#' @param object A \code{DESeq2::DESeqDataSet} object.
#' @references argument_list
#' @references graphics_formats
#' @references output_directory
#' @references prefix
#'
#' @return \code{NULL}
#'
#' @examples
#' \dontrun{
#' plot_rin_scores(object = dds)
#' }
#' @noRd
plot_rin_scores <- function(object) {
plot_paths <- file.path(output_directory,
paste(
paste(prefix,
"rin",
"density",
sep = "_"),
graphics_formats,
sep = "."
))
if (all(file.exists(plot_paths)) &&
all(file.info(plot_paths)$size > 0L)) {
message("Skipping a RIN score density plot")
} else {
if ("RIN" %in% S4Vectors::colnames(x = SummarizedExperiment::colData(x = object))) {
message("Creating a RIN score density plot")
ggplot_object <-
ggplot2::ggplot(data = S4Vectors::as.data.frame(x = SummarizedExperiment::colData(x = object)))
ggplot_object <-
ggplot_object +
ggplot2::xlim(RIN = c(0.0, 10.0))
ggplot_object <-
ggplot_object +
ggplot2::geom_vline(
xintercept = 1.0,
colour = "red",
linetype = 2L
)
ggplot_object <-
ggplot_object +
ggplot2::geom_vline(
xintercept = 4.0,
colour = "yellow",
linetype = 2L
)
ggplot_object <-
ggplot_object +
ggplot2::geom_vline(
xintercept = 7.0,
colour = "green",
linetype = 2L
)
ggplot_object <-
ggplot_object +
ggplot2::geom_density(mapping = ggplot2::aes(
x = .data$RIN,
y = ggplot2::after_stat(x = .data$density)
))
ggplot_object <-
ggplot_object +
ggplot2::labs(
x = "RIN score",
y = "density",
title = "RNA Integrity Number (RIN) Density Plot",
subtitle = paste("Design", argument_list$design_name)
)
for (plot_path in plot_paths) {
ggplot2::ggsave(
filename = plot_path,
plot = ggplot_object,
width = argument_list$plot_width,
height = argument_list$plot_height,
limitsize = FALSE
)
}
rm(plot_path, ggplot_object)
} else {
message("Skipping a RIN score density plot. No RIN variable in column annotation.")
}
}
rm(plot_paths)
}
# Plot Multi-Dimensional Scaling (MDS) ------------------------------------
#' Create a Multi-Dimensional Scaling (MDS) Plot.
#'
#' Create an MDS plot based on Classical (Metric) Multi-Dimensional Scaling of
#' Euclidean distances of transformed counts for each feature. Save PDF and PNG
#' plots.
#'
#' @param object A \code{DESeq2::DESeqTransform} object.
#' @param plot_list A \code{list} of \code{list} objects configuring plots and
#' their \code{ggplot2} aesthetic mappings.
#' @param blind A \code{logical} scalar to indicate a blind or model-based
#' \code{DESeq2::DESeqTransform} object.
#' @references argument_list
#' @references graphics_formats
#' @references output_directory
#' @references prefix
#'
#' @return \code{NULL}
#'
#' @examples
#' \dontrun{
#' plot_mds(object = dds, plot_list = plot_list, blind = FALSE)
#' }
#' @noRd
plot_mds <- function(object,
plot_list = list(),
blind = FALSE) {
suffix <- if (blind)
"blind"
else
"model"
message("Creating ", suffix, " MDS plots:")
dist_object <-
stats::dist(x = t(x = SummarizedExperiment::assay(x = object, i = 1L)))
dist_matrix <- as.matrix(x = dist_object)
# Convert the Multi-Dimensional Scaling matrix into a S4Vectors::DataFrame and
# bind its columns to the sample annotation S4Vectors::DataFrame.
mds_frame <-
base::cbind(
base::data.frame(stats::cmdscale(d = dist_matrix)),
S4Vectors::as.data.frame(x = SummarizedExperiment::colData(x = object))
)
purrr::walk(
.x = plot_list,
.f = function(geom_list) {
aes_character <-
bsfR::bsfrd_geometrics_list_to_character(geom_list = geom_list)
plot_paths <- file.path(output_directory,
paste(
paste(prefix,
"mds",
aes_character,
suffix,
sep = "_"),
graphics_formats,
sep = "."
))
if (all(file.exists(plot_paths)) &&
all(file.info(plot_paths)$size > 0L)) {
message(" Skipping MDS plot: ", aes_character)
} else {
message(" Creating MDS plot: ", aes_character)
ggplot_object <-
ggplot2::ggplot(data = mds_frame)
# geom_line
if (!is.null(x = geom_list$geom_line)) {
mapping_list <-
ggplot2::aes(x = .data$X1, y = .data$X2)
if (!is.null(x = geom_list$geom_line$colour)) {
mapping_list <-
utils::modifyList(x = mapping_list, val = ggplot2::aes(colour = .data[[geom_list$geom_line$colour]]))
}
if (!is.null(x = geom_list$geom_line$group)) {
mapping_list <-
utils::modifyList(x = mapping_list, val = ggplot2::aes(group = .data[[geom_list$geom_line$group]]))
}
if (!is.null(x = geom_list$geom_line$linetype)) {
mapping_list <-
utils::modifyList(x = mapping_list,
val = ggplot2::aes(linetype = .data[[geom_list$geom_line$linetype]]))
}
ggplot_object <-
ggplot_object +
ggplot2::geom_line(mapping = mapping_list,
alpha = I(1 / 3))
rm(mapping_list)
}
# geom_point
if (!is.null(x = geom_list$geom_point)) {
mapping_list <-
ggplot2::aes(x = .data$X1, y = .data$X2)
if (!is.null(x = geom_list$geom_point$colour)) {
mapping_list <-
utils::modifyList(x = mapping_list, val = ggplot2::aes(colour = .data[[geom_list$geom_point$colour]]))
}
if (!is.null(x = geom_list$geom_point$shape)) {
mapping_list <-
utils::modifyList(x = mapping_list, val = ggplot2::aes(shape = .data[[geom_list$geom_point$shape]]))
}
ggplot_object <-
ggplot_object +
ggplot2::geom_point(mapping = mapping_list,
size = 2.0,
alpha = I(1 / 3))
if (!is.null(x = geom_list$geom_point$shape)) {
# For more than six shapes (scale_shape()), a manual scale
# (scale_shape_manual()) needs setting up.
# https://ggplot2.tidyverse.org/reference/scale_shape.html
ggplot_object <-
ggplot_object +
ggplot2::scale_shape_manual(values = seq_len(length.out = nlevels(x = mds_frame[, geom_list$geom_point$shape])))
}
rm(mapping_list)
}
# geom_text
if (!is.null(x = geom_list$geom_text)) {
mapping_list <-
ggplot2::aes(x = .data$X1, y = .data$X2)
if (!is.null(x = geom_list$geom_text$label)) {
mapping_list <-
utils::modifyList(x = mapping_list, val = ggplot2::aes(label = .data[[geom_list$geom_text$label]]))
}
if (!is.null(x = geom_list$geom_text$colour)) {
mapping_list <-
utils::modifyList(x = mapping_list, val = ggplot2::aes(colour = .data[[geom_list$geom_text$colour]]))
}
ggplot_object <-
ggplot_object +
ggplot2::geom_text(mapping = mapping_list,
size = 2.0,
alpha = I(1 / 3))
rm(mapping_list)
}
# geom_path
if (!is.null(x = geom_list$geom_path)) {
mapping_list <-
ggplot2::aes(x = .data$X1, y = .data$X2)
if (!is.null(x = geom_list$geom_path$colour)) {
mapping_list <-
utils::modifyList(x = mapping_list, val = ggplot2::aes(colour = .data[[geom_list$geom_path$colour]]))
}
if (!is.null(x = geom_list$geom_path$group)) {
mapping_list <-
utils::modifyList(x = mapping_list, val = ggplot2::aes(group = .data[[geom_list$geom_path$group]]))
}
if (!is.null(x = geom_list$geom_path$linetype)) {
mapping_list <-
utils::modifyList(x = mapping_list,
val = ggplot2::aes(linetype = .data[[geom_list$geom_path$linetype]]))
}
ggplot_object <-
ggplot_object +
ggplot2::geom_path(
mapping = mapping_list,
arrow = if (is.null(x = geom_list$geom_path$arrow))
NULL
else
grid::arrow(
length = grid::unit(x = 0.08, units = "inches"),
type = "closed"
)
)
rm(mapping_list)
}
ggplot_object <-
ggplot_object +
ggplot2::theme_bw() +
ggplot2::coord_fixed()
for (plot_path in plot_paths) {
ggplot2::ggsave(
filename = plot_path,
plot = ggplot_object,
width = argument_list$plot_width,
height = argument_list$plot_height,
limitsize = FALSE
)
}
rm(plot_path, ggplot_object)
}
rm(plot_paths, aes_character)
}
)
rm(mds_frame,
dist_matrix,
dist_object,
suffix)
}
# Plot Heatmap ------------------------------------------------------------
#' Create a Heat Map Plot.
#'
#' Create a heat map plot based on hierarchical clustering of Euclidean
#' distances of transformed counts for each feature. Save PDF and PNG plots.
#'
#' @param object A \code{DESeq2::DESeqTransform} object.
#' @param plot_list A \code{list} of \code{list} objects configuring plots and
#' their \code{ggplot2} aesthetic mappings.
#' @param blind A \code{logical} scalar to indicate a blind or model-based
#' \code{DESeq2::DESeqTransform} object.
#' @references argument_list
#' @references graphics_formats
#' @references output_directory
#' @references prefix
#'
#' @return
#'
#' @examples
#' \dontrun{
#' plot_heatmap(object = dds, plot_list = plot_list, blind = FALSE)
#' }
#' @noRd
plot_heatmap <- function(object,
plot_list = list(),
blind = FALSE) {
suffix <- if (blind)
"blind"
else
"model"
message("Creating ", suffix, " Heatmap plots:")
# Transpose the counts table, since dist() works with columns and
# assign the sample names as column and row names to the resulting matrix.
dist_object <-
dist(x = t(x = SummarizedExperiment::assay(x = object, i = 1L)))
dist_matrix <- as.matrix(x = dist_object)
base::colnames(x = dist_matrix) <- object$sample
base::rownames(x = dist_matrix) <- object$sample
purrr::walk(
.x = plot_list,
.f = function(geom_list) {
aes_character <-
bsfR::bsfrd_geometrics_list_to_character(geom_list = geom_list)
message(" Creating heatmap plot: ", aes_character)
aes_factors <-
unique(x = unlist(x = geom_list, use.names = TRUE))
plotting_frame <-
S4Vectors::as.data.frame(x = SummarizedExperiment::colData(x = object))[, aes_factors, drop = FALSE]
pheatmap_object <- NULL
if (FALSE) {
# Add the aes_factors together to create a new grouping factor
group_factor <- if (length(x = aes_factors) > 1) {
factor(x = apply(
X = plotting_frame,
MARGIN = 1,
FUN = paste,
collapse = " : "
))
} else {
SummarizedExperiment::colData(x = object)[[aes_factors]]
}
# Assign the grouping factor to the distance matrix row names.
base::colnames(x = dist_matrix) <- NULL
base::rownames(x = dist_matrix) <-
paste(object$sample, group_factor, sep = " - ")
pheatmap_object <-
pheatmap::pheatmap(
mat = dist_matrix,
color = grDevices::colorRampPalette(colors = base::rev(x = RColorBrewer::brewer.pal(
n = 9, name = "Blues"
)))(255),
clustering_distance_rows = dist_object,
clustering_distance_cols = dist_object,
fontsize_row = 6,
silent = TRUE
)
rm(group_factor)
} else {
# Draw a heatmap with covariate column annotation.
pheatmap_object <-
pheatmap::pheatmap(
mat = dist_matrix,
color = grDevices::colorRampPalette(colors = base::rev(x = RColorBrewer::brewer.pal(
n = 9, name = "Blues"
)))(255),
clustering_distance_rows = dist_object,
clustering_distance_cols = dist_object,
annotation_col = plotting_frame,
show_rownames = TRUE,
show_colnames = FALSE,
fontsize_row = 6,
silent = TRUE
)
}
plot_paths <- file.path(output_directory,
paste(
paste(prefix,
"heatmap",
aes_character,
suffix,
sep = "_"),
graphics_formats,
sep = "."
))
base::names(x = plot_paths) <- names(x = graphics_formats)
# PDF output
grDevices::pdf(
file = plot_paths["pdf"],
width = argument_list$plot_width,
height = argument_list$plot_height
)
grid::grid.draw(pheatmap_object$gtable)
base::invisible(x = grDevices::dev.off())
# PNG output
grDevices::png(
filename = plot_paths["png"],
width = argument_list$plot_width,
height = argument_list$plot_height,
units = "in",
res = 300L
)
grid::grid.draw(pheatmap_object$gtable)
base::invisible(x = grDevices::dev.off())
rm(plot_paths,
pheatmap_object,
plotting_frame,
aes_factors,
aes_character)
}
)
rm(dist_matrix,
dist_object,
suffix)
}
# Plot Principal Component Analysis (PCA) ---------------------------------
#' Create a Principal Component Analysis (PCA) Plot.
#'
#' Create a principal component analysis plot based on the top-most variant rows
#' (i.e. features) calculated via \code{genefilter::rowVars()}. Save PDF and PNG
#' plots.
#'
#' @param object A \code{DESeq2::DESeqTransform} object.
#' @param plot_list A \code{list} of \code{list} objects configuring plots and
#' their \code{ggplot2} aesthetic mappings.
#' @param blind A \code{logical} scalar to indicate a blind or model-based
#' \code{DESeq2::DESeqTransform} object.
#' @references argument_list
#' @references graphics_formats
#' @references output_directory
#' @references prefix
#'
#' @return
#'
#' @examples
#' \dontrun{
#' }
#' @noRd
plot_pca <- function(object,
plot_list = list(),
blind = FALSE) {
suffix <- if (blind)
"blind"
else
"model"
message("Creating ", suffix, " PCA plots:")
# Calculate the variance for each row (i.e., feature).
row_variance <-
genefilter::rowVars(x = SummarizedExperiment::assay(x = object, i = 1L))
# Order by decreasing variance and select the top number of rows (i.e.,
# features).
selected_rows <-
order(row_variance, decreasing = TRUE)[seq_len(length.out = min(argument_list$pca_top_number, length(x = row_variance)))]
# Perform a PCA on the (count) matrix returned by
# SummarizedExperiment::assay() for the selected features.
pca_object <-
stats::prcomp(x = t(x = SummarizedExperiment::assay(x = object, i = 1L)[selected_rows, ]))
rm(selected_rows)
# Plot the variance for a maximum of 100 components.
plot_paths <- file.path(output_directory,
paste(
paste(prefix,
"pca",
"variance",
suffix,
sep = "_"),
graphics_formats,
sep = "."
))
plotting_tibble <-
tibble::tibble(
"component" = seq_along(along.with = pca_object$sdev),
"variance" = pca_object$sdev ^ 2 / sum(pca_object$sdev ^ 2)
)
ggplot_object <-
ggplot2::ggplot(data = plotting_tibble[seq_len(length.out = min(100L, length(x = pca_object$sdev))), , drop = FALSE])
ggplot_object <-
ggplot_object +
ggplot2::geom_point(mapping = ggplot2::aes(x = .data$component, y = .data$variance))
ggplot_object <-
ggplot_object +
ggplot2::labs(
x = "Principal Component",
y = "Variance",
title = "Variance by Principal Component",
subtitle = paste("Design", argument_list$design_name)
)
for (plot_path in plot_paths) {
ggplot2::ggsave(
filename = plot_path,
plot = ggplot_object,
width = argument_list$plot_width,
height = argument_list$plot_height,
limitsize = FALSE
)
}
rm(plot_path, ggplot_object, plotting_tibble, plot_paths)
# The pca_object$x matrix has as many columns and rows as there are samples.
pca_dimensions <-
min(argument_list$pca_dimensions, base::ncol(x = pca_object$x))
# Create combinations of all possible principal component pairs.
pca_pair_matrix <-
combn(x = seq_len(length.out = pca_dimensions), m = 2L)
# Calculate the contribution to the total variance for each principal
# component. Establish a label list with principal components and their
# respective percentage of the total variance.
label_list <-
as.list(x = sprintf(
fmt = "PC%i (%.3f%%)",
seq_along(along.with = pca_object$sdev),
100 * (pca_object$sdev ^ 2 / sum(pca_object$sdev ^ 2))
))
base::names(x = label_list) <-
paste0("PC", seq_along(along.with = pca_object$sdev))
label_function <- function(value) {
return(label_list[value])
}
purrr::walk(
.x = plot_list,
.f = function(geom_list) {
aes_character <-
bsfR::bsfrd_geometrics_list_to_character(geom_list = geom_list)
message(" Creating PCA plot: ", aes_character)
plot_paths <- file.path(output_directory,
paste(
paste(prefix,
"pca",
aes_character,
suffix,
sep = "_"),
graphics_formats,
sep = "."
))
# Assemble the data for the plot from the rotated data matrix.
pca_frame <- base::as.data.frame(x = pca_object$x)
plotting_frame <-
base::data.frame(
"component_1" = factor(levels = paste0(
"PC", seq_len(length.out = pca_dimensions)
)),
"component_2" = factor(levels = paste0(
"PC", seq_len(length.out = pca_dimensions)
)),
"x" = double(),
"y" = double(),
# Also initialise all variables of the column data, but do not
# include any data (i.e., 0L rows).
S4Vectors::as.data.frame(x = SummarizedExperiment::colData(x = object)[0L,])
)
for (column_number in seq_len(length.out = base::ncol(x = pca_pair_matrix))) {
pca_label_1 <-
paste0("PC", pca_pair_matrix[1L, column_number])
pca_label_2 <-
paste0("PC", pca_pair_matrix[2L, column_number])
plotting_frame <- base::rbind(
plotting_frame,
base::data.frame(
"component_1" = pca_label_1,
"component_2" = pca_label_2,
"x" = pca_frame[, pca_label_1],
"y" = pca_frame[, pca_label_2],
S4Vectors::as.data.frame(x = SummarizedExperiment::colData(x = object))
)
)
rm(pca_label_1, pca_label_2)
}
rm(column_number)
ggplot_object <- ggplot2::ggplot(data = plotting_frame)
# geom_line
if (!is.null(x = geom_list$geom_line)) {
mapping_list <-
ggplot2::aes(x = .data$x, y = .data$y)
if (!is.null(x = geom_list$geom_line$colour)) {
mapping_list <-
utils::modifyList(x = mapping_list, val = ggplot2::aes(colour = .data[[geom_list$geom_line$colour]]))
}
if (!is.null(x = geom_list$geom_line$group)) {
mapping_list <-
utils::modifyList(x = mapping_list, val = ggplot2::aes(group = .data[[geom_list$geom_line$group]]))
}
ggplot_object <-
ggplot_object +
ggplot2::geom_line(mapping = mapping_list,
alpha = I(1 / 3))
rm(mapping_list)
}
# geom_point
if (!is.null(x = geom_list$geom_point)) {
mapping_list <-
ggplot2::aes(x = .data$x, y = .data$y)
if (!is.null(x = geom_list$geom_point$colour)) {
mapping_list <-
utils::modifyList(x = mapping_list, val = ggplot2::aes(colour = .data[[geom_list$geom_point$colour]]))
}
if (!is.null(x = geom_list$geom_point$shape)) {
mapping_list <-
utils::modifyList(x = mapping_list, val = ggplot2::aes(shape = .data[[geom_list$geom_point$shape]]))
}
ggplot_object <-
ggplot_object +
ggplot2::geom_point(mapping = mapping_list,
size = 2.0,
alpha = I(1 / 3))
if (!is.null(x = geom_list$geom_point$shape)) {
# For more than six shapes (scale_shape()), a manual scale
# (scale_shape_manual()) needs setting up.
# https://ggplot2.tidyverse.org/reference/scale_shape.html
ggplot_object <-
ggplot_object +
ggplot2::scale_shape_manual(values = seq_len(length.out = nlevels(x = plotting_frame[, geom_list$geom_point$shape])))
}
rm(mapping_list)
}
# geom_text
if (!is.null(x = geom_list$geom_text)) {
mapping_list <-
ggplot2::aes(x = .data$x, y = .data$y)
if (!is.null(x = geom_list$geom_text$label)) {
mapping_list <-
utils::modifyList(x = mapping_list, val = ggplot2::aes(label = .data[[geom_list$geom_text$label]]))
}
if (!is.null(x = geom_list$geom_text$colour)) {
mapping_list <-
utils::modifyList(x = mapping_list, val = ggplot2::aes(colour = .data[[geom_list$geom_text$colour]]))
}
ggplot_object <-
ggplot_object +
ggplot2::geom_text(mapping = mapping_list,
size = 2.0,
alpha = I(1 / 3))
rm(mapping_list)
}
# geom_path
if (!is.null(x = geom_list$geom_path)) {
mapping_list <-
ggplot2::aes(x = .data$x, y = .data$y)
if (!is.null(x = geom_list$geom_path$colour)) {
mapping_list <-
utils::modifyList(x = mapping_list, val = ggplot2::aes(colour = .data[[geom_list$geom_path$colour]]))
}
if (!is.null(x = geom_list$geom_path$group)) {
mapping_list <-
utils::modifyList(x = mapping_list, val = ggplot2::aes(group = .data[[geom_list$geom_path$group]]))
}
if (!is.null(x = geom_list$geom_path$linetype)) {
mapping_list <-
utils::modifyList(x = mapping_list, val = ggplot2::aes(linetype = .data[[geom_list$geom_path$linetype]]))
}
ggplot_object <-
ggplot_object +
ggplot2::geom_path(
mapping = mapping_list,
arrow = grid::arrow(
length = grid::unit(x = 0.08, units = "inches"),
type = "closed"
)
)
rm(mapping_list)
}
ggplot_object <-
ggplot_object +
ggplot2::facet_grid(
rows = ggplot2::vars(.data$component_1),
cols = ggplot2::vars(.data$component_2),
labeller = ggplot2::labeller(component_1 = label_function, component_2 = label_function)
)
for (plot_path in plot_paths) {
ggplot2::ggsave(
filename = plot_path,
plot = ggplot_object,
width = argument_list$plot_width,
height = argument_list$plot_height,
limitsize = FALSE
)
}
rm(plot_path, ggplot_object)
if (argument_list$verbose) {
# Write the PCA plot data frame.
utils::write.table(
x = plotting_frame,
file = file.path(output_directory,
paste(
paste(prefix,
"pca",
aes_character,
suffix,
sep = "_"),
"tsv",
sep = "."
)),
sep = "\t",
row.names = FALSE,
col.names = TRUE
)
}
rm(pca_frame,
plotting_frame,
aes_character)
}
)
rm(
label_function,
label_list,
pca_pair_matrix,
pca_object,
row_variance,
pca_dimensions,
suffix
)
}
# Start of main script ----------------------------------------------------
message("Processing design '", argument_list$design_name, "'")
# Set the number of parallel threads in the MulticoreParam instance.
BiocParallel::register(BPPARAM = BiocParallel::MulticoreParam(workers = argument_list$threads))
# Read the serialised GenomicRanges::GRanges object specifying the reference
# transcriptome.
transcriptome_granges <-
readr::read_rds(file = argument_list$transcriptome_path)
global_design_list <- bsfR::bsfrd_read_design_list(
genome_directory = argument_list$genome_directory,
design_name = argument_list$design_name,
verbose = argument_list$verbose
)
feature_dframe <- bsfR::bsfrd_initialise_feature_dframe(
genome_directory = argument_list$genome_directory,
design_name = argument_list$design_name,
feature_types = "gene",
feature_granges = transcriptome_granges,
verbose = argument_list$verbose
)
deseq_data_set <-
initialise_deseq_data_set(design_list = global_design_list,
transcriptome_granges = transcriptome_granges)
rm(transcriptome_granges)
# TODO: Write the matrix of feature versus model coefficients as a data.frame to
# disk.
# Cooks Distances Plot ----------------------------------------------------
plot_cooks_distances(object = deseq_data_set)
# RIN Score Plot ----------------------------------------------------------
# If RIN scores are annotated in the sample frame, plot their distribution.
plot_rin_scores(object = deseq_data_set)
# Likelihood Ratio Test (LRT) ---------------------------------------------
#' Run an Likelihood Ratio Test (LRT) on a Reduced Formula.
#'
#' @param reduced_formula_character A \code{character} scalar of a reduced
#' formula with a "reduced_name" attribute.
#' @references output_directory
#' @references prefix
#' @references global_design_list
#'
#' @return A \code{list} of LRT summary information.
#' @noRd
#'
#' @examples
lrt_reduced_formula_test <-
function(reduced_formula_character) {
lrt_list <- NULL
# Skip NA or empty character vectors.
if (is.na(x = reduced_formula_character) ||
!base::nzchar(x = reduced_formula_character)) {
# Return NULL instead of a tibble, which can still be processed by
# dpylr::bind_rows().
return(lrt_list)
}
file_path_lrt <-
file.path(output_directory,
paste(paste(
prefix,
"lrt",
attr(x = reduced_formula_character, which = "reduced_name"),
sep = "_"
),
"rds",
sep = "."))
file_path_all <-
file.path(output_directory,
paste(paste(
prefix,
"lrt",
attr(x = reduced_formula_character, which = "reduced_name"),
"differential",
sep = "_"
),
"tsv",
sep = "."))
file_path_significant <-
file.path(output_directory,
paste(
paste(
prefix,
"lrt",
attr(x = reduced_formula_character, which = "reduced_name"),
"significant",
sep = "_"
),
"tsv",
sep = "."
))
if (file.exists(file_path_significant) &&
file.info(file_path_significant)$size > 0L) {
message(
"Skipping reduced formula: ",
attr(x = reduced_formula_character, which = "reduced_name")
)
# Read the existing table to count the number of significant features
# after LRT.
deseq_results_lrt_frame <-
utils::read.table(file = file_path_significant,
header = TRUE,
sep = "\t")
lrt_list <- base::list(
"design" = global_design_list$design,
"full_formula" = global_design_list$full_formula,
"reduced_name" = attr(x = reduced_formula_character, which = "reduced_name"),
"reduced_formula" = reduced_formula_character,
"significant" = base::sum(x = deseq_results_lrt_frame$significant),
"effective" = base::sum(deseq_results_lrt_tibble$effective)
)
rm(deseq_results_lrt_frame)
} else {
message(
"Processing reduced formula: ",
attr(x = reduced_formula_character, which = "reduced_name")
)
# DESeq LRT requires either two model formulas or two model matrices.
# Create a reduced model matrix and check whether it is full rank.
formula_full <-
stats::as.formula(object = global_design_list$full_formula)
result_list_full <-
check_model_matrix(
model_matrix = stats::model.matrix.default(
object = formula_full,
data = SummarizedExperiment::colData(x = deseq_data_set)
)
)
formula_reduced <-
stats::as.formula(object = reduced_formula_character)
model_matrix_reduced <-
stats::model.matrix.default(object = formula_reduced,
data = SummarizedExperiment::colData(x = deseq_data_set))
result_list_reduced <-
check_model_matrix(model_matrix = model_matrix_reduced)
if (argument_list$verbose) {
message("Writing initial reduced model matrix")
utils::write.table(
x = base::as.data.frame(x = model_matrix_reduced),
file = file.path(
output_directory,
paste0(
prefix,
"_model_matrix_",
attr(x = reduced_formula_character, which = "reduced_name"),
"_initial.tsv"
)
),
sep = "\t",
row.names = TRUE,
col.names = TRUE
)
message("Writing modified reduced model matrix")
utils::write.table(
x = base::as.data.frame(x = result_list_reduced$model_matrix),
file = file.path(
output_directory,
paste0(
prefix,
"_model_matrix_",
attr(x = reduced_formula_character, which = "reduced_name"),
"_modified.tsv"
)
),
sep = "\t",
row.names = TRUE,
col.names = TRUE
)
}
full_rank <-
result_list_full$formula_full_rank &
result_list_reduced$formula_full_rank
deseq_data_set_lrt <-
DESeq2::DESeq(
object = deseq_data_set,
test = "LRT",
full = if (full_rank)
formula_full
else
result_list_full$model_matrix,
reduced = if (full_rank)
formula_reduced
else
result_list_reduced$model_matrix
)
rm(
full_rank,
result_list_reduced,
result_list_full,
formula_reduced,
formula_full
)
# print(x = paste("DESeqDataSet LRT result names for",
# attr(x = reduced_formula_character, which = "reduced_name")))
# print(x = DESeq2::resultsNames(object = deseq_data_set_lrt))
deseq_results_lrt <-
DESeq2::results(
object = deseq_data_set_lrt,
alpha = global_design_list$padj_threshold,
parallel = TRUE
)
# Serialise the DESeqResults to disk.
readr::write_rds(x = deseq_results_lrt, file = file_path_lrt)
# Convert the DESeqResults object, which extends the S4Vectors::DataFrame
# class with additional meta data annotation into a tibble.
deseq_results_lrt_tibble <-
tibble::as_tibble(x = S4Vectors::as.data.frame(x = S4Vectors::combineCols(x = feature_dframe,
deseq_results_lrt)))
deseq_results_lrt_tibble <-
dplyr::mutate(
.data = deseq_results_lrt_tibble,
# Calculate a logical indicating significance.
"significant" = dplyr::if_else(
condition = .data$padj <= .env$global_design_list$padj_threshold,
true = TRUE,
false = FALSE,
missing = FALSE
),
# Calculate a logical indicating effectiveness.
"effective" = dplyr::if_else(
condition = base::abs(x = .data$log2FoldChange) >= .env$global_design_list$l2fc_threshold,
true = TRUE,
false = FALSE,
missing = FALSE
)
)
# Write all features.
readr::write_tsv(x = deseq_results_lrt_tibble,
file = file_path_all)
# Write only significant features.
readr::write_tsv(
x = dplyr::filter(
.data = deseq_results_lrt_tibble,
.data$padj <= .env$global_design_list$padj_threshold
),
file = file_path_significant
)
lrt_list <- base::list(
"design" = global_design_list$design,
"full_formula" = global_design_list$full_formula,
"reduced_name" = attr(x = reduced_formula_character, which = "reduced_name"),
"reduced_formula" = reduced_formula_character,
"significant" = base::sum(deseq_results_lrt_tibble$significant),
"effective" = base::sum(deseq_results_lrt_tibble$effective)
)
rm(deseq_results_lrt_tibble,
deseq_results_lrt,
deseq_data_set_lrt)
}
rm(file_path_lrt, file_path_all, file_path_significant)
return(lrt_list)
}
#' Convert a Reduced Formula Character Vector into a Scalar.
#'
#' @param reduced_formula_character A two component \code{character} vector
#' after splitting the reduced formula name [1L] and the reduced formula
#' [2L].
#'
#' @return A \code{character} scalar of the reduced formula with a
#' "reduced_name" attribute.
#' @noRd
#'
#' @examples
lrt_reduced_formula_scalar <-
function(reduced_formula_character) {
character_scalar <- reduced_formula_character[2L]
attr(x = character_scalar, which = "reduced_name") <-
reduced_formula_character[1L]
return(character_scalar)
}
# The "reduced_formulas" variable of the design list encodes one or more named
# reduced formulas for LRT (e.g., "name_1:~genotype + gender;name_2:~1"). Split
# reduced formulas on ";" characters, then select the formula [2L] and assign
# the name [1L] as "reduced_name" attribute. Finally run LRT tests on the
# reduced formulas and combine the resulting LRT tibbles into a LRT summary
# tibble.
lrt_summary_tibble <-
dplyr::bind_rows(purrr::map(
.x = purrr::map(
.x = stringr::str_split(
string = stringr::str_split(
string = global_design_list$reduced_formulas[1L],
pattern = stringr::fixed(pattern = ";")
)[[1L]],
pattern = stringr::fixed(pattern = ":")
),
.f = lrt_reduced_formula_scalar
),
.f = lrt_reduced_formula_test
))
# Write the LRT summary tibble to disk.
utils::write.table(
x = lrt_summary_tibble,
file = file.path(output_directory,
paste(
paste(prefix,
"lrt",
"summary",
sep = "_"),
"tsv",
sep = "."
)),
sep = "\t",
row.names = FALSE,
col.names = TRUE
)
rm(lrt_summary_tibble)
# Plot Aesthetics ---------------------------------------------------------
# The plot_aes variable of the design data frame supplies a pipe-separated list
# of plot definitions. Each plot definition contains a semi-colon-separated list
# of geometric objects and their associated aesthetics for each plot, which is a
# comma-separated list of aesthetics=variable mappings.
#
# geom_point:colour=test_1,shape=test_2;geom_line:colour=test_3,group=test_4|
# geom_point:colour=test_a,shape=test_b;geom_line:colour=test_c,group=test_d
#
# Convert into a list of list objects with variables and aesthetics as names.
plot_list <-
bsfR::bsfrd_plots_character_to_list(plots_character = global_design_list$plot_aes)
# DESeqDataSet Results ----------------------------------------------------
print(x = "DESeqDataSet result names:")
print(x = DESeq2::resultsNames(object = deseq_data_set))
# Export RAW counts -------------------------------------------------------
# Export the raw counts from the DESeqDataSet object.
readr::write_tsv(
x = S4Vectors::as.data.frame(x = S4Vectors::combineCols(
x = feature_dframe,
methods::as(
object = DESeq2::counts(object = deseq_data_set, normalized = FALSE),
Class = "DataFrame"
)
)),
file = file.path(output_directory,
paste(
paste(prefix,
"counts",
"raw",
sep = "_"),
"tsv",
sep = "."
))
)
# Export normalised counts ------------------------------------------------
# Export the normalised counts from the DESeq2::DESeqDataSet object.
readr::write_tsv(
x = S4Vectors::as.data.frame(
x = S4Vectors::combineCols(
x = SummarizedExperiment::rowData(x = deseq_data_set),
methods::as(
object = DESeq2::counts(object = deseq_data_set, normalized = TRUE),
Class = "DataFrame"
)
)
),
file = file.path(output_directory,
paste(
paste(prefix,
"counts",
"normalised",
sep = "_"),
"tsv",
sep = "."
))
)
# Export FPKM values ------------------------------------------------------
# Export FPKM values from the DESeq2::DESeqDataSet object.
readr::write_tsv(
x = S4Vectors::as.data.frame(x = S4Vectors::combineCols(
x = feature_dframe,
methods::as(
object = DESeq2::fpkm(object = deseq_data_set),
Class = "DataFrame"
)
)),
file = file.path(output_directory,
paste(
paste(prefix,
"fpkms",
sep = "_"),
"tsv",
sep = "."
))
)
# Plot FPKM values.
plot_fpkm_values(object = DESeq2::fpkm(object = deseq_data_set))
# DESeqTransform ----------------------------------------------------------
# MDS Plot ----------------------------------------------------------------
# PCA plot ----------------------------------------------------------------
# Heatmap Plot ------------------------------------------------------------
# Export VST counts -------------------------------------------------------
for (blind in c(FALSE, TRUE)) {
suffix <- if (blind)
"blind"
else
"model"
deseq_transform <-
initialise_deseq_transform(deseq_data_set = deseq_data_set, blind = blind)
plot_mds(object = deseq_transform,
plot_list = plot_list,
blind = blind)
plot_pca(object = deseq_transform,
plot_list = plot_list,
blind = blind)
plot_heatmap(object = deseq_transform,
plot_list = plot_list,
blind = blind)
# Export the vst counts from the DESeq2::DESeqTransform object.
readr::write_tsv(
x = S4Vectors::as.data.frame(x = S4Vectors::combineCols(
x = feature_dframe,
methods::as(
object = SummarizedExperiment::assay(x = deseq_transform, i = 1L),
Class = "DataFrame"
)
)),
file = file.path(output_directory,
paste(
paste(prefix,
"counts",
"vst",
suffix,
sep = "_"),
"tsv",
sep = "."
))
)
rm(deseq_transform, suffix)
}
rm(blind, plot_list)
rm(
feature_dframe,
deseq_data_set,
global_design_list,
output_directory,
prefix,
graphics_formats,
argument_list,
plot_pca,
plot_heatmap,
plot_mds,
plot_rin_scores,
plot_cooks_distances,
plot_fpkm_values,
lrt_reduced_formula_test,
lrt_reduced_formula_scalar,
initialise_deseq_transform,
initialise_deseq_data_set,
check_model_matrix,
fix_model_matrix
)
message("All done")
# Finally, print all objects that have not been removed from the environment.
if (length(x = ls())) {
print(x = "Remaining objects:")
print(x = ls())
}
print(x = sessioninfo::session_info())
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.