#' @title Plot evolutionary signatures across transcriptomes, for multiple expression sets
#' @description \emph{PlotSignatureMultiple} is used to compare multiple
#' transcriptomic index profiles (e.g. TAI) over a common process of interest
#' (e.g. a developmental process).
#' The main use case is visualising how removing different subsets of genes from
#' the expression set can perturb the transcriptome index signal.
#'
#' @param ExpressionSets a list of PhyloExpressionSet, DivergenceExpressionSet or PolymorphismsExpressionSet objects.
#' @param set.labels a character vector of labels, one for each given expression set
#' @param measure type of transcriptome index that shall be computed. E.g.
#' \itemize{
#' \item \code{measure = "TAI"} (Transcriptome Age Index)
#' \item \code{measure = "TDI"} (Transcriptome Divergence Index)
#' \item \code{measure = "TPI"} (Transcriptome Polymorphism Index)
#' }
#' @param TestStatistic a string defining the type of test statistics to be used to quantify the statistical significance the present phylotranscriptomics pattern.
#' Possible values can be:
#' \itemize{
#' \item \code{TestStatistic} = \code{"FlatLineTest"} : Statistical test for the deviation from a flat line
#' \item \code{TestStatistic} = \code{"ReductiveHourglassTest"} : Statistical test for the existence of a hourglass shape (high-low-high pattern)
#' \item \code{TestStatistic} = \code{"ReverseHourglassTest"} : Statistical test for the existence of a reverse hourglass pattern (low-high-low pattern)
#' \item \code{TestStatistic} = \code{"EarlyConservationTest"} : Statistical test for the existence of a early conservation pattern (low-high-high pattern)
#' \item \code{TestStatistic} = \code{"LateConservationTest"} : Statistical test for the existence of a late conservation pattern (high-high-low pattern)
#' }
#' @param modules a list storing three elements for the \code{\link{ReductiveHourglassTest}}, \code{\link{EarlyConservationTest}}, \code{\link{LateConservationTest}},
#' or \code{\link{ReverseHourglassTest}}: early, mid, and late.
#' Each element expects a numeric vector specifying the developmental stages
#' or experiments that correspond to each module. For example:
#' \itemize{
#' \item \code{modules} = \code{list(early = 1:2, mid = 3:5, late = 6:7)} divides a dataset storing seven developmental stages into 3 modules.
#' }
#' @param permutations a numeric value specifying the number of permutations to be performed for the \code{\link{FlatLineTest}}, \code{\link{EarlyConservationTest}}, \code{\link{LateConservationTest}}, \code{\link{ReductiveHourglassTest}} or \code{\link{ReverseHourglassTest}}.
#' @param p.value a boolean value specifying whether the p-value of the test statistic shall be printed within the legend, for each expression set.
#' @param shaded.area a boolean value specifying whether a shaded area shall
#' be drawn for the developmental stages defined to be the presumptive phylotypic period.
#' @param xlab label of x-axis.
#' @param ylab label of y-axis.
#' @param main figure title.
#' @param legend.title legend title.
#' @param lwd line width.
#' @param alpha transparency of the shaded area and error ribbon (between [0,1]). Default is \code{alpha = 0.1}.
#' @param y.ticks number of ticks on the y-axis. Default is \code{ticks = 3}.
#'
#' @return a ggplot object visualising the transcriptome index of each given
#' expression set, together with its standard deviation per stage,
#' obtained by permuting the gene ages.
#' The profiles are shown on the same axes, so that they can be readily compared.
#' Optionally, the p-value of each profile, with respect to the choice of statistic,
#' is shown.
#'
#' @author Stefan Manolache
#'
#' @export
#'
#' @examples
#' data(PhyloExpressionSetExample)
#'
#' # remove top 1% expressed genes
#' genes.top_expression <- TopExpressionGenes(PhyloExpressionSetExample, p=.99)
#' PhyloExpressionSetExample.top_removed <- subset(PhyloExpressionSetExample,
#' !(GeneID %in% genes.top_expression))
#'
#' expression_sets = list(PhyloExpressionSetExample, PhyloExpressionSetExample.top_removed)
#' set_labels = c("100%", "99%")
#'
#' # Flat line test
#' PlotSignatureMultiple(ExpressionSets = expression_sets,
#' set.labels = set_labels,
#' TestStatistic="FlatLineTest",
#' main = "A. thaliana embryogenesis",
#' legend.title = "Top Expressed Genes Quantile")
#'
#' # Reductive hourglass test
#' PlotSignatureMultiple(ExpressionSets = expression_sets,
#' set.labels = set_labels,
#' TestStatistic="ReductiveHourglassTest",
#' main = "A. thaliana embryogenesis",
#' legend.title = "Top Expressed Genes Quantile",
#' modules=list(early=1:2, mid=3:5, late=6:7),
#' shaded.area=TRUE)
PlotSignatureMultiple <-
function(ExpressionSets,
set.labels,
measure = "TAI",
TestStatistic = "FlatLineTest",
modules = NULL,
permutations = 1000,
p.value = TRUE,
shaded.area = FALSE,
xlab = "Ontogeny",
ylab = "Transcriptome Index",
main = "",
legend.title = "Expression Sets",
lwd = 4,
alpha = 0.1,
y.ticks = 3) {
# bind global variables locally: for passing CMD check
Group <- Stage <- StdDev <- NULL
## Parameter validation
TXI <- switch(
measure,
"TAI" = TAI,
"TDI" = TDI,
"TPI" = TPI,
stop("Measure '",measure, "' is not available for this function. Please specify a measure supporting by this function.",
call. = FALSE
)
)
Test <- switch(
TestStatistic,
"FlatLineTest" = FlatLineTest,
"ReductiveHourglassTest" = ReductiveHourglassTest,
"ReverseHourglassTest" = ReverseHourglassTest,
"EarlyConservationTest" = EarlyConservationTest,
"LateConservationTest" = LateConservationTest,
stop("Please select the available test: 'FlatLineTest', 'ReductiveHourglassTest', 'ReverseHourglassTest', 'EarlyConservationTest' or 'LateConservationTest' using the argument test = 'FlatLineTest'",
call. = FALSE
)
)
if (TestStatistic %in% c(
"ReductiveHourglassTest",
"ReverseHourglassTest",
"EarlyConservationTest",
"LateConservationTest"
) & is.null(modules))
stop(
"Please specify the three modules: early, mid, and late using the argument 'module = list(early = ..., mid = ..., late = ...)'.",
call. = FALSE
)
if (length(ExpressionSets) != length(set.labels))
stop("Please specify an equal number of set labels and expression sets.", call. = FALSE)
# The expression sets should have the same domains
same_domain <- ExpressionSets |>
lapply(function(x) names(x[, -c(1:2)])) |>
unique() |>
(\(x) length(x) == 1)()
if (!same_domain)
stop("Please ensure the expression sets have the same domain of expression stages.")
# [TODO] Usually the expression sets will share a common superset of genes
# Would it be worth it then to compute only one shared bootstrap matrix?
if (TestStatistic == "FlatLineTest")
test_results <- lapply(ExpressionSets, \(x) Test(x, permutations=permutations))
else
test_results <- lapply(ExpressionSets, \(x) Test(x, permutations=permutations, modules=modules))
TIs <- lapply(ExpressionSets, TXI)
plot <- ggplot2::ggplot()
p_label <- switch(
TestStatistic,
"FlatLineTest" = "p_flt",
"ReductiveHourglassTest" = "p_rht",
"ReverseHourglassTest" = "p_reverse_hourglass",
"EarlyConservationTest" = "p_ect",
"LateConservationTest" = "p_lct",
"p_test"
)
labels <- vector()
# Plot TXI curves and errors in a shared domain
for (i in 1:length(ExpressionSets)) {
p_value <- test_results[[i]]$p.value
std_dev <- test_results[[i]]$std.dev
label <- paste0("<br><span style='font-size:16pt'>**",
set.labels[[i]],
"**</span>"
)
if (p.value)
label <- paste0(label,
"<br><br>**",
p_label,
"**: ",
as.character(signif(p_value,digits=3)),
"<br>")
labels <- c(labels, label)
TI <- tibble::tibble(Stage = names(TIs[[i]]),
TI = TIs[[i]],
StdDev = std_dev,
Group = label
)
plot <- plot +
ggplot2::geom_line(
data=TI,
ggplot2::aes(
x=factor(Stage, levels=unique(Stage)),
y=TI,
group = Group,
color = Group
),
lwd = lwd
) +
ggplot2::geom_ribbon(
data=TI,
ggplot2::aes(
x = factor(Stage, levels=unique(Stage)),
ymin = TI - StdDev,
ymax = TI + StdDev,
group=Group,
fill=Group
),
alpha=alpha
)
}
# Plot shaded area
if (shaded.area)
plot <- plot + ggplot2::geom_rect(ggplot2::aes(
xmin = modules[[2]][1] - 0.3,
xmax = modules[[2]][length(modules[[2]])] + 0.3,
ymin = -Inf,
ymax = Inf), fill = "#4d004b", alpha = alpha)
# Labels and style
plot <- plot +
ggplot2::scale_y_continuous(
breaks = scales::pretty_breaks(n = y.ticks)
) +
ggplot2::scale_colour_discrete(breaks=labels) +
ggplot2::scale_fill_discrete(breaks=labels) +
ggplot2::labs(
x = xlab,
y = ylab,
title=main,
colour = legend.title,
fill = legend.title
) +
ggplot2::theme_minimal() +
ggplot2::theme(
title = ggplot2::element_text(face = "bold", size=18),
legend.title = ggplot2::element_text(face = "bold", size=18),
legend.text = ggtext::element_markdown(size=14),
axis.title = ggplot2::element_text(face = "bold", size=20),
axis.text.y = ggplot2::element_text(face = "bold", size=18),
axis.text.x = ggplot2::element_text(face = "bold", size=18),
panel.background = ggplot2::element_blank(),
strip.text.x = ggplot2::element_text(
colour = "black",
face = "bold",
size = 18
)
)
return(plot)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.