#' @importFrom grDevices colorRampPalette
#' @importFrom stats density dist hclust
#' @importFrom S4Vectors isEmpty
NULL
#' @title Get colors by bin.
#'
#' @description Get colors for elements according to their bin.
#' Colors are assigned to bins forming a gradient from \code{col1}
#' to \code{col2} in the order of \code{levels{b}}. \code{col0} is assigned
#' to the neutral bin (attribute \code{""}) if available.
#'
#' @param b A factor that groups elements into bins (typically the output of
#' \code{\link{bin}}).
#' @param col1 First color.
#' @param col2 Second color.
#' @param col0 Neutral color.
#'
#' @seealso \code{\link{bin}}.
#'
#' @return A character vector with colors for the elements in \code{b}.
#'
#' @examples
#' set.seed(1)
#' x <- rnorm(100)
#' b <- bin(x, "equalN", nElements = 10)
#' cols <- getColsByBin(b)
#'
#' @importFrom grDevices col2rgb
#'
#' @export
getColsByBin <- function(b,
col1 = c("#003C30", "#01665E", "#35978F",
"#80CDC1", "#C7EAE5"),
col2 = c("#F6E8C3", "#DFC27D", "#BF812D",
"#8C510A", "#543005"),
col0 = "#F5F5F5") {
if (!is.factor(b)) {
b <- factor(b, levels = unique(b))
b <- setZeroBin(b, NA)
}
if (!is.null(getZeroBin(b)) && !is.na(getZeroBin(b))) {
bin0 <- getZeroBin(b)
cols <- c(colorRampPalette(col1)(bin0 - 1L),
"#AAAAAA33",
colorRampPalette(col2)(nlevels(b) - bin0))
} else {
nh <- round(nlevels(b) / 2)
cols <- c(colorRampPalette(col1)(nh),
colorRampPalette(col2)(nlevels(b) - nh))
}
res <- cols[b]
names(cols) <- levels(b)
attr(res, "cols") <- cols
return(res)
}
#' @title Histogram of binned elements.
#'
#' @description Plot a histogram of binned elements with binning information.
#'
#' @param x A numerical vector with the values used for binning.
#' @param b A factor that groups elements of \code{x} into bins (typically
#' the output of \code{\link{bin}}).
#' @param breaks A \code{numeric} scalar controlling the histogram breaks
#' (passed to \code{geom_hist(..., bins = breaks)}).
#' @param xlab,ylab,main \code{character} scalars that set the x-axis label,
#' y-axis label and the main title. Use \code{""} to suppress the label.
#' @param legendPosition A \code{character} scalar.
#' If not \code{"none"}, draw a legend with binning information. The value
#' is used to control the legend position and will be passed to
#' \code{theme(legend.position = legendPosition)}.
#' @param legend Deprecated (ignored). Please use \code{legendPosition} to
#' control the drawing and position of the legend.
#' @param legend.cex Deprecated (ignored). You can use
#' \code{\link[ggplot2]{theme}} to set legend and other graphical
#' parameters.
#' @param ... Further arguments passed to \code{\link{getColsByBin}}.
#'
#' @seealso \code{\link{getColsByBin}}, \code{\link[ggplot2]{geom_histogram}}
#'
#' @return The generated histogram as a \code{ggplot} object.
#'
#' @examples
#' set.seed(1)
#' x <- rnorm(100)
#' b <- bin(x, "equalN", nElements = 10)
#' plotBinHist(x, b)
#'
#' @importFrom ggplot2 ggplot aes geom_histogram element_blank theme_classic theme
#' @importFrom cli cli_warn
#'
#' @export
plotBinHist <- function(x, b,
breaks = 6 * nlevels(b),
xlab = deparse(substitute(x, env = as.environment(-1))),
ylab = "Frequency",
main = "",
legendPosition = "right",
legend = NULL,
legend.cex = NULL, ...) {
.assertVector(x = x, type = "numeric")
.assertVector(x = b, type = "factor", len = length(x))
.assertScalar(x = breaks, type = "numeric", allowNULL = TRUE)
.assertScalar(x = xlab, type = "character")
.assertScalar(x = ylab, type = "character")
.assertScalar(x = main, type = "character")
if (!is.null(legend)) {
cli_warn(c("{.arg legend} is deprecated and ignored.",
"i" = "You can use {.arg legendPosition} to control legend drawing and position"))
}
if (!is.null(legend.cex)) {
cli_warn(c("{.arg legend.cex} is deprecated and ignored.",
"i" = "You can use {.fn theme} to control legend and other graphical parameters"))
}
cols <- getColsByBin(b, ...)
bincols <- attr(cols, "cols")
# add number of elements to bin names
bn <- unclass(table(b))
levels(b) <- names(bincols) <- paste0(levels(b), ": ", bn)
p <- ggplot(data = data.frame(x = x, b = b), mapping = aes(x = x, fill = b)) +
geom_histogram(bins = breaks, colour = "gray20") +
scale_fill_manual(values = bincols) +
labs(x = ifelse(xlab != "", xlab, element_blank()),
y = ifelse(ylab != "", ylab, element_blank()),
main = ifelse(main != "", main, element_blank()),
fill = "Bins") +
theme_classic() +
theme(legend.position = legendPosition)
return(p)
}
#' @title Density plot of binned elements.
#'
#' @description Plot the density of binned elements with binning information.
#'
#' @param x A numerical vector with the values used for binning.
#' @param b A factor that groups elements of \code{x} into bins (typically the
#' output of \code{\link{bin}}).
#' @param xlab,ylab,main \code{character} scalars that set the x-axis label,
#' y-axis label and the main title. Use \code{""} to suppress the label.
#' @param legendPosition A \code{character} scalar.
#' If not \code{"none"}, draw a legend with binning information. The value
#' is used to control the legend position and will be passed to
#' \code{theme(legend.position = legendPosition)}.
#' @param legend Deprecated (ignored). Please use \code{legendPosition} to
#' control the drawing and position of the legend.
#' @param legend.cex Deprecated (ignored). You can use
#' \code{\link[ggplot2]{theme}} to set legend and other graphical
#' parameters.
#' @param ... Further arguments passed to \code{\link{getColsByBin}}.
#'
#' @seealso \code{\link{getColsByBin}}, \code{\link[ggplot2]{geom_density}}
#'
#' @return The generated density plot as a \code{ggplot} object.
#'
#' @examples
#' set.seed(1)
#' x <- rnorm(100)
#' b <- bin(x, "equalN", nElements = 10)
#' plotBinDensity(x, b)
#'
#' @importFrom stats density
#' @importFrom ggplot2 ggplot aes geom_area geom_line geom_rug element_blank
#' theme_classic theme
#' @importFrom cli cli_warn
#' @importFrom rlang .data
#'
#' @export
plotBinDensity <- function(x, b,
xlab = deparse(
substitute(x, env = as.environment(-1))),
ylab = "Density",
main = "",
legendPosition = "right",
legend = NULL,
legend.cex = NULL,
...) {
.assertVector(x = x, type = "numeric")
.assertVector(x = b, type = "factor", len = length(x))
stopifnot("breaks" %in% names(attributes(b)))
.assertScalar(x = xlab, type = "character")
.assertScalar(x = ylab, type = "character")
.assertScalar(x = main, type = "character")
if (!is.null(legend)) {
cli_warn(c("{.arg legend} is deprecated and ignored.",
"i" = "You can use {.arg legendPosition} to control legend drawing and position"))
}
if (!is.null(legend.cex)) {
cli_warn(c("{.arg legend.cex} is deprecated and ignored.",
"i" = "You can use {.fn theme} to control legend and other graphical parameters"))
}
cols <- getColsByBin(b, ...)
binbreaks <- attr(b, "breaks")
bincols <- attr(cols, "cols")
# add number of elements to bin names
bn <- unclass(table(b))
levels(b) <- names(bincols) <- paste0(levels(b), ": ", bn)
# calculate density and assign bins
dens <- data.frame(density(x = x)[c("x", "y")])
binbreaks[c(1, length(binbreaks))] <- range(dens$x)
dens$b <- factor(x = levels(b)[cut(dens$x, breaks = binbreaks,
include.lowest = TRUE, labels = FALSE)],
levels = levels(b))
p <- ggplot(dens) +
geom_area(data = dens, mapping = aes(.data$x, .data$y,
colour = .data$b, fill = .data$b),
outline.type = "full") +
geom_line(data = dens, mapping = aes(.data$x, .data$y), colour = "gray20") +
geom_rug(data = data.frame(x = binbreaks),
mapping = aes(.data$x), colour = "gray20") +
scale_colour_manual(values = bincols) +
scale_fill_manual(values = bincols) +
labs(x = ifelse(xlab != "", xlab, element_blank()),
y = ifelse(ylab != "", ylab, element_blank()),
main = ifelse(main != "", main, element_blank()),
colour = "Bins",
fill = "Bins") +
theme_classic() +
theme(legend.position = legendPosition)
return(p)
}
#' @title Scatter plot (xy-plot) of binned elements.
#'
#' @description Plot a scatter (xy-plot) of binned elements with binning
#' information.
#'
#' @param x A numerical vector with x values.
#' @param y A numerical vector with y values (the values used for binning).
#' @param b A factor that groups elements of \code{x,y} into bins (typically
#' the output of \code{\link{bin}(y)}).
#' @param cols \code{NULL} or a color vector defining the colors of points.
#' If \code{NULL}, the colors will be computed based on \code{b} using
#' \code{\link{getColsByBin}(b)}).
#' @param xlab Label for x-axis.
#' @param ylab Label for y-axis.
#' @param main Main title.
#' @param legendPosition A \code{character} scalar.
#' If not \code{"none"}, draw a legend with binning information. The value
#' is used to control the legend position and will be passed to
#' \code{theme(legend.position = legendPosition)}.
#' @param legend Deprecated (ignored). Please use \code{legendPosition} to
#' control the drawing and position of the legend.
#' @param legend.cex Deprecated (ignored). You can use
#' \code{\link[ggplot2]{theme}} to set legend and other graphical
#' parameters.
#' @param ... Further arguments passed to \code{\link{getColsByBin}} (only used
#' if \code{cols} is \code{NULL}).
#'
#' @seealso \code{\link{bin}}, \code{\link{getColsByBin}},
#' \code{\link[ggplot2]{geom_point}}
#'
#' @return The generated scatter plot as a \code{ggplot} object.
#'
#' @examples
#' set.seed(1)
#' x <- rnorm(100)
#' y <- rnorm(100)
#' b <- bin(y, "equalN", nElements = 10)
#' plotBinScatter(x, y, b)
#'
#' @importFrom ggplot2 ggplot aes geom_point element_blank theme_classic theme
#' @importFrom cli cli_warn
#'
#' @export
plotBinScatter <- function(x, y, b,
cols = NULL,
xlab = deparse(
substitute(x, env = as.environment(-1))),
ylab = deparse(
substitute(y, env = as.environment(-1))),
main = "",
legendPosition = "right",
legend = NULL,
legend.cex = NULL,
...) {
.assertVector(x = x, type = "numeric")
.assertVector(x = y, type = "numeric", len = length(x))
.assertVector(x = b, len = length(x))
bincols <- NULL
if (is.null(cols)) {
cols <- getColsByBin(b, ...)
bincols <- attr(cols, "cols")
} else if (length(cols) == 1L) {
cols <- rep(cols, length(x))
}
.assertVector(x = cols, len = length(x))
.assertScalar(x = xlab, type = "character")
.assertScalar(x = ylab, type = "character")
.assertScalar(x = main, type = "character")
if (is.null(bincols) && !identical(legendPosition, "none")) {
cli_warn(paste0(
"Setting {.arg legendPosition} to 'none' - ",
"cannot use custom colors ({.arg cols}) with bin-based legend"))
}
if (!is.null(legend)) {
cli_warn(c("{.arg legend} is deprecated and ignored.",
"i" = "You can use {.arg legendPosition} to control legend drawing and position"))
}
if (!is.null(legend.cex)) {
cli_warn(c("{.arg legend.cex} is deprecated and ignored.",
"i" = "You can use {.fn theme} to control legend and other graphical parameters"))
}
# add number of elements to bin names
bn <- unclass(table(b))
levels(b) <- paste0(levels(b), ": ", bn)
p <- ggplot(data = data.frame(x = x, y = y, b = b, cols = cols),
mapping = aes(x, y)) +
labs(x = ifelse(xlab != "", xlab, element_blank()),
y = ifelse(ylab != "", ylab, element_blank()),
main = ifelse(main != "", main, element_blank()),
colour = "Bins",
fill = "Bins") +
theme_classic() +
theme(legend.position = legendPosition)
if (is.null(bincols)) {
p <- p + geom_point(colour = cols)
} else {
names(bincols) <- levels(b)
p <- p + geom_point(aes(colour = b)) +
scale_colour_manual(values = bincols)
}
return(p)
}
#' @title Heatmap of motif enrichments.
#'
#' @description Plot motif enrichments (e.g. significance or magnitude) as a
#' heatmap.
#'
#' @param x A \code{\link[SummarizedExperiment]{SummarizedExperiment}} with
#' numerical matrices (motifs-by-bins) in its \code{assays()}, typically
#' the return value of \code{\link{calcBinnedMotifEnrR}} or
#' \code{\link{calcBinnedMotifEnrHomer}}.
#' @param which.plots Selects which heatmaps to plot (one or several from
#' \code{"negLog10P"}, \code{"negLog10Padj"}, \code{"pearsonResid"} and
#' \code{"log2enr"}).
#' @param width The width (in inches) of each individual heatmap, without
#' legend.
#' @param col.enr Colors used for enrichment heatmap ("pearsonResid" and
#' "log2enr").
#' @param col.sig Colors used for significance hetmaps ("negLog10P" and
#' "negLog10Padj").
#' @param col.gc Colors used for motif GC content (for
#' \code{show_motif_GC = TRUE}).
#' @param maxEnr Cap color mapping at enrichment = \code{maxEnr}
#' (default: 99.5th percentile).
#' @param maxSig Cap color mapping at -log10 P value or -log10 FDR =
#' \code{maxSig} (default: 99.5th percentile).
#' @param highlight A logical vector indicating motifs to be highlighted.
#' @param cluster If \code{TRUE}, the order of transcription factors will be
#' determined by hierarchical clustering of the \code{"pearsonResid"}
#' component. Alternatively, an \code{hclust}-object can be supplied which
#' will determine the motif ordering.
#' No reordering is done for \code{cluster = FALSE}.
#' @param show_dendrogram If \code{cluster != FALSE}, controls whether to show
#' a row dendrogram for the clustering of motifs. Ignored for
#' \code{cluster = FALSE}.
#' @param show_motif_GC If \code{TRUE}, show a column with the percent G+C of
#' the motif as part of the heatmap.
#' @param show_seqlogo If \code{TRUE}, show a sequence logo next to each motif
#' label. This will likely only make sense for a heatmap with a low number
#' of motifs.
#' @param show_bin_legend If \code{TRUE}, show a legend for the bin labels.
#' If FALSE (default), the bin legend will be hidden.
#' @param width.seqlogo The width (in inches) for the longest sequence logo
#' (shorter logos are drawn to scale).
#' @param use_raster \code{TRUE} or \code{FALSE} (default). Passed to
#' \code{use_raster} of \code{\link[ComplexHeatmap]{Heatmap}}.
#' @param na_col "white" (default). Passed to \code{na_col} of
#' \code{\link[ComplexHeatmap]{Heatmap}}.
#' @param doPlot If \code{TRUE} (default), plot the generated heatmap(s)
#' using \code{Reduce(ComplexHeatmap::add_heatmap, heatmapList)}. If
#' \code{FALSE}, just return the list of heatmap(s) (\code{heatmapList}) in
#' example before), allowing to modify them further before plotting.
#' @param ... Further arguments passed to \code{\link[ComplexHeatmap]{Heatmap}}
#' when creating the main heatmaps selected by \code{which.plots}. For
#' example, the following will set the font size of the motif names:
#' \code{plotMotifHeatmaps(..., row_names_gp = gpar(fontsize = 12))}
#'
#' @details The heatmaps are created using the \pkg{ComplexHeatmap} package
#' and plotted side-by-side.
#'
#' Each heatmap will be \code{width} inches wide, so the total plot needs a
#' graphics device with a width of at least
#' \code{length(which.plots) * width} plus the space used for motif names
#' and legend. The height will be auto-adjusted to the graphics device.
#'
#' @seealso \code{\link{bin}}, \code{\link[ComplexHeatmap]{Heatmap}}
#'
#' @references Gu, Z. Complex heatmaps reveal patterns and correlations in
#' multidimensional genomic data. Bioinformatics 2016.
#'
#' @return A list of \code{ComplexHeatmap::Heatmap} objects.
#'
#' @examples
#' se <- readRDS(system.file("extdata",
#' "results.binned_motif_enrichment_LMRs.rds",
#' package = "monaLisa"))
#' i <- which(SummarizedExperiment::assay(se, "negLog10Padj")[, 8] > 4)
#' plotMotifHeatmaps(se[i, ], which.plots = "pearsonResid",
#' width = 2, show_seqlogo = TRUE)
#'
#' @importFrom methods is show
#' @importFrom stats hclust dist quantile
#' @importFrom TFBSTools Matrix
#' @importFrom grDevices colorRampPalette
#' @importFrom S4Vectors metadata
#' @importFrom SummarizedExperiment assayNames assay rowData
#' @importFrom ComplexHeatmap HeatmapAnnotation Heatmap add_heatmap
#' @importFrom grid unit
#' @importFrom circlize colorRamp2
#' @importFrom cli cli_abort
#'
#' @export
plotMotifHeatmaps <- function(x,
which.plots = c("negLog10P", "pearsonResid",
"negLog10Padj", "log2enr"),
width = 4,
col.enr = c("#053061", "#2166AC", "#4393C3",
"#92C5DE", "#D1E5F0", "#F7F7F7",
"#FDDBC7", "#F4A582", "#D6604D",
"#B2182B", "#67001F"),
col.sig = c("#F0F0F0", "#D9D9D9", "#BDBDBD",
"#969696", "#737373", "#525252",
"#252525", "#000000"),
col.gc = c("#F7FCF5", "#E5F5E0", "#C7E9C0",
"#A1D99B", "#74C476", "#41AB5D",
"#238B45", "#006D2C", "#00441B"),
maxEnr = NULL,
maxSig = NULL,
highlight = NULL,
cluster = FALSE,
show_dendrogram = FALSE,
show_motif_GC = FALSE,
show_seqlogo = FALSE,
show_bin_legend = FALSE,
width.seqlogo = 1.5,
use_raster = FALSE,
na_col = "white",
doPlot = TRUE,
...) {
stopifnot(exprs = {
is(x, "SummarizedExperiment")
all(which.plots %in% assayNames(x))
"bins" %in% names(metadata(x))
(!show_motif_GC || "motif.percentGC" %in% colnames(rowData(x)))
})
b <- metadata(x)$bins
.assertScalar(x = width, type = "numeric", rngExcl = c(0, Inf))
.assertScalar(x = show_dendrogram, type = "logical")
.assertScalar(x = show_motif_GC, type = "logical")
.assertScalar(x = show_seqlogo, type = "logical")
.assertScalar(x = show_bin_legend, type = "logical")
.assertScalar(x = width.seqlogo, type = "numeric", rngExcl = c(0, Inf))
.assertScalar(x = use_raster, type = "logical")
.assertScalar(x = na_col, type = "character")
.assertScalar(x = doPlot, type = "logical")
stopifnot(exprs = {
ncol(x) == nlevels(b)
all(which.plots %in% c("negLog10P", "negLog10Padj",
"pearsonResid", "log2enr"))
is.null(highlight) || (is.logical(highlight) &&
length(highlight) == nrow(x))
})
bincols <- attr(getColsByBin(b), "cols")
if (identical(cluster, TRUE)) {
clAssayName <- "pearsonResid"
clAssay <- assay(x, clAssayName)
allNA <- rowSums(is.na(clAssay)) == ncol(clAssay)
if (any(allNA)) {
warning("removing motifs without finite values in '",
clAssayName, "': ",
paste(rownames(clAssay)[allNA], collapse = ", "))
x <- x[!allNA, ]
clAssay <- clAssay[!allNA, ]
}
clres <- hclust(dist(clAssay))
} else if (identical(cluster, FALSE)) {
clres <- FALSE
} else if (is(cluster, "hclust")) {
clres <- cluster
} else {
cli_abort("{.arg cluster} must be either {.code TRUE}, {.code FALSE} or an {.cls hclust}-object.")
}
hmBin <- HeatmapAnnotation(
df = data.frame(bin = factor(colnames(x),
levels = colnames(x))),
name = "bin",
col = list(bin = bincols),
show_annotation_name = FALSE,
which = "column", width = unit(width,"inch"),
annotation_height = unit(width / 16, "inch"),
show_legend = show_bin_legend)
tmp <- matrix(if (!is.null(highlight)) {
as.character(highlight)
} else {
rep(NA, nrow(x))
},
ncol = 1, dimnames = list(unname(rowData(x)$motif.name), NULL))
hmSeqlogo <- NULL
if (show_seqlogo) {
pfms <- rowData(x)$motif.pfm
maxwidth <- max(vapply(TFBSTools::Matrix(pfms), ncol, 0L))
grobL <- lapply(pfms, seqLogoGrob, xmax = maxwidth, xjust = "center")
hmSeqlogo <- HeatmapAnnotation(
logo = annoSeqlogo(grobL = grobL, which = "row",
space = unit(0.5, "mm"),
width = unit(width.seqlogo, "inch")),
show_legend = FALSE, show_annotation_name = FALSE, which = "row")
}
hmMotifs <- Heatmap(
matrix = tmp, name = "names",
width = unit(if (!is.null(highlight)) .2 else 0, "inch"),
na_col = NA, col = c("TRUE" = "green3", "FALSE" = "white"),
cluster_rows = clres, show_row_dend = show_dendrogram,
cluster_columns = FALSE, show_row_names = TRUE,
row_names_side = "left", show_column_names = FALSE,
show_heatmap_legend = FALSE, left_annotation = hmSeqlogo,
...
)
assayNameMap1 <- c(negLog10P = "P value",
negLog10Padj = "adj. P value",
pearsonResid = "Pearson residual",
log2enr = "log2 enrichment")
assayNameMap2 <- c(negLog10P = "P value (-log10)",
negLog10Padj = "adj. P value (-log10)",
pearsonResid = "Pearson residual (o-e)/sqrt(e)",
log2enr = "enrichment (log2)")
L <- list(labels = hmMotifs)
if (show_motif_GC) {
tmp <- as.matrix(rowData(x)[, "motif.percentGC", drop = FALSE])
hmPercentGC <- Heatmap(
matrix = tmp, name = "Percent G+C",
width = unit(0.2, "inch"), na_col = NA,
col = colorRamp2(breaks = c(0, seq(20, 80, length.out = 254), 100),
colors = colorRampPalette(col.gc)(256)),
cluster_rows = FALSE, cluster_columns = FALSE,
show_row_names = FALSE, show_column_names = FALSE,
show_heatmap_legend = TRUE,
heatmap_legend_param = list(color_bar = "continuous"),
use_raster = use_raster,
...
)
L <- c(L, list("percentGC" = hmPercentGC))
}
ret <- c(L, lapply(which.plots, function(w) {
dat <- assay(x, w)
if ((w == "pearsonResid") | (w == "log2enr")) {
rng <- c(-1, 1) * if (is.null(maxEnr)) {
quantile(abs(dat), .995, na.rm = TRUE)
} else {
maxEnr
}
cols <- col.enr
} else {
rng <- c(0,
if (is.null(maxSig)) {
quantile(dat, .995, na.rm = TRUE)
} else {
maxSig
})
cols <- col.sig
}
Heatmap(
matrix = dat,
name = assayNameMap1[w],
width = unit(width,"inch"),
column_title = assayNameMap2[w],
col = colorRamp2(breaks = seq(rng[1], rng[2], length.out = 256),
colors = colorRampPalette(cols)(256)),
cluster_rows = FALSE, cluster_columns = FALSE,
show_row_names = FALSE, show_column_names = FALSE,
# column_names_side = "bottom",
# column_names_max_height = unit(1.5,"inch"),
top_annotation = hmBin, show_heatmap_legend = TRUE,
heatmap_legend_param = list(color_bar = "continuous"),
use_raster = use_raster,
na_col = na_col,
...
)
}))
names(ret)[seq(length(ret) - length(which.plots) + 1L, length(ret))] <-
which.plots
if (doPlot) {
show(Reduce(ComplexHeatmap::add_heatmap, ret))
}
invisible(ret)
}
#' @title Plot Stability Paths
#'
#' @description Plot the stability paths of each variable (predictor),
#' showing the selection probability as a function of the regularization step.
#'
#' @param se The \code{SummarizedExperiment} object resulting from stability
#' selection, by running \code{\link[monaLisa]{randLassoStabSel}}.
#' @param selProbMin A numerical scalar in [0,1]. Predictors with a selection
#' probability greater than \code{selProbMin} are shown as colored lines. The
#' color is defined by the \code{col} argument.
#' @param selColor Color for the selected predictors which have a selection
#' probability greater than \code{selProbMin}.
#' @param notSelColor Color for the rest of the (un-selected) predictors.
#' @param selProbCutoffColor Color for the line depicting the selection
#' probability cutoff.
#' @param linewidth Line width.
#' @param alpha Line transparency of the stability paths.
#' @param ylim Limits for y-axis.
#' @param labelPaths If \code{TRUE}, the predictor labels will be shown at the
#' end of the stability paths. The predictor labels given in \code{labels}
#' will be shown. If unspecified, the labels corresponding to the selected
#' predictors will be added. If predictors have the same y-value in the last
#' regularization step, the labels will be shown in a random order. One
#' needs to use \code{set.seed} to reproduce the plot in this case.
#' @param labels If \code{labelPaths = TRUE}, the predictors which should be
#' labelled. If \code{NULL}, the selected predictors greater than
#' \code{metadata(se)$stabsel.params.cutoff} will be shown.
#' @param labelNudgeX If \code{labelPaths = TRUE}, how much to nudge the labels
#' to the right of the x-axis.
#' @param labelSize If \code{labelPaths = TRUE}, the size of the labels.
#'
#' @return a \code{ggplot2} object.
#'
#' @examples
#' ## create data set
#' Y <- rnorm(n = 500, mean = 2, sd = 1)
#' X <- matrix(data = NA, nrow = length(Y), ncol = 50)
#' for (i in seq_len(ncol(X))) {
#' X[ ,i] <- runif(n = 500, min = 0, max = 3)
#' }
#' s_cols <- sample(x = seq_len(ncol(X)), size = 10,
#' replace = FALSE)
#' for (i in seq_along(s_cols)) {
#' X[ ,s_cols[i]] <- X[ ,s_cols[i]] + Y
#' }
#'
#' ## reproducible randLassoStabSel() with 1 core
#' set.seed(123)
#' ss <- randLassoStabSel(x = X, y = Y)
#' plotStabilityPaths(ss)
#'
#' @seealso \code{\link[stabs]{stabsel}}
#'
#' @importFrom SummarizedExperiment assay rowData colData
#' @importFrom ggplot2 ggplot aes geom_line scale_color_manual geom_segment labs
#' scale_y_continuous scale_x_continuous guides guide_legend
#' theme_classic theme
#' @importFrom tidyr pivot_longer starts_with
#' @importFrom cli cli_abort
#' @importFrom rlang .data
#'
#' @export
plotStabilityPaths <- function(se,
selProbMin = metadata(se)$stabsel.params.cutoff,
selColor = "cadetblue",
notSelColor = "grey",
selProbCutoffColor = "firebrick",
linewidth = 0.5,
alpha = 1,
ylim = c(0, 1),
labelPaths = FALSE,
labels = NULL,
labelNudgeX = 8,
labelSize = 3) {
# checks
if (!is(se, "SummarizedExperiment")) {
cli_abort("{.arg se} must be a {.cls SummarizedExperiment}")
}
.assertScalar(x = selProbMin, type = "numeric", rngIncl = c(0, 1))
.assertColor(x = selColor, len = 1)
.assertColor(x = notSelColor, len = 1)
.assertColor(x = selProbCutoffColor, len = 1)
.assertScalar(x = linewidth, type = "numeric")
.assertScalar(x = alpha, type = "numeric", rngIncl = c(0, 1))
.assertVector(x = ylim, type = "numeric", len = 2)
.assertVector(x = colnames(se), type = "character", len = ncol(se))
.assertVector(x = rownames(se), type = "character", len = nrow(se))
.assertVector(x = colnames(colData(se)), type = "character")
if (!any(grepl(pattern = "^regStep", x = colnames(colData(se))))) {
cli_abort("the columns in {.code colData(se)} containing the selection
probabilities for each regularization step {.code i} must have
column names starting with {.emph regStep}. See
{.fn randLassoStabSel} for more details.")
}
# prepare dataframe to plot
df <- as.data.frame(colData(se))
df$predictor <- rownames(df)
df$selected <- df$selProb > selProbMin
df <- tidyr::pivot_longer(df,
cols = tidyr::starts_with(match = "regStep"),
names_to = "regStep",
values_to = "selectionProbability")
df$regStep <- as.integer(gsub(pattern = "regStep",
replacement = "",
x = df$regStep))
df$yintercept <- selProbMin
df$cutoff <- paste0("selProbMin = ", selProbMin)
xAxisTicks <- round(seq(from = min(df$regStep),
to = max(df$regStep),
length.out = 5))
# plot stability paths (returns ggplot object)
gg <- ggplot(data = df,
mapping = aes(x = .data$regStep, y = .data$selectionProbability)) +
geom_line(mapping = aes(group = .data$predictor, color = .data$selected),
linewidth = linewidth,
alpha = alpha) +
scale_color_manual(values = c("TRUE" = selColor,
"FALSE" = notSelColor)) +
geom_segment(mapping = aes(x = min(.data$regStep),
xend = max(.data$regStep),
y = .data$yintercept,
yend = .data$yintercept,
linetype = .data$cutoff),
color = selProbCutoffColor,
linewidth = linewidth) +
labs(x = "Regularization Step",
y = "Selection Probability") +
scale_y_continuous(expand = c(0, 0), limits = ylim) +
scale_x_continuous(expand = c(0, 0),
breaks = xAxisTicks,
labels = xAxisTicks) +
guides(colour = guide_legend(override.aes = list(alpha = 1))) +
theme_classic() +
geom_segment(data = data.frame(x = min(df$regStep),
xend = max(df$regStep), y = 0),
mapping = aes(x = .data$x, xend = .data$xend,
y = .data$y, yend = .data$y)) +
geom_segment(data = data.frame(x = min(df$regStep), y = ylim[1],
yend = ylim[2]),
mapping = aes(x = .data$x, xend = .data$x,
y = .data$y, yend = .data$yend)) +
theme(axis.line = element_blank())
if (labelPaths) {
.assertPackagesAvailable("ggrepel")
.assertScalar(x = labelNudgeX, type = "numeric")
.assertScalar(x = labelSize, type = "numeric")
if (is.null(labels)) {
labels <- unique(df$predictor[df$selected])
} else {
# check that the labels are correct and exist
.assertVector(x = labels, type = "character",
validValues = unique(df$predictor))
}
gg <- gg +
ggrepel::geom_text_repel(data = df[(df$predictor %in% labels) &
(df$regStep == max(df$regStep)), ],
aes(label = .data$predictor,
color = .data$selected),
max.overlaps = Inf,
nudge_x = labelNudgeX, na.rm = TRUE,
size = labelSize, direction = "y",
hjust = 0, segment.linetype = "dotted",
segment.size = 0.7, segment.curvature = -0.1,
segment.angle = 20, box.padding = 0.4,
segment.alpha = 0.5, vjust = 0,
show.legend = FALSE)
}
gg
}
#' @title Plot selection probabilities of predictors
#'
#' @description This function plots the selection probabilities of predictors
#' (for example the selected motifs), optionally multiplied with either +1 or
#' -1 to give a sense of both the strength and the directionality of the
#' associated effects. The directionality is estimated from the sign of the
#' correlation coefficient between each predictor and the response vector.
#'
#' @param se The \code{SummarizedExperiment} object with the results from
#' stability selection (typically returned by \code{\link{randLassoStabSel}}).
#' @param directional A logical scalar. If \code{TRUE}, selection probabilities
#' are plotted with the sign of the marginal correlation between a predictor
#' and the response.
#' @param selProbMin A numerical scalar in [0,1]. Predictors with a selection
#' probability greater than \code{selProbMin} are considered selected and
#' colored by the input from \code{selColor}. By default, \code{selProbMin} is
#' extracted from the parameters stored in \code{se}.
#' @param selProbMinPlot A numerical scalar in [0,1] less than
#' \code{selProbMin}. Predictors with a selection probability greater
#' than \code{selProbMinPlot} but less than \code{selProbMin} are shown as
#' bars with color defined in \code{notSelColor}. \code{selProbMinPlot} is
#' useful in order to include additional predictors in the barplot, that were
#' not selected according to \code{selProbMin} but may be close to that cutoff
#' or are simply nice to visualize alongside the selected predictors. Setting
#' \code{selProbMinPlot = 0} will include all predictors.
#' @param showSelProbMin A logical scalar. If \code{TRUE}, the value of
#' \code{selProbMin} is shown by a horizontal line with the color defined
#' by \code{selProbCutoffColor}.
#' @param selColor Color for the selected predictors which have a selection
#' probability greater than \code{selProbMin}.
#' @param notSelColor Color for the rest of the (unselected) predictors which
#' will be show in the barplot.
#' @param selProbCutoffColor Color for the line depicting the selection
#' probability cutoff.
#' @param method A character scalar with the correlation method to use in the
#' calculation of predictor-response marginal correlations. One of "pearson",
#' "kendall" or "spearman" (see \code{\link[stats]{cor}}).
#' @param ylimext A numeric scalar defining how much the y axis limits should be
#' expanded beyond the plotted probabilities to allow for space for the
#' bar labels. This value can be increased if the predictor names above the
#' bars are too long and not showing in the plot.
#'
#' @details This function creates a bar plot with \code{ggplot}.
#' Each bar corresponds to a predictor (motif) and the colors correspond to
#' whether or not it was selected. The y-axis shows the selection
#' probabilities (\code{directional=FALSE}) or selection probabilities with
#' the sign of the marginal correlation to the response
#' (\code{directional=TRUE}).
#'
#' @return a \code{ggplot2} object.
#'
#' @examples
#' ## create data set
#' set.seed(321)
#' Y <- rnorm(n = 500, mean = 2, sd = 1)
#' X <- matrix(data = NA, nrow = length(Y), ncol = 50)
#' for (i in seq_len(ncol(X))) {
#' X[ ,i] <- runif(n = 500, min = 0, max = 3)
#' }
#' s_cols <- sample(x = seq_len(ncol(X)), size = 10,
#' replace = FALSE)
#' for (s in s_cols) {
#' X[ ,s] <- X[, s] + (Y + rnorm(500, 0, 4)) * ifelse(s %% 2, -1, 1)
#' }
#'
#' ## reproducible randLassoStabSel() with 1 core
#' set.seed(123)
#' ss <- randLassoStabSel(x = X, y = Y)
#' plotSelectionProb(ss)
#'
#' @importFrom SummarizedExperiment rowData assay
#' @importFrom S4Vectors metadata
#' @importFrom stats cor reorder
#' @importFrom ggplot2 ggplot aes geom_col scale_fill_manual geom_hline labs
#' geom_text ylim theme theme_classic
#' @importFrom rlang .data
#'
#' @export
plotSelectionProb <- function(se,
directional = TRUE,
selProbMin = metadata(se)$stabsel.params.cutoff,
selProbMinPlot = 0.4,
showSelProbMin = TRUE,
selColor = "cadetblue",
notSelColor = "grey",
selProbCutoffColor = "firebrick",
method = c("pearson", "kendall", "spearman"),
ylimext = 0.2) {
# checks
.assertScalar(x = directional, type = "logical")
.assertScalar(x = selProbMin, type = "numeric", rngIncl = c(0, 1))
.assertScalar(x = selProbMinPlot, type = "numeric", rngIncl = c(0, 1))
.assertScalar(x = showSelProbMin, type = "logical")
stopifnot(exprs = {
is(se, "SummarizedExperiment")
selProbMin >= selProbMinPlot
})
.assertColor(x = selColor, len = 1)
.assertColor(x = notSelColor, len = 1)
.assertColor(x = selProbCutoffColor, len = 1)
method <- match.arg(method)
.assertScalar(x = ylimext, type = "numeric", rngIncl = c(0, Inf))
# prepare dataframe to plot
df <- data.frame(
corcoef = as.vector(cor(x = SummarizedExperiment::rowData(se)$y,
y = SummarizedExperiment::assay(se, "x"),
method = method)),
probs = se$selProb,
predNames = colnames(se),
selected = se$selProb > selProbMin
)
df$yintercept <- selProbMin
df$cutoff <- paste0("selProbMin = ", selProbMin)
# ... multiply prob by the sign of corcoef if directional=TRUE
if(directional){
df$probs <- df$probs * sign(df$corcoef)
}
# ... only keep abs(probs) >= selProbMinPlot
df <- df[abs(df$probs) >= selProbMinPlot, ]
ymin <- min(min(df$probs), 0) - ylimext*max(df$probs)
ymax <- max(df$probs) + ylimext*max(df$probs)
# plot (returns ggplot object)
gg <- ggplot(data = df,
mapping = aes(x = stats::reorder(.data$predNames,
-.data$probs),
y = .data$probs)) +
geom_col(mapping = aes(fill = .data$selected)) +
scale_fill_manual(values = c("TRUE" = selColor, "FALSE" = notSelColor)) +
labs(x = element_blank(),
y = ifelse(
directional,
"Directional selection probability",
"Selection probability"
)) +
ylim(c(ymin, ymax)) +
geom_text(mapping = aes(label = .data$predNames,
hjust = ifelse(.data$probs >= 0, -0.1, 1.1)),
angle = 90) +
theme_classic() +
theme(axis.text.x = element_blank(),
axis.line.x = element_blank(),
axis.ticks.x = element_blank(),
axis.ticks.length.y = unit(0.2, "cm"))
if(showSelProbMin){
gg <- gg +
geom_hline(mapping = aes(yintercept = .data$yintercept,
linetype = .data$cutoff),
color = selProbCutoffColor, linewidth = 1)
if(directional & min(df$probs) <= -selProbMin){
gg <- gg +
geom_hline(yintercept = -selProbMin,
color = selProbCutoffColor,
linewidth = 1)
}
}
gg
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.