#' Construct an (interactive) scatterplot
#'
#' This function provides a convenience wrapper for constructing a scatter
#' plot of two columns in a data.frame, and color the points by whether
#' the values in a third column are below a provided value.
#' Users will interact with the function via the higher-level wrappers
#' \code{plotVolcano()} and \code{plotMeanDiff()}, which both call this
#' function internally.
#'
#' @param res A \code{data.frame} with values to plot.
#' @param xCol,yCol Character scalars defining the columns to plot on the
#' x- and y-axis, respectively.
#' @param xLabel,yLabel Character scalars providing the x- and y-axis
#' titles, respectively.
#' @param colorCol Character scalar defining the column to use to color
#' the points (typically the adjusted p-value).
#' @param colorThreshold Numeric scalar. If the value in \code{colorCol}
#' is lower than colorThreshold, the point will be indicated in red.
#' @param labelCol Character scalar defining the column to use to label
#' individual points in the plot.
#' @param labelValues Character vector defining the points to highlight with
#' labels. Must correspond to values in the \code{labelCol} column.
#' @param centerAxis Character vector, a subset of c("x", "y") or \code{NULL}.
#' Indicates the axis that should be centered (i.e., where the axis
#' limits should be extended to put 0 in the center).
#' @param pointSize Character scalar, either \code{"small"} or \code{"large"},
#' defining the style to use to plot the points.
#' @param interactivePlot Logical scalar, whether to return an interactive
#' plot.
#'
#' @return If \code{interactivePlot} is \code{TRUE}, a \code{plotly}
#' object. If \code{interactivePlot} is \code{FALSE}, a \code{ggplot2}
#' object.
#'
#' @author Charlotte Soneson
#' @keywords internal
#' @noRd
#'
#' @importFrom ggplot2 ggplot theme_minimal coord_cartesian theme labs
#' element_text geom_point aes
#' @importFrom rlang .data
#' @importFrom ggrepel geom_text_repel
.plotScatter <- function(res, xCol, yCol, xLabel = xCol, yLabel = yCol,
colorCol, colorThreshold,
labelCol = NULL, labelValues = c(), centerAxis = "x",
pointSize = "small", interactivePlot = FALSE) {
.assertVector(x = res, type = "data.frame")
.assertScalar(x = xCol, type = "character", validValues = colnames(res))
.assertScalar(x = yCol, type = "character", validValues = colnames(res))
.assertScalar(x = xLabel, type = "character")
.assertScalar(x = yLabel, type = "character")
.assertScalar(x = colorCol, type = "character",
validValues = colnames(res))
.assertScalar(x = colorThreshold, type = "numeric")
.assertScalar(x = labelCol, type = "character",
validValues = colnames(res), allowNULL = TRUE)
.assertVector(x = labelValues, type = "character", allowNULL = TRUE)
.assertVector(x = centerAxis, type = "character",
validValues = c("x", "y"), allowNULL = TRUE)
.assertScalar(x = pointSize, type = "character",
validValues = c("small", "large"))
.assertScalar(x = interactivePlot, type = "logical")
if (interactivePlot && !requireNamespace("plotly", quietly = TRUE)) {
stop("The 'plotly' package is required to make interactive plots.")
}
xr <- range(res[[xCol]], na.rm = TRUE)
yr <- range(res[[yCol]], na.rm = TRUE)
if ("x" %in% centerAxis) {
xr <- c(-max(abs(xr), na.rm = TRUE), max(abs(xr), na.rm = TRUE))
}
if ("y" %in% centerAxis) {
yr <- c(-max(abs(yr), na.rm = TRUE), max(abs(yr), na.rm = TRUE))
}
gg <- ggplot2::ggplot(res, ggplot2::aes(x = .data[[xCol]], y = .data[[yCol]])) +
ggplot2::theme_minimal() + ggplot2::coord_cartesian(xlim = xr, ylim = yr) +
ggplot2::theme(axis.text = ggplot2::element_text(size = 12),
axis.title = ggplot2::element_text(size = 14),
title = ggplot2::element_text(size = 14)) +
ggplot2::labs(x = xLabel, y = yLabel)
if (!is.null(labelCol)) {
gg <- gg +
ggplot2::aes(label = .data[[labelCol]])
}
if (pointSize == "large") {
gg <- gg +
ggplot2::geom_point(fill = "lightgrey", color = "grey", pch = 21, size = 1.5)
} else {
gg <- gg +
ggplot2::geom_point(color = "black", size = 0.25, alpha = 0.5)
}
if (any(res[[colorCol]] < colorThreshold)) {
gg <- gg +
ggplot2::labs(subtitle = paste0("Points are indicated in red if ",
colorCol, "<", colorThreshold))
if (pointSize == "large") {
gg <- gg +
ggplot2::geom_point(
data = res[res[[colorCol]] < colorThreshold, , drop = FALSE],
fill = "red", color = "grey", pch = 21, size = 1.5
)
} else {
gg <- gg +
ggplot2::geom_point(
data = res[res[[colorCol]] < colorThreshold, , drop = FALSE],
color = "red", size = 0.25
)
}
}
if (interactivePlot) {
plotly::ggplotly(gg)
} else {
if (!is.null(labelCol) && length(labelValues) > 0) {
.assertVector(x = labelValues, type = "character", validValues = res[[labelCol]])
gg <- gg +
ggrepel::geom_text_repel(
data = res[res[[labelCol]] %in% labelValues, , drop = FALSE],
max.overlaps = Inf, size = 4, min.segment.length = 0.1)
}
gg
}
}
.getColName <- function(res, validValues, aspect) {
.assertVector(x = res, type = "data.frame")
colName <- grep(paste(paste0("^", validValues, "$"), collapse = "|"),
colnames(res), value = TRUE)
if (length(colName) == 0) {
stop("No suitable column found for ", aspect, ", one of the ",
"following expected: ", paste(validValues, collapse = ", "))
}
if (length(colName) > 1) {
warning("Multiple suitable columns found for ", aspect,
"; choosing the first: ", colName[1])
colName <- colName[1]
}
colName
}
#' Construct an MA (mean-difference) plot
#'
#' @param res \code{data.frame} (typically output from
#' \code{calculateRelativeFC()}) with columns corresponding to the
#' average abundance (\code{logCPM} or \code{AveExpr}), log-fold
#' change (\code{logFC}) and significance (\code{FDR} or
#' \code{adj.P.Val}).
#' @param meanCol,logFCCol,pvalCol,padjCol Character scalars indicating the
#' columns from \code{res} that will be used to represent the
#' mean value (x-axis), logFC (y-axis), nominal p-value (used to find
#' the top features to label) and adjusted p-value (used
#' for coloring). If \code{NULL} (default), pre-specified values
#' will be used depending on the available columns
#' (\code{"logCPM"} or \code{"AveExpr"}, \code{"logFC"},
#' \code{"PValue"} or \code{"P.Value"}, and
#' \code{"FDR"} or \code{"adj.P.Val"}, respectively).
#' @param padjThreshold Numeric scalar indicating the adjusted p-value
#' threshold to use for coloring the points. All features with
#' adjusted p-value below the treshold will be shown in red.
#' @param pointSize Either \code{"small"} or \code{"large"}, indicating
#' which of the two available plot styles that will be used.
#' @param interactivePlot Logical scalar, indicating whether an
#' interactive plot should be returned, in which one can hover
#' over the individual points and obtain further information.
#' @param nTopToLabel Numeric scalar, indicating the number of points that
#' should be labeled in the plot. The points will be ranked by the
#' \code{pvalCol} column, and the top \code{nTopToLabel} values will
#' be labeled by the corresponding row names. Only used if
#' \code{interactivePlot} is \code{FALSE}.
#'
#' @return If \code{interactivePlot} is \code{TRUE}, a \code{plotly}
#' object. If \code{interactivePlot} is \code{FALSE}, a \code{ggplot2}
#' object.
#'
#' @author Charlotte Soneson
#' @export
#'
#' @examples
#' se <- readRDS(system.file("extdata", "GSE102901_cis_se.rds",
#' package = "mutscan"))[1:200, ]
#' design <- model.matrix(~ Replicate + Condition,
#' data = SummarizedExperiment::colData(se))
#' res <- calculateRelativeFC(se, design, coef = "Conditioncis_output")
#' plotMeanDiff(res, pointSize = "large", nTopToLabel = 3)
#'
plotMeanDiff <- function(res, meanCol = NULL, logFCCol = NULL, pvalCol = NULL,
padjCol = NULL, padjThreshold = 0.05, pointSize = "small",
interactivePlot = FALSE, nTopToLabel = 0) {
.assertScalar(x = nTopToLabel, type = "numeric", rngIncl = c(0, Inf))
.assertVector(x = res, type = "data.frame")
## Get the columns to use
if (is.null(meanCol)) {
meanCol <- .getColName(res, validValues = c("logCPM", "AveExpr"),
aspect = "x-axis")
}
if (is.null(logFCCol)) {
logFCCol <- .getColName(res, validValues = c("logFC"),
aspect = "y-axis")
}
if (is.null(pvalCol)) {
pvalCol <- .getColName(res, validValues = c("PValue", "P.Value"),
aspect = "labeling")
}
if (is.null(padjCol)) {
padjCol <- .getColName(res, validValues = c("FDR", "adj.P.Val"),
aspect = "highlighting")
}
.assertScalar(pvalCol, type = "character", validValues = colnames(res))
res$feature <- rownames(res)
if (nTopToLabel > 0) {
labelValues <- res[order(res[[pvalCol]]), "feature"][seq_len(nTopToLabel)]
} else {
labelValues <- c()
}
.plotScatter(res = res, xCol = meanCol, yCol = logFCCol,
xLabel = meanCol, yLabel = logFCCol,
colorCol = padjCol,
colorThreshold = padjThreshold,
labelCol = "feature", labelValues = labelValues,
centerAxis = "y", pointSize = pointSize,
interactivePlot = interactivePlot)
}
#' Construct a volcano plot
#'
#' @param res \code{data.frame} (typically output from
#' \code{calculateRelativeFC()}) with columns corresponding to the
#' log-fold change (\code{logFC}), p-value (\code{PValue} or
#' \code{P.Value}) and significance (\code{FDR} or \code{adj.P.Val}).
#' @param logFCCol,pvalCol,padjCol Character scalars indicating the
#' columns from \code{res} that will be used to represent the
#' logFC (x-axis), p-value (y-axis) and adjusted p-value (used
#' for coloring). If \code{NULL} (default), pre-specified values
#' will be used depending on the available columns
#' (\code{"logFC"}, \code{"PValue"} or \code{"P.Value"}, and
#' \code{"FDR"} or \code{"adj.P.Val"}, respectively).
#' @param padjThreshold Numeric scalar indicating the adjusted p-value
#' threshold to use for coloring the points. All features with
#' adjusted p-value below the treshold will be shown in red.
#' @param pointSize Either \code{"small"} or \code{"large"}, indicating
#' which of the two available plot styles that will be used.
#' @param interactivePlot Logical scalar, indicating whether an
#' interactive plot should be returned, in which one can hover
#' over the individual points and obtain further information.
#' @param nTopToLabel Numeric scalar, indicating the number of points that
#' should be labeled in the plot. The points will be ranked by the
#' \code{pvalCol} column, and the top \code{nTopToLabel} values will
#' be labeled by the corresponding row names. Only used if
#' \code{interactivePlot} is \code{FALSE}.
#'
#' @return If \code{interactivePlot} is \code{TRUE}, a \code{plotly}
#' object. If \code{interactivePlot} is \code{FALSE}, a \code{ggplot2}
#' object.
#'
#' @author Charlotte Soneson
#' @export
#'
#' @examples
#' se <- readRDS(system.file("extdata", "GSE102901_cis_se.rds",
#' package = "mutscan"))[1:200, ]
#' design <- model.matrix(~ Replicate + Condition,
#' data = SummarizedExperiment::colData(se))
#' res <- calculateRelativeFC(se, design, coef = "Conditioncis_output")
#' plotVolcano(res, pointSize = "large", nTopToLabel = 3)
#'
plotVolcano <- function(res, logFCCol = NULL, pvalCol = NULL, padjCol = NULL,
padjThreshold = 0.05, pointSize = "small",
interactivePlot = FALSE, nTopToLabel = 0) {
.assertScalar(x = nTopToLabel, type = "numeric", rngIncl = c(0, Inf))
.assertVector(x = res, type = "data.frame")
## Get the columns to use
if (is.null(logFCCol)) {
logFCCol <- .getColName(res, validValues = c("logFC"),
aspect = "x-axis")
}
if (is.null(pvalCol)) {
pvalCol <- .getColName(res, validValues = c("PValue", "P.Value"),
aspect = "y-axis")
}
if (is.null(padjCol)) {
padjCol <- .getColName(res, validValues = c("FDR", "adj.P.Val"),
aspect = "highlighting")
}
.assertScalar(pvalCol, type = "character", validValues = colnames(res))
res$negLog10P <- -log10(res[[pvalCol]])
res$feature <- rownames(res)
if (nTopToLabel > 0) {
labelValues <- res[order(res[[pvalCol]]), "feature"][seq_len(nTopToLabel)]
} else {
labelValues <- c()
}
.plotScatter(res = res, xCol = logFCCol, yCol = "negLog10P",
xLabel = logFCCol, yLabel = paste0("-log10(", pvalCol, ")"),
colorCol = padjCol,
colorThreshold = padjThreshold,
labelCol = "feature", labelValues = labelValues,
centerAxis = "x", pointSize = pointSize,
interactivePlot = interactivePlot)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.