#' Plot inferential replicates for a gene or transcript
#'
#' For datasets with inferential replicates, boxplots are
#' drawn for the two groups and potentially grouped by
#' covariates. For datasets with only mean and variance,
#' points and intervals (95% intervals using Normal
#' approximation) are drawn. Additionally, for numeric
#' \code{x} values, points and intervals will be drawn
#' and \code{\link{computeInfRV}} should be run first
#' in order to add the mean and variance statistics.
#'
#' @param y a SummarizedExperiment (see \code{swish})
#' @param idx the name or row number of the gene or transcript
#' @param x the name of the condition variable for splitting
#' and coloring the samples or cells. Also can be a numeric,
#' e.g. pseudotime, in which case, \code{cov} can be used
#' to designate groups for coloring
#' @param cov the name of the covariate for adjustment
#' @param colsDrk dark colors for the lines of the boxes
#' @param colsLgt light colors for the inside of the boxes
#' @param xaxis logical, whether to label the sample numbers.
#' default is \code{TRUE} if there are less than 30 samples
#' @param xlab the x-axis label
#' @param ylim y limits
#' @param main title
#' @param mainCol name of metadata column to use for title
#' (instead of rowname)
#' @param legend logical, show simple legend (default FALSE)
#' @param legendPos character, position of the legend (default "topleft")
#' @param legendTitle logical, whether to add the name of the
#' grouping variable as a title on the legend (default FALSE)
#' @param legendCex numeric, size of the legend (default 1)
#' @param useMean logical, when inferential replicates
#' are not present or when \code{x} is continuous,
#' whether to use the \code{mean} assay or the
#' \code{counts} assay for plotting
#' @param q numeric, the quantile to use when plotting
#' the intervals when inferential replicates are not
#' present or when \code{x} is continuous. Default
#' is \code{qnorm(.975) ~= 1.96} corresponding to 95%
#' intervals
#' @param applySF logical, when inferential replicates are
#' not present, should \code{y$sizeFactor} be divided out
#' from the mean and interval plots (default FALSE)
#' @param reorder logical, should points within a group
#' defined by condition and covariate be re-ordered by
#' their count value (default is FALSE, except for alevin data)
#' @param thin integer, should the mean and interval lines
#' be drawn thin (the default switches from 0 [not thin]
#' to 1 [thinner] at n=150 cells, and from 1 [thinner]
#' to 2 [thinnest] at n=400 cells)
#' @param shiftX when \code{x} is continuous and \code{cov}
#' is provided, the amount to shift the values on the x-axis
#' to improve visibility of the point and line ranges
#' (will be subtracted from the first level of \code{cov}
#' and added to the second level of \code{cov})
#'
#' @return nothing, a plot is displayed
#'
#' @examples
#'
#' y <- makeSimSwishData()
#' plotInfReps(y, 3, "condition")
#'
#' y <- makeSimSwishData(n=40)
#' y$batch <- factor(rep(c(1,2,3,1,2,3),c(5,10,5,5,10,5)))
#' plotInfReps(y, 3, "condition", "batch")
#'
#' @export
plotInfReps <- function(y, idx, x, cov=NULL,
colsDrk=c("dodgerblue","goldenrod4","royalblue4",
"red3","purple4","darkgreen"),
colsLgt=c("lightblue1","goldenrod1","royalblue1",
"salmon1","orchid1","limegreen"),
xaxis, xlab, ylim,
main, mainCol,
legend=FALSE,
legendPos="topleft",
legendTitle=FALSE,
legendCex=1,
useMean=TRUE,
q=qnorm(.975),
applySF=FALSE,
reorder,
thin,
shiftX) {
# define key variables
stopifnot(x %in% names(colData(y)))
condition <- colData(y)[[x]]
# whether x is a factor variable or numeric (e.g. pseudotime)
xfac <- is(condition, "factor")
if (!xfac) stopifnot(is(condition, "numeric"))
# boxplot depends on whether there are inf reps (and other factors)
hasInfReps <- any(grepl("infRep", assayNames(y)))
# boxplot if inf reps are present and x factorial
drawBoxplot <- hasInfReps & xfac
stopifnot(length(colsDrk) == length(colsLgt))
if (xfac) {
# define colors for boxplot
ncond <- nlevels(condition)
stopifnot(ncond <= length(colsDrk))
colsDrk <- colsDrk[seq_len(ncond)]
colsLgt <- colsLgt[seq_len(ncond)]
}
if (!drawBoxplot) {
if (useMean & !("mean" %in% assayNames(y))) {
message("using 'counts' assay, as 'mean' is missing, see argument 'useMean'")
useMean <- FALSE
}
# make sure variance assay is present
if (!"variance" %in% assayNames(y))
stop("'variance' assay is missing, use computeInfRV() first")
}
if (missing(xaxis)) {
if (xfac) {
xaxis <- ncol(y) < 30
} else {
xaxis <- TRUE
}
}
if (missing(thin)) {
thin <- if (ncol(y) >= 400) 2 else if (ncol(y) >= 150) 1 else 0
} else {
stopifnot(thin >= 0 & thin <= 2)
}
if (!is.null(cov)) {
stopifnot(cov %in% names(colData(y)))
covariate <- factor(colData(y)[[cov]])
stopifnot(is(covariate, "factor"))
ngrp <- nlevels(covariate)
}
infRepsScaled <- FALSE
if (!is.null(metadata(y)$infRepsScaled)) {
infRepsScaled <- metadata(y)$infRepsScaled
}
# single cell?
sc <- FALSE
if (!is.null(metadata(y)$tximetaInfo$type)) {
if (metadata(y)$tximetaInfo$type == "alevin") {
sc <- TRUE
}
}
if (missing(xlab)) {
if (xfac) {
xlab <- if (sc) "cells" else "samples"
} else {
xlab <- x
}
}
if (missing(reorder)) {
if (xfac) {
reorder <- sc
} else {
reorder <- FALSE
}
} else {
if (!xfac & reorder) stop("reorder not used when 'x' is numeric")
}
ylab <- if (infRepsScaled) "scaled counts" else "counts"
# this is a dummy variable used when making the plot()
# if we don't put x-axis ticks, then we will move
# the label up higher using title()
xlabel <- if (xaxis) xlab else ""
if (missing(main)) {
if (missing(mainCol)) {
if (is.character(idx)) {
main <- idx
} else {
main <- rownames(y)[idx]
}
} else {
stopifnot(mainCol %in% names(mcols(y)))
main <- mcols(y)[idx,mainCol]
}
}
if (reorder) {
if (drawBoxplot) {
infReps <- assays(y[idx,])[grep("infRep",assayNames(y))]
value <- colMeans(unlist(infReps))
} else {
which.assay <- if (useMean) "mean" else "counts"
value <- assays(y)[[which.assay]][idx,]
if (applySF & !is.null(y$sizeFactor)) {
value <- value/y$sizeFactor
}
}
if (is.null(cov)) {
o <- order(condition, value)
} else {
o <- order(covariate, condition, value)
}
# not reordering:
} else {
if (xfac) {
if (is.null(cov)) {
o <- order(condition)
} else {
o <- order(covariate, condition)
}
# numeric 'x', don't reorder
} else {
o <- seq_along(condition)
}
}
# col - dark color for boxplot border, points
# col.hglt - highlight color for inside boxplot, lines when n >= 400
if (xfac) {
if (is.null(cov)) {
samp.nums <- unlist(lapply(table(condition), seq_len))
col <- rep(colsDrk, table(condition))
col.hglt <- rep(colsLgt, table(condition))
} else {
vec.tab <- as.vector(table(condition, covariate))
samp.nums <- unlist(lapply(vec.tab, seq_len))
col <- rep(rep(colsDrk, ngrp), vec.tab)
col.hglt <- rep(rep(colsLgt, ngrp), vec.tab)
}
} else {
if (is.null(cov)) {
col <- colsDrk[1]
col.hglt <- colsLgt[1]
} else {
col <- colsDrk[covariate]
col.hglt <- colsLgt[covariate]
}
}
#############
## boxplot ##
#############
if (drawBoxplot) {
infReps <- assays(y[idx,])[grep("infRep",assayNames(y))]
cts <- unlist(infReps)[,o]
ymax <- max(cts)
ymin <- if (is.null(cov)) 0 else -0.02 * ymax
if (missing(ylim)) {
ylim <- c(ymin,ymax)
} else {
stopifnot(length(ylim) == 2)
}
boxplot2(cts, col=col, col.hglt=col.hglt, ylim=ylim,
xlab=xlabel, ylab=ylab, main=main)
################################################
## point and line plot by factor or numeric x ##
################################################
} else {
which.assay <- if (useMean) "mean" else "counts"
cts <- assays(y)[[which.assay]][idx,o]
sds <- sqrt(assays(y)[["variance"]][idx,o])
if (applySF & !is.null(y$sizeFactor)) {
cts <- cts / y$sizeFactor[o]
sds <- sds / y$sizeFactor[o]
ylab <- "scaled counts"
}
# shifting x values
if (!missing(shiftX)) {
stopifnot(!is.null(cov))
covLvl1 <- covariate == levels(covariate)[1]
condition[covLvl1] <- condition[covLvl1] - shiftX
condition[!covLvl1] <- condition[!covLvl1] + shiftX
}
ymax <- max(cts + q*sds)
ymin <- 0
if (xfac & !is.null(cov)) {
ymin <- -0.02 * ymax
}
if (missing(ylim)) {
ylim <- c(ymin, ymax)
} else {
stopifnot(length(ylim) == 2)
}
if (xfac) {
plot(cts, type="n", main=main,
xaxt="n", ylim=ylim,
xlab=xlabel, ylab=ylab)
} else {
plot(condition, cts, type="n", main=main,
xaxt="n", ylim=ylim,
xlab=xlabel, ylab=ylab)
}
seg.lwd <- if (thin == 0) 2 else if (thin == 1) 1 else 3
seg.col <- if (thin < 2) col else col.hglt
if (xfac) {
segments(seq_along(cts), pmax(cts - q*sds, 0),
seq_along(cts), cts + q*sds,
col=seg.col, lwd=seg.lwd)
} else {
segments(condition, pmax(cts - q*sds, 0),
condition, cts + q*sds,
col=seg.col, lwd=seg.lwd)
}
pts.pch <- if (thin == 0) 22 else 15
pts.lwd <- if (thin == 0) 1. else 1
pts.cex <- if (thin == 0) 1 else 0.5
if (xfac) {
points(cts,
col=col, pch=pts.pch, bg=col.hglt,
cex=pts.cex, lwd=pts.lwd)
} else {
points(condition, cts,
col=col, pch=pts.pch, bg=col.hglt,
cex=pts.cex, lwd=pts.lwd)
}
}
if (xaxis) {
if (xfac && !all(samp.nums == 1)) {
axis(1, seq_along(condition), samp.nums)
} else {
axis(1)
}
}
if (!xaxis) {
title(xlab=xlab, mgp=c(1,1,0))
}
# make alternating black/grey lines on bottom denoting pairs/batches
if (xfac & !is.null(cov)) {
cuts <- cumsum(table(covariate))
segments(c(1,cuts[-ngrp]+1),ymin,cuts,ymin,lwd=3,
col=rep(c("black","grey60"),length=ngrp))
}
if (legend & (xfac | !is.null(cov))) {
group <- if (xfac) condition else covariate
group.name <- if (xfac) x else cov
ltitle <- if (legendTitle) group.name else NULL
# here we reverse the R default order,
# so the reference level will be on the bottom, not top
legend(legendPos, legend=rev(levels(group)), title=ltitle,
col=rev(head(colsDrk, nlevels(group))),
pt.bg=rev(head(colsLgt, nlevels(group))),
pch=22, cex=legendCex, bg="white", box.col=NA, inset=.01)
}
}
boxplot2 <- function(x, w=.4, ylim, col, col.hglt, xlab="", ylab="", main="") {
qs <- rowQuantiles(t(x), probs=0:4/4)
if (missing(ylim)) {
ylim <- c(min(x),max(x))
}
plot(qs[,3], type="n", xlim=c(0.5,ncol(x)+.5), xaxt="n",
xlab=xlab, ylab=ylab, main=main, ylim=ylim)
s <- seq_len(ncol(x))
rect(s-w,qs[,2],s+w,qs[,4], col=col.hglt, border=col)
segments(s-w, qs[,3], s+w, qs[,3], col=col, lwd=3, lend=1)
segments(s, qs[,2], s, qs[,1], col=col, lend=1)
segments(s, qs[,4], s, qs[,5], col=col, lend=1)
segments(s-w/2, qs[,1], s+w/2, qs[,1], col=col)
segments(s-w/2, qs[,5], s+w/2, qs[,5], col=col)
}
#' MA plot - log fold change over average counts
#'
#' @param y a SummarizedExperiment (see \code{swish})
#' @param alpha the FDR threshold for coloring points
#' @param sigcolor the color for the significant points
#' @param ... passed to plot
#'
#' @return nothing, a plot is displayed
#'
#' @examples
#'
#' y <- makeSimSwishData()
#' y <- scaleInfReps(y)
#' y <- labelKeep(y)
#' y <- swish(y, x="condition")
#' plotMASwish(y)
#'
#' @export
plotMASwish <- function(y, alpha=.05, sigcolor="blue", ...) {
dat <- data.frame(log10mean=mcols(y)$log10mean,
log2FC=mcols(y)$log2FC,
sig=mcols(y)$qvalue < alpha)
dat <- dat[order(dat$sig),]
with(dat, plot(log10mean, log2FC, pch=20, cex=.4,
col=ifelse(sig, sigcolor, "grey60"), ...))
abline(h=0, col="grey40")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.