R/plotScoreDistribution.R

Defines functions .pretty_violins .plot_score_distribution plotScoreDistribution

Documented in plotScoreDistribution

#' Plot score distributions 
#'
#' Plot the distribution of assignment scores across all cells assigned to each reference label.
#'
#' @param results A \linkS4class{DataFrame} containing the output from \code{\link{SingleR}}, 
#' \code{\link{classifySingleR}}, \code{\link{combineCommonResults}}, or \code{\link{combineRecomputedResults}}.
#' @param show Deprecated, use \code{\link{plotDeltaDistribution}} instead for \code{show!="scores"}.
#' @param labels.use Character vector specifying the labels to show in the plot facets.
#' Defaults to all labels in \code{results}.
#' @param references Integer scalar or vector specifying the references to visualize.
#' This is only relevant for combined results, see Details.
#' @param scores.use Deprecated, see \code{references}.
#' @param calls.use Deprecated and ignored.
#' @param pruned.use Deprecated and ignored.
#' @param dots.on.top Logical scalar specifying whether cell dots should be plotted on top of the violin plots.
#' @param this.color String specifying the color for cells that were assigned to the label.
#' @param pruned.color String specifying the color for cells that were assigned to the label but pruned.
#' @param other.color String specifying the color for other cells not assigned to the label.
#' @param size Numeric scalar to set the size of the dots.
#' @param ncol Integer scalar to set the number of labels to display per row.
#' @param show.nmads,show.min.diff Deprecated, use \code{\link{plotDeltaDistribution}} instead.
#' @param grid.vars Named list of extra variables to pass to \code{\link[gridExtra]{grid.arrange}},
#' used to arrange the multiple plots generated when \code{references} is of length greater than 1.
#'
#' @return
#' If \code{references} specifies a single set of scores,
#' a \link[ggplot2]{ggplot} object is returned showing the scores in violin plots.
#'
#' If \code{references} specifies multiple scores for a combined result,
#' multiple ggplot objects are generated in a grid on the current graphics device.
#' 
#' If \code{references} specifies multiple scores and \code{grid.vars=NULL},
#' a list is returned containing the ggplot objects for manual display.
#'
#' @details
#' This function creates jitter and violin plots showing assignment scores for all cells across one or more labels.
#' Each facet represents a label in \code{labels.use} and contains three violin plots:
#' \itemize{
#' \item \dQuote{Assigned}, containing scores for all cells assigned to that label.
#' Colored according to \code{this.color}.
#' \dQuote{Pruned}, containing scores for all cells assigned to that label but pruned out, e.g., by \code{\link{pruneScores}}.
#' Colored according to \code{pruned.color}, and can be omitted by setting \code{pruned.color=NA}.
#' \item \dQuote{Other}, containing the scores for all cells assigned to other labels.
#' Colored according to \code{other.color}.
#' }
#' The expectation is that the former is higher than the latter,
#' though the deltas generated by \code{\link{plotDeltaDistribution}} are often more informative in this regard.
#'
#' For combined results (see \code{?\link{combineRecomputedResults}}),
#' this function can show both the combined and individual scores.
#' This is done using the \code{references} argument,
#' entries of which refer to columns of \code{results$orig.results} if positive or to the combined results if zero.
#' For example:
#' \itemize{
#' \item If we set \code{references=2}, we will plot the scores from the second individual reference.
#' \item If we set \code{references=1:2}, 
#' we will plot the scores from first and second references (in separate plots) faceted by their corresponding labels.
#' \item By default, the function will create a separate plot for the combined scores and each individual reference,
#' equivalent to \code{references=0:N} for \code{N} individual references.
#' }
#'
#' @seealso
#' \code{\link{pruneScores}}, to remove low-quality labels based on the scores.
#'
#' \code{\link{plotDeltaDistribution}} and \code{\link{plotScoreHeatmap}}, for alternative diagnostic plots.
#'
#' @author Daniel Bunis and Aaron Lun
#' @examples
#' example(SingleR, echo=FALSE)
#'
#' # To show the distribution of scores grouped by label:
#' plotScoreDistribution(results = pred)
#'
#' # We can display a particular label using the label
#' plotScoreDistribution(results = pred,
#'     labels.use = "B")
#'
#' # For multiple references, default output will contain separate plots for
#' # each original reference as well as for the the combined scores.
#' example(combineRecomputedResults, echo = FALSE)
#' plotScoreDistribution(results = combined)
#'
#' # 'references' specifies which original results to plot distributions for.
#' plotScoreDistribution(results = combined, references = 0)
#' plotScoreDistribution(results = combined, references = 1:2)
#'
#' # Tweaking the grid arrangement:
#' plotScoreDistribution(combined, grid.vars = list(ncol = 2))
#'
#' @export
plotScoreDistribution <- function(
    results,
    show = NULL,
    labels.use = colnames(results$scores),
    references = NULL,
    scores.use = NULL,
    calls.use = 0,
    pruned.use = NULL,
    size = 0.5,
    ncol = 5,
    dots.on.top = TRUE,
    this.color = "#F0E442",
    pruned.color = "#E69F00",
    other.color = "gray60",
    show.nmads = 3,
    show.min.diff = NULL,
    grid.vars = list())
{
    if (!is.null(show)) {
        show <- match.arg(show, c("scores", "delta.med", "delta.next"))
        if (show!="scores") {
            .Deprecated(new="plotDeltaDistrbiution")
            return(plotDeltaDistribution(results, show=show, labels.use=labels.use,
                references=scores.use, size=size, ncol=ncol, dots.on.top=dots.on.top,
                this.color=this.color, pruned.color=pruned.color, grid.vars=grid.vars))
        } else {
            .Deprecated(old="show=\"scores\"")
        }
    }

    results <- .ensure_named(results)
    is.combined <- !is.null(results$orig.results)
    ref.names <- colnames(results$orig.results)

    if (!is.null(scores.use)) {
        references <- scores.use
        .Deprecated(old="scores.use", new="references")
    }
    if (is.null(references)) {
        references <- c(0L, seq_along(results$orig.results)) 
    }

    plots <- vector("list", length(references))
    for (i in seq_along(plots)) {

        # Pulling out the scores to use in this iteration.
        chosen <- references[i]
        if (chosen==0L) {
            current.results <- results
        } else {
            current.results <- results$orig.results[[chosen]]
        }

        scores <- current.results$scores
        scores.title <- .values_title(is.combined, chosen, ref.names, show)

        # Pulling out the labels to use in this iteration.
        labels <- current.results$labels
        labels.title <- .values_title(is.combined, chosen, ref.names, "Labels")

        # Pulling out the pruning calls to use in this iteration.
        prune.calls <- NULL
        if (!is.na(pruned.color)) {
            prune.calls <- current.results$pruned.labels
        }

        # Actually creating the plot
        plots[[i]] <- .plot_score_distribution(
            scores=scores, labels=labels, prune.calls=prune.calls, labels.use=labels.use, 
            labels.title=labels.title, scores.title=scores.title, 
            this.color=this.color, pruned.color=pruned.color, other.color=other.color, 
            size=size, ncol=ncol, dots.on.top=dots.on.top)
    }

    if (length(plots)==1L) {
        # Doing this to be consistent with raw ggplot output.
        plots[[1]]
    } else {
        if (!is.null(grid.vars) && length(references) > 1L) {
            do.call(gridExtra::grid.arrange, c(plots, grid.vars))
        } else {
            plots
        }
    }
}

.plot_score_distribution <- function(
    scores, labels, prune.calls, labels.use,
    labels.title, scores.title, 
    this.color, pruned.color, other.color, size, ncol, dots.on.top)
{
    # Create a dataframe with separate rows for each score in values.
    df <- data.frame(
        label = rep(colnames(scores), nrow(scores)),
        values = as.numeric(t(scores)),
        stringsAsFactors = FALSE)

    # Add whether this label is the final label given to each cell.
    df$cell.calls <- rep("other", nrow(df)) # rep() protects when nrow(df)=0.
    is.called <- df$label == rep(labels, each=ncol(scores))
    df$cell.calls[is.called] <- "assigned"

    # Replace cell.call if cell was pruned.
    if (!is.null(prune.calls)) {
        is.pruned <- rep(is.na(prune.calls), each=ncol(scores))
        df$cell.calls[is.pruned & is.called] <- "pruned"
    }

    # Trim dataframe by labels
    keep <- df$label %in% labels.use
    if (any(keep)) {
        df <- df[keep,]
    } else {
        warning("ignoring 'labels.use' as it has no values in ", scores.title)
    }

    # Making the violin plots.
    p <- ggplot2::ggplot(data = df,
            ggplot2::aes_string(x = "cell.calls", y = "values", fill = "cell.calls")) +
        ggplot2::scale_fill_manual(
            name = labels.title,
            breaks = c("assigned", "pruned", "other"),
            values = c(this.color, pruned.color, other.color))
    
    jit <- ggplot2::geom_jitter(height = 0, width = 0.3, color = "black",
        shape = 16, size = size, na.rm = TRUE)

    .pretty_violins(p, df=df, ncol=ncol, scores.title=scores.title, 
        size=size, dots.on.top=dots.on.top, jitter=jit)
}

.pretty_violins <- function(p, df, ncol, scores.title, size, dots.on.top, jitter, ...) {
    p <- p + ggplot2::theme_classic() +
        ggplot2::facet_wrap(facets = ~label, ncol = ncol) +
        ggplot2::ylab(scores.title)
    
    if (nlevels(as.factor(df$label)) == 1) {
        p <- p + ggplot2::scale_x_discrete(name = NULL, labels = NULL)
    } else {
        p <- p + ggplot2::scale_x_discrete(name = "Labels", labels = NULL)
    }

    if (!dots.on.top) {
        p <- p + jitter
    }

    p <- p + ggplot2::geom_violin(na.rm=TRUE, ...)

    if (dots.on.top) {
        p <- p + jitter
    }

    p
}

Try the SingleR package in your browser

Any scripts or data that you put into this service are public.

SingleR documentation built on Feb. 4, 2021, 2:01 a.m.