R/plotting.R

Defines functions .get_palette .resolve_plot_colours .choose_vis_values plotSCESet plotPCASCESet .makePairs plotDiffusionMapSCESet plotMDSSCESet plotReducedDim.default plotReducedDim.SCESet plotPlatePosition plotExpressionSCESet plotExpressionDefault plotMetadata plotPhenoData plotFeatureData multiplot plotExprsVsTxLength

Documented in multiplot plotDiffusionMapSCESet plotExpressionDefault plotExpressionSCESet plotExprsVsTxLength plotFeatureData plotMDSSCESet plotMetadata plotPCASCESet plotPhenoData plotPlatePosition plotReducedDim.default plotReducedDim.SCESet plotSCESet

## Suite of plotting functions

################################################################################
## define colour palettes
.get_palette <- function(palette_name) {
    switch(palette_name,
           tableau20 = c("#1F77B4", "#AEC7E8", "#FF7F0E", "#FFBB78", "#2CA02C",
                         "#98DF8A", "#D62728", "#FF9896", "#9467BD", "#C5B0D5",
                         "#8C564B", "#C49C94", "#E377C2", "#F7B6D2", "#7F7F7F",
                         "#C7C7C7", "#BCBD22", "#DBDB8D", "#17BECF", "#9EDAE5"),
           tableau10medium = c("#729ECE", "#FF9E4A", "#67BF5C", "#ED665D",
                               "#AD8BC9", "#A8786E", "#ED97CA", "#A2A2A2",
                               "#CDCC5D", "#6DCCDA"),
           colorblind10 = c("#006BA4", "#FF800E", "#ABABAB", "#595959",
                            "#5F9ED1", "#C85200", "#898989", "#A2C8EC",
                            "#FFBC79", "#CFCFCF"),
           trafficlight = c("#B10318", "#DBA13A", "#309343", "#D82526",
                            "#FFC156", "#69B764", "#F26C64", "#FFDD71",
                            "#9FCD99"),
           purplegray12 = c("#7B66D2", "#A699E8", "#DC5FBD", "#FFC0DA",
                            "#5F5A41", "#B4B19B", "#995688", "#D898BA",
                            "#AB6AD5", "#D098EE", "#8B7C6E", "#DBD4C5"),
           bluered12 = c("#2C69B0", "#B5C8E2", "#F02720", "#FFB6B0", "#AC613C",
                         "#E9C39B", "#6BA3D6", "#B5DFFD", "#AC8763", "#DDC9B4",
                         "#BD0A36", "#F4737A"),
           greenorange12 = c("#32A251", "#ACD98D", "#FF7F0F", "#FFB977",
                             "#3CB7CC", "#98D9E4", "#B85A0D", "#FFD94A",
                             "#39737C", "#86B4A9", "#82853B", "#CCC94D"),
           cyclic = c("#1F83B4", "#1696AC", "#18A188", "#29A03C", "#54A338",
                      "#82A93F", "#ADB828", "#D8BD35", "#FFBD4C", "#FFB022",
                      "#FF9C0E", "#FF810E", "#E75727", "#D23E4E", "#C94D8C",
                      "#C04AA7", "#B446B3", "#9658B1", "#8061B4", "#6F63BB")
    )
}

# Get nice plotting colour schemes for very general colour variables
.resolve_plot_colours <- function(plot_out, colour_by, colour_by_name,
                                  fill = FALSE) {
    ## if the colour_by object is NULL, return the plot_out object unchanged
    if ( is.null(colour_by) )
        return(plot_out)
    ## Otherwise, set a sensible colour scheme and return the plot_out object
    if ( fill ) {
        if ( is.numeric(colour_by) ) {
            plot_out <- plot_out +
                viridis::scale_fill_viridis(name = colour_by_name)
        } else {
            nlevs_colour_by <- nlevels(as.factor(colour_by))
            if (nlevs_colour_by <= 10) {
                plot_out <- plot_out + scale_fill_manual(
                    values = .get_palette("tableau10medium"),
                    name = colour_by_name)
            } else {
                if (nlevs_colour_by > 10 && nlevs_colour_by <= 20) {
                    plot_out <- plot_out + scale_fill_manual(
                        values = .get_palette("tableau20"),
                        name = colour_by_name)
                } else {
                    plot_out <- plot_out +
                        viridis::scale_fill_viridis(
                            name = colour_by_name, discrete = TRUE)
                }
            }
        }
    } else {
        if ( is.numeric(colour_by) ) {
            plot_out <- plot_out +
                viridis::scale_color_viridis(name = colour_by_name)
        } else {
            nlevs_colour_by <- nlevels(as.factor(colour_by))
            if (nlevs_colour_by <= 10) {
                plot_out <- plot_out + scale_colour_manual(
                    values = .get_palette("tableau10medium"),
                    name = colour_by_name)
            } else {
                if (nlevs_colour_by > 10 && nlevs_colour_by <= 20) {
                    plot_out <- plot_out + scale_colour_manual(
                        values = .get_palette("tableau20"),
                        name = colour_by_name)
                } else {
                    plot_out <- plot_out +
                        viridis::scale_color_viridis(
                            name = colour_by_name, discrete = TRUE)
                }
            }
        }
    }
    plot_out
}

.choose_vis_values <- function(x, by, check_pdata = TRUE,
                               cell_control_default = FALSE,
                               check_features = FALSE,
                               exprs_values = "exprs",
                               coerce_factor = FALSE, level_limit = NA) {
    ## This function looks through the visualization data and returns the
    ## values to be visualized. Either 'by' itself, or a column of pData,
    ## or a column of fData, or the expression values of a feature.

    if (is.character(by)) {
        if (length(by) != 1L) {
            stop("'by' should be a character vector of length 1")
        }

        ## checking if it refers to a field in pData/fData.
        vals <- NULL
        if (check_pdata) {
            if (by %in% varLabels(x)) {
                vals <- x[[by]]
            }
            unfound <- "colnames(pData(x))"
        } else {
            if (by %in% Biobase::fvarLabels(x)) {
                vals <- fData(x)[[by]]
            }
            unfound <- "colnames(fData(x))"
        }

        ## checking if it refers to a feature
        if (check_features) {
            if (is.null(vals) && by %in% featureNames(x)) {
                vals <- get_exprs(x, exprs_values)[by,]
            }
            unfound <- append(unfound, "featureNames(x)")
        }

        ## throwing an error if still unfound
        if (is.null(vals)) {
            stop(sprintf("'%s' not found %s", by,
                         paste(sprintf("in '%s'", unfound), collapse = " or ")))
        }

    } else if (is.data.frame(by)) {
        if (ncol(by) != 1L) {
            stop("'by' should be a data frame with one column")
        } else if (nrow(by) != ncol(x)) {
            stop("'nrow(by)' should be equal to number of columns in 'x'")
        }

        ## Allow arbitrary values to be specified.
        vals <- by[,1]
        by <- colnames(by)

    } else {
        if (!is.null(by)) stop("invalid value of 'by' supplied")
        vals <- NULL

        ##  Switching to cell controls if desired.
        if (cell_control_default) {
            if ( "is_cell_control" %in% varLabels(x) ) {
                by <- "is_cell_control"
                vals <- x[[by]]
            }
        }
    }

    # Checking the level limit.
    if (coerce_factor && !is.null(vals)) {
        vals <- factor(vals)
        if (level_limit < nlevels(vals))
            stop(sprintf("number of unique levels exceeds %i", level_limit))
    }

    return(list(name = by, val = vals))
}


################################################################################
### Generic plot function for SCESet

#' Plot an overview of expression for each cell
#'
#' Plot the relative proportion of the library accounted for by the most highly
#' expressed features for each cell for an \code{SCESet} dataset.
#'
#' @param x an \code{SCESet} object
#' @param block1 character string defining the column of \code{pData(object)} to
#' be used as a factor by which to separate the cells into blocks (separate
#' panels) in the plot. Default is \code{NULL}, in which case there is no
#' blocking.
#' @param block2 character string defining the column of \code{pData(object)} to
#' be used as a factor by which to separate the cells into blocks (separate
#' panels) in the plot. Default is \code{NULL}, in which case there is no
#' blocking.
#' @param colour_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to colour the points in the plot. Alternatively,
#' a data frame with one column containing a value for each cell, which will be
#' mapped to a corresponding colour.
#' @param nfeatures numeric scalar indicating the number of features to include
#' in the plot.
#' @param exprs_values character string indicating which values should be used
#' as the expression values for this plot. Valid arguments are \code{"tpm"}
#' (transcripts per million), \code{"counts"} (raw counts), \code{"cpm"}
#' (counts per million), \code{"fpkm"} (FPKM values), or \code{"exprs"} (default;
#' which are assumed to be on the log2 scale and are un-logged prior to plotting). If
#' not specified, the function will search for values in the order given above.
#' @param linewidth numeric scalar giving the "size" parameter (in ggplot2
#' parlance) for the lines plotted. Default is 1.5.
#' @param y optional argument for generic \code{plot} functions, not used for
#' plotting an \code{SCESet} object
#' @param ... arguments passed to \code{plotSCESet}
#' @param ncol number of columns to use for \code{facet_wrap} if only one block is
#' defined.
#' @param theme_size numeric scalar giving font size to use for the plotting
#' theme
#'
#' @details Plots produced by this function are intended to provide an overview
#' of large-scale differences between cells. For each cell, the features are
#' ordered from most-expressed to least-expressed and the cumulative proportion
#' of the total expression for the cell is computed across the top
#' \code{nfeatures} features. These plots can flag cells with a very high
#' proportion of the library coming from a small number of features; such cells
#' are likely to be problematic for analyses. Using the colour and blocking
#' arguments can flag overall differences in cells under different experimental
#' conditions or affected by different batch and other variables.
#'
#' @return a ggplot plot object
#'
#' @name plot
#' @aliases plot plot,SCESet-method plot,SCESet,ANY-method
#' @export
#' @method plot plot,SCESet-method
#'
#' @examples
#' ## Set up an example SCESet
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#'
#' plot(example_sceset, exprs_values = "exprs")
#' plot(example_sceset, exprs_values = "exprs", colour_by = "Cell_Cycle")
#' plot(example_sceset, exprs_values = "exprs", block1 = "Treatment",
#' colour_by = "Cell_Cycle")
#' plot(example_sceset, exprs_values = "exprs", block1 = "Treatment",
#' block2 = "Mutation_Status", colour_by = "Cell_Cycle")
#' # What happens if chosen expression values are not available?
#' plot(example_sceset, block1 = "Treatment", colour_by = "Cell_Cycle")
#'
setMethod("plot", signature("SCESet"),
          function(x, ...) {
              plotSCESet(x, ...)
          })

#' @rdname plot
#' @aliases plot
#' @importFrom dplyr mutate
#' @importFrom plyr aaply
#' @importFrom reshape2 melt
#' @export
plotSCESet <- function(x, block1 = NULL, block2 = NULL, colour_by = NULL,
                       nfeatures = 500, exprs_values = NULL, ncol = 3,
                       linewidth = 1.5, theme_size = 10) {
    object <- x
    if ( !is(object, "SCESet") )
        stop("Object must be an SCESet")
    if ( !is.null(block1) ) {
        if ( !(block1 %in% colnames(pData(object))) )
            stop("The block1 argument must either be NULL or a column of pData(object).")
    }
    if ( !is.null(block2) ) {
        if ( !(block2 %in% colnames(pData(object))) )
            stop("The block2 argument must either be NULL or a column of pData(object).")
    }

    ## Setting values to colour by.
    colour_by_out <- .choose_vis_values(object, colour_by)
    colour_by <- colour_by_out$name
    colour_by_vals <- colour_by_out$val

    ## Define an expression matrix depending on which values we're using
    exprs_values <- .exprs_hunter(x, exprs_values)
    exprs_mat <- get_exprs(x, exprs_values)
    if ( exprs_values == "exprs" )
        exprs_mat <- 2 ^ exprs_mat - object@logExprsOffset

    ## Use plyr to get the sequencing real estate accounted for by features
    nfeatures_total <- nrow(exprs_mat)
    seq_real_estate <- t(plyr::aaply(exprs_mat, 2, .fun = function(x) {
        cumsum(sort(x, decreasing = TRUE))
    }))
    rownames(seq_real_estate) <- seq_len(nfeatures_total)
    nfeatures_to_plot <- nfeatures
    to_plot <- seq_len(nfeatures_to_plot)
    seq_real_estate_long <- reshape2::melt(seq_real_estate[to_plot, ],
                                           value.name = "exprs")

    ## Get the proportion of the library accounted for by the top features
    prop_library <- reshape2::melt(t(t(seq_real_estate[to_plot, ]) /
                                         colSums(exprs_mat)),
                                   value.name = "prop_library")
    colnames(seq_real_estate_long) <- c("Feature", "Cell", "exprs")
    seq_real_estate_long$Proportion_Library <- prop_library$prop_library

    ## Add block and colour_by information if provided
    if ( !is.null(block1) )
        seq_real_estate_long <- dplyr::mutate(
            seq_real_estate_long, block1 = as.factor(rep(object[[block1]],
                                                       each = nfeatures_to_plot)))
    if ( !is.null(block2) )
        seq_real_estate_long <- dplyr::mutate(
            seq_real_estate_long, block2 = as.factor(rep(object[[block2]],
                                                       each = nfeatures_to_plot)))
    if ( !is.null(colour_by) )
        seq_real_estate_long <- dplyr::mutate(
            seq_real_estate_long, colour_by = rep(colour_by_vals,
                                             each = nfeatures_to_plot))

    ## Set up plot
    if ( is.null(colour_by) ) {
        plot_out <- ggplot(seq_real_estate_long,
                           aes_string(x = "Feature", y = "Proportion_Library",
                                      group = "Cell")) +
            geom_line(linetype = "solid", alpha = 0.3, size = linewidth)
    } else {
        plot_out <- ggplot(seq_real_estate_long,
                           aes_string(x = "Feature", y = "Proportion_Library",
                                      group = "Cell", colour = "colour_by")) +
            geom_line(linetype = "solid", alpha = 0.3, size = linewidth)
    }
    ## Deal with blocks for grid
    if ( !(is.null(block1) | is.null(block2)) )
        plot_out <- plot_out + facet_grid(block2 ~ block1)
    else {
        if ( !is.null(block1) && is.null(block2) ) {
            plot_out <- plot_out +
                facet_wrap(~block1, ncol = ncol)
        }
        if ( is.null(block1) && !is.null(block2) ) {
            plot_out <- plot_out +
                facet_wrap(~block2, ncol = ncol)
        }
    }
    ## Add extra plot theme and details
    if ( !is.null(seq_real_estate_long$colour_by) ) {
       plot_out <- .resolve_plot_colours(plot_out,
                                         seq_real_estate_long$colour_by,
                                         colour_by)
    }

    plot_out <- plot_out +
        xlab("Number of features") + ylab("Cumulative proportion of library")

    if ( requireNamespace("cowplot", quietly = TRUE) )
        plot_out <- plot_out + cowplot::theme_cowplot(theme_size)
    else
        plot_out <- plot_out + theme_bw(theme_size)
    ## Return plot
    plot_out
}


################################################################################

#' Plot PCA for an SCESet object
#'
#' Produce a principal components analysis (PCA) plot of two or more principal
#' components for an \code{SCESet} dataset.
#'
#' @param object an \code{SCESet} object
#' @param ntop numeric scalar indicating the number of most variable features to
#' use for the PCA. Default is \code{500}, but any \code{ntop} argument is
#' overrided if the \code{feature_set} argument is non-NULL.
#' @param ncomponents numeric scalar indicating the number of principal
#' components to plot, starting from the first principal component. Default is
#' 2. If \code{ncomponents} is 2, then a scatterplot of PC2 vs PC1 is produced.
#' If \code{ncomponents} is greater than 2, a pairs plots for the top components
#' is produced.
#' @param exprs_values character string indicating which values should be used
#' as the expression values for this plot. Valid arguments are \code{"tpm"}
#' (transcripts per million), \code{"norm_tpm"} (normalised TPM
#' values), \code{"fpkm"} (FPKM values), \code{"norm_fpkm"} (normalised FPKM
#' values), \code{"counts"} (counts for each feature), \code{"norm_counts"},
#' \code{"cpm"} (counts-per-million), \code{"norm_cpm"} (normalised
#' counts-per-million), \code{"exprs"} (whatever is in the \code{'exprs'} slot
#' of the \code{SCESet} object; default), \code{"norm_exprs"} (normalised
#' expression values) or \code{"stand_exprs"} (standardised expression values)
#' or any other named element of the \code{assayData} slot of the \code{SCESet}
#' object that can be accessed with the \code{get_exprs} function.
#' @param colour_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to colour the points in the plot. Alternatively,
#' a data frame with one column, containing values to map to colours for all cells.
#' @param shape_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the shape of the points in the plot.
#' Alternatively, a data frame with one column containing values to map to shapes.
#' @param size_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the size of points in the plot.
#' Alternatively, a data frame with one column containing values to map to sizes.
#' @param feature_set character, numeric or logical vector indicating a set of
#' features to use for the PCA. If character, entries must all be in
#' \code{featureNames(object)}. If numeric, values are taken to be indices for
#' features. If logical, vector is used to index features and should have length
#' equal to \code{nrow(object)}.
#' @param return_SCESet logical, should the function return an \code{SCESet}
#' object with principal component values for cells in the
#' \code{reducedDimension} slot. Default is \code{FALSE}, in which case a
#' \code{ggplot} object is returned.
#' @param scale_features logical, should the expression values be standardised
#' so that each feature has unit variance? Default is \code{TRUE}.
#' @param draw_plot logical, should the plot be drawn on the current graphics
#' device? Only used if \code{return_SCESet} is \code{TRUE}, otherwise the plot
#' is always produced.
#' @param pca_data_input character argument defining which data should be used
#' as input for the PCA. Possible options are \code{"exprs"} (default), which
#' uses expression data to produce a PCA at the cell level; \code{"pdata"} which
#' uses numeric variables from \code{pData(object)} to do PCA at the cell level;
#' and \code{"fdata"} which uses numeric variables from \code{fData(object)} to
#' do PCA at the feature level.
#' @param selected_variables character vector indicating which variables in
#' \code{pData(object)} to use for the phenotype-data based PCA. Ignored if
#' the argument \code{pca_data_input} is anything other than \code{"pdata"}.
#' @param detect_outliers logical, should outliers be detected in the PC plot?
#' Only an option when \code{pca_data_input} argument is \code{"pdata"}. Default
#' is \code{FALSE}.
#' @param theme_size numeric scalar giving default font size for plotting theme
#' (default is 10).
#' @param legend character, specifying how the legend(s) be shown? Default is
#' \code{"auto"}, which hides legends that have only one level and shows others.
#' Alternatives are "all" (show all legends) or "none" (hide all legends).
#'
#' @details The function \code{\link{prcomp}} is used internally to do the PCA.
#' The function checks whether the \code{object} has standardised
#' expression values (by looking at \code{stand_exprs(object)}). If yes, the
#' existing standardised expression values are used for the PCA. If not, then
#' standardised expression values are computed using \code{\link{scale}} (with
#' feature-wise unit variances or not according to the \code{scale_features}
#' argument), added to the object and PCA is done using these new standardised
#' expression values.
#'
#' If the arguments \code{detect_outliers} and \code{return_SCESet} are both
#' \code{TRUE}, then the element \code{$outlier} is added to the pData
#' (phenotype data) slot of the \code{SCESet} object. This element contains
#' indicator values about whether or not each cell has been designated as an
#' outlier based on the PCA. These values can be accessed for filtering
#' low quality cells with, foe example, \code{example_sceset$outlier}.
#'
#' @return either a ggplot plot object or an SCESet object
#'
#' @name plotPCA
#' @aliases plotPCA plotPCA,SCESet-method
#' @importFrom BiocGenerics plotPCA
#' @importFrom matrixStats rowVars
#' @importFrom matrixStats colVars
#' @export
#'
#' @examples
#' ## Set up an example SCESet
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' drop_genes <- apply(exprs(example_sceset), 1, function(x) {var(x) == 0})
#' example_sceset <- example_sceset[!drop_genes, ]
#'
#' ## Examples plotting PC1 and PC2
#' plotPCA(example_sceset)
#' plotPCA(example_sceset, colour_by = "Cell_Cycle")
#' plotPCA(example_sceset, colour_by = "Cell_Cycle", shape_by = "Treatment")
#' plotPCA(example_sceset, colour_by = "Cell_Cycle", shape_by = "Treatment",
#' size_by = "Mutation_Status")
#' plotPCA(example_sceset, shape_by = "Treatment", size_by = "Mutation_Status")
#' plotPCA(example_sceset, feature_set = 1:100, colour_by = "Treatment",
#' shape_by = "Mutation_Status")
#'
#' ## experiment with legend
#' example_subset <- example_sceset[, example_sceset$Treatment == "treat1"]
#' plotPCA(example_subset, colour_by = "Cell_Cycle", shape_by = "Treatment", legend = "all")
#'
#' plotPCA(example_sceset, shape_by = "Treatment", return_SCESet = TRUE)
#'
#' ## Examples plotting more than 2 PCs
#' plotPCA(example_sceset, ncomponents = 8)
#' plotPCA(example_sceset, ncomponents = 4, colour_by = "Treatment",
#' shape_by = "Mutation_Status")
#'
plotPCASCESet <- function(object, ntop=500, ncomponents=2,
                          exprs_values = "exprs", colour_by = NULL,
                          shape_by = NULL, size_by = NULL, feature_set = NULL,
                          return_SCESet = FALSE, scale_features = TRUE,
                          draw_plot = TRUE, pca_data_input = "exprs",
                          selected_variables = NULL, detect_outliers = FALSE,
                          theme_size = 10, legend = "auto") {
    ## check legend argument
    legend <- match.arg(legend, c("auto", "none", "all"))

    ## Check arguments are valid
    colour_by_out <- .choose_vis_values(object, colour_by, cell_control_default = TRUE,
                                        check_features = TRUE, exprs_values = exprs_values)
    colour_by <- colour_by_out$name
    colour_by_vals <- colour_by_out$val

    shape_by_out <- .choose_vis_values(object, shape_by, cell_control_default = TRUE,
                                       coerce_factor = TRUE, level_limit = 10)
    shape_by <- shape_by_out$name
    shape_by_vals <- shape_by_out$val

    size_by_out <- .choose_vis_values(object, size_by, check_features = TRUE, exprs_values = exprs_values)
    size_by <- size_by_out$name
    size_by_vals <- size_by_out$val

    if ( is.character(feature_set) ) {
        if ( !(all(feature_set %in% featureNames(object))) )
            stop("when the argument 'feature_set' is of type character, all features must be in featureNames(object)")
    }

    ## Define an expression matrix depending on which values we're using
#     exprs_values <- match.arg(
#         exprs_values, c("exprs", "tpm", "fpkm", "counts", "cpm", "norm_exprs",
#                         "stand_exprs"))
    exprs_mat <- get_exprs(object, exprs_values, warning=FALSE)
    if ( is.null(exprs_mat) )
        stop(sprintf("object does not contain '%s'", exprs_values))

    ## Define features to use: either ntop, or if a set of features is defined,
    ## then those
    if ( is.null(feature_set) ) {
        rv <- matrixStats::rowVars(exprs_mat)
        feature_set <-
            order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
    }

    if ( pca_data_input == "pdata" ) {
        #use_variable <- sapply(pData(object), is.double)
        ## select pData features to use
        if ( is.null(selected_variables) ) {
            selected_variables <- c("pct_counts_top_100_features",
                                    "total_features",
                                    "pct_counts_feature_controls",
                                    "n_detected_feature_controls",
                                    "log10_counts_endogenous_features",
                                    "log10_counts_feature_controls")
        }
        use_variable <- varLabels(object) %in% selected_variables
        vars_not_found <- !(selected_variables %in% varLabels(object))
        if ( any(vars_not_found) ) {
            message(paste("The following selected_variables were not found in pData(object):", selected_variables[vars_not_found]))
            message("Other variables from pData(object) can be used by specifying a vector of variable names as the selected_variables argument.")
            message("PCA is being conducted using the following variables:",
                    selected_variables[!vars_not_found])
        }
        ## scale double variables
        exprs_to_plot <- scale(pData(object)[, use_variable],
                               scale = scale_features)
    } else if ( pca_data_input == "fdata" ) {
        use_variable <- sapply(fData(object), is.double)
        ## scale double variables
        exprs_to_plot <- scale(fData(object)[, use_variable],
                               scale = scale_features)
    } else {
        # Subsetting to the desired features (do NOT move below 'scale()')
        exprs_to_plot <- exprs_mat[feature_set,, drop = FALSE]
        ## Standardise expression if scale_features argument is TRUE
        exprs_to_plot <- scale(t(exprs_to_plot), scale = scale_features)
    }

    ## Drop any features with zero variance
    keep_feature <- (matrixStats::colVars(exprs_to_plot) > 0.001)
    keep_feature[is.na(keep_feature)] <- FALSE
    exprs_to_plot <- exprs_to_plot[, keep_feature]

    ## Compute PCA
    pca <- prcomp(exprs_to_plot)
    percentVar <- pca$sdev ^ 2 / sum(pca$sdev ^ 2)

    ## Define data.frame for plotting
    if ( pca_data_input == "fdata") {
        df_to_plot <- data.frame(pca$x[, 1:ncomponents],
                                 row.names = featureNames(object))
    } else {
        df_to_plot <- data.frame(pca$x[, 1:ncomponents],
                                 row.names = sampleNames(object))
        df_to_plot$colour_by <- colour_by_vals
        df_to_plot$shape_by <- shape_by_vals
        df_to_plot$size_by <- size_by_vals
    }

    ## conduct outlier detection
    if ( detect_outliers ) {
        if ( requireNamespace("mvoutlier", quietly = TRUE) ) {
            if ( !(pca_data_input == "pdata") ) {
                warning("outlier detection will only be done if pca_data_input
                        argument is 'pdata' (operating on QC metrics)")
            } else {
                outliers <- mvoutlier::pcout(exprs_to_plot, makeplot = FALSE,
                                             explvar = 0.5, crit.M1 = 0.9,
                                             crit.c1 = 5, crit.M2 = 0.9,
                                             crit.c2 = 0.99, cs = 0.25,
                                             outbound = 0.05)
                outlier <- !as.logical(outliers$wfinal01)
                object$outlier <- outlier
                df_to_plot$colour_by <- outlier
                colour_by <- "outlier"
                cat("The following cells/samples are detected as outliers:\n")
                cat(paste(sampleNames(object)[outlier], collapse = "\n"))
                cat("\nVariables with highest loadings for PC1 and PC2:")
                oo <- order(pca$rotation[, 1], decreasing = TRUE)
                if ( requireNamespace("knitr", quietly = TRUE))
                    print(knitr::kable(pca$rotation[oo, 1:2]))
                else
                    print(pca$rotation[oo, 1:2])
            }
        } else {
            warning("The package mvoutlier must be installed to do outlier
                    detection")
        }
    }

    ## Make reduced-dimension plot
    if ( pca_data_input == "fdata" )
        plot_out <- plotReducedDim.default(df_to_plot, ncomponents,
                                           percentVar = percentVar,
                                           legend = legend)
    else
        plot_out <- plotReducedDim.default(df_to_plot, ncomponents, colour_by,
                                           shape_by, size_by, percentVar,
                                           legend = legend)

    ## Define plotting theme
    if ( requireNamespace("cowplot", quietly = TRUE) )
        plot_out <- plot_out + cowplot::theme_cowplot(theme_size)
    else
        plot_out <- plot_out + theme_bw(theme_size)
    ## remove legend if so desired
    if ( legend == "none" )
        plot_out <- plot_out + theme(legend.position = "none")

    ## Plot PCA and return appropriate object
    if (return_SCESet) {
        ncomp_out <- max(ncomponents, 10)
        ncomp_out <- min(ncomp_out, ncol(pca$x))
        df_out <- pca$x[, 1:ncomp_out]
        rownames(df_out) <- sampleNames(object)
        attr(df_out, "percentVar") <- percentVar[1:ncomp_out]
        reducedDimension(object) <- df_out
        if ( draw_plot )
            print(plot_out)
        return(object)
    } else {
        ## Return PCA plot
        return(plot_out)
    }
}


#' @rdname plotPCA
#' @param ... further arguments passed to \code{\link{plotPCASCESet}}
#' @aliases plotPCA
#' @export
setMethod("plotPCA", signature("SCESet"),
          function(object, ntop = 500, ncomponents = 2, exprs_values = "exprs",
                   colour_by = NULL, shape_by = NULL, size_by = NULL,
                   feature_set = NULL, return_SCESet = FALSE,
                   scale_features = TRUE, draw_plot = TRUE,
                   pca_data_input = "exprs", selected_variables = NULL,
                   detect_outliers = FALSE, theme_size = 10, legend = "auto") {
              plotPCASCESet(object, ntop, ncomponents, exprs_values, colour_by,
                            shape_by, size_by, feature_set, return_SCESet,
                            scale_features, draw_plot, pca_data_input,
                            selected_variables, detect_outliers, theme_size,
                            legend)
          })




.makePairs <- function(data_matrix) {
    ## with thanks to Gaston Sanchez, who posted this code online
    ## https://gastonsanchez.wordpress.com/2012/08/27/scatterplot-matrices-with-ggplot/
    if ( is.null(names(data_matrix)) )
        names(data_matrix) <- paste0("row", 1:nrow(data_matrix))
    exp_grid <- expand.grid(x = 1:ncol(data_matrix), y = 1:ncol(data_matrix))
    exp_grid <- exp_grid[exp_grid$x != exp_grid$y,]
    all_panels <- do.call("rbind", lapply(1:nrow(exp_grid), function(i) {
        xcol <- exp_grid[i, "x"]
        ycol <- exp_grid[i, "y"]
        data.frame(xvar = names(data_matrix)[ycol], yvar = names(data_matrix)[xcol],
                   x = data_matrix[, xcol], y = data_matrix[, ycol], data_matrix)
    }))
    all_panels$xvar <- factor(all_panels$xvar, levels = names(data_matrix))
    all_panels$yvar <- factor(all_panels$yvar, levels = names(data_matrix))
    densities <- do.call("rbind", lapply(1:ncol(data_matrix), function(i) {
        data.frame(xvar = names(data_matrix)[i], yvar = names(data_matrix)[i],
                   x = data_matrix[, i])
    }))
    list(all = all_panels, densities = densities)
}


################################################################################
### plotTSNE

#' Plot t-SNE for an SCESet object
#'
#' Produce a t-distributed stochastic neighbour embedding (t-SNE) plot of two
#' components for an \code{SCESet} dataset.
#'
#' @param object an \code{SCESet} object
#' @param ntop numeric scalar indicating the number of most variable features to
#' use for the t-SNE Default is \code{500}, but any \code{ntop} argument is
#' overrided if the \code{feature_set} argument is non-NULL.
#' @param ncomponents numeric scalar indicating the number of t-SNE
#' components to plot, starting from the first t-SNE component. Default is
#' 2. If \code{ncomponents} is 2, then a scatterplot of component 1 vs component
#' 2 is produced. If \code{ncomponents} is greater than 2, a pairs plots for the
#' top components is produced. NB: computing more than two components for t-SNE
#' can become very time consuming.
#' @param exprs_values character string indicating which values should be used
#' as the expression values for this plot. Valid arguments are \code{"tpm"}
#' (transcripts per million), \code{"norm_tpm"} (normalised TPM
#' values), \code{"fpkm"} (FPKM values), \code{"norm_fpkm"} (normalised FPKM
#' values), \code{"counts"} (counts for each feature), \code{"norm_counts"},
#' \code{"cpm"} (counts-per-million), \code{"norm_cpm"} (normalised
#' counts-per-million), \code{"exprs"} (whatever is in the \code{'exprs'} slot
#' of the \code{SCESet} object; default), \code{"norm_exprs"} (normalised
#' expression values) or \code{"stand_exprs"} (standardised expression values),
#' or any other named element of the \code{assayData} slot of the \code{SCESet}
#' object that can be accessed with the \code{get_exprs} function.
#' @param colour_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to colour the points in the plot. Alternatively,
#' a data frame with one column containing values to map to colours for all cells.
#' @param shape_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the shape of the points in the plot.
#' Alternatively, a data frame with one column containing values to map to shapes.
#' @param size_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the size of points in the plot.
#' Alternatively, a data frame with one column containing values to map to sizes.
#' @param feature_set character, numeric or logical vector indicating a set of
#' features to use for the t-SNE calculation. If character, entries must all be
#' in \code{featureNames(object)}. If numeric, values are taken to be indices for
#' features. If logical, vector is used to index features and should have length
#' equal to \code{nrow(object)}.
#' @param return_SCESet logical, should the function return an \code{SCESet}
#' object with principal component values for cells in the
#' \code{reducedDimension} slot. Default is \code{FALSE}, in which case a
#' \code{ggplot} object is returned.
#' @param scale_features logical, should the expression values be standardised
#' so that each feature has unit variance? Default is \code{TRUE}.
#' @param draw_plot logical, should the plot be drawn on the current graphics
#' device? Only used if \code{return_SCESet} is \code{TRUE}, otherwise the plot
#' is always produced.
#' @param theme_size numeric scalar giving default font size for plotting theme
#' (default is 10).
#' @param rand_seed (optional) numeric scalar that can be passed to
#' \code{set.seed} to make plots reproducible.
#' @param perplexity numeric scalar value defining the "perplexity parameter"
#' for the t-SNE plot. Passed to \code{\link[Rtsne]{Rtsne}} - see documentation
#' for that package for more details.
#' @param legend character, specifying how the legend(s) be shown? Default is
#' \code{"auto"}, which hides legends that have only one level and shows others.
#' Alternatives are "all" (show all legends) or "none" (hide all legends).
#' @param ... further arguments passed to \code{\link[Rtsne]{Rtsne}}
#'
#' @details The function \code{\link[Rtsne]{Rtsne}} is used internally to
#' compute the t-SNE.
#'
#' @return If \code{return_SCESet} is \code{TRUE}, then the function returns an
#' \code{SCESet} object, otherwise it returns a \code{ggplot} object.
#' @name plotTSNE
#' @aliases plotTSNE plotTSNE,SCESet-method
#'
#' @export
#' @seealso
#' \code{\link[Rtsne]{Rtsne}}
#' @references
#' L.J.P. van der Maaten. Barnes-Hut-SNE. In Proceedings of the International
#' Conference on Learning Representations, 2013.
#'
#' @examples
#' ## Set up an example SCESet
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' drop_genes <- apply(exprs(example_sceset), 1, function(x) {var(x) == 0})
#' example_sceset <- example_sceset[!drop_genes, ]
#'
#' ## Examples plotting PC1 and PC2
#' plotTSNE(example_sceset, perplexity = 10)
#' plotTSNE(example_sceset, colour_by = "Cell_Cycle", perplexity = 10)
#' plotTSNE(example_sceset, colour_by = "Cell_Cycle", shape_by = "Treatment",
#' perplexity = 10)
#' plotTSNE(example_sceset, colour_by = "Cell_Cycle", shape_by = "Treatment",
#' size_by = "Mutation_Status", perplexity = 10)
#' plotTSNE(example_sceset, shape_by = "Treatment", size_by = "Mutation_Status",
#' perplexity = 5)
#' plotTSNE(example_sceset, feature_set = 1:100, colour_by = "Treatment",
#' shape_by = "Mutation_Status", perplexity = 5)
#'
#' plotTSNE(example_sceset, shape_by = "Treatment", return_SCESet = TRUE,
#' perplexity = 10)
#'
#'
setMethod("plotTSNE", signature("SCESet"),
          function(object, ntop = 500, ncomponents = 2, exprs_values = "exprs",
                   colour_by = NULL, shape_by = NULL, size_by = NULL,
                   feature_set = NULL, return_SCESet = FALSE,
                   scale_features = TRUE, draw_plot = TRUE, theme_size = 10,
                   rand_seed = NULL, perplexity = floor(ncol(object) / 5),
                   legend = "auto", ...) {
              ##
              if ( !requireNamespace("Rtsne", quietly = TRUE) )
                  stop("This function requires the 'Rtsne' package.
                       Try: install.packages('Rtsne').")
              ## check legend argument
              legend <- match.arg(legend, c("auto", "none", "all"))

              ## Check arguments are valid
              colour_by_out <- .choose_vis_values(object, colour_by, cell_control_default = TRUE,
                                                  check_features = TRUE, exprs_values = exprs_values)
              colour_by <- colour_by_out$name
              colour_by_vals <- colour_by_out$val

              shape_by_out <- .choose_vis_values(object, shape_by, cell_control_default = TRUE,
                                                 coerce_factor = TRUE, level_limit = 10)
              shape_by <- shape_by_out$name
              shape_by_vals <- shape_by_out$val

              size_by_out <- .choose_vis_values(object, size_by, check_features = TRUE, exprs_values = exprs_values)
              size_by <- size_by_out$name
              size_by_vals <- size_by_out$val

              if ( !is.null(feature_set) && typeof(feature_set) == "character" ) {
                  if ( !(all(feature_set %in% featureNames(object))) )
                      stop("when the argument 'feature_set' is of type character, all features must be in featureNames(object)")
              }

              ## Define an expression matrix depending on which values we're
              ## using
              exprs_mat <- get_exprs(object, exprs_values, warning = FALSE)
              if ( is.null(exprs_mat) )
                  stop(sprintf("object does not contain '%s'", exprs_values))

              ## Define features to use: either ntop, or if a set of features is
              ## defined, then those
              if ( is.null(feature_set) ) {
                  rv <- matrixStats::rowVars(exprs_mat)
                  ntop <- min(ntop, length(rv))
                  feature_set <- order(rv, decreasing = TRUE)[seq_len(ntop)]
              }

              ## Drop any features with zero variance
              exprs_to_plot <- exprs_mat[feature_set,]
              keep_feature <- (matrixStats::rowVars(exprs_to_plot) > 0.001)
              keep_feature[is.na(keep_feature)] <- FALSE
              exprs_to_plot <- exprs_to_plot[keep_feature, ]

              ## Standardise expression if stand_exprs(object) is null
              exprs_to_plot <- t(scale(t(exprs_to_plot), scale = scale_features))

              ## Compute t-SNE
              if ( !is.null(rand_seed) )
                  set.seed(rand_seed)
              tsne_out <- Rtsne::Rtsne(t(exprs_to_plot),
                                       initial_dims = max(50, ncol(object)),
                                       perplexity = perplexity,
                                       dims = ncomponents,...)

              ## Define data.frame for plotting
              df_to_plot <- data.frame(tsne_out$Y[, 1:ncomponents],
                                       row.names = sampleNames(object))
              df_to_plot$colour_by <- colour_by_vals
              df_to_plot$shape_by <- shape_by_vals
              df_to_plot$size_by <- size_by_vals

              ## Make reduced-dimension plot
              plot_out <- plotReducedDim.default(df_to_plot, ncomponents,
                                                 colour_by, shape_by, size_by,
                                                 legend = legend)

              ## Define plotting theme
              if ( requireNamespace("cowplot", quietly = TRUE) )
                  plot_out <- plot_out + cowplot::theme_cowplot(theme_size)
              else
                  plot_out <- plot_out + theme_bw(theme_size)
              ## remove legend if so desired
              if ( legend == "none" )
                  plot_out <- plot_out + theme(legend.position = "none")

              ## Plot t-SNE and return appropriate object
              if (return_SCESet) {
                  df_out <- tsne_out$Y[, 1:ncomponents]
                  rownames(df_out) <- sampleNames(object)
                  reducedDimension(object) <- df_out
                  if ( draw_plot )
                      print(plot_out)
                  return(object)
              } else {
                  ## Return t-SNE plot
                  return(plot_out)
              }
          })

################################################################################
### plotDiffusionMap

#' Plot a diffusion map for an SCESet object
#'
#' Produce a diffusion map plot of two components for an \code{SCESet} dataset.
#'
#' @param object an \code{SCESet} object
#' @param ntop numeric scalar indicating the number of most variable features to
#' use for the diffusion map. Default is \code{500}, but any \code{ntop}
#' argument is overrided if the \code{feature_set} argument is non-NULL.
#' @param ncomponents numeric scalar indicating the number of principal
#' components to plot, starting from the first diffusion map component. Default
#' is 2. If \code{ncomponents} is 2, then a scatterplot of component 1 vs
#' component 2 is produced. If \code{ncomponents} is greater than 2, a pairs
#' plots for the top components is produced. NB: computing many components for
#' the diffusion map can become time consuming.
#' @param exprs_values character string indicating which values should be used
#' as the expression values for this plot. Valid arguments are \code{"tpm"}
#' (transcripts per million), \code{"norm_tpm"} (normalised TPM
#' values), \code{"fpkm"} (FPKM values), \code{"norm_fpkm"} (normalised FPKM
#' values), \code{"counts"} (counts for each feature), \code{"norm_counts"},
#' \code{"cpm"} (counts-per-million), \code{"norm_cpm"} (normalised
#' counts-per-million), \code{"exprs"} (whatever is in the \code{'exprs'} slot
#' of the \code{SCESet} object; default), \code{"norm_exprs"} (normalised
#' expression values) or \code{"stand_exprs"} (standardised expression values)
#' or any other named element of the \code{assayData} slot of the \code{SCESet}
#' object that can be accessed with the \code{get_exprs} function.
#' @param colour_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to colour the points in the plot. Alternatively, a
#' data frame with one column containing values to map to colours for all cells.
#' @param shape_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the shape of the points in the plot.
#' @param size_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the size of points in the plot.
#' @param feature_set character, numeric or logical vector indicating a set of
#' features to use for the diffusion map. If character, entries must all be in
#' \code{featureNames(object)}. If numeric, values are taken to be indices for
#' features. If logical, vector is used to index features and should have length
#' equal to \code{nrow(object)}.
#' @param return_SCESet logical, should the function return an \code{SCESet}
#' object with principal component values for cells in the
#' \code{reducedDimension} slot. Default is \code{FALSE}, in which case a
#' \code{ggplot} object is returned.
#' @param scale_features logical, should the expression values be standardised
#' so that each feature has unit variance? Default is \code{TRUE}.
#' @param draw_plot logical, should the plot be drawn on the current graphics
#' device? Only used if \code{return_SCESet} is \code{TRUE}, otherwise the plot
#' is always produced.
#' @param theme_size numeric scalar giving default font size for plotting theme
#' (default is 10).
#' @param rand_seed (optional) numeric scalar that can be passed to
#' \code{set.seed} to make plots reproducible.
#' @param sigma argument passed to \code{\link[destiny]{DiffusionMap}}
#' @param distance argument passed to \code{\link[destiny]{DiffusionMap}}
#' @param legend character, specifying how the legend(s) be shown? Default is
#' \code{"auto"}, which hides legends that have only one level and shows others.
#' Alternatives are "all" (show all legends) or "none" (hide all legends).
#' @param ... further arguments passed to \code{\link[destiny]{DiffusionMap}}
#'
#' @details The function \code{\link[destiny]{DiffusionMap}} is used internally
#' to compute the diffusion map.
#'
#' @return If \code{return_SCESet} is \code{TRUE}, then the function returns an
#' \code{SCESet} object, otherwise it returns a \code{ggplot} object.
#' @name plotDiffusionMap
#' @aliases plotDiffusionMap plotDiffusionMap,SCESet-method
#'
#' @export
#' @seealso
#' \code{\link[destiny]{destiny}}
#' @references
#' Haghverdi L, Buettner F, Theis FJ. Diffusion maps for high-dimensional single-cell analysis of differentiation data. Bioinformatics. 2015; doi:10.1093/bioinformatics/btv325
#'
#' @examples
#' ## Set up an example SCESet
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' drop_genes <- apply(exprs(example_sceset), 1, function(x) {var(x) == 0})
#' example_sceset <- example_sceset[!drop_genes, ]
#'
#' ## Examples plotting diffusion maps
#' plotDiffusionMap(example_sceset)
#' plotDiffusionMap(example_sceset, colour_by = "Cell_Cycle")
#' plotDiffusionMap(example_sceset, colour_by = "Cell_Cycle",
#' shape_by = "Treatment")
#' plotDiffusionMap(example_sceset, colour_by = "Cell_Cycle",
#' shape_by = "Treatment", size_by = "Mutation_Status")
#' plotDiffusionMap(example_sceset, shape_by = "Treatment",
#' size_by = "Mutation_Status")
#' plotDiffusionMap(example_sceset, feature_set = 1:100, colour_by = "Treatment",
#' shape_by = "Mutation_Status")
#'
#' plotDiffusionMap(example_sceset, shape_by = "Treatment",
#' return_SCESet = TRUE)
#'
#'
plotDiffusionMapSCESet <- function(object, ntop = 500, ncomponents = 2,
                                   exprs_values = "exprs", colour_by = NULL,
                                   shape_by = NULL, size_by = NULL,
                                   feature_set = NULL, return_SCESet = FALSE,
                                   scale_features = TRUE, draw_plot = TRUE,
                                   theme_size = 10, rand_seed = NULL,
                                   sigma = NULL, distance = "euclidean",
                                   legend = "auto", ...) {
    ##
    if ( !requireNamespace("destiny", quietly = TRUE) )
        stop("This function requires the 'destiny' package.
                       Try from Bioconductor with:
                       source('https://bioconductor.org/biocLite.R')
                       biocLite('destiny').")
    ## check legend argument
    legend <- match.arg(legend, c("auto", "none", "all"))

    ## Check arguments are valid
    colour_by_out <- .choose_vis_values(object, colour_by, cell_control_default = TRUE,
                                        check_features = TRUE, exprs_values = exprs_values)
    colour_by <- colour_by_out$name
    colour_by_vals <- colour_by_out$val

    shape_by_out <- .choose_vis_values(object, shape_by, cell_control_default = TRUE,
                                       coerce_factor = TRUE, level_limit = 10)
    shape_by <- shape_by_out$name
    shape_by_vals <- shape_by_out$val

    size_by_out <- .choose_vis_values(object, size_by, check_features = TRUE, exprs_values = exprs_values)
    size_by <- size_by_out$name
    size_by_vals <- size_by_out$val

    if ( !is.null(feature_set) && typeof(feature_set) == "character" ) {
        if ( !(all(feature_set %in% featureNames(object))) )
            stop("when the argument 'feature_set' is of type character, all features must be in featureNames(object)")
    }

    ## Define an expression matrix depending on which values we're
    ## using
    exprs_mat <- get_exprs(object, exprs_values, warning = FALSE)
    if ( is.null(exprs_mat) )
        stop(sprintf("object does not contain '%s'", exprs_values))

    ## Define features to use: either ntop, or if a set of features is
    ## defined, then those
    if ( is.null(feature_set) ) {
        rv <- matrixStats::rowVars(exprs_mat)
        feature_set <-
            order(rv, decreasing = TRUE)[seq_len(min(ntop, length(rv)))]
    }

    ## Drop any features with zero variance
    exprs_to_plot <- exprs_mat
    exprs_to_plot <- exprs_to_plot[feature_set,]
    keep_feature <- (matrixStats::rowVars(exprs_to_plot) > 0.001)
    keep_feature[is.na(keep_feature)] <- FALSE
    exprs_to_plot <- exprs_to_plot[keep_feature, ]

    ## Standardise expression if indicated by scale_features argument
    exprs_to_plot <- t(scale(t(exprs_to_plot), scale = scale_features))

    ## Compute DiffusionMap
    if ( !is.null(rand_seed) )
        set.seed(rand_seed)
    difmap_out <- destiny::DiffusionMap(
        t(exprs_to_plot), sigma = sigma, distance = distance, ...)


    ## Define data.frame for plotting
    df_to_plot <- data.frame(difmap_out@eigenvectors[, 1:ncomponents],
                             row.names = sampleNames(object))
    df_to_plot$colour_by <- colour_by_vals
    df_to_plot$shape_by <- shape_by_vals
    df_to_plot$size_by <- size_by_vals

    ## Make reduced-dimension plot
    plot_out <- plotReducedDim.default(df_to_plot, ncomponents,
                                       colour_by, shape_by, size_by,
                                       legend = legend)

    ## Define plotting theme
    if ( requireNamespace("cowplot", quietly = TRUE) )
        plot_out <- plot_out + cowplot::theme_cowplot(theme_size)
    else
        plot_out <- plot_out + theme_bw(theme_size)
    ## remove legend if so desired
    if ( legend == "none" )
        plot_out <- plot_out + theme(legend.position = "none")

    ## Plot PCA and return appropriate object
    if (return_SCESet) {
        df_out <- difmap_out@eigenvectors[, 1:ncomponents]
        rownames(df_out) <- sampleNames(object)
        reducedDimension(object) <- df_out
        if ( draw_plot )
            print(plot_out)
        return(object)
    } else {
        ## Return PCA plot
        return(plot_out)
    }
}

#' @rdname plotDiffusionMap
#' @aliases plotDiffusionMap
#' @export
setMethod("plotDiffusionMap", signature("SCESet"),
          function(object, ntop = 500, ncomponents = 2, exprs_values = "exprs",
                   colour_by = NULL, shape_by = NULL, size_by = NULL,
                   feature_set = NULL, return_SCESet = FALSE,
                   scale_features = FALSE, draw_plot = TRUE, theme_size = 10,
                   rand_seed = NULL, sigma = NULL, distance = "euclidean",
                   legend = "auto", ...) {
              plotDiffusionMapSCESet(object, ntop, ncomponents, exprs_values,
                                     colour_by, shape_by, size_by,
                                     feature_set, return_SCESet,
                                     scale_features, draw_plot, theme_size,
                                     rand_seed, sigma, distance, legend, ...)
          })


################################################################################
### plotMDS

#' Produce a multidimensional scaling plot for an SCESet object
#'
#' #' Produce an MDS plot from the cell pairwise distance data in an
#' \code{SCESet} dataset.
#'
#' @param object an \code{SCESet} object
#' @param ncomponents numeric scalar indicating the number of principal
#' components to plot, starting from the first principal component. Default is
#' 2. If \code{ncomponents} is 2, then a scatterplot of PC2 vs PC1 is produced.
#' If \code{ncomponents} is greater than 2, a pairs plots for the top components
#' is produced. NB: computing more than two components for t-SNE can become very
#' time consuming.
#' @param colour_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to colour the points in the plot. Alternatively, a
#' data frame with one column containing values to map to colours for all cells.
#' @param shape_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the shape of the points in the plot.
#' @param size_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the size of points in the plot.
#' @param return_SCESet logical, should the function return an \code{SCESet}
#' object with principal component values for cells in the
#' \code{reducedDimension} slot. Default is \code{FALSE}, in which case a
#' \code{ggplot} object is returned.
#' @param draw_plot logical, should the plot be drawn on the current graphics
#' device? Only used if \code{return_SCESet} is \code{TRUE}, otherwise the plot
#' is always produced.
#' @param exprs_values a string specifying the expression values to use for
#' colouring the points, if \code{colour_by} or \code{size_by} are set as feature names.
#' @param theme_size numeric scalar giving default font size for plotting theme
#' (default is 10).
#' @param legend character, specifying how the legend(s) be shown? Default is
#' \code{"auto"}, which hides legends that have only one level and shows others.
#' Alternatives are "all" (show all legends) or "none" (hide all legends).
#' @param ... arguments passed to S4 plotMDS method
#'
#' @details The function \code{\link{cmdscale}} is used internally to
#' compute the multidimensional scaling components to plot.
#'
#' @return If \code{return_SCESet} is \code{TRUE}, then the function returns an
#' \code{SCESet} object, otherwise it returns a \code{ggplot} object.
#' @name plotMDS
#' @aliases plotMDS plotMDS,SCESet-method
#'
#' @export
#'
#' @examples
#' ## Set up an example SCESet
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' drop_genes <- apply(exprs(example_sceset), 1, function(x) {var(x) == 0})
#' example_sceset <- example_sceset[!drop_genes, ]
#' example_sceset <- calculateQCMetrics(example_sceset)
#'
#' ## define cell-cell distances
#' cellDist(example_sceset) <- as.matrix(dist(t(exprs(example_sceset))))
#'
#' ## Examples plotting
#' plotMDS(example_sceset)
#' plotMDS(example_sceset, colour_by = "Cell_Cycle")
#' plotMDS(example_sceset, colour_by = "Cell_Cycle",
#' shape_by = "Treatment")
#'
#' ## define cell-cell distances differently
#' cellDist(example_sceset) <- as.matrix(dist(t(counts(example_sceset)),
#' method = "canberra"))
#' plotMDS(example_sceset, colour_by = "Cell_Cycle",
#' shape_by = "Treatment", size_by = "Mutation_Status")
#'
plotMDSSCESet <- function(object, ncomponents = 2, colour_by = NULL,
                          shape_by = NULL, size_by = NULL,
                          return_SCESet = FALSE, draw_plot = TRUE,
                          exprs_values = "exprs", theme_size = 10, legend = "auto") {
    ## check legend argument
    legend <- match.arg(legend, c("auto", "none", "all"))
    ##
    cell_dist <- cellDist(object)
    ncells <- ncol(object)
    if ( !(dim(cellDist(object))[1] == ncells &&
           dim(cellDist(object))[2] == ncells) )
        stop("cellDist(object) is not of the correct dimensions. Please define cell pairwise distances and try again:
             e.g. cellDist(object) <- as.matrix(dist(t(exprs(object))))")

    ## Check arguments are valid
    colour_by_out <- .choose_vis_values(object, colour_by, cell_control_default = TRUE,
                                        check_features = TRUE, exprs_values = exprs_values)
    colour_by <- colour_by_out$name
    colour_by_vals <- colour_by_out$val

    shape_by_out <- .choose_vis_values(object, shape_by, cell_control_default = TRUE,
                                       coerce_factor = TRUE, level_limit = 10)
    shape_by <- shape_by_out$name
    shape_by_vals <- shape_by_out$val

    size_by_out <- .choose_vis_values(object, size_by, check_features = TRUE, exprs_values = exprs_values)
    size_by <- size_by_out$name
    size_by_vals <- size_by_out$val

    ## Compute multidimentional scaling
    mds_out <- cmdscale(cell_dist, k = ncomponents)

    ## Define data.frame for plotting
    df_to_plot <- data.frame(mds_out[, 1:ncomponents],
                             row.names = sampleNames(object))
    colnames(df_to_plot) <- paste0("Component_", 1:ncomponents)
    df_to_plot$colour_by <- colour_by_vals
    df_to_plot$shape_by <- shape_by_vals
    df_to_plot$size_by <- size_by_vals

    ## Make reduced-dimension plot
    plot_out <- plotReducedDim.default(df_to_plot, ncomponents,
                                       colour_by, shape_by, size_by,
                                       legend = legend)

    ## Define plotting theme
    if ( requireNamespace("cowplot", quietly = TRUE) )
        plot_out <- plot_out + cowplot::theme_cowplot(theme_size)
    else
        plot_out <- plot_out + theme_bw(theme_size)
    ## remove legend if so desired
    if ( legend == "none" )
        plot_out <- plot_out + theme(legend.position = "none")

    ## Plot PCA and return appropriate object
    if (return_SCESet) {
        df_out <- mds_out[, 1:ncomponents]
        rownames(df_out) <- sampleNames(object)
        reducedDimension(object) <- df_out
        if ( draw_plot )
            print(plot_out)
        return(object)
    } else {
        ## Return PCA plot
        return(plot_out)
    }
}

#' @rdname plotMDS
#' @aliases plotMDS
#' @export
setMethod("plotMDS", signature("SCESet"),
          function(object, ncomponents = 2, colour_by = NULL, shape_by = NULL,
                   size_by = NULL, return_SCESet = FALSE, draw_plot = TRUE,
                   exprs_values = "exprs", theme_size = 10, legend = "auto") {
              plotMDSSCESet(object, ncomponents, colour_by, shape_by, size_by,
                            return_SCESet, draw_plot, exprs_values, theme_size, legend)
          })



################################################################################
### plotReducedDim

#' Plot reduced dimension representation of cells
#'
#' @param object an \code{SCESet} object with a non-NULL \code{reducedDimension}
#' slot.
#' @param df_to_plot data.frame containing a reduced dimension represenation of
#' cells and optional metadata for the plot.
#' @param ncomponents numeric scalar indicating the number of principal
#' components to plot, starting from the first principal component. Default is
#' 2. If \code{ncomponents} is 2, then a scatterplot of Dimension 2 vs Dimension
#' 1 is produced. If \code{ncomponents} is greater than 2, a pairs plots for the
#'  top dimensions is produced.
#' @param colour_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to colour the points in the plot. Alternatively, a
#' data frame with one column containing values to map to colours for all cells.
#' @param shape_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the shape of the points in the plot.
#' @param size_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to define the size of points in the plot.
#' @param percentVar numeric vector giving the proportion of variance in
#' expression explained by each reduced dimension. Only expected to be used
#' internally in the \code{\link[scaterlegacy]{plotPCA}} function.
#' @param exprs_values a string specifying the expression values to use for
#' colouring the points, if \code{colour_by} or \code{size_by} are set as feature names.
#' @param theme_size numeric scalar giving default font size for plotting theme
#' (default is 10).
#' @param legend character, specifying how the legend(s) be shown? Default is
#' \code{"auto"}, which hides legends that have only one level and shows others.
#' Alternatives are "all" (show all legends) or "none" (hide all legends).
#' @param ... optional arguments (from those listed above) passed to
#' \code{plotReducedDim.SCESet} or \code{plotReducedDim.default}
#'
#' @details The function \code{plotReducedDim.default} assumes that the first
#' \code{ncomponents} columns of \code{df_to_plot} contain the reduced dimension
#'  components to plot, and that any subsequent columns define factors for
#'  \code{colour_by}, \code{shape_by} and \code{size_by} in the plot.
#'
#' @return a ggplot plot object
#'
#' @name plotReducedDim
#' @aliases plotReducedDim plotReducedDim,SCESet-method plotReducedDim,data.frame-method
#' @import viridis
#' @export
#'
#' @examples
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' drop_genes <- apply(exprs(example_sceset), 1, function(x) {var(x) == 0})
#' example_sceset <- example_sceset[!drop_genes, ]
#'
#' reducedDimension(example_sceset) <- prcomp(t(exprs(example_sceset)), scale. = TRUE)$x
#' plotReducedDim(example_sceset)
#' plotReducedDim(example_sceset, colour_by="Cell_Cycle")
#' plotReducedDim(example_sceset, colour_by="Cell_Cycle", shape_by="Treatment")
#' plotReducedDim(example_sceset, colour_by="Cell_Cycle", size_by="Treatment")
#' plotReducedDim(example_sceset, ncomponents=5)
#' plotReducedDim(example_sceset, ncomponents=5, colour_by="Cell_Cycle", shape_by="Treatment")
#' plotReducedDim(example_sceset, colour_by="Gene_0001")
#'
plotReducedDim.default <- function(df_to_plot, ncomponents=2, colour_by=NULL,
                           shape_by=NULL, size_by=NULL, percentVar=NULL,
                           theme_size = 10, legend = "auto") {
    ## check legend argument
    legend <- match.arg(legend, c("auto", "none", "all"), several.ok = FALSE)
    ## Define plot
    if ( ncomponents > 2 ) {
        ## expanding numeric columns for pairs plot
        df_to_expand <- df_to_plot[, 1:ncomponents]
        if ( is.null(percentVar) ) {
            colnames(df_to_expand) <- colnames(df_to_plot)[1:ncomponents]
        } else {
            colnames(df_to_expand) <- paste0(
                colnames(df_to_plot)[1:ncomponents], ": ",
                round(percentVar[1:ncomponents] * 100), "% variance")
        }
        gg1 <- .makePairs(df_to_expand)
        ## new data frame
        df_to_plot_big <- data.frame(gg1$all, df_to_plot[, -c(1:ncomponents)])
        colnames(df_to_plot_big)[-c(1:4)] <- colnames(df_to_plot)
        ## pairs plot
        plot_out <- ggplot(df_to_plot_big, aes_string(x = "x", y = "y")) +
            facet_grid(xvar ~ yvar, scales = "free") +
            stat_density(aes_string(x = "x",
                                    y = "(..scaled.. * diff(range(x)) + min(x))"),
                         data = gg1$densities, position = "identity",
                         colour = "grey20", geom = "line") +
            xlab("") +
            ylab("") +
            theme_bw(theme_size)
    } else {
        comps <- colnames(df_to_plot)[1:2]
        if ( is.null(percentVar) ) {
            x_lab <- "Dimension 1"
            y_lab <- "Dimension 2"
        } else {
            x_lab <- paste0("Component 1: ", round(percentVar[1] * 100),
                            "% variance")
            y_lab <- paste0("Component 2: ", round(percentVar[2] * 100),
                            "% variance")
        }
        plot_out <- ggplot(df_to_plot, aes_string(x = comps[1], y = comps[2])) +
            xlab(x_lab) +
            ylab(y_lab) +
            geom_rug(colour = "gray20", alpha = 0.65) +
            theme_bw(theme_size)
    }

    ## if only one level for the variable, set to NULL
    if ( legend == "auto" ) {
        if ( !is.null(colour_by) && length(unique(df_to_plot$colour_by)) == 1)
            colour_by <- NULL
        if ( !is.null(shape_by) && length(unique(df_to_plot$shape_by)) == 1)
            shape_by  <- NULL
        if ( !is.null(size_by) && length(unique(df_to_plot$size_by)) == 1)
            size_by <- NULL
    }

    ## Apply colour_by, shape_by and size_by variables if defined
    if ( !is.null(colour_by) && !is.null(shape_by) && !is.null(size_by) ) {
        plot_out <- plot_out +
            geom_point(aes_string(colour = "colour_by", shape = "shape_by",
                                  size = "size_by"), alpha = 0.65) +
            guides(size = guide_legend(title = size_by),
                   shape = guide_legend(title = shape_by))
        plot_out <- .resolve_plot_colours(plot_out, df_to_plot$colour_by,
                                          colour_by)
    } else {
        if  ( sum(is.null(colour_by) + is.null(shape_by) + is.null(size_by)) == 1 ) {
            if ( !is.null(colour_by) && !is.null(shape_by) ) {
                plot_out <- plot_out +
                    geom_point(aes_string(colour = "colour_by",
                                          shape = "shape_by"),
                               alpha = 0.65) +
                    guides(shape = guide_legend(title = shape_by))
                plot_out <- .resolve_plot_colours(plot_out,
                                                  df_to_plot$colour_by,
                                                  colour_by)
            }
            if ( !is.null(colour_by) && !is.null(size_by) ) {
                plot_out <- plot_out +
                    geom_point(aes_string(fill = "colour_by", size = "size_by"),
                               shape = 21, colour = "gray70", alpha = 0.65) +
                    guides(size = guide_legend(title = size_by))
                plot_out <- .resolve_plot_colours(plot_out,
                                                  df_to_plot$colour_by,
                                                  colour_by, fill = TRUE)
            }
            if ( !is.null(shape_by) && !is.null(size_by) ) {
                plot_out <- plot_out +
                    geom_point(aes_string(shape = "shape_by", size = "size_by"),
                               fill = "gray20", colour = "gray20",
                               alpha = 0.65) +
                    guides(size = guide_legend(title = size_by),
                           shape = guide_legend(title = shape_by))
            }
        } else {
            if ( sum(is.null(colour_by) + is.null(shape_by) +
                     is.null(size_by)) == 2 ) {
                if ( !is.null(colour_by) ) {
                    plot_out <- plot_out +
                        geom_point(aes_string(fill = "colour_by"),
                                   shape = 21, colour = "gray70", alpha = 0.65)
                    plot_out <- .resolve_plot_colours(plot_out,
                                                      df_to_plot$colour_by,
                                                      colour_by, fill = TRUE)
                }
                if ( !is.null(shape_by) ) {
                    plot_out <- plot_out +
                        geom_point(aes_string(shape = "shape_by"),
                                   colour = "gray20", alpha = 0.65) +
                        guides(shape = guide_legend(title = shape_by))
                }
                if ( !is.null(size_by) ) {
                    plot_out <- plot_out +
                        geom_point(aes_string(size = "size_by"),
                                   fill = "gray20", shape = 21,
                                   colour = "gray70", alpha = 0.65) +
                        guides(size = guide_legend(title = size_by))
                }
            } else {
                plot_out <- plot_out +
                    geom_point(fill = "gray20", shape = 21,
                               colour = "gray70", alpha = 0.65)
            }
        }
    }

    ## Return plot
    plot_out
}

#' @rdname plotReducedDim
#' @aliases plotReducedDim
#' @export
plotReducedDim.SCESet <- function(object, ncomponents=2, colour_by=NULL,
                                  shape_by=NULL, size_by=NULL,
                                  exprs_values = "exprs", theme_size = 10,
                                  legend = "auto") {
    ## check legend argument
    legend <- match.arg(legend, c("auto", "none", "all"))

    ## Check arguments are valid
    colour_by_out <- .choose_vis_values(
        object, colour_by, cell_control_default = TRUE, check_features = TRUE,
        exprs_values = exprs_values)
    colour_by <- colour_by_out$name
    colour_by_vals <- colour_by_out$val

    shape_by_out <- .choose_vis_values(
        object, shape_by, cell_control_default = TRUE, coerce_factor = TRUE,
        level_limit = 10)
    shape_by <- shape_by_out$name
    shape_by_vals <- shape_by_out$val

    size_by_out <- .choose_vis_values(
        object, size_by, check_features = TRUE, exprs_values = exprs_values)
    size_by <- size_by_out$name
    size_by_vals <- size_by_out$val

    ## Extract reduced dimension representation of cells
    if ( is.null(reducedDimension(object)) )
        stop("reducedDimension slot of object is NULL. Need non null reducedDimension to plot.")
    red_dim <- redDim(object)
    if ( ncomponents > ncol(red_dim) )
        stop("ncomponents to plot is larger than number of columns of reducedDimension(object)")

    ## Define data.frame for plotting
    df_to_plot <- data.frame(red_dim[, 1:ncomponents])
    df_to_plot$colour_by <- colour_by_vals
    df_to_plot$shape_by <- shape_by_vals
    df_to_plot$size_by <- size_by_vals

    ## Call default method to make the plot
    plot_out <- plotReducedDim.default(df_to_plot, ncomponents, colour_by,
                                       shape_by, size_by, percentVar = NULL,
                                       legend = legend)

    ## Define plotting theme
    if ( requireNamespace("cowplot", quietly = TRUE) )
        plot_out <- plot_out + cowplot::theme_cowplot(theme_size)
    else
        plot_out <- plot_out + theme_bw(theme_size)
    ## remove legend if so desired
    if ( legend == "none" )
        plot_out <- plot_out + theme(legend.position = "none")

    ## return plot
    plot_out

}

#' @rdname plotReducedDim
#' @aliases plotReducedDIm
#' @export
setMethod("plotReducedDim", signature("SCESet"),
          function(object, ncomponents=2, colour_by=NULL, shape_by=NULL,
                   size_by=NULL, exprs_values = "exprs",
                   theme_size = 10, legend="auto") {
              plotReducedDim.SCESet(object, ncomponents, colour_by, shape_by,
                                    size_by, exprs_values, theme_size, legend)
          })

#' @rdname plotReducedDim
#' @aliases plotReducedDim
#' @export
setMethod("plotReducedDim", signature("data.frame"),
          function(object, ncomponents=2, colour_by=NULL, shape_by=NULL,
                   size_by=NULL, percentVar=NULL, legend="auto") {
              plotReducedDim.default(object, ncomponents, colour_by, shape_by,
                                     size_by, percentVar, legend)
          })




################################################################################
### Plot cells in plate positions

#' Plot cells in plate positions
#'
#' Plots cells in their position on a plate, coloured by phenotype data or
#' feature expression.
#'
#' @param object an \code{SCESet} object. If \code{object$plate_position} is not
#' \code{NULL}, then this will be used to define each cell's position on the
#' plate, unless the \code{plate_position} argument is specified.
#' @param plate_position optional character vector providing a position on the
#' plate for each cell (e.g. A01, B12, etc, where letter indicates row and
#' number indicates column). Specifying this argument overrides any plate
#' position information extracted from the SCESet object.
#' @param colour_by character string defining the column of \code{pData(object)} to
#' be used as a factor by which to colour the points in the plot. Alternatively, a
#' data frame with one column containing values to map to colours for all cells.
#' @param x_position numeric vector providing x-axis positions for the cells
#' (ignored if \code{plate_position} is not \code{NULL})
#' @param y_position numeric vector providing y-axis positions for the cells
#' (ignored if \code{plate_position} is not \code{NULL})
#' @param exprs_values a string specifying the expression values to use for
#' colouring the points, if \code{colour_by} is set as a feature name.
#' @param theme_size numeric scalar giving default font size for plotting theme
#' (default is 10).
#' @param legend character, specifying how the legend(s) be shown? Default is
#' \code{"auto"}, which hides legends that have only one level and shows others.
#' Alternatives are "all" (show all legends) or "none" (hide all legends).
#'
#' @details This function expects plate positions to be given in a charcter
#' format where a letter indicates the row on the plate and a numeric value
#' indicates the column. So each cell has a plate position such as "A01", "B12",
#' "K24" and so on. From these plate positions, the row is extracted as the
#' letter, and the column as the numeric part. If \code{object$plate_position}
#' or the \code{plate_position} argument are used to define plate positions,
#' then positions should be provided in this format. Alternatively, numeric
#' values to be used as x- and y-coordinates by supplying both the
#' \code{x_position} and \code{y_position} arguments to the function.
#'
#' @return
#' A ggplot object.
#'
#' @export
#'
#' @examples
#' ## prepare data
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' example_sceset <- calculateQCMetrics(example_sceset)
#'
#' ## define plate positions
#' example_sceset$plate_position <- paste0(
#' rep(LETTERS[1:5], each = 8), rep(formatC(1:8, width = 2, flag = "0"), 5))
#'
#' ## plot plate positions
#' plotPlatePosition(example_sceset, colour_by = "Mutation_Status")
#'
#' plotPlatePosition(example_sceset, colour_by = "Gene_0004")
#'
plotPlatePosition <- function(object, plate_position = NULL,
                              colour_by = NULL,
                              x_position = NULL, y_position = NULL,
                              exprs_values = "exprs", theme_size = 24, legend = "auto") {
    ## check object is SCESet object
    if ( !is(object, "SCESet") )
        stop("Object must be of class SCESet.")

    ## check legend argument
    legend <- match.arg(legend, c("auto", "none", "all"))
    ## Checking colour validity
    colour_by_out <- .choose_vis_values(object, colour_by, cell_control_default = TRUE,
                                        check_features = TRUE, exprs_values = exprs_values)
    colour_by <- colour_by_out$name
    colour_by_vals <- colour_by_out$val

    ## obtain well positions
    if ( !is.null(plate_position) ) {
        if ( length(plate_position) != ncol(object) )
            stop("Supplied plate_position argument must have same length as number of columns of SCESet object.")
        plate_position_char <- plate_position

    } else
        plate_position_char <- object$plate_position

    if ( is.null(plate_position_char) ) {
        if ( is.null(x_position) || is.null(y_position) )
            stop("If plate_position is NULL then both x_position and y_position must be supplied.")
        plate_position_x <- x_position
        plate_position_y <- y_position
    } else {
        plate_position_y <- gsub("[0-9]*", "", plate_position_char)
        plate_position_y <- factor(plate_position_y,
                                   rev(sort(unique(plate_position_y))))
        plate_position_x <- gsub("[A-Z]*", "", plate_position_char)
        plate_position_x <- ordered(as.integer(plate_position_x))
    }

    ## Define data.frame for plotting
    df_to_plot <- data.frame(plate_position_x, plate_position_y)
    if ( !is.null(plate_position_char) )
        df_to_plot[["plate_position_char"]] <- plate_position_char
    df_to_plot$colour_by <- colour_by_vals

    ## make the plot
    aesth <- aes(x = plate_position_x, y = plate_position_y, fill = colour_by)
    if ( !is.null(plate_position_char) )
        aesth$label <- as.symbol("plate_position_char")

    plot_out <- ggplot(df_to_plot, aesth) +
        geom_point(shape = 21, size = theme_size, colour = "gray50")
    if ( !is.null(plate_position_char) )
        plot_out <- plot_out + geom_text(colour = "gray90")
    ## make sure colours are nice
    plot_out <- .resolve_plot_colours(plot_out,
                                      df_to_plot$colour_by,
                                      colour_by, fill = TRUE)

    ## Define plotting theme
    plot_out <- plot_out + theme_bw(theme_size) +
        theme(axis.title = element_blank(), axis.ticks = element_blank(),
              legend.text = element_text(size = theme_size / 2),
              legend.title = element_text(size = theme_size / 2)) +
        guides(fill = guide_legend(override.aes = list(size = theme_size / 2)))
    ## remove legend if so desired
    if ( legend == "none" )
        plot_out <- plot_out + theme(legend.position = "none")

    ## return plot
    plot_out
}


################################################################################

#' Plot expression values for a set of features (e.g. genes or transcripts)
#'
#' @param object an SCESet object containing expression values and
#' experimental information. Must have been appropriately prepared. For the
#' \code{plotExpressionDefault} method, the \code{object} argument is a
#' data.frame in 'long' format providing expression values for a set of features
#'  to plot, plus metadata used in the \code{aesth} argument, but this is not
#'  meant to be a user-level operation.
#' @param features a character vector of feature names or Boolean
#' vector or numeric vector of indices indicating which features should have
#' their expression values plotted
#' @param x character string providing a column name of \code{pData(object)} or
#' a feature name (i.e. gene or transcript) to plot on the x-axis in the
#' expression plot(s). If a feature name, then expression values for the feature
#' will be plotted on the x-axis for each subplot.
#' @param exprs_values character string indicating which values should be used
#' as the expression values for this plot. Valid arguments are \code{"tpm"}
#' (transcripts per million), \code{"norm_tpm"} (normalised TPM
#' values), \code{"fpkm"} (FPKM values), \code{"norm_fpkm"} (normalised FPKM
#' values), \code{"counts"} (counts for each feature), \code{"norm_counts"},
#' \code{"cpm"} (counts-per-million), \code{"norm_cpm"} (normalised
#' counts-per-million), \code{"exprs"} (whatever is in the \code{'exprs'} slot
#' of the \code{SCESet} object; default), \code{"norm_exprs"} (normalised
#' expression values) or \code{"stand_exprs"} (standardised expression values)
#' or any other slots that have been added to the \code{"assayData"} slot by
#' the user.
#' @param colour_by optional character string supplying name of a column of
#' \code{pData(object)} which will be used as a variable by which to colour
#' expression values on the plot. Alternatively, a data frame with one column,
#' containing a value for each cell that will be mapped to a colour.
#' @param shape_by optional character string supplying name of a column of
#' \code{pData(object)} which will be used as a variable to define the shape of
#' points for expression values on the plot. Alternatively, a data frame with
#' one column containing values to map to shapes.
#' @param size_by optional character string supplying name of a column of
#' \code{pData(object)} which will be used as a variable to define the size of
#' points for expression values on the plot. Alternatively, a data frame with
#' one column containing values to map to sizes.
#' @param ncol number of columns to be used for the panels of the plot
#' @param xlab label for x-axis; if \code{NULL} (default), then \code{x} will be
#' used as the x-axis label
#' @param show_median logical, show the median for each group on the plot
#' @param show_violin logical, show a violin plot for the distribution
#' for each group on the plot
#' @param show_smooth logical, show a smoothed fit through the expression values
#'  on the plot
#' @param alpha numeric value between 0 (completely transparent) and 1 (completely
#' solid) defining how transparent plotted points (cells) should be.
#' Points are jittered horizontally if the x-axis value is categorical rather
#' than numeric to avoid overplotting.
#' @param theme_size numeric scalar giving default font size for plotting theme
#' (default is 10)
#' @param log2_values should the expression values be transformed to the
#' log2-scale for plotting (with an offset of 1 to avoid logging zeroes)?
#' @param size numeric scalar optionally providing size for points if
#' \code{size_by} argument is not given. Default is \code{NULL}, in which case
#' \pkg{ggplot2} default is used.
#' @param scales character scalar, should scales be fixed ("fixed"),
#' free ("free"), or free in one dimension ("free_x"; "free_y", the default).
#' Passed to the \code{scales} argument in the \code{\link[ggplot2]{facet_wrap}} function
#' from the \code{ggplot2} package.
#' @param se logical, should standard errors be shown (default \code{TRUE}) for
#' the smoothed fit through the cells. (Ignored if \code{show_smooth} is \code{FALSE}).
#' @param jitter character scalar to define whether points are to be jittered
#' (\code{"jitter"}) or presented in a "beeswarm" style (if \code{"swarm"}; default).
#' "Beeswarm" style usually looks more attractive, but for datasets with a large
#' number of cells, or for dense plots, the jitter option may work better.
#' @param ... optional arguments (from those listed above) passed to
#' \code{plotExpressionSCESet} or \code{plotExpressionDefault}
#'
#' @details Plot expression values (default log2(transcripts-per-million +
#' 1), if available) for a set of features.
#' @return a ggplot plot object
#'
#' @name plotExpression
#' @aliases plotExpression plotExpression,SCESet-method plotExpression,data.frame-method
#' @import ggplot2
#' @importFrom Biobase varLabels
#' @importFrom Biobase featureNames
#' @importFrom ggbeeswarm geom_quasirandom
#' @export
#'
#' @examples
#' ## prepare data
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' example_sceset <- calculateQCMetrics(example_sceset)
#'
#' ## default plot
#' plotExpression(example_sceset, 1:15)
#' plotExpression(example_sceset, 1:15, jitter = "jitter")
#'
#' ## plot expression against an x-axis value
#' plotExpression(example_sceset, 1:6, "Mutation_Status")
#'
#' ## explore options
#' plotExpression(example_sceset, 1:6, x="Mutation_Status", exprs_values="exprs",
#' colour_by="Cell_Cycle", show_violin=TRUE, show_median=TRUE)
#' plotExpression(example_sceset, 1:6, x="Mutation_Status", exprs_values="counts",
#' colour_by="Cell_Cycle", show_violin=TRUE, show_median=TRUE)
#'
#' ## plot expression against expression values for Gene_0004
#' plotExpression(example_sceset, 1:4, "Gene_0004")
#' plotExpression(example_sceset, 1:4, "Gene_0004", show_smooth = TRUE)
#' plotExpression(example_sceset, 1:4, "Gene_0004", show_smooth = TRUE, se = FALSE)
#'
plotExpressionSCESet <- function(object, features, x = NULL, exprs_values = "exprs",
                                 colour_by = NULL, shape_by = NULL,
                                 size_by = NULL, ncol = 2, xlab = NULL,
                                 show_median = FALSE, show_violin = TRUE,
                                 show_smooth = FALSE, alpha = 0.6,
                                 theme_size = 10, log2_values = FALSE, size = NULL,
                                 scales = "fixed", se = TRUE, jitter = "swarm") {
    ## Check object is an SCESet object
    if ( !is(object, "SCESet") )
        stop("object must be an SCESet")

    ## Define number of features to plot
    if (is.logical(features))
        nfeatures <- sum(features)
    else
        nfeatures <- length(features)

    ## Picking the expression values
    if ( typeof(features) == "character" ) {
        if ( !(all(features %in% featureNames(object))) )
            stop("when the argument 'features' is of type character, all features must be in featureNames(object)")
    }

    exprs_mat <- get_exprs(object, exprs_values)
    if ( log2_values ) {
        exprs_mat <- log2(exprs_mat + 1)
        ylab <- paste0("Expression (", exprs_values, "; log2-scale)")
    } else
        ylab <- paste0("Expression (", exprs_values, ")")
    to_melt <- as.matrix(exprs_mat[features, , drop = FALSE])

    ## check x-coordinates are valid
    xcoord <- NULL
    if ( !is.null(x) ) {
        if ( x %in% featureNames(object) ) {
            xcoord <- exprs_mat[x,]
            show_violin <- FALSE
            show_median <- FALSE
        } else if (x %in% varLabels(object)) {
            xcoord <- pData(object)[,x]
        } else {
            stop("'x' should be a column of pData(object) or in featureNames(object)")
        }
    }

    ## checking visualization arguments
    colour_by_out <- .choose_vis_values(object, colour_by)
    colour_by <- colour_by_out$name
    colour_by_vals <- colour_by_out$val

    shape_by_out <- .choose_vis_values(object, shape_by, coerce_factor = TRUE,
                                       level_limit = 10)
    shape_by <- shape_by_out$name
    shape_by_vals <- shape_by_out$val

    size_by_out <- .choose_vis_values(object, size_by)
    size_by <- size_by_out$name
    size_by_vals <- size_by_out$val

       ## Melt the expression data and metadata into a convenient form
    evals_long <- reshape2::melt(to_melt, value.name = "evals")
    colnames(evals_long) <- c("Feature", "Cell", "evals")

    ## Prepare the samples information
    samps <- data.frame(row.names = colnames(object))
    if ( !is.null(xcoord) ) samps[[x]] <- xcoord

    ## Construct a ggplot2 aesthetic for the plot
    aesth <- aes()
    if ( is.null(x) ) {
        aesth$x <- as.symbol("Feature")
        one_facet <- TRUE
    } else {
        aesth$x <- as.symbol(x)
        one_facet <- FALSE
    }
    aesth$y <- as.symbol("evals")
    ## Define sensible x-axis label if NULL
    if ( is.null(xlab) )
        xlab <- x

    if ( !is.null(shape_by) ) {
        aesth$shape <- as.symbol(shape_by)
        samps[[shape_by]] <- shape_by_vals
    }
    if ( !is.null(size_by) ) {
        aesth$size <- as.symbol(size_by)
        samps[[size_by]] <- size_by_vals
    }
    if ( !is.null(colour_by) ) {
        aesth$colour <- as.symbol(colour_by)
        samps[[colour_by]] <- colour_by_vals
    }
    ## Extend the sample information, combine with the expression values
    samples_long <- samps[rep(seq_len(ncol(object)), each = nfeatures), , drop = FALSE]
    ## Colour by is_exprs if we can (i.e. is_exprs is not NULL)
    if ( is.null(colour_by) && !is.null(is_exprs(object)) ) {
        colour_by <- "Is_Expressed"
        aesth$colour <- as.symbol(colour_by)
        colour_by_vals <- reshape2::melt(is_exprs(object)[features,],
                                         value.name = "is_exprs")
        samples_long[[colour_by]] <- colour_by_vals[["is_exprs"]]
    }

    ## create plotting object
    object <- cbind(evals_long, samples_long)

    ## Make the plot
    plot_out <- plotExpressionDefault(object, aesth, ncol, xlab, ylab,
                                      show_median, show_violin, show_smooth,
                                      alpha, size, scales, one_facet, se, jitter)

    ## Define plotting theme
    if ( requireNamespace("cowplot", quietly = TRUE) )
        plot_out <- plot_out + cowplot::theme_cowplot(theme_size)
    else
        plot_out <- plot_out + theme_bw(theme_size)
    if ( is.null(x) ) { ## in this case, do not show x-axis ticks or labels
        plot_out <- plot_out + theme(
            axis.text.x = element_text(angle = 60, vjust = 1, hjust = 1),
            axis.ticks.x = element_blank(),
            plot.margin = unit(c(.03, .02, .05, .02), "npc"))
        if (is.null(colour_by))
            plot_out <- plot_out + guides(fill = "none", colour = "none")
    }
    plot_out
}


#' @param aesth an \code{aes} object to use in the call to \code{\link[ggplot2]{ggplot}}.
#' @param ylab character string defining a label for the y-axis (y-axes) of the
#' plot.
#' @param one_facet logical, should expression values for features be plotted in one facet
#' instead of mutiple facets, one per feature? Default if \code{x = NULL}.
#' @rdname plotExpression
#' @aliases plotExpression
#' @export
plotExpressionDefault <- function(object, aesth, ncol = 2, xlab = NULL,
                                  ylab = NULL, show_median = FALSE,
                                  show_violin = TRUE, show_smooth = FALSE,
                                  alpha = 0.6, size = NULL, scales = "fixed",
                                  one_facet = FALSE, se = TRUE, jitter = "swarm") {
    if ( !("Feature" %in% names(object)) )
        stop("object needs a column named 'Feature' to define the feature(s) by which to plot expression.")

    ## use x as group for violin plot if discrete
    group_by_x <- (show_violin &&
        (!is.numeric(object[[as.character(aesth$x)]]) ||
             nlevels(as.factor(object[[as.character(aesth$x)]])) <= 5))
    if (group_by_x)
        aesth$group <- aesth$x
    else
        aesth$group <- 1

    ## Define the plot
    if (one_facet) {
        if (is.null(aesth$colour))
            aesth$colour <- as.symbol("Feature")
        plot_out <- ggplot(object, aesth) + xlab(xlab) + ylab(ylab)
    } else {
        plot_out <- ggplot(object, aesth) +
            facet_wrap(~Feature, ncol = ncol, scales = scales) +
            xlab(xlab) +
            ylab(ylab)
    }

    ## if colour aesthetic is defined, then choose sensible colour palette
    if ( !is.null(aesth$colour) )
        plot_out <- .resolve_plot_colours(plot_out,
                                          object[[as.character(aesth$colour)]],
                                          as.character(aesth$colour))

    ## if x axis variable is not numeric, then jitter points horizontally
    if ( is.numeric(aesth$x) ) {
        if ( is.null(aesth$size) & !is.null(size) )
            plot_out <- plot_out + geom_point(size = size, alpha = alpha)
        else
            plot_out <- plot_out + geom_point(alpha = alpha)
    } else {
        if ( is.null(aesth$size) & !is.null(size) ) {
            if (jitter == "swarm")
                plot_out <- plot_out + ggbeeswarm::geom_quasirandom(
                    alpha = alpha, size = size)
            else
                plot_out <- plot_out + geom_jitter(
                    alpha = alpha, size = size, position = position_jitter(height = 0))
        }
        else {
            if (jitter == "swarm")
                plot_out <- plot_out + ggbeeswarm::geom_quasirandom(
                    alpha = alpha)
            else
                plot_out <- plot_out + geom_jitter(
                    alpha = alpha, position = position_jitter(height = 0))
        }
    }

    ## show optional decorations on plot if desired
    if (show_violin) {
        if (one_facet && (aesth$colour == as.symbol("Feature"))) {
            plot_out <- plot_out +
                geom_violin(aes_string(fill = "Feature"), colour = "gray60",
                            alpha = 0.2, scale = "width")
            plot_out <- .resolve_plot_colours(
                plot_out, object[[as.character(aesth$colour)]],
                as.character(aesth$colour), fill = TRUE)
        }
        else
            plot_out <- plot_out + geom_violin(colour = "gray60", alpha = 0.3,
                                               fill = "gray80", scale = "width")
    }
    if (show_median) {
        plot_out <- plot_out +
            stat_summary(fun.y = median, fun.ymin = median, fun.ymax = median,
                         geom = "crossbar", width = 0.3, alpha = 0.8)
    }
    if (show_smooth) {
        plot_out <- plot_out + stat_smooth(colour = "firebrick", linetype = 2, se = se)
    }
    plot_out
}


#' @rdname plotExpression
#' @aliases plotExpression
#' @export
setMethod("plotExpression", signature(object = "SCESet"),
          function(object, ...) {
              plotExpressionSCESet(object, ...)
          })


#' @rdname plotExpression
#' @aliases plotExpression
#' @export
setMethod("plotExpression", signature("data.frame"),
          function(object, ...) {
              plotExpressionDefault(object, ...)
          })

################################################################################

#' Plot metadata for cells or features
#'
#' @param object a data.frame (or object that can be coerced to such) object
#' containing metadata in columns to plot.
#' @param aesth aesthetics function call to pass to ggplot. This function
#' expects at least x and y variables to be supplied. The default is to plot
#' total_features against log10(total_counts).
#' @param shape numeric scalar to define the plotting shape. Ignored if shape is
#' included in the \code{aesth} argument.
#' @param alpha numeric scalar (in the interval 0 to 1) to define the alpha
#' level (transparency) of plotted points. Ignored if alpha is included in the
#' \code{aesth} argument.
#' @param size numeric scalar to define the plotting size. Ignored if size is
#' included in the \code{aesth} argument.
#' @param theme_size numeric scalar giving default font size for plotting theme
#' (default is 10)
#'
#' @details Plot cell or feature metadata from an SCESet object. If one variable
#'  is supplied then a density plot will be returned. If both variables are
#' continuous (numeric) then a scatter plot will be returned. If one variable is
#' discrete and one continuous then a violin plot with jittered points overlaid
#' will be returned. If both variables are discrete then a jitter plot will be
#' produced. The object returned is a ggplot object, so further layers and
#' plotting options (titles, facets, themes etc) can be added.
#'
#' @return a ggplot plot object
#'
#' @import viridis
#' @export
#'
#' @examples
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' example_sceset <- calculateQCMetrics(example_sceset)
#' plotMetadata(pData(example_sceset))
#'
plotMetadata <- function(object,
                         aesth=aes_string(x = "log10(total_counts)",
                                          y = "total_features"),
                         shape = NULL, alpha = NULL, size = NULL,
                         theme_size = 10) {
    ## Must have at least an x variable in the aesthetics
    if (is.null(aesth$x))
        stop("No x variable defined. Must have at least an x variable defined
             in the aesth argument.")

    ## Determine type of plot to make
    ### Define plot type if there is only an x variable but no y
    if (is.null(aesth$y)) {
        typeof_x <- typeof(aesth[[1]])
        plot_type <- "density"
        if (typeof_x == "symbol") {
            var_name <- as.character(aesth[[1]])
            x <- typeof(object[, var_name])
            if (is.character(x) | is.factor(x))
                plot_type <- "bar"
        }
    } else {
        ### Define plot type if both x and y variables are provided
        typeof_x <- typeof(aesth$x)
        typeof_y <- typeof(aesth$y)
        var_type_x <- var_type_y <- "continuous"
        if (typeof_x == "symbol") {
            var_name <- as.character(aesth$x)
            x <- object[, var_name]
            if (is.character(x) | is.factor(x))
                var_type_x <- "discrete"
        }
        if (typeof_y == "symbol") {
            var_name <- as.character(aesth$y)
            y <- object[, var_name]
            if (is.character(y) | is.factor(y))
                var_type_x <- "discrete"
        }
        if ( var_type_x == "continuous" && var_type_y == "continuous" )
            plot_type <- "scatter"
        else {
            if ( var_type_x == "discrete" && var_type_y == "discrete" )
                plot_type <- "jitter"
            else
                plot_type <- "violin"
        }
    }

    ## Setup plot
    show_size_guide <- show_alpha_guide <- show_shape_guide <- TRUE
    if ( is.null(aesth$size) ) {
        show_size_guide <- FALSE
        if ( is.null(size) )
            size <- 1
    }
    if ( is.null(aesth$alpha) ) {
        show_alpha_guide <- FALSE
        if ( is.null(alpha) )
            alpha <- 0.7
    }
    if ( is.null(aesth$shape) ) {
        show_shape_guide <- FALSE
        if ( is.null(shape) )
            shape <- 16
    }

    ## Set up basics of plot
    plot_out <- ggplot(object, aesth)

    ## Density plot
    if (plot_type == "bar") {
        plot_out <- plot_out + geom_bar(stat = "identity")
    } else if (plot_type == "density") {
        plot_out <- plot_out + geom_density(kernel = "rectangular", size = 2) +
            geom_rug(alpha = 0.5, size = 1)
    } else {
        if (!show_shape_guide && !show_size_guide && !show_alpha_guide)
            plot_out <- plot_out + ggbeeswarm::geom_quasirandom(
                shape = shape, size = size, alpha = alpha)
        else {
            if (!show_shape_guide && !show_size_guide) {
                plot_out <- plot_out + ggbeeswarm::geom_quasirandom(
                    shape = shape, size = size)
            } else if (!show_size_guide && !show_alpha_guide) {
                plot_out <- plot_out + ggbeeswarm::geom_quasirandom(
                    size = size, alpha = alpha)
            } else if (!show_shape_guide && !show_alpha_guide) {
                plot_out <- plot_out + ggbeeswarm::geom_quasirandom(
                    shape = shape, alpha = alpha)
            } else {
                if (!show_shape_guide) {
                    plot_out <- plot_out + ggbeeswarm::geom_quasirandom(
                        shape = shape)
                } else if (!show_size_guide) {
                    plot_out <- plot_out + ggbeeswarm::geom_quasirandom(
                        size = size)
                } else if (!show_alpha_guide) {
                    plot_out <- plot_out + ggbeeswarm::geom_quasirandom(
                        alpha = alpha)
                } else {
                    plot_out <- plot_out +  ggbeeswarm::geom_quasirandom()
                }
            }
        }
        if (plot_type == "scatter")
            plot_out <- plot_out + geom_rug(alpha = 0.5, size = 1)
        if (plot_type == "violin")
            plot_out <- plot_out + geom_violin(size = 1, scale = "width")
    }

    ## Define plotting theme
    if ( requireNamespace("cowplot", quietly = TRUE) )
        plot_out <- plot_out + cowplot::theme_cowplot(theme_size)
    else
        plot_out <- plot_out + theme_bw(theme_size)

    ## Define plot colours
    if ( "colour" %in% names(aesth) || "color" %in% names(aesth) ) {
        if ( is.null(aesth$colour) )
            colvar <- as.character(aesth$color)
        else
            colvar <- as.character(aesth$colour)
        plot_out <- .resolve_plot_colours(plot_out, object[, colvar],
                                          as.character(colvar))
    }
    if ( !is.null(aesth$fill) ) {
        plot_out <- .resolve_plot_colours(plot_out, object[, aesth$fill],
                                          as.character(aesth$fill), fill = TRUE)
    }

    ## Tweak plot guides
    if ( !show_alpha_guide )
        plot_out <- plot_out + guides(alpha = FALSE)
    if ( !show_shape_guide )
        plot_out <- plot_out + guides(shape = FALSE)
    if ( !show_size_guide )
        plot_out <- plot_out + guides(size = FALSE)

    ## Return plot object
    plot_out
}


################################################################################

#' Plot phenotype data from an SCESet object
#'
#' @param object an SCESet object containing expression values and
#' experimental information. Must have been appropriately prepared.
#' @param aesth aesthetics function call to pass to ggplot. This function
#' expects at least x and y variables to be supplied. The default is to plot
#' total_features against log10(total_counts).
#' @param ... arguments passed to \code{\link{plotMetadata}}, e.g.
#' \code{theme_size}, \code{size}, \code{alpha}, \code{shape}.
#'
#' @details Plot phenotype data from an SCESet object. If one variable is
#' supplied then a density plot will be returned. If both variables are
#' continuous (numeric) then a scatter plot will be returned. If one variable is
#' discrete and one continuous then a violin plot with jittered points overlaid
#' will be returned. If both variables are discrete then a jitter plot will be
#' produced. The object returned is a ggplot object, so further layers and
#' plotting options (titles, facets, themes etc) can be added.
#'
#' @return a ggplot plot object
#'
#' @export
#' @examples
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' example_sceset <- calculateQCMetrics(example_sceset)
#' plotPhenoData(example_sceset, aesth = aes_string(x = "log10(total_counts)",
#' y = "total_features", colour = "Mutation_Status"))
#'
plotPhenoData <- function(object, aesth=aes_string(x = "log10(total_counts)",
                                                   y = "total_features"), ...) {
    ## We must have an SCESet object
    if (!is(object, "SCESet"))
        stop("object must be an SCESet object.")

    ## Define dataframe to pass to plotMetadata
    df_to_plot <- pData(object)

    ## Check that aesthetics make sense for feature names if used
    for (item in unlist(aesth)) {
        item <- as.character(item)
        if ( !(item %in% varLabels(object)) &&
             (item %in% featureNames(object)) ) {
            df_to_plot <- data.frame(df_to_plot, exprs(object)[item,])
            colnames(df_to_plot)[ncol(df_to_plot)] <- item
        }
    }

    ## Pass pData(object) to plotMetadata
    plot_out <- plotMetadata(df_to_plot, aesth, ...)

    plot_out
}


################################################################################

#' Plot feature (gene) data from an SCESet object
#'
#' @param object an SCESet object containing expression values and
#' experimental information. Must have been appropriately prepared.
#' @param aesth aesthetics function call to pass to ggplot. This function
#' expects at least x and y variables to be supplied. The default is to produce
#' a density plot of number of cells expressing the feature (requires
#' \code{calculateQCMetrics} to have been run on the SCESet object prior).
#' @param ... arguments passed to \code{\link{plotMetadata}}, e.g.
#' \code{theme_size}, \code{size}, \code{alpha}, \code{shape}.
#'
#' @details Plot feature (gene) data from an SCESet object. If one variable is
#' supplied then a density plot will be returned. If both variables are
#' continuous (numeric) then a scatter plot will be returned. If one variable is
#' discrete and one continuous then a violin plot with jittered points overlaid
#' will be returned. If both variables are discrete then a jitter plot will be
#' produced. The object returned is a ggplot object, so further layers and
#' plotting options (titles, facets, themes etc) can be added.
#'
#' @return a ggplot plot object
#'
#' @export
#' @examples
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd)
#' example_sceset <- calculateQCMetrics(example_sceset)
#' plotFeatureData(example_sceset, aesth=aes(x=n_cells_exprs, y=pct_total_counts))
#'
plotFeatureData <- function(object,
                            aesth = aes_string(x = "n_cells_exprs",
                                               y = "prop_total_counts"), ...) {
    ## We must have an SCESet object
    if (!is(object, "SCESet"))
        stop("object must be an SCESet object.")

    ## Pass pData(object) to plotMetadata
    plot_out <- plotMetadata(fData(object), aesth, ...)

    plot_out
}


################################################################################
### Multiplot function for ggplot2 plots

#' Multiple plot function for ggplot2 plots
#'
#' Place multiple \code{\link[ggplot2]{ggplot}} plots on one page.
#'
#' @param ...,plotlist ggplot objects can be passed in ..., or to plotlist (as
#' a list of ggplot objects)
#' @param cols numeric scalar giving the number of columns in the layout
#' @param layout a matrix specifying the layout. If present, \code{cols} is
#' ignored.
#'
#' @details If the layout is something like
#' \code{matrix(c(1,2,3,3), nrow=2, byrow=TRUE)}, then plot 1 will go in the
#' upper left, 2 will go in the upper right, and 3 will go all the way across
#' the bottom. There is no way to tweak the relative heights or widths of the
#' plots with this simple function. It was adapted from
#' \url{http://www.cookbook-r.com/Graphs/Multiple_graphs_on_one_page_(ggplot2)/}
#'
#' @return a \code{ggplot} plot object
#'
#' @importFrom grid grid.newpage
#' @importFrom grid pushViewport
#' @importFrom grid viewport
#' @importFrom grid grid.layout
#' @export
#' @examples
#' library(ggplot2)
#' ## This example uses the ChickWeight dataset, which comes with ggplot2
#' ## First plot
#' p1 <- ggplot(ChickWeight, aes(x = Time, y = weight, colour = Diet, group = Chick)) +
#'    geom_line() +
#'    ggtitle("Growth curve for individual chicks")
#' ## Second plot
#' p2 <- ggplot(ChickWeight, aes(x = Time, y = weight, colour = Diet)) +
#'    geom_point(alpha = .3) +
#'    geom_smooth(alpha = .2, size = 1) +
#'    ggtitle("Fitted growth curve per diet")
#' ## Third plot
#' p3 <- ggplot(subset(ChickWeight, Time == 21), aes(x = weight, colour = Diet)) +
#'    geom_density() +
#'    ggtitle("Final weight, by diet")
#' ## Fourth plot
#' p4 <- ggplot(subset(ChickWeight, Time == 21), aes(x = weight, fill = Diet)) +
#'     geom_histogram(colour = "black", binwidth = 50) +
#'    facet_grid(Diet ~ .) +
#'    ggtitle("Final weight, by diet") +
#'    theme(legend.position = "none")        # No legend (redundant in this graph)
#' ## Combine plots and display
#' multiplot(p1, p2, p3, p4, cols = 2)
#'
multiplot <- function(..., plotlist = NULL, cols = 1, layout = NULL) {
    ## Make a list from the ... arguments and plotlist
    plots <- c(list(...), plotlist)

    num_plots <- length(plots)

    ## If layout is NULL, then use 'cols' to determine layout
    if (is.null(layout)) {
        ## Make the panel
        ## ncol: Number of columns of plots
        ## nrow: Number of rows needed, calculated from # of cols
        layout <- matrix(seq(1, cols * ceiling(num_plots / cols)),
                         ncol = cols, nrow = ceiling(num_plots / cols))
    }

    if (num_plots == 1) {
        print(plots[[1]])
    } else {
        ## Set up the page
        grid::grid.newpage()
        grid::pushViewport(grid::viewport(
            layout = grid::grid.layout(nrow(layout), ncol(layout))))

        # Make each plot, in the correct location
        for (i in 1:num_plots) {
            # Get i,j matrix positions of the regions that contain this subplot
            matchidx <- as.data.frame(which(layout == i, arr.ind = TRUE))
            print(plots[[i]], vp = grid::viewport(layout.pos.row = matchidx$row,
                                            layout.pos.col = matchidx$col))
        }
    }
}


################################################################################
### Plot expression against transcript length


#' Plot expression against transcript length
#'
#' Plot expression values from an SCESet object against transcript length values
#' defined in the SCESet object or supplied as an argument.
#'
#' @param object an \code{\link{SCESet}} object
#' @param tx_length transcript lengths to plot on the x-axis. Can be one of: (1)
#' the name of a column of \code{fData(object)} containing the transcript length
#' values, or (2) the name of an element of \code{assayData(object)} containing
#' a matrix of transcript length values, or (3) a numeric vector of length equal
#' to the number of rows of \code{object} (number of features).
#' @param exprs_values character string indicating which values should be used
#' as the expression values for this plot. Valid arguments are \code{"tpm"}
#' (transcripts per million), \code{"norm_tpm"} (normalised TPM
#' values), \code{"fpkm"} (FPKM values), \code{"norm_fpkm"} (normalised FPKM
#' values), \code{"counts"} (counts for each feature), \code{"norm_counts"},
#' \code{"cpm"} (counts-per-million), \code{"norm_cpm"} (normalised
#' counts-per-million), \code{"exprs"} (whatever is in the \code{'exprs'} slot
#' of the \code{SCESet} object; default), \code{"norm_exprs"} (normalised
#' expression values) or \code{"stand_exprs"} (standardised expression values)
#' or any other slots that have been added to the \code{"assayData"} slot by
#' the user.
#' @param colour_by optional character string supplying name of a column of
#' \code{fData(object)} which will be used as a variable by which to colour
#' expression values on the plot. Alternatively, a data frame with
#' one column, containing a value for each feature to map to a colour.
#' @param shape_by optional character string supplying name of a column of
#' \code{fData(object)} which will be used as a variable to define the shape of
#' points for expression values on the plot. Alternatively, a data frame
#' with one column containing values to map to shapes.
#' @param size_by optional character string supplying name of a column of
#' \code{fData(object)} which will be used as a variable to define the size of
#' points for expression values on the plot. Alternatively, a data frame
#' with one column containing values to map to sizes.
#' @param xlab label for x-axis; if \code{NULL} (default), then \code{x} will be
#' used as the x-axis label
#' @param show_exprs_sd logical, show the standard deviation of expression
#' values for each feature on the plot
#' @param show_smooth logical, show a smoothed fit through the expression values
#'  on the plot
#' @param alpha numeric value between 0 (completely transparent) and 1 (completely
#' solid) defining how transparent plotted points (cells) should be.
#' Points are jittered horizontally if the x-axis value is categorical rather
#' than numeric to avoid overplotting.
#' @param theme_size numeric scalar giving default font size for plotting theme
#' (default is 10)
#' @param log2_values should the expression values be transformed to the
#' log2-scale for plotting (with an offset of 1 to avoid logging zeroes)?
#' @param size numeric scalar optionally providing size for points if
#' \code{size_by} argument is not given. Default is \code{NULL}, in which case
#' \pkg{ggplot2} default is used.
#' @param se logical, should standard errors be shown (default \code{TRUE}) for
#' the smoothed fit through the cells. (Ignored if \code{show_smooth} is \code{FALSE}).
#'
#' @return a ggplot object
#' @export
#'
#' @examples
#' data("sc_example_counts")
#' data("sc_example_cell_info")
#' pd <- new("AnnotatedDataFrame", data = sc_example_cell_info)
#' fd <- new("AnnotatedDataFrame", data =
#' data.frame(gene_id = rownames(sc_example_counts),
#'         feature_id = paste("feature", rep(1:500, each = 4), sep = "_"),
#'      median_tx_length = rnorm(2000, mean = 5000, sd = 500)))
#' rownames(fd) <- rownames(sc_example_counts)
#' example_sceset <- newSCESet(countData = sc_example_counts, phenoData = pd,
#' featureData = fd)
#'
#' plotExprsVsTxLength(example_sceset, "median_tx_length")
#' plotExprsVsTxLength(example_sceset, "median_tx_length", show_smooth = TRUE)
#' plotExprsVsTxLength(example_sceset, "median_tx_length", show_smooth = TRUE,
#' show_exprs_sd = TRUE)
#'
#' ## using matrix of tx length values in assayData(object)
#' mat <- matrix(rnorm(ncol(example_sceset) * nrow(example_sceset), mean = 5000,
#'  sd = 500), nrow = nrow(example_sceset))
#' dimnames(mat) <- dimnames(example_sceset)
#' set_exprs(example_sceset, "tx_len") <- mat
#'
#' plotExprsVsTxLength(example_sceset, "tx_len", show_smooth = TRUE,
#' show_exprs_sd = TRUE)
#'
#' ## using a vector of tx length values
#' plotExprsVsTxLength(example_sceset, rnorm(2000, mean = 5000, sd = 500))
#'
plotExprsVsTxLength <- function(object, tx_length = "median_feat_eff_len",
                                exprs_values = "exprs",
                                colour_by = NULL, shape_by = NULL,
                                size_by = NULL, xlab = NULL,
                                show_exprs_sd = FALSE,
                                show_smooth = FALSE, alpha = 0.6,
                                theme_size = 10, log2_values = FALSE, size = NULL,
                                se = TRUE) {
    ## Check object is an SCESet object
    if ( !is(object, "SCESet") )
        stop("object must be an SCESet")

    tx_length_values <- rep(NA, nrow(object))
    ## Check arguments are valid
    if ( length(tx_length) == 1 ) {
        if ( tx_length %in% Biobase::fvarLabels(object) )
            tx_length_values <- fData(object)[[tx_length]]
        else {
            if ( tx_length %in% names(Biobase::assayData(object)) ) {
                tx_length_mat <- Biobase::assayData(object)[[tx_length]]
                tx_length_values <- matrixStats::rowMedians(tx_length_mat)
            } else
                stop("the argument 'tx_length' should specify a column of fData(object) [see Biobase::fvarLabels(object)] or an element of assayData(object) [see names(assayData(object))")
        }
    } else {
        if ( length(tx_length) != nrow(object) )
            stop("If tx_length is a vector it must have length equal to nrow(object).")
        else {
            if ( !is.numeric(tx_length) )
                stop("If a vector, tx_length must contain numeric values.")
            tx_length_values <- tx_length
        }
    }

    exprs_mat <- get_exprs(object, exprs_values)
    if ( log2_values ) {
        exprs_mat <- log2(exprs_mat + 1)
        ylab <- paste0("Expression (", exprs_values, "; log2-scale)")
    } else
        ylab <- paste0("Expression (", exprs_values, ")")

    ## compute mean expression and sd of expression values
    exprs_mean <- rowMeans(exprs_mat)
    exprs_sd <- matrixStats::rowSds(exprs_mat)

    df_to_plot <- data.frame(tx_length_values, exprs_mean, exprs_sd,
                             ymin = exprs_mean - exprs_sd,
                             ymax = exprs_mean + exprs_sd)

    ## check colour, size, shape arguments
    colour_by_out <- .choose_vis_values(object, colour_by, check_pdata = FALSE)
    colour_by <- colour_by_out$name
    if (!is.null(colour_by)) df_to_plot[[colour_by]] <- colour_by_out$val

    shape_by_out <- .choose_vis_values(object, shape_by, check_pdata = FALSE,
                                       coerce_factor = TRUE, level_limit = 10)
    shape_by <- shape_by_out$name
    if (!is.null(shape_by)) df_to_plot[[shape_by]] <- shape_by_out$val

    size_by_out <- .choose_vis_values(object, size_by, check_pdata = FALSE)
    size_by <- size_by_out$name
    if (!is.null(size_by)) df_to_plot[[size_by]] <- size_by_out$val

    ## Construct a ggplot2 aesthetic for the plot
    aesth <- aes()
    aesth$x <- as.symbol("tx_length_values")
    aesth$y <- as.symbol("exprs_mean")
    aesth$ymin <- as.symbol("ymin")
    aesth$ymax <- as.symbol("ymax")

    if ( !is.null(colour_by) )
        aesth$colour <- as.symbol(colour_by)
    if ( !is.null(shape_by) )
        aesth$shape <- as.symbol(shape_by)
    if ( !is.null(size_by) )
        aesth$size <- as.symbol(size_by)

    ## Define sensible x-axis label if NULL
    if ( is.null(xlab) )
        xlab <- "Median transcript length"

    ## Make the plot
    plot_out <- ggplot2::ggplot(df_to_plot, aesth) + xlab(xlab) + ylab(ylab)

    ## if colour aesthetic is defined, then choose sensible colour palette
    if ( !is.null(aesth$colour) )
        plot_out <- .resolve_plot_colours(plot_out,
                                          df_to_plot[[as.character(aesth$colour)]],
                                          as.character(aesth$colour))

    if ( is.null(aesth$size) & !is.null(size) ) {
        ## add SDs
        if ( show_exprs_sd )
            plot_out <- plot_out + geom_pointrange(size = size, alpha = 0.9 * alpha)
        ## add points to plot
        plot_out <- plot_out + geom_point(size = size, alpha = alpha)
    }  else {
        ## add SDs
        if ( show_exprs_sd )
            plot_out <- plot_out + geom_pointrange(alpha = 0.9 * alpha)
        ## add points to plot
        plot_out <- plot_out + geom_point(alpha = alpha)
    }

    ## show optional decorations on plot if desired
    if (show_smooth) {
        plot_out <- plot_out + stat_smooth(colour = "firebrick", linetype = 2,
                                           se = se)
    }

    ## Define plotting theme
    if ( requireNamespace("cowplot", quietly = TRUE) )
        plot_out <- plot_out + cowplot::theme_cowplot(theme_size)
    else
        plot_out <- plot_out + theme_bw(theme_size)
    plot_out
}
dynverse/scaterlegacy documentation built on Feb. 17, 2020, 5:07 a.m.