R/plot_PCAs.R

Defines functions plot_PCAs

Documented in plot_PCAs

#' Plot samples on pairwise sets of PCA axes, optionally plotting variables by shape and/or color
#' 
#' This function plots samples on all pairwise sets of specified PCA axes. Points can have their shape
#' and/or color set by variables provided in a design object.
#' @param scores_design_pca a matrix or data frame containing the principal component scores and (optionally) any other sample annotation to be used in plotting. Typically generated by running \code{prcomp}, then using \code{cbind} or similar to append sample annotation.
#' @param PCs a numeric vector of principal component axes to include in plotting. Defaults to 1:3, which will cause the function to plot PC1 vs. PC2, PC1 vs. PC3, and PC2 vs. PC3.
#' @param pvars.labs (optional) character vector containing axis labels. Can be generated by running PCA with \code{calc_PCAs}. Defaults to "PC#", which yields axis labels such as "PC1", "PC2", etc. Set to NULL to suppress axis labels.
#' @param color_by_var (optional) character string or integer identifying the column in \code{scores_design_pca} to color points by. If not provided, points are plotted in black.
#' @param color_by_var_levels (optional) character vector defining the order of elements in the variable used for coloring points; this order is used for the plot legend and to match the order of colors (if provided). If not provided, levels of the variable are ordered by order of appearance in \code{scores_design_pca}.
#' @param color_var_lab (optional) string to be used as the title for the color legend.
#' @param my_cols (optional) vector of colors to use for plotting. If \code{color_by_var} is numeric, should have two elements, providing the start and end points for a continuous color scale (generated by \code{scale_color_gradient}). If color_by_var is not numeric, should be a vector with one color for each level of \code{color_by_var}; if the number of values supplied is less than the numer of levels in color_by_var, additional values are interpolated using colorRampPalette. By default, uses a range from blue to red.
#' @param na_col color to use for NA values of \code{color_by_var}.
#' @param pch_by_var (optional) character string or integer identifying the column in \code{scores_design_pca} to vary point shapes by. If not provided, points are plotted as dots.
#' @param pch_by_var_levels (optional) character vector defining the order of elements in the variable used for point shapes; this order is used for the plot legend and to match the order of shapes (if provided). If not provided, levels of the variable are ordered by order of appearance in \code{scores_design_pca}.
#' @param pch_var_lab (optional) string to be used as the title for the point shape legend.
#' @param my_pch vector of shapes to use for plotting. Required if plotting points by shape; if not provided, all points will be plotted as dots. Must contain at least as many elements as the number of unique elements in \code{pch_by_var}.
#' @param text_by_var (optional) character string or integer identifying the column in \code{scores_design_pca} to plot as text.
#' @param plot_text_and_pch boolean, whether to plot both text labels and points. If \code{FALSE}, text labels are plotted and points are not.
#' @param text_by_var_size numeric, the scaling factor for the text labels; passed as \code{size} to \code{geom_text}.
#' @param add_legend boolean, whether to include legend(s) on the plot for points plotted by variables.
#' @param file_prefix a character string. If provided, the function outputs a pdf of the plot, named "{file_prefix}{other_stuff}.pdf", where {other_stuff} includes the PCs being plotted and variables for shape or color plotting. If \code{NULL}, plots are output to the current plotting device. Defaults to NULL.
#' @param plotdims a numeric vector, the size (in inches) of the plotting object. Applies only if \code{file_prefix} is not NULL.
#' @param point_order character string, specifying how to order the points. Currently accepted values are "random", which randomizes the order of the points, and "input", which sends the points to ggplot as they are in the input data frame. Defaults to "random".
#' @import ggplot2
#' @importFrom rlang sym
#' @export
plot_PCAs <-
  function(scores_design_pca, PCs=1:3, pvars.labs="PC#",
           color_by_var=NULL, color_by_var_levels=NULL, color_var_lab=NULL,
           my_cols=c("blue","red"), na_col="grey50",
           pch_by_var=NULL, pch_by_var_levels=NULL, pch_var_lab=NULL, my_pch=NULL,
           text_by_var=NULL, plot_text_and_pch=FALSE, text_by_var_size=1,
           add_legend=TRUE,
           file_prefix=NULL, plotdims=c(9,9),
           point_order="random") {
    file_suffix <- "pdf"
    
    scores_design_pca <- as.data.frame(scores_design_pca)
    plot_color_by_var <- !is.null(color_by_var)
    color_scale <- NULL; color_labs <- NULL
    if (plot_color_by_var) {
      if (!((is.character(color_by_var) & (color_by_var %in% colnames(scores_design_pca))) |
            (is.numeric(color_by_var) & color_by_var %in% 1:ncol(scores_design_pca))))
        stop(paste("Column", color_by_var, "not found in the included design object."))
      if (!is.numeric(scores_design_pca[,color_by_var, drop=TRUE])) {
        if (!is.factor(scores_design_pca[,color_by_var, drop=TRUE]) | !is.null(color_by_var_levels)) {
          if (is.null(color_by_var_levels))
            color_by_var_levels <- as.character(unique(scores_design_pca[,color_by_var, drop=TRUE]))
          scores_design_pca[,color_by_var] <-
            factor(scores_design_pca[,color_by_var, drop=TRUE], levels=color_by_var_levels)
        }
        if (length(my_cols) < length(color_by_var_levels))
          my_cols <- colorRampPalette(colors=my_cols)(length(color_by_var_levels))
        
        color_scale <- scale_color_manual(values=my_cols)
      } else color_scale <- scale_color_gradient(low=my_cols[1], high=my_cols[2], na.value=my_cols[3])
      
      file_suffix <- paste("color_by_", color_by_var, ".", file_suffix, sep="")
      color_labs <- if (!is.null(color_var_lab)) labs(color=color_var_lab) else labs(color=color_by_var)
    }
    
    plot_pch_by_var <- !is.null(pch_by_var)
    pch_scale <- NULL; pch_labs <- NULL
    if (plot_pch_by_var) {
      if (!((is.character(pch_by_var) & (pch_by_var %in% colnames(scores_design_pca))) |
            (is.numeric(pch_by_var) & pch_by_var %in% 1:ncol(scores_design_pca))))
        stop(paste("Column", pch_by_var, "not found in the included design object."))
      if (!is.factor(scores_design_pca[,pch_by_var, drop=TRUE]) | !is.null(pch_by_var_levels)) {
        if (is.null(pch_by_var_levels))
          pch_by_var_levels <- as.character(unique(scores_design_pca[,pch_by_var, drop=TRUE]))
        scores_design_pca[,pch_by_var] <-
          factor(scores_design_pca[,pch_by_var, drop=TRUE], levels=pch_by_var_levels)
      }
      file_suffix <- paste("pch_by_", pch_by_var, ".", file_suffix, sep="")
      
      if (!is.null(my_pch)) pch_scale <- scale_shape_manual(values=my_pch)
      pch_labs <- if (!is.null(pch_var_lab)) labs(shape=pch_var_lab) else labs(shape=pch_by_var)
    }
    
    if (plot_color_by_var & plot_pch_by_var) {
      plot_points <- geom_point(aes(color=!!sym(color_by_var), shape=!!sym(pch_by_var)), size=3)
    } else if (plot_color_by_var & !plot_pch_by_var) {
      plot_points <- geom_point(aes(color=!!sym(color_by_var)), size=3)
    } else if (!plot_color_by_var & plot_pch_by_var) {
      plot_points <- geom_point(aes(shape=!!sym(pch_by_var)), size=3)
    }
    
    if (!exists("plot_points") & (is.null(text_by_var) | plot_text_and_pch))
      plot_points <- geom_point(size=3)
    
    if (!is.null(text_by_var)) {
      if (!((is.character(text_by_var) & (text_by_var %in% colnames(scores_design_pca))) |
            (is.numeric(text_by_var) & text_by_var %in% 1:ncol(scores_design_pca))))
        stop(paste("Column", text_by_var, "not found in the included design object."))
      if (exists("plot_points")) {
        plot_points <-
          plot_points +
          geom_text(aes(label=!!sym(text_by_var)), size=text_by_var_size)
      } else plot_points <- geom_text(aes(label=!!sym(text_by_var)), size=text_by_var_size)
      file_suffix <- paste("text_by_", text_by_var, ".", file_suffix, sep="")
    }
    
    if (!is.null(pvars.labs)) {
      if (pvars.labs=="PC#") {
        pvars.labs <- paste0("PC", PCs)
      }
    }
    
    scores_design_pca <- miscHelpers::order_points(scores_design_pca, method=point_order)
    
    pc_names <- paste("PC", PCs, sep="")
    for (i in 1:(length(PCs)-1)) {
      for (j in (i+1):length(PCs)) {
        pca_plot <-
          ggplot(scores_design_pca, aes(x=!!sym(pc_names[i]), y=!!sym(pc_names[j]))) +
          labs(x = pvars.labs[PCs[i]], y = pvars.labs[PCs[j]]) +
          color_scale + color_labs +
          pch_scale + pch_labs +
          plot_points
        
        if (!add_legend) pca_plot <- pca_plot + theme(legend.position="none")
        
        if (!is.null(file_prefix)) {
          pdf(
            file=paste(
              file_prefix, paste("PC", PCs[i], "_vs_PC", PCs[j], sep=""),
              file_suffix, sep="."),
            w=plotdims[1], h=plotdims[2])
          on.exit(while ("pdf" %in% names(dev.list())) dev.off()) # close plotting device on exit (mostly important for errors that could leave pdf output open)
        }
        print(pca_plot)
      }
    }
    while (names(dev.cur()) == "pdf") dev.off()  # shut down PDF device (not sure why on.exit isn't doing it)
  }
mjdufort/RNAseQC documentation built on April 19, 2024, 3:13 p.m.