R/pca2D.R

#' Generate a two-dimensional PCA plot.
#' 
#' Takes ExpressionSet object, performs PCA on the transposed expression matrix,
#' then plots the projections of the sample scores on the two specified PCs.
#' 
#' @param cellData ExpressionSet object created with readCells (and preferably 
#'   transformed with prepCells).  It is also helpful to first run 
#'   reduceGenes_var.
#' @param center Boolean specifying whether to the center the data prior to PCA.
#'   This is generally recommended.
#' @param scale Boolean specifying whether the data should be scaled prior to 
#'   PCA.  This is generally not recommended unless samples have different units
#'   (e.g. some samples are counts and some are TPMs).
#' @param PCs Vector of two integers specifying the PCs to plot.  E.g. c(1,3) 
#'   plots PC1 vs PC3.
#' @param groups Character string specifying the title of the column in pData 
#'   that contains the names of the groups to which each sample belongs.  The 
#'   dots representing each sample in the plot will be colored by group. The 
#'   column length should be the same length as the number of samples.
#' @param values Character string specifying the title of the column in pData 
#'   that contains a vector of numeric values to be plotted as a color gradient 
#'   on the dots in the plot.  The column length should be the same length as 
#'   the number of samples.  Gene and values cannot be specified simultaneously.
#'   If groups and values are both specified, gene will be used for coloring. 
#'   Set bubble to TRUE to display group information as well.
#' @param gene Character string specifying a gene whose expression values will 
#'   be plotted a color gradient on the dots in the plot.  One gene name should 
#'   be specified, and the gene must be present within the expression table. 
#'   Gene and values cannot be specified simultaneously.  If groups and gene are
#'   both specified, gene will be used for coloring.  Set bubble to TRUE to 
#'   display group information as well.
#' @param colors Vector of character strings of length 2 specifying the color range for values or gene.  If
#'   not specified, a default black-to-yellow color gradient will be used.
#' @param logNumeric Boolean specifying whether the numbers in values should be 
#'   transformed to log2 space.
#' @param alpha Numeric specifying the transparency (from 0 to 1) level of the 
#'   dot colors.
#' @param dotsize Numeric specifying the size of the dots in the plot.
#' @param bubble Boolean specifying whether dots of different sizes should be 
#'   plotted to display information about another variable on the plot.
#' @param bubbleSizes Vector of numerics specifying the dot sizes for each group
#'   within groups.  Length of bubbleSizes should be the same length as groups. 
#'   If bubbleSizes is not specified, default sizes will be generated according 
#'   to the levels in the groups column.  Useful for displaying group 
#'   information along with values or gene.  Additionally, can be used to 
#'   indicate additional information about groups.  E.g. if the levels in groups
#'   are 'A1', 'B1', 'C2', and 'D2', bubbleSizes could be c(3,3,6,6) to indicate
#'   the '1' and '2' components of the groups with different dot sizes.
#' @param save Boolean specifying whether to save the resultant plot as a .tiff 
#'   file.
#' @return PCA plot for the two specified PCs.
#' @export


pca2D <- function(cellData, center = TRUE, scale = FALSE, PCs = c(1, 2), ICA=FALSE, groups, values, gene, colors, logNumeric = TRUE, CR = FALSE, alpha = 0.7, dotsize = 5, 
    bubble = FALSE, bubbleSizes, save = FALSE) {
    
    if (.Platform$OS.type == "windows") {
        quartz <- function() windows()
    }
    
  if (cellData@logData$prepCells[1] == "No") {
    warning("It would be wise to run prepCells prior to pca2D.", call. = FALSE)
  }
    
    if (!missing(groups) && !all((groups %in% colnames(pData(cellData))))) {
        stop("The column name specified for groups is not found in phenoData. If absent, add a column to phenoData containing the group for each cell, even if it only consists of 1 group.", 
            call. = FALSE)
    }
    
    if (!missing(values) && !all((values %in% colnames(pData(cellData))))) {
        stop("The column name specified for values is not found in phenoData.", 
            call. = FALSE)
    }
    
  if (!missing(groups) && !all((names(cData(cellData)) %in% colnames(pData(cellData))))) {
    stop("Some or all of the column names in colorData do not have matching column names in phenoData.", 
         call. = FALSE)
  }
    
    samples <- pData(cellData)
    exprs_table <- exprs(cellData)
    
    # Obtain cell colors
    
    
    if (ICA==FALSE){
      pca1 <- prcomp(t(exprs_table), center = center, scale. = scale)
      comp1 <- data.frame(pca1$x[, PCs])
      comp.cell.type1 <- comp1
    }
    
    
    if (ICA==TRUE){
      ica1 <- fastICA::fastICA(t(exprs_table), max(PCs))
      comp1 <- data.frame(ica1$S[, PCs])
      comp1["y"] <- seq(0, 0, length.out = length(samples[, groups]))
      comp.cell.type1 <- comp1
    }
    
    if (missing(groups)) {
      cell_colors <- "blue"
      comp.cell.type1["Cell_Type"] <- rep("All", nrow(pData(cellData)))
    }
    
    if (!missing(groups)) {
      cell_colors <- data.frame(cellData@colorData[[groups]], stringsAsFactors = FALSE)
      cell_colors <- as.character(cell_colors[!apply(is.na(cell_colors) | cell_colors == "", 1, all), ])
      groupz <- data.frame(pData(cellData)[, groups], stringsAsFactors = FALSE)
      comp.cell.type1["Cell_Type"] <- factor(groupz[, colnames(groupz)], levels = unique(groupz[, colnames(groupz)]), ordered = FALSE)
    }
    
    
    if (bubble == TRUE) {
        
        if (missing(bubbleSizes)) {
            sizes <- 1.4^((4:(4 + length(levels(comp.cell.type1$Cell_Type)))))
        }
        
        if (!missing(bubbleSizes)) {
            sizes <- bubbleSizes
        }
        
    }
    
    if (bubble == FALSE) {
        sizes <- vector(length = length(cell_colors))
        sizes[1:length(cell_colors)] <- dotsize
    }
    
    if (missing(values) && missing(gene)) {
        
        if (bubble == TRUE) {
            warning("Bubble sizes currently reflect the number of levels in groups and are not adding additional information to the plot.", call. = FALSE)
        }
        
        if (CR == TRUE) {
            samp_PCA <- ggplot(comp.cell.type1) + geom_point(aes_string(x = colnames(comp1)[1], y = colnames(comp1)[2], fill = "factor(Cell_Type,levels=unique(Cell_Type),ordered=FALSE)", 
                size = "factor(Cell_Type,levels=unique(Cell_Type),ordered=FALSE)"), shape = 21, color = "black", alpha = alpha) + scale_fill_manual(name = "Groups", 
                values = cell_colors) + scale_size_manual(name = "Groups", values = sizes) + guides(fill = guide_legend("Groups"), color = guide_legend("Groups"), 
                size = guide_legend("Groups")) + theme_bw() + theme(legend.key = element_rect(colour = F), legend.background = element_rect(fill = NA)) + theme(axis.text.x = element_text(size = 15), 
                axis.text.y = element_text(size = 15), axis.title.x = element_text(size = 25, vjust = -0.8), axis.title.y = element_text(size = 25, vjust = 1.4), 
                legend.text = element_text(size = 13), legend.title = element_text(size = 15, face = "bold"), plot.margin = grid::unit(c(2.1, 1.1, 1.1, 1.1), 
                  "cm")) + stat_ellipse(aes_string(x = colnames(comp1)[1], y = colnames(comp1)[2], fill = "factor(Cell_Type)"), geom = "polygon", level = 0.95, 
                alpha = 0.3, show_guide = F)
            quartz()
            print(samp_PCA)
            if (save == TRUE) {
                ggsave(samp_PCA, file = paste("PCA Plot.tiff"), width = 7, height = 6)
            }
        }
        
        if (CR == FALSE) {
            samp_PCA <- ggplot(comp.cell.type1) + geom_point(aes_string(x = colnames(comp1)[1], y = colnames(comp1)[2], fill = "factor(Cell_Type,levels=unique(Cell_Type),ordered=FALSE)", 
                size = "factor(Cell_Type,levels=unique(Cell_Type),ordered=FALSE)"), shape = 21, color = "black", alpha = alpha) + scale_fill_manual(name = "Groups", 
                values = cell_colors) + scale_size_manual(name = "Groups", values = sizes) + guides(fill = guide_legend("Groups"), color = guide_legend("Groups"), 
                size = guide_legend("Groups")) + theme_bw() + theme(legend.key = element_rect(colour = F), legend.background = element_rect(fill = NA)) + theme(axis.text.x = element_text(size = 15), 
                axis.text.y = element_text(size = 15), axis.title.x = element_text(size = 25, vjust = -0.8), axis.title.y = element_text(size = 25, vjust = 1.4), 
                legend.text = element_text(size = 13), legend.title = element_text(size = 15, face = "bold"), plot.margin = grid::unit(c(2.1, 1.1, 1.1, 1.1), 
                  "cm")) + scale_color_gradient2()
            quartz()
            print(samp_PCA)
            if (save == TRUE) {
                ggsave(samp_PCA, file = paste("PCA Plot.tiff"), width = 7, height = 6)
            }
        }
    }
    
    if (!missing(values)) {
        
        if (!missing(gene)) {
            stop("Cannot specify both values and gene.", call. = FALSE)
        }
        
        if (CR == TRUE) {
            warning("Cannot have a confidence region with values or genes turned on.", call. = FALSE)
        }
        
        if (!missing(colors)){
          cell_colors <- colors
        }
      
        if (length(cell_colors) != 2) {
            warning("Specify a vector of two colors to control color, using default colors instead.", call. = FALSE)
            cell_colors[1:2] <- c("black", "yellow")
        }
        
        valuez <- data.frame(pData(cellData)[, values], stringsAsFactors = FALSE)
        if (logNumeric == TRUE) {
            valuez <- log2(valuez)
        }
        comp.cell.type1["Values"] <- valuez
        
        if (bubble == TRUE) {
            
            samp_PCA <- ggplot(comp.cell.type1) + geom_point(aes_string(x = colnames(comp1)[1], y = colnames(comp1)[2], fill = "Values", size = "factor(Cell_Type,levels=unique(Cell_Type),ordered=FALSE)"), 
                shape = 21, color = "black", alpha = alpha) + scale_size_manual(name = "Groups", values = sizes) + guides(color = guide_legend("Values"), size = guide_legend("Groups")) + 
                theme_bw() + theme(legend.key = element_rect(colour = F), legend.background = element_rect(fill = NA)) + theme(axis.text.x = element_text(size = 15), 
                axis.text.y = element_text(size = 15), axis.title.x = element_text(size = 25, vjust = -0.8), axis.title.y = element_text(size = 25, vjust = 1.4), 
                legend.text = element_text(size = 13), legend.title = element_text(size = 15, face = "bold"), plot.margin = grid::unit(c(2.1, 1.1, 1.1, 1.1), 
                  "cm")) + scale_fill_gradient(low = cell_colors[1], high = cell_colors[2])
            quartz()
            print(samp_PCA)
            if (save == TRUE) {
                ggsave(samp_PCA, file = paste("PCA Plot.tiff"), width = 7, height = 6)
            }
        }
        
        if (bubble == FALSE) {
            
            samp_PCA <- ggplot(comp.cell.type1) + geom_point(aes_string(x = colnames(comp1)[1], y = colnames(comp1)[2], fill = "Values"), size = dotsize, shape = 21, 
                color = "black", alpha = alpha) + guides(color = guide_legend(title = "Values")) + theme_bw() + theme(legend.key = element_rect(colour = F), 
                legend.background = element_rect(fill = NA)) + theme(axis.text.x = element_text(size = 15), axis.text.y = element_text(size = 15), axis.title.x = element_text(size = 25, 
                vjust = -0.8), axis.title.y = element_text(size = 25, vjust = 1.4), legend.text = element_text(size = 13), legend.title = element_text(size = 15, 
                face = "bold"), plot.margin = grid::unit(c(2.1, 1.1, 1.1, 1.1), "cm")) + scale_fill_gradient(low = cell_colors[1], high = cell_colors[2])
            quartz()
            print(samp_PCA)
            if (save == TRUE) {
                ggsave(samp_PCA, file = paste("PCA Plot.tiff"), width = 7, height = 6)
            }
        }
    }
    
    if (!missing(gene)) {
        
        if (!missing(values)) {
            stop("Cannot specify both values and gene.", call. = FALSE)
        }
        
        if (CR == TRUE) {
            warning("Cannot have a confidence region with values or genes turned on.", call. = FALSE)
        }
        
        if (!missing(gene) && !gene %in% row.names(exprs_table)) {
            stop("Specified gene is not present in expression table", call. = FALSE)
        }
      
        if (!missing(colors)){
          cell_colors <- colors
        }
      
        if (length(cell_colors) != 2) {
            warning("Specify a vector of two colors to control color, using default colors instead", call. = FALSE)
            cell_colors[1:2] <- c("black", "yellow")
        }
        
        genez <- data.frame(exprs_table[gene, ], stringsAsFactors = FALSE)[, 1]
        comp.cell.type1[gene] <- genez
        
        if (bubble == TRUE) {
            
            samp_PCA <- ggplot(comp.cell.type1) + geom_point(aes_string(x = colnames(comp1)[1], y = colnames(comp1)[2], fill = gene, size = "factor(Cell_Type,levels=unique(Cell_Type),ordered=FALSE)"), 
                shape = 21, color = "black", alpha = alpha) + scale_size_manual(name = "Groups", values = sizes) + guides(color = guide_legend(title = gene), 
                size = guide_legend("Groups")) + theme_bw() + theme(legend.key = element_rect(colour = F), legend.background = element_rect(fill = NA)) + theme(axis.text.x = element_text(size = 15), 
                axis.text.y = element_text(size = 15), axis.title.x = element_text(size = 25, vjust = -0.8), axis.title.y = element_text(size = 25, vjust = 1.4), 
                legend.text = element_text(size = 13), legend.title = element_text(size = 15, face = "bold"), plot.margin = grid::unit(c(2.1, 1.1, 1.1, 1.1), 
                  "cm")) + scale_fill_gradient(low = cell_colors[1], high = cell_colors[2])
            quartz()
            print(samp_PCA)
            if (save == TRUE) {
                ggsave(samp_PCA, file = paste("PCA Plot.tiff"), width = 7, height = 6)
            }
        }
        
        if (bubble == FALSE) {
            
            samp_PCA <- ggplot(comp.cell.type1) + geom_point(aes_string(x = colnames(comp1)[1], y = colnames(comp1)[2], fill = gene), size = dotsize, shape = 21, 
                color = "black", alpha = alpha) + guides(color = guide_legend(title = gene)) + theme_bw() + theme(legend.key = element_rect(colour = F), legend.background = element_rect(fill = NA)) + 
                theme(axis.text.x = element_text(size = 15), axis.text.y = element_text(size = 15), axis.title.x = element_text(size = 25, vjust = -0.8), axis.title.y = element_text(size = 25, 
                  vjust = 1.4), legend.text = element_text(size = 13), legend.title = element_text(size = 15, face = "bold"), plot.margin = grid::unit(c(2.1, 
                  1.1, 1.1, 1.1), "cm")) + scale_fill_gradient(low = cell_colors[1], high = cell_colors[2])
            quartz()
            print(samp_PCA)
            if (save == TRUE) {
                ggsave(samp_PCA, file = paste("PCA Plot.tiff"), width = 7, height = 6)
            }
        }
    }
    samp_PCA
} 
joeburns06/hocuspocus documentation built on May 19, 2019, 2:59 p.m.