R/makePCAplot.8.R

Defines functions makePCAplot.8

Documented in makePCAplot.8

#' Plot PCA. Version 8. 
#' 
#' Plot 2D PCA graph given a data frame. Tailor graphing parameters and save figure in working directory. Also adds percent of variance explained.
#' 
#' This function, in contrast to previous versions relies on prcomp() function rather than svd() function in computing the eigenvectors.
#' 
#' @param df A data frame on which PCA plot is based.
#' @param colorVec A vector of colors corresponding to column names of df. Coerced to characted. Defaults to "black" (see http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf).
#' @param label A boolean indicating whether to add labels to PCA dots. Defaults to FALSE.
#' @param label_dist A numeric of the vertical distance between a dot and dot label. Defaults to 0.05.
#' @param main A character of plot title. Defaults to "PCA".
#' @param cex Either a single numeric of label size or a vector of point size. Defaults to a single numeric 1.
#' @param pch A numeric specifying the type of points in the scatter. Defults to 16 which yields filled circles. If many types of points needed please supply a redundant vector of plotting symbol codes (see http://www.statmethods.net/advgraphs/parameters.html) of lenght equal to column names. E.g. pch=c(16,13)[c(rep(1,15),rep(2,125))]
#' @param addLegend A boolean indicating whether to add the legend of colors used in the plot.
#' @param legendPosition A character indicating position of legend. One of "bottomright", "bottom", "bottomleft", "left", "topleft", "top", "topright", "right" and "center. Defults to "topright".
#' @param legendNames A character vector of legend names. Defults to NA.
#' @param legendColors A character vector of colors corresponding to legendNames. Must be in the same left-to-right order (see http://www.stat.columbia.edu/~tzheng/files/Rcolor.pdf).
#' @param plotOut A boolean indicating whether to save a plot figure in the working directory.
#' @param width A numeric of plot's figure width (applicable only when plotOut==TRUE).
#' @param height A numeric of plot's figure height (applicable only when plotOut==TRUE).
#' @param plotOutName A character of plot figure file name (applicable only when plotOut==TRUE).
#' @param horizontal A numeric indicating where the draw a horizontal line. Defults to NA.
#' @param vertical A numeric indicating where to draw a vertical line. Defults to NA.
#' @return A PCA plot.
#' @export

makePCAplot.8 <- function(df,spitOut_PCA_dims = TRUE, colorVec,label=FALSE,label_dist=0.05, main="PCA", cex=1, pch = 16, addLegend=FALSE, plotOut=FALSE, width=13, height=8, plotOutName="PCA.pdf", legendPosition="topright", legendNames=NA, legendColors=NA, horizontal=NA, vertical=NA){
  if(!(length(colorVec)==ncol(df))){
    stop("Lenght of 'colorVec' does not equal ncol of of 'df'. Exiting!")
  }
  PCA <- prcomp(t(df))
  PC1_varExp <- as.character(round(summary(PCA)$importance[,1][2]*100,0))
  PC2_varExp <- as.character(round(summary(PCA)$importance[,2][2]*100,0))
  if(plotOut == TRUE){
    pdf(plotOutName, width = width, height = height)
    plot(PCA$x[,1], PCA$x[,2], xlab = paste("PC1 (",PC1_varExp,"% of variance)",sep = ""), ylab = paste("PC2 (", PC2_varExp,"% of variance)",sep = ""), col=as.character(colorVec), main = main, type = "p", pch=pch,cex=cex,asp = 1)
    if(label==TRUE){
      text(PCA$x[,1], PCA$x[,2]+label_dist, colnames(df), col = as.character(colorVec), cex = cex)
    }
    if(addLegend==TRUE){
      if(anyNA(legendNames)|anyNA(legendColors)){
        stop("'legendNames' or 'legendColors' are/have NA. Exiting!")
      }
      legend(legendPosition, legendNames, fill=legendColors, bty = "n")
    }
    if(class(horizontal)=="numeric"){
      abline(h = horizontal)
    }
    if(class(vertical)=="numeric"){
      abline(v= vertical)
    }
    dev.off()
  }
  else{
    plot(PCA$x[,1], PCA$x[,2], xlab = paste("PC1 (",PC1_varExp,"% of variance)",sep = ""), ylab = paste("PC2 (", PC2_varExp,"% of variance)",sep = ""), col=as.character(colorVec), main = main, type = "p", pch=pch,cex=cex,asp=1)
    if(label==TRUE){
      text(PCA$x[,1], PCA$x[,2]+label_dist, colnames(df), col = as.character(colorVec), cex = cex)
    }
    if(addLegend==TRUE){
      if(anyNA(legendNames)|anyNA(legendColors)){
        stop("'legendNames' or 'legendColors' are/have NA. Exiting!")
      }
      legend(legendPosition, legendNames, fill=legendColors, bty = "n")
    }
    if(class(horizontal)=="numeric"){
      abline(h = horizontal)
    }
    if(class(vertical)=="numeric"){
      abline(v= vertical)
    }
    if(spitOut_PCA_dims==TRUE){
      print("Outputting PCA dims")
      PCAdims <- PCA$x
      row.names(PCAdims) <- colnames(df)
      columnNames <- paste("PC", 1:ncol(PCAdims))
      colnames(PCAdims) <- columnNames
      return(PCAdims)
    }
    else{
      print("DONE")
    }
  }
}
msxakk89/dat documentation built on Aug. 3, 2020, 6:39 p.m.