R/pca_func.R

Defines functions pca_func

Documented in pca_func

#' @title A function for PCA plotting with highlighting groups using ellipses
#' @description Beautiful PCA plot
#' @param data a sample-by-measures numeric matrix. Required
#' @param groups a character vector of sample group assignment. Required
#' @param title a title for the graph. Default: none
#' @param print_ellipse highlight groups by ellipses, Default: TRUE
#' @param center center the data for PCA, see ?prcomp. Default: TRUE
#' @param scale scale the data for PCA, see ?prcomp. Default: TRUE
#' @return ggplot2 object
#' @export
#' @examples
#' \dontrun{
#' library(ellipse)
#' library(ggplot2)
#' pca_func(iris[, 1:3], groups = iris$Species, title = "Iris")
#' }
#' @note Source: \url{https://shiring.github.io/machine_learning/2017/01/15/rfe_ga_post}
##

pca_func <- function(data, groups, title = "", print_ellipse = TRUE, center = TRUE, scale = TRUE) {

  # perform pca and extract scores
  # pcaOutput <- pca(data, printDropped = FALSE, scale = scale, center = center)
  pcaOutput <- prcomp(data, center = center, scale. = scale)
  pcaOutput2 <- as.data.frame(pcaOutput$x)

  # define groups for plotting
  pcaOutput2$groups <- groups

  # when plotting samples calculate ellipses for plotting (when plotting features, there are no replicates)
  if (print_ellipse) {

    centroids <- aggregate(cbind(PC1, PC2) ~ groups, pcaOutput2, mean)
    conf.rgn  <- do.call(rbind, lapply(unique(pcaOutput2$groups), function(t)
      data.frame(groups = as.character(t),
                 ellipse::ellipse(cov(pcaOutput2[pcaOutput2$groups == t, 1:2]),
                                  centre = as.matrix(centroids[centroids$groups == t, 2:3]),
                                  level = 0.95),
                 stringsAsFactors = FALSE)))

    plot <- ggplot(data = pcaOutput2, aes(x = PC1, y = PC2, group = groups, color = groups)) +
      geom_polygon(data = conf.rgn, aes(fill = groups), alpha = 0.2) +
      geom_point(size = 2, alpha = 0.6) +
      scale_color_brewer(palette = "Set1") +
      labs(title = title,
           color = "",
           fill = "",
           x = paste0("PC1: ", round(summary(pcaOutput)$importance[2, 1], digits = 2) * 100, "% variance"),
           y = paste0("PC2: ", round(summary(pcaOutput)$importance[2, 2], digits = 2) * 100, "% variance"))

  } else {

    # if there are fewer than 10 groups (e.g. the predictor classes) I want to have colors from RColorBrewer
    if (length(unique(pcaOutput2$groups)) <= 10) {

      plot <- ggplot(data = pcaOutput2, aes(x = PC1, y = PC2, group = groups, color = groups)) +
        geom_point(size = 3, alpha = 1) +
        scale_color_brewer(palette = "Set1") +
        labs(title = title,
             color = "",
             fill = "",
             x = paste0("PC1: ", round(summary(pcaOutput)$importance[2, 1], digits = 2) * 100, "% variance"),
             y = paste0("PC2: ", round(summary(pcaOutput)$importance[2, 2], digits = 2) * 100, "% variance"))

    } else {

      # otherwise use the default rainbow colors
      plot <- ggplot(data = pcaOutput2, aes(x = PC1, y = PC2, group = groups, color = groups)) +
        geom_point(size = 2, alpha = 0.6) +
        labs(title = title,
             color = "",
             fill = "",
             x = paste0("PC1: ", round(summary(pcaOutput)$importance[2, 1], digits = 2) * 100, "% variance"),
             y = paste0("PC2: ", round(summary(pcaOutput)$importance[2, 2], digits = 2) * 100, "% variance"))

    }
  }

  return(plot)

}
mdozmorov/MDmisc documentation built on Aug. 24, 2022, 9:18 a.m.