#' @title A function for PCA plotting with highlighting groups using ellipses
#' @description Beautiful PCA plot
#' @param data a sample-by-measures numeric matrix. Required
#' @param groups a character vector of sample group assignment. Required
#' @param title a title for the graph. Default: none
#' @param print_ellipse highlight groups by ellipses, Default: TRUE
#' @param center center the data for PCA, see ?prcomp. Default: TRUE
#' @param scale scale the data for PCA, see ?prcomp. Default: TRUE
#' @return ggplot2 object
#' @export
#' @examples
#' \dontrun{
#' library(ellipse)
#' library(ggplot2)
#' pca_func(iris[, 1:3], groups = iris$Species, title = "Iris")
#' }
#' @note Source: \url{https://shiring.github.io/machine_learning/2017/01/15/rfe_ga_post}
##
pca_func <- function(data, groups, title = "", print_ellipse = TRUE, center = TRUE, scale = TRUE) {
# perform pca and extract scores
# pcaOutput <- pca(data, printDropped = FALSE, scale = scale, center = center)
pcaOutput <- prcomp(data, center = center, scale. = scale)
pcaOutput2 <- as.data.frame(pcaOutput$x)
# define groups for plotting
pcaOutput2$groups <- groups
# when plotting samples calculate ellipses for plotting (when plotting features, there are no replicates)
if (print_ellipse) {
centroids <- aggregate(cbind(PC1, PC2) ~ groups, pcaOutput2, mean)
conf.rgn <- do.call(rbind, lapply(unique(pcaOutput2$groups), function(t)
data.frame(groups = as.character(t),
ellipse::ellipse(cov(pcaOutput2[pcaOutput2$groups == t, 1:2]),
centre = as.matrix(centroids[centroids$groups == t, 2:3]),
level = 0.95),
stringsAsFactors = FALSE)))
plot <- ggplot(data = pcaOutput2, aes(x = PC1, y = PC2, group = groups, color = groups)) +
geom_polygon(data = conf.rgn, aes(fill = groups), alpha = 0.2) +
geom_point(size = 2, alpha = 0.6) +
scale_color_brewer(palette = "Set1") +
labs(title = title,
color = "",
fill = "",
x = paste0("PC1: ", round(summary(pcaOutput)$importance[2, 1], digits = 2) * 100, "% variance"),
y = paste0("PC2: ", round(summary(pcaOutput)$importance[2, 2], digits = 2) * 100, "% variance"))
} else {
# if there are fewer than 10 groups (e.g. the predictor classes) I want to have colors from RColorBrewer
if (length(unique(pcaOutput2$groups)) <= 10) {
plot <- ggplot(data = pcaOutput2, aes(x = PC1, y = PC2, group = groups, color = groups)) +
geom_point(size = 3, alpha = 1) +
scale_color_brewer(palette = "Set1") +
labs(title = title,
color = "",
fill = "",
x = paste0("PC1: ", round(summary(pcaOutput)$importance[2, 1], digits = 2) * 100, "% variance"),
y = paste0("PC2: ", round(summary(pcaOutput)$importance[2, 2], digits = 2) * 100, "% variance"))
} else {
# otherwise use the default rainbow colors
plot <- ggplot(data = pcaOutput2, aes(x = PC1, y = PC2, group = groups, color = groups)) +
geom_point(size = 2, alpha = 0.6) +
labs(title = title,
color = "",
fill = "",
x = paste0("PC1: ", round(summary(pcaOutput)$importance[2, 1], digits = 2) * 100, "% variance"),
y = paste0("PC2: ", round(summary(pcaOutput)$importance[2, 2], digits = 2) * 100, "% variance"))
}
}
return(plot)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.