#devtools::use_package('ggplot2', type = 'Suggests')
#library('ggplot2')
#' Biplot for \code{\link[rsvd]{rpca}} using \code{\link[ggplot2]{ggplot}}.
#'
#' @description Creates a pretty biplot which is showing the individual factor map overlayed by the
#' variables factor map, i.e, plotting both the principal component scores and directions.
#'
#' @param rpcaObj Object returned by the \code{\link[rsvd]{rpca}} function.
#'
#' @param pcs Array_like. \cr
#' An array with two values indicating the two PCs which should be used for plotting.
#' By default the first two PCs are used, e.g., \eqn{c(1,2)}.
#'
#' @param loadings Bool (\eqn{TRUE}, \eqn{FALSE}), optional. \cr
#' If \eqn{TRUE}, the eigenvectors
#' are unit scaled by the square root of the eigenvalues \eqn{W = W * diag(sqrt(eigvals))}.
#'
#' @param groups Factor, optional. \cr
#' Factor indicating groups.
#'
#' @param alpha Scalar, optional. \cr
#' Alpha transparency for scatter plot.
#'
#' @param ellipse Bool (\eqn{TRUE}, \eqn{FALSE}), optional. \cr
#' Draw a 1sd data ellipse for each group, if \eqn{TRUE}.
#'
#' @param alpha.ellipse Scalar, optional. \cr
#' Alpha transparency for ellipse.
#'
#' @param var_labels Bool (\eqn{TRUE}, \eqn{FALSE}), optional. \cr
#' Plot variable names, if \eqn{TRUE}.
#'
#' @param var_labels.names Array_like, optional. \cr
#' User specific labels for the individuals.
#'
#' @param ind_labels Bool (\eqn{TRUE}, \eqn{FALSE}), optional. \cr
#' Plot data point names, if \eqn{TRUE}.
#'
#' @param ind_labels.names Array_like, optional. \cr
#' User specific labels for data points.
#'
#' @seealso \code{\link[rsvd]{rpca}}, \code{\link[ggplot2]{ggplot}}
#'
#' @author N. Benjamin Erichson, \email{erichson@berkeley.edu}
#'
#' @examples
#' #See ?rpca
#' @export
ggbiplot <- function( rpcaObj, pcs = c(1,2), loadings=TRUE, groups = NULL, alpha = 0.6,
ellipse = TRUE, alpha.ellipse=0.2,
var_labels=TRUE, var_labels.names=NULL,
ind_labels=TRUE, ind_labels.names=NULL
)
{
if (!requireNamespace('ggplot2', quietly = TRUE)) {
stop("The package 'ggplot2' is needed for this function to work. Please install it.",
call. = FALSE)
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Check if retx is provided
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if(is.null(rpcaObj$x)) stop("ggbiplot requires the rotated variables, i.e., set rpca(..., retx = TRUE).")
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Check selected pcs
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
stopifnot(length(pcs) == 2)
if(max(pcs) > ncol(rpcaObj$rotation)) stop("Selected PC is not valid.")
if(min(pcs) < 1) stop("Selected PC is not valid.")
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Dimensions
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
n = nrow(rpcaObj$x)
p = nrow(rpcaObj$rotation)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Label PCs
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
variance = rpcaObj$sdev**2
explained_variance_ratio = round(variance / rpcaObj$var,3) * 100
PC1 = paste("PC", pcs[1], "(", explained_variance_ratio[pcs[1]] , "% explained var.)", sep="")
PC2 = paste("PC", pcs[2], "(", explained_variance_ratio[pcs[2]] , "% explained var.)", sep="")
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Scale principal component scores
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Z1 = rpcaObj$x[, pcs[1]] #* (rpcaObj$eigvals[pcs[1]]**0.5)
Z2 = rpcaObj$x[, pcs[2]] #* (rpcaObj$eigvals[pcs[2]]**0.5)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Create data frame for variables scores
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
dfscores <- data.frame(scores=cbind(Z1,Z2), row.names = 1:n)
colnames(dfscores) <- c( 'a', 'b')
if(is.null(rownames(rpcaObj$x))) {
dfscores$"indName" <- as.character(1:n)
} else {
dfscores$"indName" <- rownames(rpcaObj$x)
}
if(!is.null(ind_labels.names)) dfscores$"indName" <- ind_labels.names
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Create data frame for variables
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if(loadings==FALSE) rotation = rpcaObj$rotation[,pcs]
if(loadings==TRUE) rotation = t(t(rpcaObj$rotation[,pcs]) * rpcaObj$eigvals[pcs]**0.5)
dfvariables <- data.frame(rotation=rotation, row.names = 1:p)
colnames(dfvariables) <- c( 'a', 'b')
if(is.null(rownames(rpcaObj$rotation))) {
dfvariables$"varName" <- as.character(1:p)
} else {
dfvariables$"varName" <- rownames(rpcaObj$rotation)
}
if(!is.null(var_labels.names)) dfvariables$"varName" <- var_labels.names
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
#Workaround for CRAN: Nulling
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
x <- NULL # Setting the variables to NULL first
y <- NULL # Setting the variables to NULL first
a <- NULL # Setting the variables to NULL first
b <- NULL # Setting the variables to NULL first
class <- NULL # Setting the variables to NULL first
indName <- NULL # Setting the variables to NULL first
varName <- NULL # Setting the variables to NULL first
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Scores
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if(is.null(groups)) {
dfscores$"class" <- 'subjects'
} else{
dfscores$"class" <- groups
}
g <- ggplot2::ggplot(data=dfscores, ggplot2::aes(x = a, y = b, colour = class )) +
ggplot2::geom_point(size = 2, alpha = alpha) +
ggplot2::theme(legend.position = "none")
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Stat ellipse
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if(is.null(groups) == 0 && ellipse==TRUE){
g <- g + ggplot2::stat_ellipse( geom = "polygon", alpha = alpha.ellipse,
ggplot2::aes(fill = class))
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Label data points
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if(ind_labels==TRUE) {
g <- g + ggplot2::geom_text(data = dfscores,
ggplot2::aes(label = indName, x = a, y = b,
angle = 0, hjust = 0.1, vjust = 0.1),
color = 'black', size = 4)
}
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Variable factor map
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
g <- g + ggplot2::geom_segment(data = dfvariables,
ggplot2::aes(x = 0, y = 0, xend = a, yend = b ),
arrow = grid::arrow(length = grid::unit(0.5, 'picas')),
color = '#de2d26' , size = 0.5)
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
# Label the variable axes
#~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
if(var_labels==TRUE) {
g <- g + ggplot2::geom_text(data = dfvariables,
ggplot2::aes(label = varName, x = a, y = b,
angle = 0, hjust = 0.1, vjust = 0.1),
color = '#de2d26', size = 4)
}
g <- g + ggplot2::ggtitle('Individuals factor map (PCA)')
g <- g + ggplot2::xlab(PC1) + ggplot2::ylab(PC2)
g <- g + ggplot2::guides(colour=FALSE)
g <- g + ggplot2::geom_vline(xintercept=0, linetype="dashed", color = "black")
g <- g + ggplot2::geom_hline(yintercept=0, linetype="dashed", color = "black")
g <- g + ggplot2::theme_bw()
g <- g + ggplot2::theme(panel.grid.major = ggplot2::element_blank(),
panel.grid.minor = ggplot2::element_blank())
return(g)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.