### Principal component analysis
#' Scree plot with factoextra
#'
#' @description `scree_factoextra` takes a prcomp element and diplays the percentage of variance explained by the principal components in a barplot.
#' @details This function takes a prcom element (i.e. an element generated by the base::prcomp function), and displays the percentage of variance explained by each principal component in a barplot. Additionally there is also a dot- and line graph.
#' @param prcomp_element object of class prcomp or pca. Produced by base::prcomp or FactoMineR::PCA.
#' @param out_path string. Path to save plot to png. If out_path is empty, the plot is printed to the device.
#' @param title string. Main title of plot. Default: "Scree plot"
#' @export
#' @examples
#' {pca_iris <- prcomp(dplyr::select_if(iris, is.numeric))
#' scree_factoextra(pca_iris, title = "Scree plot iris")}
scree_factoextra <- function(prcomp_element,
title = "Scree plot",
out_path = "none"){
scree <- factoextra::fviz_eig(prcomp_element,
barfill = viridis::viridis(n = 1, begin = 0.3),
barcolor = viridis::viridis(n = 1, begin = 0.3),
# linecolor = "grey40",
addlabels = TRUE) +
ggplot2::labs(title = title) +
ggplot2::theme_minimal() +
ggplot2::theme(plot.title = ggplot2::element_text(size=12, hjust = 0.5),
axis.text.x = ggplot2::element_text(size = 8),
axis.title.x = ggplot2::element_text(size = 10, hjust = 0.5),
axis.title.y = ggplot2::element_text(size = 10, hjust = 0.5),
legend.text = ggplot2::element_text(size = 8),
legend.title = ggplot2::element_text(size = 10))
if(out_path != "none"){
print(paste("Saving plot to ", out_path, "_screePlot.png", sep = ""))
ggplot2::ggsave(paste(out_path, "_screePlot.png", sep = ""),
plot = scree)
}
else{
scree
}
}
#' Plot contribution of principal component to variance
#'
#' @description `plot_pc_variance` prints a barplot with a line of the contribution of principal components to the data variance
#' @param data_frame data frame. Contains eigenvalues, contribution to data variance and cummulative contribution to data variance for a PCA model.
#' @param x column of data_frame. Column with principal component names.
#' @param y column of data_frame. Column with eigenvalues/contribution to variance/ cummulative contribution to variance.
#' @param xlab string. X-axis label. Default: "dimensions"
#' @param ylab string. X-axis label. Default: "variance [\%]"
#' @param title string. Main title. Default: "Scree plot"
#' @param fill string. Color of the barplot. Default: "#35608DFF"
#' @param hjust numeric. Position of labels relative to the points in the graph. Default: -0.2
#' @export
#' @examples
#' {iris_pca <- FactoMineR::PCA(dplyr::select_if(iris, is.numeric))
#' iris_eigen <- as.data.frame(factoextra::get_eigenvalue(iris_pca))
#' plot_pc_variance(iris_eigen, x = rownames(iris_eigen), y = iris_eigen$cumulative.variance.percent)}
plot_pc_variance <- function(data_frame,
x,
y,
xlab = "dimensions",
ylab = "variance [%]",
title = "Scree plot",
fill = "#35608DFF",
hjust = -0.2){
variance_barchart <- ggplot2::ggplot(data=data_frame, ggplot2::aes(x=x, y=y, group = 1)) +
ggplot2::geom_bar(stat = "identity", fill = fill) +
ggplot2::geom_line() +
ggplot2::geom_point() +
ggplot2::labs(title = title, y = ylab, x = xlab) +
ggplot2::scale_x_discrete(limits = x) +
ggplot2::geom_text(ggplot2::aes(label = round(y, 1)), vjust = -0.2, hjust = hjust, color = "grey40")
variance_barchart
}
#' Biplot with factorextra
#'
#' @description `biplot_factoextra` prints a biplot from a prcomp element.
#' @details This function prints a biplot from a prcomp element.
#' The points can be colored by group. Loadings are optionally displayed.
#' @param prcomp_element object of class prcomp. The base::prcomp function performs a principal component analysis.
#' This is the object it returns.
#' @param groups vector. Groups to color by. Default = "none"
#' @param ellipse bool. Draw confidence ellipse arount the clusters of groups. Default = FALSE.
#' @param loadings string / vector of strings. Draw loadings arrows and labels. c("arrow", "text"), "text", "none" (default).
#' @param out_path string. Path to save plot to png.
#' If out_path is empty, the plot is printed to the device.
#' @export
#' @examples
#' {pca_iris <- prcomp(dplyr::select_if(iris, is.numeric))
#' iris_groups <- iris$Species
#' biplot_factoextra(pca_iris)
#' biplot_factoextra(pca_iris,
#' groups = iris_groups,
#' ellipse = TRUE,
#' loadings = c("arrow", "text"))}
biplot_factoextra <- function(prcomp_element,
groups = "none",
ellipse = FALSE,
loadings = "none",
out_path = "none"){
biplot <- factoextra::fviz_pca_biplot(prcomp_element,
habillage = groups, # a vector of groups by whicht to color
label = "var",
labelsize = 2,
repel = TRUE, # labels do not overlap
col.var = "grey40",
## ellipses
addEllipses = ellipse,
ellipse.type = "norm",
## legend
legend.title = "Groups",
geom.var = loadings) +
viridis::scale_color_viridis(discrete = TRUE) +
viridis::scale_fill_viridis(discrete = TRUE) +
ggplot2::theme_minimal() +
ggplot2::theme(plot.title = ggplot2::element_text(size=12, hjust = 0.5),
axis.text.x = ggplot2::element_text(size = 8),
axis.title.x = ggplot2::element_text(size = 10, hjust = 0.5),
axis.title.y = ggplot2::element_text(size = 10, hjust = 0.5),
legend.text = ggplot2::element_text(size = 8),
legend.title = ggplot2::element_text(size = 10))
if(out_path != "none"){
print(paste("Saving to ", out_path, "_biplot.png", sep = ""))
ggplot2::ggsave(paste(out_path, "_biplot.png", sep = ""),
plot = biplot)
}
else{
biplot
}
}
#' Scree plot with Rbase
#'
#' @description `scree_base` takes a prcomp element and diplays the percentage and the cummulative percentage of variance of variance explained by the principal components in a barplot.
#' @details This function takes a prcomp element (i.e. an element generated by the base::prcomp function), and displays the percentage of variance and the cummulative percentage of variance explained by each principal component in a dot- and line graph.
#' @param input_df data frame or matrix.
#' @param scale bool. Apply scaling when erforming PCA. Default = FALSE.
#' @param out_path string. Path to save plot to png. If out_path is empty, the plot is printed to the device.
#' @param title vector of strings. Main titles for both plots. Default: c("Scree plot", "Cummulative scree plot")
#' @export
#' @examples
#' scree_base(mtcars)
scree_base <- function(input_df,
title = c("Scree plot", "Cummulative scree plot"),
scale = FALSE,
out_path = "none"){
out_name <- paste(out_path, "_screePlot", ".png", sep = "")
prcomp_element <- stats::prcomp(input_df)
# variance explained by each pc
var_pca <- prcomp_element$sdev ^ 2
prop_of_variance <- round(var_pca / sum(var_pca), 5) * 100
cummulative_prop_of_variance <- cumsum(prop_of_variance)
proportion_of_variance_table <- data.frame(Proportion_of_variance = prop_of_variance,
Cummulative_proportion_of_variance = cummulative_prop_of_variance)
func <- function(){
graphics::par(mfrow=c(1,2), mar = c(2, 2, 2, 2))
graphics::plot(prop_of_variance, # scree plot to decide which PCs are used
main = "", xlab ="", ylab ="",
ylim=c(0 ,100),
type = "b")
graphics::title( main = title[1], cex.main = 0.9, font.main = 1,
cex.lab = 1,
xlab =" Principal Component ",
ylab =" Proportion of Variance Explained [%]")
graphics::plot(cummulative_prop_of_variance, # scree plot to decide which PCs are used
main = "", xlab ="", ylab ="",
ylim=c(0 ,100),
type="b")
graphics::title( main = title[2], cex.main = 0.9, font.main = 1,
cex.lab = 0.8,
xlab ="Principal Component",
ylab ="Cumulative Proportion of Variance Explained [%]")
}
if(out_path != "none"){
print(paste("Saving to ", out_name))
grDevices::png(filename = out_name)
func()
grDevices::dev.off()
}
else{
func()
}
}
#' Biplot with ggplot2
#'
#' @description `biplot_ggplot2` prints a biplot from a prcomp element.
#' @details This function prints a biplot from a prcomp element. The points can be colored by group. Loadings are optionally displayed.
#' @param input_df a data frame with at least one factor column.
#' @param xdim integer. Default: 1
#' @param ydim integer. Default: 2
#' @param scale logical. Should data be scaled to unit variance for PCA? Default = FALSE
#' @param title string. Main title of the plot. Default: "PCA - Biplot"
#' @param groups vector. Groups to color by. Default = "none"
#' @param ellipse logical. Draw confidence ellipse arount the clusters of groups. Default = FALSE.
#' @param loadings logical. Draw loadings arrows and labels. Default = FALSE
#' @param out_path string. Path to save plot to png. If out_path is empty, the plot is printed to the device.
#' @export
#' @examples
#' \dontrun{
#' biplot_ggplot2(iris, groups = "Species")
#' biplot_ggplot2(iris, groups = "Species", xdim = 1, ydim = 3 )
#' biplot_ggplot2(iris, "Species", ellipse = TRUE, loadings = TRUE)}
biplot_ggplot2 <- function(input_df,
xdim = 1,
ydim = 2,
groups = "none",
scale = FALSE,
loadings = FALSE,
ellipse = FALSE,
title = "PCA - Biplot",
out_path = "none"){
if(groups == "none"){
group_by <- NULL
}
else {
group_by <- groups
}
prcomp_element <- stats::prcomp(dplyr::select_if(input_df, is.numeric), scale. = scale)
proportion <- summary(prcomp_element)$importance
x_prop <- round(proportion[2, xdim] * 100, 1)
y_prop <- round(proportion[2, ydim] * 100, 1)
biplot <- ggplot2::autoplot(prcomp_element, data = input_df,
x = xdim,
y = ydim,
colour = group_by,
shape = group_by,
loadings = loadings,
loadings.colour = "black",
loadings.label = loadings,
loadings.label.size = 2,
loadings.label.colour = "black",
frame = ellipse,
frame.type = "norm") +
ggplot2::labs(title = title,
x = paste("PC", xdim, " (", x_prop, "%)", sep = ""),
y = paste("PC", ydim, " (", y_prop, "%)", sep = "")) +
ggplot2::theme_minimal() +
ggplot2::theme(plot.title = ggplot2::element_text(size=12, hjust = 0.5),
axis.text.x = ggplot2::element_text(size = 8),
axis.title.x = ggplot2::element_text(size = 10, hjust = 0.5),
axis.title.y = ggplot2::element_text(size = 10, hjust = 0.5),
legend.text = ggplot2::element_text(size = 8),
legend.title = ggplot2::element_text(size = 10))
if(groups != "none"){
group_color <- as.vector(viridis::viridis(n = length(levels(input_df[[group_by]])), alpha = 1))
biplot <- biplot +
ggplot2::scale_fill_manual(values = group_color) +
ggplot2::scale_color_manual(values = group_color)
}
if(out_path != "none"){
print(paste("Saving plot to ", out_path, "_biplot.png", sep = ""))
ggplot2::ggsave(paste(out_path, "_biplot.png", sep = ""),
plot = biplot)
} else{
biplot
}
}
#' Plot contribution to pca
#'
#' `plot_contrib_to_pc` plots the contributions of each variable to each principal component in a grid.
#' @param pca_element Object produced by FactoMineR::PCA.
#' @param title string. Main title of the plot. Default = "Contribution of variables to the principal components"
#' @param out_path string. Path to save plot as png. If "none" (default), plot is printed to device.
#' @export
#' @example
#' cars_pca <- factoMineR::PCAdplyr::select_if(mtcars, is.numeric), graph = FALSE)
#' plot_contrib_to_pc(cars_pca)
plot_contrib_to_pc <- function(pca_element,
title = "Contribution of variables to the principal components",
out_path = "none"){
out_name = paste(out_path, "pca_contribution")
pca_variables <- factoextra::get_pca_var(pca_element)
pca_contrib <- as.data.frame(pca_variables$contrib)
pca_contrib_ordered <- pca_contrib[order(pca_contrib$Dim.1, decreasing = TRUE),]
func <- function(){
graphics::par(mar = c(1, 1, 10, 1), mfrow = c(1, 1), family = "AvantGarde", col.main = "grey40", cex.main = 0.8, col.axis = "grey40")
mycolors <- as.vector(viridis::viridis(n = 10, direction = -1))
corrplot::corrplot(t(pca_contrib_ordered),
method = "circle",
is.corr = FALSE,
pch.col = "green",
tl.srt = 45,
tl.cex = 0.5,
tl.col = "grey40",
col= grDevices::colorRampPalette(mycolors)(10),
cl.cex = 0.5,
cl.align.text = "l"
)
graphics::title(main = title)
}
if(out_path != "none"){
print(paste("Saving to ", out_name))
grDevices::png(filename = out_name, width = 15*96, height = 5*96, units = "px")
func()
grDevices::dev.off()
}
else{
func()
}
graphics::par(mar = c(1, 1, 1, 1))
}
#' Loadings plot
#'
#' `plot_loadings` plots the loadings between the first two principal components.
#' @param pca_object Object produced by FactoMineR::PCA.
#' @param title string. Main title of the plot. Default = "Loadings plot"
#' @param colour logical. TRUE ... color scale by PC1. FALSE (default) ... all black.
#' @param top_loadings integer. Number of loadings to label (the top n loadings of both PCs are labelled).
#' @param xlab string. Title of x-axis. Default: "Dim.1"
#' @param ylab string. Title of y-axis. Default: "Dim.2"
#' @export
#' @example
#' cars_pca <- FactoMineR::PCA(dplyr::select_if(mtcars, is.numeric), graph = FALSE)
#' plot_loadings(cars_pca)
plot_loadings <- function(pca_object,
colour = FALSE,
top_loadings = 10,
title = "Loadings plot",
xlab = "Dim.1",
ylab = "Dim.2"){
x <- pca_object$var
loadings <- as.data.frame(x$coord)
loadings <- data.frame(loadings, x$contrib)
loadings$mylabel <- rep("", nrow(loadings))
loadings <- loadings[order(loadings$Dim.1.1, decreasing = TRUE),]
loadings$mylabel[1:10] <- rownames(loadings)[1:10]
loadings <- loadings[order(loadings$Dim.2.1, decreasing = TRUE),]
loadings$mylabel[1:top_loadings] <- rownames(loadings)[1:top_loadings]
if(top_loadings == 0){
loadings$mylabel <- rep("", nrow(loadings))
}
pc1_max <- max(loadings$Dim.1)
pc2_max <- max(loadings$Dim.2)
if(colour == TRUE){
loadings_plot <- ggplot2::ggplot(loadings, ggplot2::aes(x=Dim.1,
y=Dim.2,
colour = Dim.1.1)) }
else{ loadings_plot <- ggplot2::ggplot(loadings, ggplot2::aes(x=Dim.1,
y=Dim.2)) }
loadings_plot <- loadings_plot +
ggplot2::geom_point(size=1) +
ggplot2::geom_hline(yintercept = 0,
linetype = "dashed",
colour = "grey60") +
ggplot2::geom_vline(xintercept = 0,
linetype = "dashed",
colour = "grey60") +
ggrepel::geom_text_repel(ggplot2::aes(x = Dim.1,
y = Dim.2,
label = mylabel),
colour = "black",
size = 3) +
ggplot2::labs(title = title, x = xlab, y = ylab) +
ggplot2::scale_color_viridis_c(direction = -1, end = 0.9) +
ggplot2::xlim(-pc1_max, pc1_max) +
ggplot2::ylim(-pc2_max, pc2_max) +
ggplot2::theme_minimal() +
ggplot2::theme(plot.title = ggplot2::element_text(size=12, hjust = 0.5, family="AvantGarde"),
plot.subtitle = ggplot2::element_text(size = 8, hjust = 0.5, family = "AvantGarde", colour = "grey40"),
axis.text.x = ggplot2::element_text(size = 8, colour = "grey40", family="AvantGarde"),
axis.text.y = ggplot2::element_text(size = 8, colour = "grey40", family="AvantGarde"),
axis.title.x = ggplot2::element_text(size = 10, hjust = 0.5, colour = "grey40", family="AvantGarde"),
axis.title.y = ggplot2::element_text(size = 10, hjust = 0.5, colour = "grey40", family="AvantGarde"),
legend.text = ggplot2::element_blank(),
legend.title = ggplot2::element_blank(),
legend.position = "none")
loadings_plot
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.