tidy_pca <- function(data, scale = TRUE, show_plot = TRUE) {
if ("FALSE" %in% purrr::map_lgl(data, is.numeric))
stop("All columns must be numeric for PCA")
if (scale == TRUE) {
pca <- scale(data) %>% princomp()
} else {
pca <- princomp(data)
}
eigs <- pca$sdev^2
var_explained <- eigs / sum(eigs)
pca_summary <- tibble::tibble(component = seq_along(eigs),
eigen_values = eigs,
var_explained,
total_explained = cumsum(var_explained))
pca_loadings <- pca$loadings %>%
as.numeric() %>%
matrix(., ncol = ncol(data)) %>%
tibble::as_tibble() %>%
setNames(., nm = paste0("Comp.", 1:ncol(data))) %>%
mutate(variable = colnames(data)) %>%
select(variable, dplyr::everything())
pca_output <- list(
pca_summary = pca_summary,
pca_loadings = pca_loadings,
pca_scores = pca$scores %>% tibble::as_tibble()
)
if (show_plot == TRUE) {
eigen_plot <- ggplot2::ggplot(pca_output$pca_summary,
aes(x = component, y = eigen_values)) +
ggplot2::geom_point() +
ggplot2::geom_line() +
ggplot2::labs(title = "Eigenvalues",
x = "Components",
y = "") +
ggplot2::scale_y_continuous(breaks = scales::pretty_breaks(n = 10))
cumvar_plot <- ggplot2::ggplot(pca_output$pca_summary,
aes(x = component, y = total_explained)) +
ggplot2::geom_point() +
ggplot2::geom_line() +
ggplot2::labs(title = "Cumulative Variance Explained",
x = "Components",
y = "") +
ggplot2::scale_y_continuous(breaks = scales::pretty_breaks(n = 10))
gridExtra::grid.arrange(eigen_plot, cumvar_plot, ncol=2)
}
return(pca_output)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.