Nothing
#' Principal Component Analysis (PCA) Plotting
#'
#' @title uplot_pca
#' @name uplot_pca
#' @family plots
#' @description
#' This function performs Principal Component Analysis (PCA) on a dataset, and visualizes the results in various ways, including a scatter plot of the first two principal components (PC1 vs PC2) and a Van Krevelen plot projected using PC1 values. The PCA is performed on the molecular formula data, aggregated by a grouping variable, and handles cases where columns exhibit zero variance (which cannot be included in PCA).
#' @inheritParams main_docu
#' @import data.table stats ggplot2
# @importFrom ggplot2 scale_color_identity scale_color_viridis_d geom_hline ggplot geom_bar geom_histogram coord_cartesian geom_point scale_color_gradientn labs theme_minimal theme annotation_custom
#' @importFrom graphics text
#'
#' @return A list containing:
#' \item{pca}{The PCA model object (class `prcomp`).}
#' \item{t_score}{A data table of PCA scores (principal component values for each sample).}
#' \item{fig_vk}{A Van Krevelen plot projected with PC1 values.}
#' \item{fig_pca}{A scatter plot of the first two principal components (PC1 vs PC2).}
#' \item{mfd}{The input data table, augmented with principal component values.}
#'
# @examples
# # Example usage assuming mfd is a data table with appropriate columns:
# pca_results <- uplot_pca(mfd = mf_data_demo, grp = "file_id", int_col = "norm_int")
#'
#' @note The function uses `prcomp` for PCA and `uplot_vk` for the Van Krevelen plot.
#'
#' @seealso \code{\link{uplot_vk}} for the Van Krevelen plot function.
#' @export
uplot_pca <- function(mfd, grp, int_col = "norm_int",
palname = "viridis",
col_bar = TRUE,
...) {
files <- PC1 <- NULL
# --- Input checks ---------------------------------------------------------
if (!"mf" %in% names(mfd)) {
stop("Missing required column: 'mf' (molecular formula).")
}
if (!int_col %in% names(mfd)) {
stop("Column not found: ", int_col)
}
if (!grp %in% names(mfd)) {
stop("Grouping column not found: ", grp)
}
# --- Pivot table: samples × molecular formulas ----------------------------
df_pivot <- dcast(
mfd,
formula = get(grp) ~ mf,
value.var = int_col,
fun.aggregate = mean,
fill = 0
)
df_pivot <- data.frame(df_pivot)
rownames(df_pivot) <- df_pivot[[1]]
df_pivot[[1]] <- NULL
# --- Remove zero-variance columns -----------------------------------------
zero_var <- sapply(df_pivot, function(x) var(x) == 0)
if (any(zero_var)) {
df_pivot <- df_pivot[, !zero_var, drop = FALSE]
}
# --- PCA -------------------------------------------------------------------
pca <- stats::prcomp(df_pivot, scale. = TRUE)
t_score <- as.data.table(pca$x)
t_score[, files := rownames(pca$x)]
# --- Prepare axis labels ---------------------------------------------------
pc1_var <- pca$sdev[1]^2 / sum(pca$sdev^2) * 100
pc2_var <- pca$sdev[2]^2 / sum(pca$sdev^2) * 100
xlab <- sprintf("PC1 (%.3g%%)", pc1_var)
ylab <- sprintf("PC2 (%.3g%%)", pc2_var)
# --- Plotly PCA score plot -------------------------------------------------
x_min <- min(t_score$PC1)
x_max <- max(t_score$PC1) * 1.5
fig_pca <- plotly::plot_ly(
data = t_score,
x = ~PC1,
y = ~PC2,
type = "scatter",
mode = "markers+text",
text = ~files,
textposition = "right",
textfont = list(color = "black"),
marker = list(size = 8)
) |>
layout(
xaxis = list(title = xlab, range = c(x_min, x_max)),
yaxis = list(title = ylab),
title = ""
)
# --- Prepare PCA loadings for VK plot -------------------------------------
loadings <- as.data.table(pca$rotation)
loadings[, mf := rownames(pca$rotation)]
# Merge PC1 loading onto original mfd
mfd2 <- merge(mfd, loadings[, .(mf, PC1)], by = "mf", all.x = TRUE)
mfd2 <- mfd2[!is.na(PC1)]
# --- Van Krevelen plot projected by PC1 -----------------------------------
fig_vk <- uplot_vk(
mfd = mfd2,
z_var = "PC1",
palname = palname,
col_bar = col_bar,
main = "Van Krevelen Diagram (colored by PC1)"
)
# --- Return results --------------------------------------------------------
list(
pca = pca,
t_score = t_score,
fig_vk = fig_vk,
fig_pca = fig_pca,
mfd = mfd2
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.