Nothing
#' The function pathway_errorbar() is used to visualize the results of functional pathway differential abundance analysis as error bar plots.
#'
#' @name pathway_errorbar
#' @param abundance A data frame with row names representing pathways and column names representing samples. Each element represents the relative abundance of the corresponding pathway in the corresponding sample.
#' @param daa_results_df A data frame containing the results of the differential abundance analysis of the pathways, generated by the pathway_daa function. x_lab should be a column name of daa_results_df.
#' @param Group A data frame or a vector that assigns each sample to a group. The groups are used to color the samples in the figure.
#' @param ko_to_kegg A logical parameter indicating whether there was a convertion that convert ko abundance to kegg abundance.
#' @param p_values_threshold A numeric parameter specifying the threshold for statistical significance of differential abundance. Pathways with p-values below this threshold will be considered significant.
#' @param order A parameter controlling the ordering of the rows in the figure. The options are: "p_values" (order by p-values), "name" (order by pathway name), "group" (order by the group with the highest mean relative abundance), or "pathway_class" (order by the pathway category).
#' @param select A vector of pathway names to be included in the figure. This can be used to limit the number of pathways displayed. If NULL, all pathways will be displayed.
#' @param p_value_bar A logical parameter indicating whether to display a bar showing the p-value threshold for significance. If TRUE, the bar will be displayed.
#' @param colors A vector of colors to be used to represent the groups in the figure. Each color corresponds to a group. If NULL, colors will be selected based on the color_theme.
#' @param x_lab A character string to be used as the x-axis label in the figure. The default value is "description" for KOs'descriptions and "pathway_name" for KEGG pathway names.
#' @param log2_fold_change_color A character string specifying the color for log2 fold change bars. Default is "#87ceeb" (light blue). Can also be "auto" to use theme-based colors.
#' @param max_features A numeric parameter specifying the maximum number of features to display before issuing a warning. Default is 30. Set to a higher value to display more features, or Inf to disable the limit entirely.
#' @param color_theme A character string specifying the color theme to use. Options include: "default", "nature", "science", "cell", "nejm", "lancet", "colorblind_friendly", "viridis", "plasma", "minimal", "high_contrast", "pastel", "bold". Default is "default".
#' @param pathway_class_colors A vector of colors for pathway class annotations. If NULL, colors will be selected from the theme.
#' @param smart_colors A logical parameter indicating whether to use intelligent color selection based on data characteristics. Default is FALSE.
#' @param accessibility_mode A logical parameter indicating whether to use accessibility-friendly colors. Default is FALSE.
#' @param legend_position A character string specifying legend position. Options: "top", "bottom", "left", "right", "none". Default is "top".
#' @param legend_direction A character string specifying legend direction. Options: "horizontal", "vertical". Default is "horizontal".
#' @param legend_title A character string for legend title. If NULL, no title is displayed.
#' @param legend_title_size A numeric value specifying legend title font size. Default is 12.
#' @param legend_text_size A numeric value specifying legend text font size. Default is 10.
#' @param legend_key_size A numeric value specifying legend key size in cm. Default is 0.8.
#' @param legend_ncol A numeric value specifying number of columns in legend. If NULL, automatic layout is used.
#' @param legend_nrow A numeric value specifying number of rows in legend. If NULL, automatic layout is used.
#' @param pvalue_format A character string specifying p-value format. Options: "numeric", "scientific", "smart", "stars_only", "combined". Default is "smart".
#' @param pvalue_stars A logical parameter indicating whether to display significance stars. Default is TRUE.
#' @param pvalue_colors A logical parameter indicating whether to use color coding for significance levels. Default is FALSE.
#' @param pvalue_size A numeric value or "auto" for p-value text size. Default is "auto".
#' @param pvalue_angle A numeric value specifying p-value text angle in degrees. Default is 0.
#' @param pvalue_thresholds A numeric vector of significance thresholds. Default is c(0.001, 0.01, 0.05).
#' @param pvalue_star_symbols A character vector of star symbols for significance levels. Default is c("***", "**", "*").
#' @param pathway_class_text_size A numeric value or "auto" for pathway class text size. Default is "auto".
#' @param pathway_class_text_color A character string for pathway class text color. Use "auto" for theme-based color. Default is "black".
#' @param pathway_class_text_face A character string for pathway class text face. Options: "plain", "bold", "italic". Default is "bold".
#' @param pathway_class_text_angle A numeric value specifying pathway class text angle in degrees. Default is 0.
#' @param pathway_class_position A character string specifying pathway class position. Options: "left", "right", "none". Default is "left".
#' @param pathway_names_text_size A numeric value or "auto" for pathway names (y-axis labels) text size. Default is "auto".
#' @importFrom stats sd
#' @return A ggplot2 (patchwork) plot showing the error bar plot of the differential abundance analysis results for the functional pathways.
#' @export
#'
#' @examples
#' \dontrun{
#' # Example 1: Analyzing KEGG pathway abundance
#' metadata <- read_delim(
#' "path/to/your/metadata.txt",
#' delim = "\t",
#' escape_double = FALSE,
#' trim_ws = TRUE
#' )
#'
#' # data(metadata)
#'
#' kegg_abundance <- ko2kegg_abundance(
#' "path/to/your/pred_metagenome_unstrat.tsv"
#' )
#'
#' # data(kegg_abundance)
#'
#' # Please change group to "your_group_column" if you are not using example dataset
#' group <- "Environment"
#'
#' daa_results_df <- pathway_daa(
#' abundance = kegg_abundance,
#' metadata = metadata,
#' group = group,
#' daa_method = "ALDEx2",
#' select = NULL,
#' reference = NULL
#' )
#'
#' # Please check the unique(daa_results_df$method) and choose one
#' daa_sub_method_results_df <- daa_results_df[daa_results_df$method
#' == "ALDEx2_Welch's t test", ]
#'
#' daa_annotated_sub_method_results_df <- pathway_annotation(
#' pathway = "KO",
#' daa_results_df = daa_sub_method_results_df,
#' ko_to_kegg = TRUE
#' )
#'
#' # Please change Group to metadata$your_group_column if you are not using example dataset
#' Group <- metadata$Environment
#'
#' p <- pathway_errorbar(
#' abundance = kegg_abundance,
#' daa_results_df = daa_annotated_sub_method_results_df,
#' Group = Group,
#' p_values_threshold = 0.05,
#' order = "pathway_class",
#' select = daa_annotated_sub_method_results_df %>%
#' arrange(p_adjust) %>%
#' slice(1:20) %>%
#' select("feature") %>% pull(),
#' ko_to_kegg = TRUE,
#' p_value_bar = TRUE,
#' colors = NULL,
#' x_lab = "pathway_name",
#' log2_fold_change_color = "#FF5733" # Custom color for log2 fold change bars
#' )
#'
#' # Example 2: Analyzing EC, MetaCyc, KO without conversions
#' metadata <- read_delim(
#' "path/to/your/metadata.txt",
#' delim = "\t",
#' escape_double = FALSE,
#' trim_ws = TRUE
#' )
#' # data(metadata)
#'
#' metacyc_abundance <- read.delim("path/to/your/metacyc_abundance.tsv")
#'
#' # data(metacyc_abundance)
#'
#' group <- "Environment"
#'
#' daa_results_df <- pathway_daa(
#' abundance = metacyc_abundance %>% column_to_rownames("pathway"),
#' metadata = metadata,
#' group = group,
#' daa_method = "LinDA",
#' select = NULL,
#' reference = NULL
#' )
#'
#'
#' daa_annotated_results_df <- pathway_annotation(
#' pathway = "MetaCyc",
#' daa_results_df = daa_results_df,
#' ko_to_kegg = FALSE
#' )
#'
#' Group <- metadata$Environment
#'
#' p <- pathway_errorbar(
#' abundance = metacyc_abundance %>% column_to_rownames("pathway"),
#' daa_results_df = daa_annotated_results_df,
#' Group = Group,
#' p_values_threshold = 0.05,
#' order = "group",
#' select = NULL,
#' ko_to_kegg = FALSE,
#' p_value_bar = TRUE,
#' colors = NULL,
#' x_lab = "description",
#' log2_fold_change_color = "#006400" # Dark green for log2 fold change bars
#' )
#' }
utils::globalVariables(c("group", "name", "value", "feature", "negative_log10_p", "pathway_class", "p_adjust", "log2_fold_change", "transform_sample_counts", "column_to_rownames", "txtProgressBar", "setTxtProgressBar", "utils"))
pathway_errorbar <-
function(abundance,
daa_results_df,
Group,
ko_to_kegg = FALSE,
p_values_threshold = 0.05,
order = "group",
select = NULL,
p_value_bar = TRUE,
colors = NULL,
x_lab = NULL,
log2_fold_change_color = "#87ceeb",
max_features = 30,
color_theme = "default",
pathway_class_colors = NULL,
smart_colors = FALSE,
accessibility_mode = FALSE,
# New legend control parameters
legend_position = "top",
legend_direction = "horizontal",
legend_title = NULL,
legend_title_size = 12,
legend_text_size = 10,
legend_key_size = 0.8,
legend_ncol = NULL,
legend_nrow = NULL,
# P-value display parameters
pvalue_format = "numeric",
pvalue_stars = TRUE,
pvalue_colors = FALSE,
pvalue_size = "auto",
pvalue_angle = 0,
pvalue_thresholds = c(0.001, 0.01, 0.05),
pvalue_star_symbols = c("***", "**", "*"),
# Pathway class annotation parameters
pathway_class_text_size = "auto",
pathway_class_text_color = "black",
pathway_class_text_face = "bold",
pathway_class_text_angle = 0,
pathway_class_position = "right",
# Pathway names text size parameter
pathway_names_text_size = "auto") {
ko_to_kegg <- normalize_logical_flag(ko_to_kegg, "ko_to_kegg")
p_value_bar <- normalize_logical_flag(p_value_bar, "p_value_bar")
smart_colors <- normalize_logical_flag(smart_colors, "smart_colors")
accessibility_mode <- normalize_logical_flag(accessibility_mode, "accessibility_mode")
pvalue_stars <- normalize_logical_flag(pvalue_stars, "pvalue_stars")
pvalue_colors <- normalize_logical_flag(pvalue_colors, "pvalue_colors")
# Input validation using unified functions
validate_abundance(abundance)
validate_dataframe(daa_results_df,
required_cols = c("feature", "method", "group1", "group2", "p_adjust"),
param_name = "daa_results_df")
if (length(Group) != ncol(abundance)) {
stop("Length of Group must match number of columns in abundance matrix")
}
# Check the number of significant features
# Calculate number of significant features (for reference)
# sig_features <- sum(daa_results_df$p_adjust < 0.05)
# Identify pathways with missing annotation
missing_pathways <- daa_results_df[is.na(daa_results_df$pathway_name), "feature"]
# Inform the user about the missing annotations
if(length(missing_pathways) > 0) {
message(sprintf("Excluded %d pathways with missing annotations. Use 'pathway_annotation' to add them.",
length(missing_pathways)))
}
# Get the names of all columns in the data frame
column_names <- colnames(daa_results_df)
# Check if there are more than two 'group' columns
group_columns <- grepl("^group", column_names)
if (sum(group_columns) > 2) {
p_value_bar <- FALSE
message(
"There are more than two 'group' columns in the 'daa_results_df' data frame. As a result, it is not possible to compute the log2 fold values. The 'p_value_bar' has been automatically set to FALSE."
)
}
# Set x_lab if not provided
if (is.null(x_lab)){
if (ko_to_kegg) {
x_lab <- "pathway_name"
}else{
x_lab <- "description"
}
if (is.null(daa_results_df$pathway_name) && is.null(daa_results_df$description)){
stop("No annotation columns found. Use 'pathway_annotation' to annotate daa_results_df first.")
}
}
if (!(x_lab %in% colnames(daa_results_df))){
stop(sprintf("Column '%s' not found in daa_results_df. Use 'pathway_annotation' to add annotations.", x_lab))
}
# Exclude rows with missing pathway annotation (after x_lab is set)
original_rows <- nrow(daa_results_df)
daa_results_df <- daa_results_df[!is.na(daa_results_df[,x_lab]),]
filtered_rows <- nrow(daa_results_df)
# Check if all rows were filtered out due to missing annotations
if (filtered_rows == 0) {
warning(
sprintf("All %d rows were excluded due to missing '%s' annotations. ", original_rows, x_lab),
"This typically occurs when no statistically significant pathways were found. ",
"Cannot create error bar plot with empty data. Returning NULL.",
call. = FALSE
)
return(NULL)
}
if (original_rows > filtered_rows) {
message(sprintf("Excluded %d rows with missing '%s' annotations.", original_rows - filtered_rows, x_lab))
}
validate_daa_results(daa_results_df)
# Enhanced color selection using the new color theme system
n_groups <- nlevels(as.factor(Group))
# Use smart color selection if requested
if (smart_colors || accessibility_mode) {
smart_selection <- smart_color_selection(
n_groups = n_groups,
has_pathway_class = ko_to_kegg,
data_type = "abundance",
accessibility_mode = accessibility_mode
)
color_theme <- smart_selection$theme_name
if (smart_colors) {
message("Smart color selection: ", smart_selection$reason)
}
}
# Get the color theme
theme_colors <- get_color_theme(color_theme, n_groups)
# Set group colors
if (is.null(colors)) {
colors <- theme_colors$group_colors[1:n_groups]
}
# Set pathway class colors if not provided
if (is.null(pathway_class_colors)) {
pathway_class_colors <- theme_colors$pathway_class_colors
}
# Set log2 fold change color
if (log2_fold_change_color == "auto") {
log2_fold_change_color <- theme_colors$fold_change_single
}
errorbar_abundance_mat <- as.matrix(abundance)
daa_results_filtered_df <-
daa_results_df[which(daa_results_df$p_adjust < p_values_threshold),]
if (!is.null(select)) {
daa_results_filtered_sub_df <-
daa_results_filtered_df[daa_results_filtered_df$feature %in% select, ]
} else {
daa_results_filtered_sub_df <- daa_results_filtered_df
}
if (nrow(daa_results_filtered_sub_df) > max_features) {
warning(
sprintf("Found %d significant features (max: %d). Use 'select' to filter or set max_features=Inf.",
nrow(daa_results_filtered_sub_df), max_features),
call. = FALSE
)
}
if (nrow(daa_results_filtered_sub_df) == 0){
stop(
"Visualization with 'pathway_errorbar' cannot be performed because there are no features with statistical significance. ",
"For possible solutions, please check the FAQ section of the tutorial."
)
}
# Convert to relative abundance via the shared helper. This keeps
# the normalization identical to calculate_abundance_stats() and
# rejects zero-sum sample columns up front, instead of silently
# propagating NaN into the plotted error bars.
relative_abundance_mat <- compute_relative_abundance(
errorbar_abundance_mat,
context = "pathway_errorbar()"
)
# Subset to only include the features present in daa_results_filtered_sub_df$feature
sub_relative_abundance_mat <- relative_abundance_mat[rownames(relative_abundance_mat) %in% daa_results_filtered_sub_df$feature, , drop = FALSE]
# Create a mapping from sample names to groups.
# Prefer names(Group) when available to avoid order-dependent mismatches.
if (!is.null(names(Group)) &&
length(names(Group)) == length(Group) &&
all(!is.na(names(Group)))) {
sample_to_group_map <- stats::setNames(as.character(Group), names(Group))
} else {
sample_to_group_map <- stats::setNames(as.character(Group), colnames(abundance))
}
# Get groups for the current abundance columns.
correct_groups <- sample_to_group_map[colnames(sub_relative_abundance_mat)]
if (any(is.na(correct_groups))) {
missing_samples <- colnames(sub_relative_abundance_mat)[is.na(correct_groups)]
stop(
"Could not map Group values to abundance samples. ",
"Missing sample name(s): ",
paste(missing_samples, collapse = ", "),
". Ensure Group is in the same order as abundance columns ",
"or provide names(Group) matching sample IDs."
)
}
# Per-feature, per-group mean and sd. Route through the shared helper
# instead of the older pivot_longer + group_by(...) + summarise(...)
# path that used to live here. That hand-rolled aggregation diverged
# from pathway_errorbar_table()'s path on NA handling (it did not set
# `na.rm = TRUE`), so the same abundance matrix could yield different
# bar heights in the plot vs the table. Using summarize_abundance_by_group()
# makes the mean/sd a single source of truth across the package.
Group_levels <- unique(as.character(Group))
error_bar_pivot_longer_tibble_summarised <-
summarize_abundance_by_group(sub_relative_abundance_mat, correct_groups)
error_bar_pivot_longer_tibble_summarised$name <-
factor(error_bar_pivot_longer_tibble_summarised$name,
levels = rownames(sub_relative_abundance_mat))
error_bar_pivot_longer_tibble_summarised$group <-
factor(error_bar_pivot_longer_tibble_summarised$group, levels = Group_levels)
# When ko_to_kegg = TRUE, validate pathway_class column exists
# This is required for proper alignment of pathway class annotations
if (ko_to_kegg && !"pathway_class" %in% colnames(daa_results_filtered_sub_df)) {
stop(
"The 'pathway_class' column is missing but ko_to_kegg = TRUE. ",
"Please use pathway_annotation(..., ko_to_kegg = TRUE) to annotate the data, ",
"or set ko_to_kegg = FALSE if you don't need pathway class annotations."
)
}
switch(
order,
"p_values" = {
if (ko_to_kegg) {
# Nested sorting: first by pathway_class, then by p_adjust within each class
# This ensures pathway class color blocks align correctly
order <- order(
daa_results_filtered_sub_df$pathway_class,
daa_results_filtered_sub_df$p_adjust
)
} else {
order <- order(daa_results_filtered_sub_df$p_adjust)
}
},
"name" = {
if (ko_to_kegg) {
# Nested sorting: first by pathway_class, then by feature name within each class
order <- order(
daa_results_filtered_sub_df$pathway_class,
daa_results_filtered_sub_df$feature
)
} else {
order <- order(daa_results_filtered_sub_df$feature)
}
},
"group" = {
# Track the group with the highest mean abundance per feature.
# Ties are resolved by the original group-level order so ordering is
# deterministic and assigns exactly one group per feature.
daa_results_filtered_sub_df$pro <- NA_character_
for (i in levels(error_bar_pivot_longer_tibble_summarised$name)) {
# Get subset for current feature
error_bar_pivot_longer_tibble_summarised_sub <-
error_bar_pivot_longer_tibble_summarised[error_bar_pivot_longer_tibble_summarised$name == i,]
# Find group with maximum mean abundance
group_means <- stats::setNames(
error_bar_pivot_longer_tibble_summarised_sub$mean,
as.character(error_bar_pivot_longer_tibble_summarised_sub$group)
)
tied_groups <- names(group_means)[group_means == max(group_means, na.rm = TRUE)]
pro_group <- Group_levels[Group_levels %in% tied_groups][1]
# Find indices of rows matching the current feature, excluding NA values
idx <- which(daa_results_filtered_sub_df$feature == i & !is.na(daa_results_filtered_sub_df$feature))
# Only assign values if valid indices exist
if (length(idx) > 0) {
daa_results_filtered_sub_df$pro[idx] <- pro_group
}
}
if (ko_to_kegg) {
# Nested sorting: first by pathway_class, then by group and p_adjust within each class
order <- order(
daa_results_filtered_sub_df$pathway_class,
factor(daa_results_filtered_sub_df$pro, levels = Group_levels),
daa_results_filtered_sub_df$p_adjust
)
} else {
# Order by group and p-value
order <-
order(factor(daa_results_filtered_sub_df$pro, levels = Group_levels),
daa_results_filtered_sub_df$p_adjust)
}
},
"pathway_class" = {
if (!"pathway_class" %in% colnames(daa_results_filtered_sub_df)) {
stop(
"The 'pathway_class' column is missing in the 'daa_results_filtered_sub_df' data frame. ",
"Please use the 'pathway_annotation' function to annotate the 'pathway_daa' results."
)
}
order <- order(
daa_results_filtered_sub_df$pathway_class,
daa_results_filtered_sub_df$p_adjust
)
},
{
order <- order
}
)
daa_results_filtered_sub_df <-
daa_results_filtered_sub_df[order,]
# Reorder data to match daa_results_filtered_sub_df$feature order
# Using match() instead of loop rbind() for O(n) vs O(n²) performance
error_bar_pivot_longer_tibble_summarised$feature_order <- match(
error_bar_pivot_longer_tibble_summarised$name,
daa_results_filtered_sub_df$feature
)
error_bar_pivot_longer_tibble_summarised_ordered <-
error_bar_pivot_longer_tibble_summarised[order(error_bar_pivot_longer_tibble_summarised$feature_order), ]
error_bar_pivot_longer_tibble_summarised_ordered$feature_order <- NULL
if (!ko_to_kegg) {
# Match by feature name using the match function
matched_indices <- match(
error_bar_pivot_longer_tibble_summarised_ordered$name,
daa_results_filtered_sub_df$feature
)
error_bar_pivot_longer_tibble_summarised_ordered[, x_lab] <-
daa_results_filtered_sub_df[matched_indices, x_lab]
}
if (ko_to_kegg) {
error_bar_pivot_longer_tibble_summarised_ordered$pathway_class <-
rep(daa_results_filtered_sub_df$pathway_class,
each = length(levels(
factor(error_bar_pivot_longer_tibble_summarised_ordered$group)
)))
}
error_bar_pivot_longer_tibble_summarised_ordered$name <- factor(error_bar_pivot_longer_tibble_summarised_ordered$name, levels = rev(daa_results_filtered_sub_df$feature))
# Calculate smart text size for pathway names (y-axis labels)
pathway_names_final_text_size <- if (pathway_names_text_size == "auto") {
calculate_smart_text_size(nrow(daa_results_filtered_sub_df),
base_size = 10, min_size = 8, max_size = 14)
} else {
pathway_names_text_size
}
# Calculate smart text size for pathway class annotations
pathway_class_final_text_size <- if (pathway_class_text_size == "auto") {
calculate_smart_text_size(nrow(daa_results_filtered_sub_df),
base_size = 10, min_size = 3, max_size = 4)
} else {
as.numeric(pathway_class_text_size) # Ensure it's numeric
}
# Set pathway class text color based on theme if "auto"
pathway_class_final_text_color <- if (pathway_class_text_color == "auto") {
# Get theme colors for pathway class
current_theme <- get_color_theme(color_theme)
current_theme$pathway_class_colors[1] # Use first theme color
} else {
pathway_class_text_color
}
bar_errorbar <-
ggplot2::ggplot(error_bar_pivot_longer_tibble_summarised_ordered,
ggplot2::aes(mean, name, fill = group)) +
ggplot2::geom_errorbar(
ggplot2::aes(xmax = mean + sd, xmin = 0),
position = ggplot2::position_dodge(width = 0.8),
width = 0.5,
linewidth = 0.5,
color = "black"
) +
ggplot2::geom_bar(stat = "identity",
position = ggplot2::position_dodge(width = 0.8),
width = 0.8) +
GGally::geom_stripped_cols(width = 10) +
ggplot2::scale_fill_manual(values = colors) +
ggplot2::scale_color_manual(values = colors) +
ggprism::theme_prism(base_size = 12) +
ggplot2::scale_x_continuous(expand = c(0, 0)) +
ggplot2::scale_y_discrete(labels = rev(daa_results_filtered_sub_df[, x_lab])) +
ggplot2::labs(x = "Relative Abundance", y = NULL) +
ggplot2::theme(
axis.ticks.y = ggplot2::element_blank(),
axis.line.y = ggplot2::element_blank(),
axis.line.x = ggplot2::element_line(linewidth = 0.5),
axis.ticks.x = ggplot2::element_line(linewidth = 0.5),
panel.grid.major.y = ggplot2::element_blank(),
panel.grid.major.x = ggplot2::element_blank(),
axis.text = ggplot2::element_text(size = 10, color = "black"),
axis.text.x = ggplot2::element_text(margin = ggplot2::margin(r = 0)),
axis.text.y = ggplot2::element_text(
size = pathway_names_final_text_size,
color = "black",
margin = ggplot2::margin(b = 6)
),
axis.title.x = ggplot2::element_text(
size = 10,
color = "black",
hjust = 0.5
),
legend.position = legend_position,
legend.key.size = ggplot2::unit(legend_key_size, "cm"),
legend.direction = legend_direction,
legend.justification = "center",
legend.text = ggplot2::element_text(size = legend_text_size, face = "bold"),
legend.box.just = "center",
legend.title = if (!is.null(legend_title)) {
ggplot2::element_text(size = legend_title_size, face = "bold")
} else {
ggplot2::element_blank()
},
plot.margin = ggplot2::margin(0, 0.5, 0.5, 0, unit = "cm")
) +
# Add guide specifications for multi-column/row layouts
{if (!is.null(legend_ncol) || !is.null(legend_nrow)) {
ggplot2::guides(fill = ggplot2::guide_legend(
ncol = legend_ncol,
nrow = legend_nrow,
byrow = TRUE
))
} else {
ggplot2::guides()
}} +
ggplot2::coord_cartesian(clip = "off") +
ggplot2::theme(
axis.text.x = ggplot2::element_text(angle = 45, hjust = 1),
plot.margin = ggplot2::unit(c(1, 1, 1, 1), "cm")
)
if (ko_to_kegg) {
# Convert table to matrix to preserve names as rownames
pathway_class_table <- table(daa_results_filtered_sub_df$pathway_class)
pathway_class_group_mat <- as.data.frame(as.matrix(pathway_class_table))
colnames(pathway_class_group_mat) <- "Freq"
# Create the pathway_class_group data frame
pathway_class_group <- data.frame(
. = unique(daa_results_filtered_sub_df$pathway_class),
Freq = pathway_class_group_mat[unique(daa_results_filtered_sub_df$pathway_class), "Freq"]
)
start <-
c(1, rev(pathway_class_group$Freq)[1:(length(pathway_class_group$Freq) - 1)]) %>%
cumsum()
end <- cumsum(rev(pathway_class_group$Freq))
ymin <- start - 1 / 2
ymax <- end + 1 / 2
nPoints <- length(start)
pCol <- pathway_class_colors[1:nPoints]
pFill <- pCol
for (i in 1:nPoints) {
bar_errorbar <- bar_errorbar +
ggplot2::annotation_custom(
grob = grid::rectGrob(
gp = grid::gpar(
col = pCol[i],
fill = pFill[i],
lty = NULL,
lwd = NULL,
alpha = 0.2
)
),
xmin = -2,
xmax = 0,
ymin = ymin[i],
ymax = ymax[i]
)
}
}
# Add necessary columns
if (any(daa_results_filtered_sub_df$p_adjust < 0, na.rm = TRUE)) {
stop("p_adjust values must be non-negative.", call. = FALSE)
}
daa_results_filtered_sub_df$negative_log10_p <-
-log10(pmax(daa_results_filtered_sub_df$p_adjust, .Machine$double.xmin))
# Compute the log2 fold change displayed in the side panel.
#
# When the DAA method already supplied a `log2_fold_change` column --
# DESeq2, edgeR, limma voom, LinDA, Maaslin2, metagenomeSeq, and
# ALDEx2 with `include_effect_size = TRUE` -- we MUST preserve it.
# The effect size printed as a bar here and the p_adjust driving the
# significance panel next to it are two outputs of the same model
# fit; replacing the bar with a relative-abundance mean ratio while
# the p-value still comes from the model produces a figure where the
# two panels tell different stories about the same feature. The
# previous implementation added the column defensively as NA only
# when missing, then unconditionally overwrote every row with a
# mean-ratio calculation -- so e.g. `log2_fold_change = 99` went in
# and a number computed from relative abundances came out, silently.
#
# The mean-ratio pathway is still the right fallback for methods
# that do not estimate effect sizes (ALDEx2 with
# `include_effect_size = FALSE`, Lefser, or user-supplied DAA
# results without a log2_fold_change column). We only run it in that
# case.
if (!"log2_fold_change" %in% colnames(daa_results_filtered_sub_df)) {
daa_results_filtered_sub_df$log2_fold_change <- NA_real_
# Calculate pseudocount once using all mean values for consistency
all_means <- error_bar_pivot_longer_tibble_summarised_ordered$mean
pseudocount <- calculate_pseudocount(all_means)
for (i in daa_results_filtered_sub_df$feature){
# Get mean values for this feature
feature_means <- error_bar_pivot_longer_tibble_summarised_ordered[error_bar_pivot_longer_tibble_summarised_ordered$name %in% i,]
# Get group1 and group2 names for this feature from DAA results
feature_row <- daa_results_filtered_sub_df[daa_results_filtered_sub_df$feature == i, ]
group1_name <- feature_row$group1[1]
group2_name <- feature_row$group2[1]
# Get mean values for each group in the correct order
mean_group1 <- feature_means[feature_means$group == group1_name, ]$mean
mean_group2 <- feature_means[feature_means$group == group2_name, ]$mean
# Calculate log2 fold change using unified function
if (length(mean_group1) > 0 && length(mean_group2) > 0) {
log2_fc <- calculate_log2_fold_change(mean_group1, mean_group2, pseudocount = pseudocount)
daa_results_filtered_sub_df[daa_results_filtered_sub_df$feature==i,]$log2_fold_change <- log2_fc
} else {
# Fallback using unified function with auto-calculated pseudocount
mean_vals <- feature_means$mean
if (length(mean_vals) >= 2) {
log2_fc <- calculate_log2_fold_change(mean_vals[1], mean_vals[2])
daa_results_filtered_sub_df[daa_results_filtered_sub_df$feature==i,]$log2_fold_change <- log2_fc
}
}
}
}
daa_results_filtered_sub_df$feature <- factor(daa_results_filtered_sub_df$feature,levels = rev(daa_results_filtered_sub_df$feature))
# This panel plots a single log2 fold change bar per feature. There is
# no grouping to map to `fill`, so the color is set directly on
# geom_bar() instead of routing through a one-level fill scale with a
# dummy constant column. The old code used `aes(fill = group_nonsense)`
# + `scale_fill_manual(values = log2_fold_change_color)` to accomplish
# the same visual result via an extra indirection.
p_values_bar <- daa_results_filtered_sub_df %>%
ggplot2::ggplot(ggplot2::aes(feature, log2_fold_change)) +
ggplot2::geom_bar(stat = "identity",
position = ggplot2::position_dodge(width = 0.8),
width = 0.8,
fill = log2_fold_change_color) +
ggplot2::labs(y = "log2 fold change", x = NULL) +
GGally::geom_stripped_cols() +
ggplot2::geom_hline(ggplot2::aes(yintercept = 0),
linetype = 'dashed',
color = 'black') +
ggprism::theme_prism(base_size = 12) +
ggplot2::scale_y_continuous(expand = c(0, 0)) +
ggplot2::theme(
axis.ticks.y = ggplot2::element_blank(),
axis.line.y = ggplot2::element_blank(),
axis.line.x = ggplot2::element_line(linewidth = 0.5),
axis.ticks.x = ggplot2::element_line(linewidth = 0.5),
panel.grid.major.y = ggplot2::element_blank(),
panel.grid.major.x = ggplot2::element_blank(),
axis.text = ggplot2::element_text(size = 10, color = "black"),
axis.text.y = ggplot2::element_blank(),
axis.text.x = ggplot2::element_text(
size = 10,
color = "black",
margin = ggplot2::margin(b = 6)
),
axis.title.x = ggplot2::element_text(
size = 11,
color = "black",
hjust = 0.5
),
legend.position = "none"
) +
ggplot2::coord_flip()
if (ko_to_kegg) {
# Calculate label y-position as the center of each rectangle
# The -0.5 offset was causing misalignment (see demos/alignment_debug_analysis.R)
pathway_class_y <- (ymax + ymin) / 2
# This side-panel draws pathway-class labels aligned to the main plot.
# All labels share the same x, so we set `x = ""` directly in aes()
# instead of padding the data frame with a constant dummy column.
pathway_class_plot_df <- data.frame(
pathway_class_y = as.numeric(pathway_class_y),
pathway_class = rev(unique(daa_results_filtered_sub_df$pathway_class)),
stringsAsFactors = FALSE
)
# Number of features for y-axis limits
n_features <- nrow(daa_results_filtered_sub_df)
pathway_class_annotation <-
pathway_class_plot_df %>% ggplot2::ggplot(ggplot2::aes(x = "", y = pathway_class_y)) + ggplot2::geom_text(
ggplot2::aes(label = pathway_class),
size = pathway_class_final_text_size,
color = pathway_class_final_text_color,
fontface = pathway_class_text_face,
family = "sans",
angle = pathway_class_text_angle,
hjust = if (pathway_class_position == "left") 1 else 0
) +
# Use scale_y_continuous to properly align with the discrete bar plot y-axis
# The bar plot's discrete y-axis maps factor levels to positions 1, 2, ..., n
# We need matching limits (0.5 to n+0.5) to align the annotation labels
ggplot2::scale_y_continuous(limits = c(0.5, n_features + 0.5), expand = c(0, 0)) +
ggprism::theme_prism(base_size = 12) +
ggplot2::theme(
axis.ticks = ggplot2::element_blank(),
axis.line = ggplot2::element_blank(),
panel.grid.major.y = ggplot2::element_blank(),
panel.grid.major.x = ggplot2::element_blank(),
panel.background = ggplot2::element_blank(),
axis.text = ggplot2::element_blank(),
# Dynamic margin based on position: more space on the side where text is
# Margin order is: top, right, bottom, left
plot.margin = if (pathway_class_position == "left") {
ggplot2::unit(c(0, 0.2, 0, 2.0), "cm") # Increased left margin to 2.0cm
} else {
ggplot2::unit(c(0, 2.0, 0, 0.2), "cm") # Increased right margin to 2.0cm
},
axis.title.y = ggplot2::element_blank(),
axis.title.x = ggplot2::element_blank(),
legend.position = "none"
) +
ggplot2::coord_cartesian(clip = "off")
}
# Enhanced p-value formatting using the new system
format_p_value <- function(p) {
if (pvalue_format != "numeric") {
format_pvalue_smart(p,
format = pvalue_format,
stars = pvalue_stars,
thresholds = pvalue_thresholds,
star_symbols = pvalue_star_symbols)
} else {
ifelse(p < 0.001, sprintf("%.1e", p), sprintf("%.3f", p))
}
}
# Calculate p-value colors if enabled
pvalue_text_colors <- if (pvalue_colors) {
get_significance_colors(daa_results_filtered_sub_df$p_adjust,
thresholds = pvalue_thresholds,
colors = c("#d73027", "#fc8d59", "#fee08b"),
default_color = "black")
} else {
rep("black", nrow(daa_results_filtered_sub_df))
}
# Calculate smart text size for p-values
pvalue_text_size <- if (pvalue_size == "auto") {
calculate_smart_text_size(nrow(daa_results_filtered_sub_df),
base_size = 2.5, min_size = 1.8, max_size = 3.0)
} else {
pvalue_size
}
daa_results_filtered_sub_df$unique <-
nrow(daa_results_filtered_sub_df) - seq_len(nrow(daa_results_filtered_sub_df)) + 1
# All p-value labels share a single x anchor (this panel is a vertical
# strip), so we set `x = ""` in aes() instead of adding a constant
# dummy column just to have a variable name to map.
p_annotation <- daa_results_filtered_sub_df %>%
ggplot2::ggplot(ggplot2::aes(x = "", y = p_adjust)) +
ggplot2::geom_text(
ggplot2::aes(x = "", y = unique,
label = format_p_value(p_adjust),
color = I(pvalue_text_colors)),
size = pvalue_text_size,
fontface = "bold",
family = "sans",
angle = pvalue_angle
) +
ggplot2::labs(y = "p-value (adjusted)") +
ggplot2::scale_y_discrete(position = "right") +
ggprism::theme_prism(base_size = 12) +
ggplot2::theme(
axis.ticks = ggplot2::element_blank(),
axis.line = ggplot2::element_blank(),
panel.grid.major.y = ggplot2::element_blank(),
panel.grid.major.x = ggplot2::element_blank(),
panel.background = ggplot2::element_blank(),
axis.text = ggplot2::element_blank(),
plot.margin = ggplot2::unit(c(0, 0.2, 0, 0), "cm"),
axis.title.y = ggplot2::element_text(
size = 11,
color = "black",
vjust = 0
),
axis.title.x = ggplot2::element_blank(),
legend.position = "none"
)
if (p_value_bar) {
if (ko_to_kegg) {
combination_bar_plot <-
pathway_class_annotation + bar_errorbar + p_values_bar + p_annotation + patchwork::plot_layout(ncol = 4, widths =
c(2.5, 2.0, 0.7, 0.3))
}
else{
combination_bar_plot <-
bar_errorbar + p_values_bar + p_annotation + patchwork::plot_layout(ncol = 3, widths = c(2.3, 0.7, 0.3))
}
}else{
if (ko_to_kegg) {
combination_bar_plot <-
pathway_class_annotation + bar_errorbar + p_annotation + patchwork::plot_layout(ncol = 3, widths =
c(2.5, 2.0, 0.3))
}
else{
combination_bar_plot <-
bar_errorbar + p_annotation + patchwork::plot_layout(ncol = 2, widths = c(2.5, 0.2))
}
}
return(combination_bar_plot)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.