Nothing
#' The function pathway_errorbar() is used to visualize the results of functional pathway differential abundance analysis as error bar plots.
#'
#' @name pathway_errorbar
#' @param abundance A data frame with row names representing pathways and column names representing samples. Each element represents the relative abundance of the corresponding pathway in the corresponding sample.
#' @param daa_results_df A data frame containing the results of the differential abundance analysis of the pathways, generated by the pathway_daa function. x_lab should be a column name of daa_results_df.
#' @param Group A data frame or a vector that assigns each sample to a group. The groups are used to color the samples in the figure.
#' @param ko_to_kegg A logical parameter indicating whether there was a convertion that convert ko abundance to kegg abundance.
#' @param p_values_threshold A numeric parameter specifying the threshold for statistical significance of differential abundance. Pathways with p-values below this threshold will be considered significant.
#' @param order A parameter controlling the ordering of the rows in the figure. The options are: "p_values" (order by p-values), "name" (order by pathway name), "group" (order by the group with the highest mean relative abundance), or "pathway_class" (order by the pathway category).
#' @param select A vector of pathway names to be included in the figure. This can be used to limit the number of pathways displayed. If NULL, all pathways will be displayed.
#' @param p_value_bar A logical parameter indicating whether to display a bar showing the p-value threshold for significance. If TRUE, the bar will be displayed.
#' @param colors A vector of colors to be used to represent the groups in the figure. Each color corresponds to a group.
#' @param x_lab A character string to be used as the x-axis label in the figure. The default value is "description" for KOs'descriptions and "pathway_name" for KEGG pathway names.
#' @importFrom stats sd
#' @value A ggplot2 plot (`plot`)
#' The plot visualizes the differential abundance results of a specific differential abundance analysis method. The corresponding dataframe contains the results used to create the plot.
#' @return A figure showing the error bar plot of the differential abundance analysis results for the functional pathways.
#' @export
#'
#' @examples
#' \dontrun{
#' # Example 1: Analyzing KEGG pathway abundance
#' metadata <- read_delim(
#' "path/to/your/metadata.txt",
#' delim = "\t",
#' escape_double = FALSE,
#' trim_ws = TRUE
#' )
#'
#' # data(metadata)
#'
#' kegg_abundance <- ko2kegg_abundance(
#' "path/to/your/pred_metagenome_unstrat.tsv"
#' )
#'
#' # data(kegg_abundance)
#'
#' # Please change group to "your_group_column" if you are not using example dataset
#' group <- "Environment"
#'
#' daa_results_df <- pathway_daa(
#' abundance = kegg_abundance,
#' metadata = metadata,
#' group = group,
#' daa_method = "ALDEx2",
#' select = NULL,
#' reference = NULL
#' )
#'
#' # Please check the unique(daa_results_df$method) and choose one
#' daa_sub_method_results_df <- daa_results_df[daa_results_df$method
#' == "ALDEx2_Welch's t test", ]
#'
#' daa_annotated_sub_method_results_df <- pathway_annotation(
#' pathway = "KO",
#' daa_results_df = daa_sub_method_results_df,
#' ko_to_kegg = TRUE
#' )
#'
#' # Please change Group to metadata$your_group_column if you are not using example dataset
#' Group <- metadata$Environment
#'
#' p <- pathway_errorbar(
#' abundance = kegg_abundance,
#' daa_results_df = daa_annotated_sub_method_results_df,
#' Group = Group,
#' p_values_threshold = 0.05,
#' order = "pathway_class",
#' select = daa_annotated_sub_method_results_df %>%
#' arrange(p_adjust) %>%
#' slice(1:20) %>%
#' select("feature") %>% pull(),
#' ko_to_kegg = TRUE,
#' p_value_bar = TRUE,
#' colors = NULL,
#' x_lab = "pathway_name"
#' )
#'
#' # Example 2: Analyzing EC, MetaCyc, KO without conversions
#' metadata <- read_delim(
#' "path/to/your/metadata.txt",
#' delim = "\t",
#' escape_double = FALSE,
#' trim_ws = TRUE
#' )
#' # data(metadata)
#'
#' ko_abundance <- read.delim("path/to/your/metacyc_abundance.tsv")
#'
#' # data(metacyc_abundance)
#'
#' group <- "Environment"
#'
#' daa_results_df <- pathway_daa(
#' abundance = metacyc_abundance %>% column_to_rownames("pathway"),
#' metadata = metadata,
#' group = group,
#' daa_method = "LinDA",
#' select = NULL,
#' reference = NULL
#' )
#'
#'
#' daa_annotated_results_df <- pathway_annotation(
#' pathway = "MetaCyc",
#' daa_results_df = daa_results_df,
#' ko_to_kegg = FALSE
#' )
#'
#' Group <- metadata$Environment
#'
#' p <- pathway_errorbar(
#' abundance = metacyc_abundance %>% column_to_rownames("pathway"),
#' daa_results_df = daa_annotated_results_df,
#' Group = Group,
#' p_values_threshold = 0.05,
#' order = "group",
#' select = NULL,
#' ko_to_kegg = FALSE,
#' p_value_bar = TRUE,
#' colors = NULL,
#' x_lab = "description"
#' )
#' }
utils::globalVariables(c("group", "name", "value", "feature", "negative_log10_p", "group_nonsense", "nonsense", "pathway_class", "p_adjust", "log_2_fold_change", "transform_sample_counts", "column_to_rownames", "txtProgressBar", "setTxtProgressBar", "utils"))
pathway_errorbar <-
function(abundance,
daa_results_df,
Group,
ko_to_kegg = FALSE,
p_values_threshold = 0.05,
order = "group",
select = NULL,
p_value_bar = TRUE,
colors = NULL,
x_lab = NULL) {
# Identify pathways with missing annotation
missing_pathways <- daa_results_df[is.na(daa_results_df$pathway_name), "feature"]
# Inform the user about the missing annotations
if(length(missing_pathways) > 0) {
message("The following pathways are missing annotations and have been excluded: ",
paste(missing_pathways, collapse = ", "))
message("You can use the 'pathway_annotation' function to add annotations for these pathways.")
}
# Get the names of all columns in the data frame
column_names <- colnames(daa_results_df)
# Check if there are more than two 'group' columns
group_columns <- grepl("^group", column_names)
if (sum(group_columns) > 2) {
p_value_bar <- FALSE
message(
"There are more than two 'group' columns in the 'daa_results_df' data frame. As a result, it is not possible to compute the log2 fold values. The 'p_value_bar' has been automatically set to FALSE."
)
}
# Exclude rows with missing pathway annotation
daa_results_df <- daa_results_df[!is.na(daa_results_df[,x_lab]),]
if (is.null(x_lab)){
if (ko_to_kegg == TRUE){
x_lab <- "pathway_name"
}else{
x_lab <- "description"
}
if (is.null(daa_results_df$pathway_name)&is.null(daa_results_df$description)){
message(
"Please utilize the 'pathway_annotation' function to annotate the 'daa_results_df' data frame."
)
}
}
if (!(x_lab %in% colnames(daa_results_df))){
message(
"The 'x_lab' you defined does not exist as a column in the 'daa_results_df' data frame."
)
}
if (nlevels(factor(daa_results_df$method)) != 1) {
message(
"The 'method' column in the 'daa_results_df' data frame contains more than one method. Please filter it to contain only one method."
)
}
if (nlevels(factor(daa_results_df$group1)) != 1 || nlevels(factor(daa_results_df$group2)) != 1) {
message(
"The 'group1' or 'group2' column in the 'daa_results_df' data frame contains more than one group. Please filter each to contain only one group."
)
}
if (is.null(colors)) {
colors <- c("#d93c3e", "#3685bc", "#6faa3e", "#e8a825", "#c973e6", "#ee6b3d", "#2db0a7", "#f25292")[1:nlevels(as.factor(Group))]
}
errorbar_abundance_mat <- as.matrix(abundance)
daa_results_filtered_df <-
daa_results_df[daa_results_df$p_adjust < p_values_threshold,]
if (!is.null(select)) {
daa_results_filtered_sub_df <-
daa_results_filtered_df[daa_results_filtered_df$feature %in% select, ]
} else {
daa_results_filtered_sub_df <- daa_results_filtered_df
}
if (nrow(daa_results_filtered_sub_df) > 30) {
message(
paste0(
"The number of features with statistical significance exceeds 30, leading to suboptimal visualization. ",
"Please use 'select' to reduce the number of features.\n",
"Currently, you have these features: ",
paste(paste0('"', daa_results_filtered_sub_df$feature, '"'), collapse = ", "), ".\n",
"You can find the statistically significant features with the following command:\n",
"daa_results_df %>% filter(p_adjust < 0.05) %>% select(c(\"feature\",\"p_adjust\"))"
)
)
stop()
}
if (nrow(daa_results_filtered_sub_df) == 0){
stop(
"Visualization with 'pathway_errorbar' cannot be performed because there are no features with statistical significance. ",
"For possible solutions, please check the FAQ section of the tutorial."
)
}
# Convert to relative abundance
relative_abundance_mat <- apply(t(errorbar_abundance_mat), 1, function(x)
x / sum(x))
# Subset to only include the features present in daa_results_filtered_sub_df$feature
sub_relative_abundance_mat <- relative_abundance_mat[rownames(relative_abundance_mat) %in% daa_results_filtered_sub_df$feature,]
# Create a matrix for the error bars
error_bar_matrix <- cbind(
sample = colnames(sub_relative_abundance_mat),
group = Group,
t(sub_relative_abundance_mat)
)
error_bar_df <- as.data.frame(error_bar_matrix)
error_bar_df$group <- factor(Group,levels = levels(as.factor(Group)))
error_bar_pivot_longer_df <- tidyr::pivot_longer(error_bar_df,-c(sample, group))
error_bar_pivot_longer_tibble <-
mutate(error_bar_pivot_longer_df, group = as.factor(group))
error_bar_pivot_longer_tibble$sample <-
factor(error_bar_pivot_longer_tibble$sample)
error_bar_pivot_longer_tibble$name <-
factor(error_bar_pivot_longer_tibble$name)
error_bar_pivot_longer_tibble$value <-
as.numeric(error_bar_pivot_longer_tibble$value)
error_bar_pivot_longer_tibble_summarised <-
error_bar_pivot_longer_tibble %>% group_by(name, group) %>%
summarise(mean = mean(value), sd = stats::sd(value))
error_bar_pivot_longer_tibble_summarised <-
error_bar_pivot_longer_tibble_summarised %>% mutate(group2 = "nonsense")
switch(
order,
"p_values" = {
order <- order(daa_results_filtered_sub_df$p_adjust)
},
"name" = {
order <- order(daa_results_filtered_sub_df$feature)
},
"group" = {
daa_results_filtered_sub_df$pro <- 1
for (i in levels(error_bar_pivot_longer_tibble_summarised$name)) {
error_bar_pivot_longer_tibble_summarised_sub <-
error_bar_pivot_longer_tibble_summarised[error_bar_pivot_longer_tibble_summarised$name ==
i,]
pro_group <-
error_bar_pivot_longer_tibble_summarised_sub[error_bar_pivot_longer_tibble_summarised_sub$mean ==
max(error_bar_pivot_longer_tibble_summarised_sub$mean),]$group
pro_group <- as.vector(pro_group)
daa_results_filtered_sub_df[daa_results_filtered_sub_df$feature ==
i,]$pro <- pro_group
}
order <-
order(daa_results_filtered_sub_df$pro,
daa_results_filtered_sub_df$p_adjust)
},
"pathway_class" = {
if (!"pathway_class" %in% colnames(daa_results_filtered_sub_df)) {
stop(
"The 'pathway_class' column is missing in the 'daa_results_filtered_sub_df' data frame. ",
"Please use the 'pathway_annotation' function to annotate the 'pathway_daa' results."
)
}
order <- order(
daa_results_filtered_sub_df$pathway_class,
daa_results_filtered_sub_df$p_adjust
)
},
{
order <- order
}
)
daa_results_filtered_sub_df <-
daa_results_filtered_sub_df[order,]
error_bar_pivot_longer_tibble_summarised_ordered <-
data.frame(
name = NULL,
group = NULL,
mean = NULL,
sd = NULL
)
for (i in daa_results_filtered_sub_df$feature) {
error_bar_pivot_longer_tibble_summarised_ordered <-
rbind(
error_bar_pivot_longer_tibble_summarised_ordered,
error_bar_pivot_longer_tibble_summarised[error_bar_pivot_longer_tibble_summarised$name ==
i,]
)
}
if (ko_to_kegg == FALSE){
error_bar_pivot_longer_tibble_summarised_ordered[, x_lab] <-
rep(daa_results_filtered_sub_df[, x_lab], each = length(levels(
factor(error_bar_pivot_longer_tibble_summarised_ordered$group)
)))
}
if (ko_to_kegg == TRUE) {
error_bar_pivot_longer_tibble_summarised_ordered$pathway_class <-
rep(daa_results_filtered_sub_df$pathway_class,
each = length(levels(
factor(error_bar_pivot_longer_tibble_summarised_ordered$group)
)))
}
error_bar_pivot_longer_tibble_summarised_ordered$name <- factor(error_bar_pivot_longer_tibble_summarised_ordered$name, levels = rev(daa_results_filtered_sub_df$feature))
bar_errorbar <-
ggplot2::ggplot(error_bar_pivot_longer_tibble_summarised_ordered, # nolint: object_usage_linter.
ggplot2::aes(mean, name, fill = group)) + # nolint
ggplot2::geom_errorbar(
ggplot2::aes(xmax = mean + sd, xmin = 0),
position = ggplot2::position_dodge(width = 0.8),
width = 0.5,
size = 0.5,
color = "black"
) +
ggplot2::geom_bar(stat = "identity",
position = ggplot2::position_dodge(width = 0.8),
width = 0.8) +
GGally::geom_stripped_cols(width = 10) +
ggplot2::scale_fill_manual(values = colors) +
ggplot2::scale_color_manual(values = colors) +
ggprism::theme_prism() +
ggplot2::scale_x_continuous(expand = c(0, 0),
guide = "prism_offset_minor",) +
ggplot2::scale_y_discrete(labels = rev(daa_results_filtered_sub_df[, x_lab])) +
ggplot2::labs(x = "Relative Abundance", y = NULL) +
ggplot2::theme(
axis.ticks.y = ggplot2::element_blank(),
axis.line.y = ggplot2::element_blank(),
axis.line.x = ggplot2::element_line(size = 0.5),
axis.ticks.x = ggplot2::element_line(size = 0.5),
panel.grid.major.y = ggplot2::element_blank(),
panel.grid.major.x = ggplot2::element_blank(),
axis.text = ggplot2::element_text(size = 10, color = "black"), # nolint
axis.text.x = ggplot2::element_text(margin = ggplot2::margin(r = 0)), # nolint
axis.text.y = ggplot2::element_text(
size = 10,
color = "black",
margin = ggplot2::margin(b = 6)
),
axis.title.x = ggplot2::element_text(
size = 10,
color = "black",
hjust = 0.5
),
legend.position = "top",
legend.key.size = ggplot2::unit(0.1, "cm"),
legend.direction = "vertical",
legend.justification = "left",
legend.text = ggplot2::element_text(size = 8, face = "bold"),
legend.box.just = "right",
plot.margin = ggplot2::margin(0, 0.5, 0.5, 0, unit = "cm")
) + ggplot2::coord_cartesian(clip = "off")
if (ko_to_kegg == TRUE) {
pathway_class_group_mat <-
daa_results_filtered_sub_df$pathway_class %>%
table() %>%
data.frame() %>% column_to_rownames(".")
pathway_class_group <- data.frame(.= unique(daa_results_filtered_sub_df$pathway_class),Freq = pathway_class_group_mat[unique(daa_results_filtered_sub_df$pathway_class),])
start <-
c(1, rev(pathway_class_group$Freq)[1:(length(pathway_class_group$Freq) - 1)]) %>%
cumsum()
end <- cumsum(rev(pathway_class_group$Freq))
ymin <- start - 1 / 2
ymax <- end + 1 / 2
nPoints <- length(start)
pCol <- c("#D51F26","#272E6A","#208A42","#89288F","#F47D2B",
"#FEE500","#8A9FD1","#C06CAB","#E6C2DC","#90D5E4",
"#89C75F","#F37B7D","#9983BD","#D24B27","#3BBCA8",
"#6E4B9E","#0C727C", "#7E1416","#D8A767","#3D3D3D")[1:nPoints]
pFill <- pCol
for (i in 1:nPoints) {
bar_errorbar <- bar_errorbar +
ggplot2::annotation_custom(
grob = grid::rectGrob(
gp = grid::gpar(
col = pCol[i],
fill = pFill[i],
lty = NULL,
lwd = NULL,
alpha = 0.2
)
),
xmin = ggplot2::unit(-2, 'native'),
xmax = ggplot2::unit(0, 'native'),
ymin = ggplot2::unit(ymin[i], 'native'),
ymax = ggplot2::unit(ymax[i], 'native')
)
}
}
daa_results_filtered_sub_df <-
cbind(
daa_results_filtered_sub_df,
negative_log10_p = -log10(daa_results_filtered_sub_df$p_adjust),
group_nonsense = "nonsense",
log_2_fold_change = NA
)
for (i in daa_results_filtered_sub_df$feature){
mean <- error_bar_pivot_longer_tibble_summarised_ordered[error_bar_pivot_longer_tibble_summarised_ordered$name %in% i,]$mean
daa_results_filtered_sub_df[daa_results_filtered_sub_df$feature==i,]$log_2_fold_change <- log2(mean[1]/mean[2])
}
daa_results_filtered_sub_df$feature <- factor(daa_results_filtered_sub_df$feature,levels = rev(daa_results_filtered_sub_df$feature))
p_values_bar <- daa_results_filtered_sub_df %>%
ggplot2::ggplot(ggplot2::aes(feature, log_2_fold_change, fill = group_nonsense)) +
ggplot2::geom_bar(stat = "identity",
position = ggplot2::position_dodge(width = 0.8),
width = 0.8) +
ggplot2::labs(y = "log2 fold change", x = NULL) +
GGally::geom_stripped_cols() +
ggplot2::scale_fill_manual(values = "#87ceeb") +
ggplot2::scale_color_manual(values = "#87ceeb") +
ggplot2::geom_hline(ggplot2::aes(yintercept = 0),
linetype = 'dashed',
color = 'black') +
ggprism::theme_prism() +
ggplot2::scale_y_continuous(expand = c(0, 0),
guide = "prism_offset_minor") +
ggplot2::theme(
axis.ticks.y = ggplot2::element_blank(),
axis.line.y = ggplot2::element_blank(),
axis.line.x = ggplot2::element_line(size = 0.5),
axis.ticks.x = ggplot2::element_line(size = 0.5),
panel.grid.major.y = ggplot2::element_blank(),
panel.grid.major.x = ggplot2::element_blank(),
axis.text = ggplot2::element_text(size = 10, color = "black"),
axis.text.y = ggplot2::element_blank(),
axis.text.x = ggplot2::element_text(
size = 10,
color = "black",
margin = ggplot2::margin(b = 6)
),
axis.title.x = ggplot2::element_text(
size = 11,
color = "black",
hjust = 0.5
),
legend.position = "non"
) +
ggplot2::coord_flip()
if (ko_to_kegg == TRUE) {
pathway_class_y <- (ymax + ymin) / 2 - 0.5
pathway_class_plot_df <-
as.data.frame(
cbind(
nonsense = "nonsense",
pathway_class_y = pathway_class_y,
pathway_class = rev(unique(
daa_results_filtered_sub_df$pathway_class
))
)
)
pathway_class_plot_df$pathway_class_y <-
as.numeric(pathway_class_plot_df$pathway_class_y)
pathway_class_annotation <-
pathway_class_plot_df %>% ggplot2::ggplot(ggplot2::aes(nonsense, pathway_class_y)) + ggplot2::geom_text(
ggplot2::aes(nonsense, pathway_class_y, label = pathway_class),
size = 3.5,
color = "black",
fontface = "bold",
family = "sans"
) +
ggplot2::scale_y_discrete(position = "right") +
ggprism::theme_prism() +
ggplot2::theme(
axis.ticks = ggplot2::element_blank(),
axis.line = ggplot2::element_blank(),
panel.grid.major.y = ggplot2::element_blank(),
panel.grid.major.x = ggplot2::element_blank(),
panel.background = ggplot2::element_blank(),
axis.text = ggplot2::element_blank(),
plot.margin = ggplot2::unit(c(0, 0.2, 0, 0), "cm"),
axis.title.y = ggplot2::element_blank(),
axis.title.x = ggplot2::element_blank(),
legend.position = "non"
)
}
daa_results_filtered_sub_df$p_adjust <-
as.character(daa_results_filtered_sub_df$p_adjust)
daa_results_filtered_sub_df$unique <-
nrow(daa_results_filtered_sub_df) - seq_len(nrow(daa_results_filtered_sub_df)) + 1
daa_results_filtered_sub_df$p_adjust <-
substr(daa_results_filtered_sub_df$p_adjust, 1, 5)
p_annotation <- daa_results_filtered_sub_df %>%
ggplot2::ggplot(ggplot2::aes(group_nonsense, p_adjust)) +
ggplot2::geom_text(
ggplot2::aes(group_nonsense, unique, label = p_adjust),
size = 3.5,
color = "black",
fontface = "bold",
family = "sans"
) +
ggplot2::labs(y = "p-value (adjusted)") +
ggplot2::scale_y_discrete(position = "right") +
ggprism::theme_prism() +
ggplot2::theme(
axis.ticks = ggplot2::element_blank(),
axis.line = ggplot2::element_blank(),
panel.grid.major.y = ggplot2::element_blank(),
panel.grid.major.x = ggplot2::element_blank(),
panel.background = ggplot2::element_blank(),
axis.text = ggplot2::element_blank(),
plot.margin = ggplot2::unit(c(0, 0.2, 0, 0), "cm"),
axis.title.y = ggplot2::element_text(
size = 11,
color = "black",
vjust = 0
),
axis.title.x = ggplot2::element_blank(),
legend.position = "non"
)
if (p_value_bar == TRUE) {
if (ko_to_kegg == TRUE) {
combination_bar_plot <-
pathway_class_annotation + bar_errorbar + p_values_bar + p_annotation + patchwork::plot_layout(ncol = 4, widths =
c(1, 1.2, 0.5, 0.1))
}
else{
combination_bar_plot <-
bar_errorbar + p_values_bar + p_annotation + patchwork::plot_layout(ncol = 3, widths = c(2.3, 0.7, 0.3))
}
}else{
if (ko_to_kegg == TRUE) {
combination_bar_plot <-
pathway_class_annotation + bar_errorbar + p_annotation + patchwork::plot_layout(ncol = 3, widths =
c(1, 1.2, 0.1))
}
else{
combination_bar_plot <-
bar_errorbar + p_annotation + patchwork::plot_layout(ncol = 2, widths = c(2.5, 0.2))
}
}
return(combination_bar_plot)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.