R/pathway_errorbar.R

Defines functions pathway_errorbar

Documented in pathway_errorbar

#' The function pathway_errorbar() is used to visualize the results of functional pathway differential abundance analysis as error bar plots.
#'
#' @name pathway_errorbar
#' @param abundance A data frame with row names representing pathways and column names representing samples. Each element represents the relative abundance of the corresponding pathway in the corresponding sample.
#' @param daa_results_df A data frame containing the results of the differential abundance analysis of the pathways, generated by the pathway_daa function. x_lab should be a column name of daa_results_df.
#' @param Group A data frame or a vector that assigns each sample to a group. The groups are used to color the samples in the figure.
#' @param ko_to_kegg A logical parameter indicating whether there was a convertion that convert ko abundance to kegg abundance.
#' @param p_values_threshold A numeric parameter specifying the threshold for statistical significance of differential abundance. Pathways with p-values below this threshold will be considered significant.
#' @param order A parameter controlling the ordering of the rows in the figure. The options are: "p_values" (order by p-values), "name" (order by pathway name), "group" (order by the group with the highest mean relative abundance), or "pathway_class" (order by the pathway category).
#' @param select A vector of pathway names to be included in the figure. This can be used to limit the number of pathways displayed. If NULL, all pathways will be displayed.
#' @param p_value_bar A logical parameter indicating whether to display a bar showing the p-value threshold for significance. If TRUE, the bar will be displayed.
#' @param colors A vector of colors to be used to represent the groups in the figure. Each color corresponds to a group.
#' @param x_lab A character string to be used as the x-axis label in the figure. The default value is "description" for KOs'descriptions and "pathway_name" for KEGG pathway names.
#' @importFrom stats sd
#' @value A ggplot2 plot (`plot`)
#' The plot visualizes the differential abundance results of a specific differential abundance analysis method. The corresponding dataframe contains the results used to create the plot.
#' @return A figure showing the error bar plot of the differential abundance analysis results for the functional pathways.
#' @export
#'
#' @examples
#' \dontrun{
#' # Example 1: Analyzing KEGG pathway abundance
#' metadata <- read_delim(
#'   "path/to/your/metadata.txt",
#'   delim = "\t",
#'   escape_double = FALSE,
#'   trim_ws = TRUE
#' )
#'
#' # data(metadata)
#'
#' kegg_abundance <- ko2kegg_abundance(
#'   "path/to/your/pred_metagenome_unstrat.tsv"
#' )
#'
#' # data(kegg_abundance)
#'
#' # Please change group to "your_group_column" if you are not using example dataset
#' group <- "Environment"
#'
#' daa_results_df <- pathway_daa(
#'   abundance = kegg_abundance,
#'   metadata = metadata,
#'   group = group,
#'   daa_method = "ALDEx2",
#'   select = NULL,
#'   reference = NULL
#' )
#'
#' # Please check the unique(daa_results_df$method) and choose one
#' daa_sub_method_results_df <- daa_results_df[daa_results_df$method
#' == "ALDEx2_Welch's t test", ]
#'
#' daa_annotated_sub_method_results_df <- pathway_annotation(
#'   pathway = "KO",
#'   daa_results_df = daa_sub_method_results_df,
#'   ko_to_kegg = TRUE
#' )
#'
#' # Please change Group to metadata$your_group_column if you are not using example dataset
#' Group <- metadata$Environment
#'
#' p <- pathway_errorbar(
#'   abundance = kegg_abundance,
#'   daa_results_df = daa_annotated_sub_method_results_df,
#'   Group = Group,
#'   p_values_threshold = 0.05,
#'   order = "pathway_class",
#'   select = daa_annotated_sub_method_results_df %>%
#'   arrange(p_adjust) %>%
#'   slice(1:20) %>%
#'   select("feature") %>% pull(),
#'   ko_to_kegg = TRUE,
#'   p_value_bar = TRUE,
#'   colors = NULL,
#'   x_lab = "pathway_name"
#' )
#'
#' # Example 2: Analyzing EC, MetaCyc, KO without conversions
#' metadata <- read_delim(
#'   "path/to/your/metadata.txt",
#'   delim = "\t",
#'   escape_double = FALSE,
#'   trim_ws = TRUE
#' )
#' # data(metadata)
#'
#' ko_abundance <- read.delim("path/to/your/metacyc_abundance.tsv")
#'
#' # data(metacyc_abundance)
#'
#' group <- "Environment"
#'
#' daa_results_df <- pathway_daa(
#'   abundance = metacyc_abundance %>% column_to_rownames("pathway"),
#'   metadata = metadata,
#'   group = group,
#'   daa_method = "LinDA",
#'   select = NULL,
#'   reference = NULL
#' )
#'
#'
#' daa_annotated_results_df <- pathway_annotation(
#'   pathway = "MetaCyc",
#'   daa_results_df = daa_results_df,
#'   ko_to_kegg = FALSE
#' )
#'
#' Group <- metadata$Environment
#'
#' p <- pathway_errorbar(
#'   abundance = metacyc_abundance %>% column_to_rownames("pathway"),
#'   daa_results_df = daa_annotated_results_df,
#'   Group = Group,
#'   p_values_threshold = 0.05,
#'   order = "group",
#'   select = NULL,
#'   ko_to_kegg = FALSE,
#'   p_value_bar = TRUE,
#'   colors = NULL,
#'   x_lab = "description"
#' )
#' }
utils::globalVariables(c("group", "name", "value", "feature", "negative_log10_p", "group_nonsense", "nonsense", "pathway_class", "p_adjust", "log_2_fold_change", "transform_sample_counts", "column_to_rownames", "txtProgressBar", "setTxtProgressBar", "utils"))
pathway_errorbar <-
  function(abundance,
           daa_results_df,
           Group,
           ko_to_kegg = FALSE,
           p_values_threshold = 0.05,
           order = "group",
           select = NULL,
           p_value_bar = TRUE,
           colors = NULL,
           x_lab = NULL) {
    # Identify pathways with missing annotation
    missing_pathways <- daa_results_df[is.na(daa_results_df$pathway_name), "feature"]

    # Inform the user about the missing annotations
    if(length(missing_pathways) > 0) {
      message("The following pathways are missing annotations and have been excluded: ",
              paste(missing_pathways, collapse = ", "))
      message("You can use the 'pathway_annotation' function to add annotations for these pathways.")
    }

    # Get the names of all columns in the data frame
    column_names <- colnames(daa_results_df)

    # Check if there are more than two 'group' columns
    group_columns <- grepl("^group", column_names)
    if (sum(group_columns) > 2) {
      p_value_bar <- FALSE
      message(
        "There are more than two 'group' columns in the 'daa_results_df' data frame. As a result, it is not possible to compute the log2 fold values. The 'p_value_bar' has been automatically set to FALSE."
      )
    }

    # Exclude rows with missing pathway annotation
    daa_results_df <- daa_results_df[!is.na(daa_results_df[,x_lab]),]

    if (is.null(x_lab)){
      if (ko_to_kegg == TRUE){
        x_lab <- "pathway_name"
      }else{
        x_lab <- "description"
      }

      if (is.null(daa_results_df$pathway_name)&is.null(daa_results_df$description)){
        message(
          "Please utilize the 'pathway_annotation' function to annotate the 'daa_results_df' data frame."
        )
      }
    }

    if (!(x_lab %in% colnames(daa_results_df))){
      message(
        "The 'x_lab' you defined does not exist as a column in the 'daa_results_df' data frame."
      )
    }

    if (nlevels(factor(daa_results_df$method)) != 1) {
      message(
        "The 'method' column in the 'daa_results_df' data frame contains more than one method. Please filter it to contain only one method."
      )
    }

    if (nlevels(factor(daa_results_df$group1)) != 1 || nlevels(factor(daa_results_df$group2)) != 1) {
      message(
        "The 'group1' or 'group2' column in the 'daa_results_df' data frame contains more than one group. Please filter each to contain only one group."
      )
    }

    if (is.null(colors)) {
      colors <- c("#d93c3e", "#3685bc", "#6faa3e", "#e8a825", "#c973e6", "#ee6b3d", "#2db0a7", "#f25292")[1:nlevels(as.factor(Group))]
    }
    errorbar_abundance_mat <- as.matrix(abundance)
    daa_results_filtered_df <-
      daa_results_df[daa_results_df$p_adjust < p_values_threshold,]
    if (!is.null(select)) {
      daa_results_filtered_sub_df <-
        daa_results_filtered_df[daa_results_filtered_df$feature %in% select, ]
    } else {
      daa_results_filtered_sub_df <- daa_results_filtered_df
    }
    if (nrow(daa_results_filtered_sub_df) > 30) {
      message(
        paste0(
          "The number of features with statistical significance exceeds 30, leading to suboptimal visualization. ",
          "Please use 'select' to reduce the number of features.\n",
          "Currently, you have these features: ",
          paste(paste0('"', daa_results_filtered_sub_df$feature, '"'), collapse = ", "), ".\n",
          "You can find the statistically significant features with the following command:\n",
          "daa_results_df %>% filter(p_adjust < 0.05) %>% select(c(\"feature\",\"p_adjust\"))"
        )
      )
      stop()
    }
    if (nrow(daa_results_filtered_sub_df) == 0){
      stop(
        "Visualization with 'pathway_errorbar' cannot be performed because there are no features with statistical significance. ",
        "For possible solutions, please check the FAQ section of the tutorial."
      )
    }
    # Convert to relative abundance
    relative_abundance_mat <- apply(t(errorbar_abundance_mat), 1, function(x)
      x / sum(x))

    # Subset to only include the features present in daa_results_filtered_sub_df$feature
    sub_relative_abundance_mat <- relative_abundance_mat[rownames(relative_abundance_mat) %in% daa_results_filtered_sub_df$feature,]

    # Create a matrix for the error bars
    error_bar_matrix <- cbind(
      sample = colnames(sub_relative_abundance_mat),
      group = Group,
      t(sub_relative_abundance_mat)
    )
    error_bar_df <- as.data.frame(error_bar_matrix)
    error_bar_df$group <- factor(Group,levels = levels(as.factor(Group)))
      error_bar_pivot_longer_df <- tidyr::pivot_longer(error_bar_df,-c(sample, group))
    error_bar_pivot_longer_tibble <-
      mutate(error_bar_pivot_longer_df, group = as.factor(group))
    error_bar_pivot_longer_tibble$sample <-
      factor(error_bar_pivot_longer_tibble$sample)
    error_bar_pivot_longer_tibble$name <-
      factor(error_bar_pivot_longer_tibble$name)
    error_bar_pivot_longer_tibble$value <-
      as.numeric(error_bar_pivot_longer_tibble$value)
    error_bar_pivot_longer_tibble_summarised <-
      error_bar_pivot_longer_tibble %>% group_by(name, group) %>%
      summarise(mean = mean(value), sd = stats::sd(value))
    error_bar_pivot_longer_tibble_summarised <-
      error_bar_pivot_longer_tibble_summarised %>% mutate(group2 = "nonsense")
    switch(
      order,
      "p_values" = {
        order <- order(daa_results_filtered_sub_df$p_adjust)
      },
      "name" = {
        order <- order(daa_results_filtered_sub_df$feature)
      },
      "group" = {
        daa_results_filtered_sub_df$pro <- 1
        for (i in levels(error_bar_pivot_longer_tibble_summarised$name)) {
          error_bar_pivot_longer_tibble_summarised_sub <-
            error_bar_pivot_longer_tibble_summarised[error_bar_pivot_longer_tibble_summarised$name ==
                                                       i,]
          pro_group <-
            error_bar_pivot_longer_tibble_summarised_sub[error_bar_pivot_longer_tibble_summarised_sub$mean ==
                                                           max(error_bar_pivot_longer_tibble_summarised_sub$mean),]$group
          pro_group <- as.vector(pro_group)
          daa_results_filtered_sub_df[daa_results_filtered_sub_df$feature ==
                                        i,]$pro <- pro_group
        }
        order <-
          order(daa_results_filtered_sub_df$pro,
                daa_results_filtered_sub_df$p_adjust)
      },
      "pathway_class" = {
        if (!"pathway_class" %in% colnames(daa_results_filtered_sub_df)) {
          stop(
            "The 'pathway_class' column is missing in the 'daa_results_filtered_sub_df' data frame. ",
            "Please use the 'pathway_annotation' function to annotate the 'pathway_daa' results."
          )
        }
        order <- order(
          daa_results_filtered_sub_df$pathway_class,
          daa_results_filtered_sub_df$p_adjust
        )
      },
      {
        order <- order
      }
    )
    daa_results_filtered_sub_df <-
      daa_results_filtered_sub_df[order,]
    error_bar_pivot_longer_tibble_summarised_ordered <-
      data.frame(
        name = NULL,
        group = NULL,
        mean = NULL,
        sd = NULL
      )
    for (i in daa_results_filtered_sub_df$feature) {
      error_bar_pivot_longer_tibble_summarised_ordered <-
        rbind(
          error_bar_pivot_longer_tibble_summarised_ordered,
          error_bar_pivot_longer_tibble_summarised[error_bar_pivot_longer_tibble_summarised$name ==
                                                     i,]
        )
    }
    if (ko_to_kegg == FALSE){
      error_bar_pivot_longer_tibble_summarised_ordered[, x_lab] <-
        rep(daa_results_filtered_sub_df[, x_lab], each = length(levels(
          factor(error_bar_pivot_longer_tibble_summarised_ordered$group)
        )))
    }

    if (ko_to_kegg == TRUE) {
      error_bar_pivot_longer_tibble_summarised_ordered$pathway_class <-
        rep(daa_results_filtered_sub_df$pathway_class,
            each = length(levels(
              factor(error_bar_pivot_longer_tibble_summarised_ordered$group)
            )))
    }
    error_bar_pivot_longer_tibble_summarised_ordered$name <- factor(error_bar_pivot_longer_tibble_summarised_ordered$name, levels = rev(daa_results_filtered_sub_df$feature))

    bar_errorbar <-
      ggplot2::ggplot(error_bar_pivot_longer_tibble_summarised_ordered, # nolint: object_usage_linter.
             ggplot2::aes(mean, name, fill = group)) + # nolint
      ggplot2::geom_errorbar(
        ggplot2::aes(xmax = mean + sd, xmin = 0),
        position = ggplot2::position_dodge(width = 0.8),
        width = 0.5,
        size = 0.5,
        color = "black"
      ) +
      ggplot2::geom_bar(stat = "identity",
               position = ggplot2::position_dodge(width = 0.8),
               width = 0.8) +
      GGally::geom_stripped_cols(width = 10) +
      ggplot2::scale_fill_manual(values = colors) +
      ggplot2::scale_color_manual(values = colors) +
      ggprism::theme_prism() +
      ggplot2::scale_x_continuous(expand = c(0, 0),
                         guide = "prism_offset_minor",) +
      ggplot2::scale_y_discrete(labels = rev(daa_results_filtered_sub_df[, x_lab])) +
      ggplot2::labs(x = "Relative Abundance", y = NULL) +
      ggplot2::theme(
        axis.ticks.y = ggplot2::element_blank(),
        axis.line.y = ggplot2::element_blank(),
        axis.line.x = ggplot2::element_line(size = 0.5),
        axis.ticks.x = ggplot2::element_line(size = 0.5),
        panel.grid.major.y = ggplot2::element_blank(),
        panel.grid.major.x = ggplot2::element_blank(),
        axis.text = ggplot2::element_text(size = 10, color = "black"), # nolint
        axis.text.x = ggplot2::element_text(margin = ggplot2::margin(r = 0)), # nolint
        axis.text.y = ggplot2::element_text(
          size = 10,
          color = "black",
          margin = ggplot2::margin(b = 6)
        ),
        axis.title.x =  ggplot2::element_text(
          size = 10,
          color = "black",
          hjust = 0.5
        ),
        legend.position = "top",
        legend.key.size = ggplot2::unit(0.1, "cm"),
        legend.direction = "vertical",
        legend.justification = "left",
        legend.text = ggplot2::element_text(size = 8, face = "bold"),
        legend.box.just = "right",
        plot.margin = ggplot2::margin(0, 0.5, 0.5, 0, unit = "cm")
      ) + ggplot2::coord_cartesian(clip = "off")

    if (ko_to_kegg == TRUE) {
      pathway_class_group_mat <-
        daa_results_filtered_sub_df$pathway_class %>%
        table() %>%
        data.frame() %>% column_to_rownames(".")
      pathway_class_group <- data.frame(.= unique(daa_results_filtered_sub_df$pathway_class),Freq = pathway_class_group_mat[unique(daa_results_filtered_sub_df$pathway_class),])
      start <-
        c(1, rev(pathway_class_group$Freq)[1:(length(pathway_class_group$Freq) - 1)]) %>%
        cumsum()
      end <- cumsum(rev(pathway_class_group$Freq))
      ymin <- start - 1 / 2
      ymax <- end + 1 / 2
      nPoints <- length(start)
      pCol <- c("#D51F26","#272E6A","#208A42","#89288F","#F47D2B",
                 "#FEE500","#8A9FD1","#C06CAB","#E6C2DC","#90D5E4",
                 "#89C75F","#F37B7D","#9983BD","#D24B27","#3BBCA8",
                 "#6E4B9E","#0C727C", "#7E1416","#D8A767","#3D3D3D")[1:nPoints]
      pFill <- pCol
      for (i in 1:nPoints)  {
        bar_errorbar <- bar_errorbar +
          ggplot2::annotation_custom(
            grob = grid::rectGrob(
              gp = grid::gpar(
                col = pCol[i],
                fill = pFill[i],
                lty = NULL,
                lwd = NULL,
                alpha = 0.2
              )
            ),
            xmin = ggplot2::unit(-2, 'native'),
            xmax = ggplot2::unit(0, 'native'),
            ymin = ggplot2::unit(ymin[i], 'native'),
            ymax = ggplot2::unit(ymax[i], 'native')
          )
      }
    }
    daa_results_filtered_sub_df <-
      cbind(
        daa_results_filtered_sub_df,
        negative_log10_p = -log10(daa_results_filtered_sub_df$p_adjust),
        group_nonsense = "nonsense",
        log_2_fold_change = NA
      )

    for (i in daa_results_filtered_sub_df$feature){
      mean <- error_bar_pivot_longer_tibble_summarised_ordered[error_bar_pivot_longer_tibble_summarised_ordered$name %in% i,]$mean
      daa_results_filtered_sub_df[daa_results_filtered_sub_df$feature==i,]$log_2_fold_change <- log2(mean[1]/mean[2])
    }
    daa_results_filtered_sub_df$feature <- factor(daa_results_filtered_sub_df$feature,levels = rev(daa_results_filtered_sub_df$feature))
    p_values_bar <- daa_results_filtered_sub_df %>%
      ggplot2::ggplot(ggplot2::aes(feature, log_2_fold_change, fill = group_nonsense)) +
      ggplot2::geom_bar(stat = "identity",
               position = ggplot2::position_dodge(width = 0.8),
               width = 0.8) +
      ggplot2::labs(y = "log2 fold change", x = NULL) +
      GGally::geom_stripped_cols() +
      ggplot2::scale_fill_manual(values = "#87ceeb") +
      ggplot2::scale_color_manual(values = "#87ceeb") +
      ggplot2::geom_hline(ggplot2::aes(yintercept = 0),
                 linetype = 'dashed',
                 color = 'black') +
      ggprism::theme_prism() +
      ggplot2::scale_y_continuous(expand = c(0, 0),
                         guide = "prism_offset_minor") +
      ggplot2::theme(
        axis.ticks.y = ggplot2::element_blank(),
        axis.line.y = ggplot2::element_blank(),
        axis.line.x = ggplot2::element_line(size = 0.5),
        axis.ticks.x = ggplot2::element_line(size = 0.5),
        panel.grid.major.y = ggplot2::element_blank(),
        panel.grid.major.x = ggplot2::element_blank(),
        axis.text = ggplot2::element_text(size = 10, color = "black"),
        axis.text.y = ggplot2::element_blank(),
        axis.text.x = ggplot2::element_text(
          size = 10,
          color = "black",
          margin = ggplot2::margin(b = 6)
        ),
        axis.title.x =  ggplot2::element_text(
          size = 11,
          color = "black",
          hjust = 0.5
        ),
        legend.position = "non"
      ) +
      ggplot2::coord_flip()

    if (ko_to_kegg == TRUE) {
      pathway_class_y <- (ymax + ymin) / 2 - 0.5
      pathway_class_plot_df <-
        as.data.frame(
          cbind(
            nonsense = "nonsense",
            pathway_class_y = pathway_class_y,
            pathway_class = rev(unique(
              daa_results_filtered_sub_df$pathway_class
            ))
          )
        )
      pathway_class_plot_df$pathway_class_y <-
        as.numeric(pathway_class_plot_df$pathway_class_y)
      pathway_class_annotation <-
        pathway_class_plot_df %>% ggplot2::ggplot(ggplot2::aes(nonsense, pathway_class_y)) + ggplot2::geom_text(
          ggplot2::aes(nonsense, pathway_class_y, label = pathway_class),
          size = 3.5,
          color = "black",
          fontface = "bold",
          family = "sans"
        ) +
        ggplot2::scale_y_discrete(position = "right") +
        ggprism::theme_prism() +
        ggplot2::theme(
          axis.ticks = ggplot2::element_blank(),
          axis.line = ggplot2::element_blank(),
          panel.grid.major.y = ggplot2::element_blank(),
          panel.grid.major.x = ggplot2::element_blank(),
          panel.background = ggplot2::element_blank(),
          axis.text = ggplot2::element_blank(),
          plot.margin = ggplot2::unit(c(0, 0.2, 0, 0), "cm"),
          axis.title.y =  ggplot2::element_blank(),
          axis.title.x = ggplot2::element_blank(),
          legend.position = "non"
        )
    }
    daa_results_filtered_sub_df$p_adjust <-
      as.character(daa_results_filtered_sub_df$p_adjust)
    daa_results_filtered_sub_df$unique <-
      nrow(daa_results_filtered_sub_df) - seq_len(nrow(daa_results_filtered_sub_df)) + 1
    daa_results_filtered_sub_df$p_adjust <-
      substr(daa_results_filtered_sub_df$p_adjust, 1, 5)
    p_annotation <- daa_results_filtered_sub_df %>%
      ggplot2::ggplot(ggplot2::aes(group_nonsense, p_adjust)) +
      ggplot2::geom_text(
        ggplot2::aes(group_nonsense, unique, label = p_adjust),
        size = 3.5,
        color = "black",
        fontface = "bold",
        family = "sans"
      ) +
      ggplot2::labs(y = "p-value (adjusted)") +
      ggplot2::scale_y_discrete(position = "right") +
      ggprism::theme_prism() +
      ggplot2::theme(
        axis.ticks = ggplot2::element_blank(),
        axis.line = ggplot2::element_blank(),
        panel.grid.major.y = ggplot2::element_blank(),
        panel.grid.major.x = ggplot2::element_blank(),
        panel.background = ggplot2::element_blank(),
        axis.text = ggplot2::element_blank(),
        plot.margin = ggplot2::unit(c(0, 0.2, 0, 0), "cm"),
        axis.title.y =  ggplot2::element_text(
          size = 11,
          color = "black",
          vjust = 0
        ),
        axis.title.x = ggplot2::element_blank(),
        legend.position = "non"
      )
    if (p_value_bar == TRUE) {
      if (ko_to_kegg == TRUE) {
        combination_bar_plot <-
          pathway_class_annotation + bar_errorbar + p_values_bar + p_annotation + patchwork::plot_layout(ncol = 4, widths =
                                                                                                c(1, 1.2, 0.5, 0.1))
      }
      else{
        combination_bar_plot <-
          bar_errorbar + p_values_bar + p_annotation + patchwork::plot_layout(ncol = 3, widths = c(2.3, 0.7, 0.3))
      }
    }else{
      if (ko_to_kegg == TRUE) {
        combination_bar_plot <-
          pathway_class_annotation + bar_errorbar + p_annotation + patchwork::plot_layout(ncol = 3, widths =
                                                                                                           c(1, 1.2, 0.1))
      }
      else{
        combination_bar_plot <-
          bar_errorbar + p_annotation + patchwork::plot_layout(ncol = 2, widths = c(2.5,  0.2))
      }
    }
    return(combination_bar_plot)
  }

Try the ggpicrust2 package in your browser

Any scripts or data that you put into this service are public.

ggpicrust2 documentation built on Nov. 8, 2023, 5:08 p.m.