R/plot_stacked_bar.R

#' plot_stacked_bar
#'
#' This is a function for stacked barplot. PS: When faceting the stacked bar plot, use facet_wrap(
#' vars(column_name), scales = "free_x")
#'
#' @param phyloseq A phyloseq object contain otu table, taxonomy table, sample metadata and
#' phylogenetic tree. Set it to NULL if using an OTU table and metadata to draw the stacked barplot.
#'
#' @param feature The feature that will show as a colorbar under x axis. Default is NA.
#'
#' @param level A taxonomy level to plot stacked bar. Default is NA. Required when using phyloseq
#' object.
#'
#' @param legend_position Legend position. Default is "top". legend_position should be one of "none",
#' "left", "right", "bottom", "top".
#'
#' @param order A vector of arranged SampleID in wanted order. Default is NULL. If NULL,
#' plot_stacked_bar will automatically arrange samples by the most abundance taxonomy in decreasing
#' order.
#'
#' @param otu_table An OTU table. Taxa are colnames, SampleID are rownames (just like phyloseq). Set
#' it to NULL if using phyloseq.
#'
#' @param metadata A metadata for the OTU table. SampleID are rownames (just like phyloseq). Set it
#' to NULL if using phyloseq.
#'
#' @param relative_abundance Turn plot into relative abundance or not. Default is TRUE.
#'
#' @param colors A vector of colors. The number of colors should be larger than the number of level.
#' Default is NULL, if NULL, plot_stacked_bar will use distinctive_colors for the plot.
#'
#' @param collapse Default is 0.01, collapse taxonomy which abundance is lower than 1 percent of
#' total into "Others". Set it to FALSE if want to show all taxonomy levels.
#'
#' @param title Default is NA, value for ggplot2::labs(title).
#'
#' @param x_text_angle Default is 90, value for ggplot2::ggplot(themt(axis.text.x = element_text(
#' angle))).
#'
#' @export
#'
#' @examples
#' # Minimum usage
#' plot_stacked_bar(demo_phyloseq_object, level = "Genus")

plot_stacked_bar <- function (
  phyloseq = NULL,
  level = NA,
  feature = NA,
  otu_table = NULL,
  metadata = NULL,
  order = NULL,
  relative_abundance = TRUE,
  legend_position = "top",
  colors = NULL,
  collapse = 0.01,
  title = NA,
  x_text_angle = 90
) {
  # Detact variables
  if (is.null(phyloseq)) {
    if (is.null(otu_table)) {
      # If no phyloseq, require otu_table
      stop(paste0("Argument 'phyloseq' and 'otu_table' are both missing, please input a phyloseq ",
                  "object or an OTU table with metadata."))
    } else {
      if (is.null(metadata)) {
        # If have otu_table, require metadata
        stop(paste0("Argument 'metadata' is missing, please input a metadata for the OTU table."))
      }
    }
  } else {
    if (!is.null(otu_table)) {
      # If have phyloseq, require no otu_table
      stop(paste0("Argument 'phyloseq' and 'otu_table' are both detected, please input one of them",
                  ", not both."))
    } else if (is.na(level)) {
      # If have phyloseq, require level
      stop(paste0("Argument 'level' is missing. Plaese choose a taxonomy level to plot stacked ",
                  "bar."))
    }
  }
  # First construct otu then convert to percentage
  if (!is.null(phyloseq)) {
    otu <- construct_otu_table(phyloseq, level)
  } else {
    otu <- otu_table
  }
  if (relative_abundance) {
    otu <- otu %>% convert_to_percentage(row_sum = TRUE)
    # Construct table for stacked bar plot
    plot_tab <- otu %>%
      # First re-order taxonomy by total counts
      .[,order(colSums(.), decreasing = TRUE)] %>%
      # Then re-order sample by the most abundance taxonomy
      .[order(.[,1], decreasing = TRUE),] %>%
      # Turn SampleID to a column
      rownames_to_column(var = "SampleID")
    y_label <- "Relative Abundance"
  } else {
    plot_tab <- otu %>%
      # First re-order taxonomy by total counts
      .[,order(colSums(.), decreasing = TRUE)] %>%
      # Then re-order sample by total abundance
      .[order(rowSums(.), decreasing = TRUE),] %>%
      # Turn SampleID to a column
      rownames_to_column(var = "SampleID")
    y_label <- "Raw Count"
  }
  # Prepare levels for taxonomy levels (order within each bar)
  levels_level <- colnames(plot_tab)[2:ncol(plot_tab)]
  # Extract metadata
  if (is.null(otu_table)) {
    sample_feature <- extract_metadata_phyloseq(phyloseq)
  } else {
    sample_feature <- metadata %>% rownames_to_column("SampleID")
  }
  # Prepare levels for SampleID (order of x.axis)
  if (is.null(order)) {
    #if (!is.na(feature)) {
    #  # Save origin colnames
    #  original_colnames <- colnames(plot_tab)
    #  # Add a new column that is rowsum
    #  plot_tab$total <- plot_tab %>%
    #    column_to_rownames("SampleID") %>%
    #    rowSums()
    #  plot_tab <- plot_tab %>%
    #    # Join metadata
    #    left_join(sample_feature) %>%
    #    # Group by feature parameter
    #    group_by_(feature) %>% # group_by_() can pass variable to group_by()
    #    # Arrange rows by total abundance and by group
    #    arrange(desc(total), .by_group = TRUE) %>% # Set .by_group = TRUE to arrange by group
    #    select(original_colnames)
    #  levels_SampleID <- plot_tab$SampleID
    #} else {
      levels_SampleID <- plot_tab$SampleID
    #}
  } else if (length(order) != length(plot_tab$SampleID)) {
    stop("The length of order and SampleID does not match.")
  } else if (all(order %in% plot_tab$SampleID)) {
    if (all(plot_tab$SampleID %in% order)) {
      levels_SampleID <- order
    }
  } else {
    stop("Given order must contain all SampleID.")
  }
  # Join metadata
  plot_tab <- left_join(plot_tab, sample_feature)
  # Add levels to SampleID
  plot_tab$SampleID <- factor(plot_tab$SampleID, levels = levels_SampleID)
  ## Prepare levels for feature (colors of x.axis)
  ## 要想X轴或Y轴显示颜色,必须添加一组颜色的vector,顺序和X轴或Y轴顺序一致,vector内容是不同的level
  ## Add "theme(axis.text.x = element_text(color = levels_feature))" to ggplot.
  ## "color" parameter can only work with factor.
  #if (!is.na(feature)) {
  #  if (is.null(order)) {
  #    levels_feature <- plot_tab[[feature]] %>% as.factor()
  #  } else {
  #    levels_feature <- as.data.frame(order)
  #    colnames(levels_feature) <- "Sort"
  #    levels_feature <- levels_feature %>%
  #      # Arrange feature order by 'order' variable
  #      left_join(select(plot_tab, SampleID, !!feature), by = c("Sort" = "SampleID")) %>%
  #      # Drop levels
  #      .[[2]] %>% as.character() %>% as.factor()
  #  }
  #} else {
  #  levels_feature <- "black"
  #}
  # Turn plot_table to a long table for plotting
  plot_tab <- tidyr::gather(
    data = plot_tab, key = level, value = "abundance",
    all_of(levels_level) # A selection of columns
  )
  if (isFALSE(collapse)) {
    collapse <- FALSE
  } else {
    if (relative_abundance) {
      # If abundance is lower than 1%, set level to "Others"
      plot_tab$level <- ifelse(
        plot_tab$abundance < collapse, "Others", plot_tab$level
      )
    } else {
      # Calculate total abundance
      plot_tab <- plot_tab %>% group_by(SampleID) %>% dplyr::mutate(total = sum(abundance))
      # If abundance is lower than 1%, set level to "Others"
      plot_tab$level <- ifelse(
        plot_tab$abundance / plot_tab$total < collapse, "Others", plot_tab$level
      )
    }
    # Make level for taxonomy levels
    levels_level <- plot_tab %>% .$level %>% unique()
    levels_level <- levels_level[-c(which(levels_level=="Others"))]
    levels_level <- c(levels_level, "Others")
    distinctive_colors <- distinctive_colors[1:length(levels_level) - 1]
    distinctive_colors <- c(distinctive_colors, "grey")
  }
  # Add levels to taxonomy levels
  plot_tab$level <- factor(plot_tab$level, levels = levels_level)
  # Bar plot
  p1 <- ggplot(plot_tab, aes(x = SampleID, y = abundance)) +
    geom_bar(mapping = aes(fill = level), stat = "identity") +
    theme_classic() +
    theme(
      axis.text.x = element_text(size = 14, angle = x_text_angle),
      axis.text.y = element_text(size = 14),
      axis.title = element_text(size = 16),
      legend.text = element_text(size = 12),
      legend.position = legend_position,
      legend.title = element_blank()
    ) +
    ylab(y_label)
  if (!is.na(title)) {
    p1 <- p1 + ggtitle(title)
  }
  if (is.null(colors)) {
    p1 <- p1 + scale_fill_manual(values = distinctive_colors)
  } else if (length(colors) < length(levels_level)) {
    stop(paste0("The number of colors is smaller than the number of ", level, "."))
  } else {
    p1 <- p1 + scale_fill_manual(values = colors)
  }
  if (is.na(feature)) {
    p1
  } else {
    # If feature is given, make abundance plot (p1) & feature colorbar (p2) and align them
    p1 <- p1 +
      theme(
        #panel.grid = element_blank(), # Remove grid line under the plot
        #panel.border = element_blank(), # Remove border
        axis.title.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.text.x = element_blank(),
        axis.text.y = element_text(size = 14),
        axis.title = element_text(size = 16),
        legend.text = element_text(size = 12),
        legend.position = legend_position,
        legend.title = element_blank()
      )
    x = "SampleID"
    y = 1
    fill = feature
    p2 <- ggplot(data = plot_tab, aes_string(x = x, y = y)) +
      geom_tile(aes_string(fill = fill)) +
      theme_minimal() +
      theme(
        panel.grid = element_blank(),
        axis.ticks.x = element_blank(),
        axis.title.x = element_blank(),
        #axis.text.x = element_blank(),
        axis.text.x = element_text(size = 14, angle = x_text_angle),
        axis.title.y=element_blank(),
        axis.text.y=element_blank(),
        axis.ticks.y=element_blank(),
        legend.text = element_text(size = 12),
        legend.position = "bottom"
      ) +
      # p2一定要用完整的distinctive_colors,因为p1的distinctive_colors可能会进行subset,
      # 导致用在p2时color的数量不足。
      scale_fill_manual(values = xviz::distinctive_colors[4:length(xviz::distinctive_colors)])
    p12 <- ggpubr::ggarrange(p1, p2, nrow = 2, heights = c(8, 1.5), align = "v")
    p12
  }
}
yeguanhuav/visual16S documentation built on Feb. 19, 2022, 10:32 a.m.