R/plot_alpha_diversity.R

Defines functions plot_alpha_diversity

Documented in plot_alpha_diversity

#' plot_alpha_diversity
#'
#' This is a function for plotting alpha diversity. Alpha diversity can only accept intergers (
#' counts) OTU table!
#'
#' @param phyloseq A phyloseq object contain OTU table, taxonomy table, sample metadata and
#' phylogenetic tree, or a phyloseq object only contain constructed OTU table and metadata. OTU
#' table of the phyloseq can only be intergers (counts) !
#'
#' @param feature The column name of the feature you want to select from metadata.
#'
#' @param feature2 The column name of another feature you want to select from metadata. Default is
#' NA.
#'
#' @param level Which taxonomy level will be used to calculate alpha diversity. Default is NA. If NA,
#' the original OTU table in phyloseq will be used. If level is given, first an OTU table with
#' taxonomy will be constructed using construct_otu_table, then the OTU table will be used to
#' calculate alpha diversity through phyloseq::plot_richness(). The level name should be one of
#' "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species".
#'
#' @param measures The measures to calculate alpha diversity. Default is NA. If NA, all available
#' alpha diversity measures will be calculated and generate a table. If not NA, measures should be
#' one of c("Observed", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson", "Fisher"). Note:
#' "Observed", "Chao1", "ACE", "Fisher" these 4 measures can accepts only integers.
#'
#' @param p_test The p-value to test alpha diversity. Default is FALSE, will not calculate p-value.
#' p_test should be either "wilcox" or "kruskal". PS: "wilcox" can only work with two groups.
#'
#' @param colors A vector of colors. The number of colors should be larger than the number of
#' feature. Default is NULL, if NULL, plot_alpha_diversity will use ggsci::jco theme for the plot.
#'
#' @param dotsize See geom_dotplot(dotsize). Default is 0.3.
#'
#' @param print_to_screen Default is FALSE. If TRUE, print alpha-diversity table to the screen.
#'
#' @param label Default is FALSE. If TRUE, add sample name labels to plot.
#'
#' @export
#'
#' @examples
#' plot_alpha_diversity(demo_phyloseq_object, feature = "diagnosis", measures = "Chao1",
#' p_test = "kruskal")

plot_alpha_diversity <- function(
  phyloseq,
  feature,
  feature2 = NA,
  level = NA,
  measures = NA,
  p_test = FALSE,
  colors = NULL,
  dotsize = 0.3,
  label = FALSE,
  print_to_screen = FALSE
  ) {
  set.seed(99)
  # 将string转换成name,才能通过`!!`符号传入dplyr
  feature_sym <- rlang::sym(feature)
  if(!is.na(measures)) {
    if(!measures %in% c("Observed", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson", "Fisher")) {
      stop(paste0('Argument "measures" should be one of c("Observed", "Chao1", "ACE", "Shannon", ',
                  '"Simpson", "InvSimpson", "Fisher").'))
    } else {
      ## Step 1: Use plot_richness function to calculate alpha diversity
      if(is.na(level)) {
        # 如果没有level,直接用phyloseq计算alpha diversity
        alpha_diversity <- plot_richness(phyloseq, x = feature, measures = measures)
      } else if(!level %in% c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")) {
        # 如果level输入有误,停止运行
        stop(paste0('Argument "level" should be one of "Kingdom", "Phylum", "Class", "Order", ',
                    '"Family", "Genus", "Species".'))
      } else {
        # 如果有level,用level生成OTU table,再计算alpha diversity
        otu <- construct_otu_table(phyloseq, level = level)
        alpha_diversity <- plot_richness(otu_table(otu, taxa_are_rows = F), measures = measures)
        # 重新添加metadata,使其和直接用phyloseq计算alpha diversity输出的结果一样
        metadata <- extract_metadata_phyloseq(phyloseq) %>%
          dplyr::rename(samples = SampleID)
        alpha_diversity$data <- alpha_diversity$data %>% left_join(metadata)
      }
      ## Step 2: Calculate p-value
      if (isFALSE(p_test)) {
        p_test <- FALSE
      } else {
        if (p_test == "wilcox") {
          # Prepare feature table for calculating Mann-Whitney U test
          feature_tab_4_MWtest <- extract_metadata_phyloseq(phyloseq, feature)
          # Extract feature levels
          feature_0 <- feature_tab_4_MWtest[[feature]] %>% unique() %>% .[1]
          feature_1 <- feature_tab_4_MWtest[[feature]] %>% unique() %>% .[2]
          replace_feature <- c(as.character(feature_0), as.character(feature_1))
          # Revalue feature levels as 0 and 1
          feature_tab_4_MWtest[[feature]] <- plyr::mapvalues(
            feature_tab_4_MWtest[[feature]], to = c(0, 1), from = replace_feature
          )
          p_value <- wilcox.test(alpha_diversity$data$value ~ feature_tab_4_MWtest[[feature]]) %>%
            .$p.value
        } else if (p_test == "kruskal") {
          # Kruskal test
          p_value <- kruskal.test(alpha_diversity$data$value,
                                  factor(alpha_diversity$data[,feature])) %>%
            .$p.value
        } else {
          stop("The input p_test is not supported.")
        }
      }
      ## Step 3: Plot alpha diversity
      # Print alpha-diversity table
      if (print_to_screen) {
        alpha_diversity$data %>%
          dplyr::select(samples, !!feature_sym, variable, value) %>%
          dplyr::arrange(!!feature_sym) %>%
          print()
      }
      ## Step 4: Draw alpha diversity plot
      y <- "value"
      if (is.na(feature2)) {
        p <- ggplot(data = alpha_diversity$data,
                    # Use aes_string() to pass variables to ggplot
                    aes_string(x = feature, y = y)) +
          geom_boxplot(aes_string(fill = feature)) +
          # The geom_jitter's dot is more than original data and I don't know why :( so don't use it.
          # geom_jitter(
          #   aes_string(color = feature),
          #   size = size
          # ) +
          # The geom_dotplot can show the exact number of original data. But when using geom_dotplot,
          # it does not accept "shape" aesthetic mapping, because the underlying drawing is done
          # with circleGrob(), so remove "feature2" argument.
          geom_dotplot(
            aes_string(fill = feature),
            binaxis = 'y', # Required by geom_dotplot, line dot with y-axis values
            stackdir = 'center', # stack dotplot to center
            dotsize = dotsize
          ) +
          ylab(paste0(measures, " Index")) +
          theme_classic() +
          theme(panel.grid = element_blank(),
                axis.text.y = element_text(size = 14),
                axis.text.x = element_text(size = 14),
                axis.title = element_text(size = 16),
                legend.text = element_text(size = 12))
      } else {
        p <- ggplot(data = alpha_diversity$data,
                    # Use aes_string() to pass variables to ggplot
                    aes_string(x = feature, y = y)) +
          geom_boxplot() +
          geom_dotplot(aes_string(fill = feature2),
                       binaxis = 'y', # Required by geom_dotplot, line dot with y-axis values
                       stackdir = 'center', # stack dotplot to center
                       dotsize = dotsize) +
          ylab(paste0(measures, " Index")) +
          theme_classic() +
          theme(panel.grid = element_blank(),
                axis.text.y = element_text(size = 14),
                axis.text.x = element_text(size = 14),
                axis.title = element_text(size = 16),
                legend.text = element_text(size = 12))
      }
      if (!isFALSE(p_test)) {
        p <- p + annotate("text",
                          x = ((alpha_diversity$data[[feature]] %>% unique() %>% length() + 1)/2),
                          y = (max(alpha_diversity$data$value) * 1.1),
                          label = paste0(p_test, " p-value = ", round(p_value, 3)),
                          size = 3)
      }
      # 添加label
      if(!is.na(level)) {
        p <- p + labs(title = paste0(measures, " Index, ", level, " Level"))
      } else {
        p <- p + labs(title = paste0(measures, " Index"))
      }
      if (label) {
        samples <- "samples"
        if(is.na(feature2)) {
          p <- p + ggrepel::geom_label_repel(
            aes_string(x = feature, y = y, label = samples, color = feature)
          )
        } else {
          p <- p + ggrepel::geom_label_repel(
            aes_string(x = feature, y = y, label = samples, color = feature2)
          )
        }
      }
      if (is.null(colors)) {
        p <- p + ggsci::scale_fill_jco() + ggsci::scale_color_jco()
      } else if (length(colors) < length(unique(alpha_diversity$data[[feature]]))) {
        stop(paste0("The number of colors is smaller than the number of ", feature, "."))
      } else {
        p <- p + scale_fill_manual(values = colors) + scale_color_manual(values = colors)
      }
      p
    }
  } else {
    alpha_diversity <- plot_richness(phyloseq, x = feature) %>%
      .[["data"]] %>%
      dplyr::select(-se) %>%
      tidyr::spread(variable, value)
    return(alpha_diversity)
  }
}
yeguanhuav/visual16S documentation built on Feb. 19, 2022, 10:32 a.m.