#' plot_alpha_diversity
#'
#' This is a function for plotting alpha diversity. Alpha diversity can only accept intergers (
#' counts) OTU table!
#'
#' @param phyloseq A phyloseq object contain OTU table, taxonomy table, sample metadata and
#' phylogenetic tree, or a phyloseq object only contain constructed OTU table and metadata. OTU
#' table of the phyloseq can only be intergers (counts) !
#'
#' @param feature The column name of the feature you want to select from metadata.
#'
#' @param feature2 The column name of another feature you want to select from metadata. Default is
#' NA.
#'
#' @param level Which taxonomy level will be used to calculate alpha diversity. Default is NA. If NA,
#' the original OTU table in phyloseq will be used. If level is given, first an OTU table with
#' taxonomy will be constructed using construct_otu_table, then the OTU table will be used to
#' calculate alpha diversity through phyloseq::plot_richness(). The level name should be one of
#' "Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species".
#'
#' @param measures The measures to calculate alpha diversity. Default is NA. If NA, all available
#' alpha diversity measures will be calculated and generate a table. If not NA, measures should be
#' one of c("Observed", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson", "Fisher"). Note:
#' "Observed", "Chao1", "ACE", "Fisher" these 4 measures can accepts only integers.
#'
#' @param p_test The p-value to test alpha diversity. Default is FALSE, will not calculate p-value.
#' p_test should be either "wilcox" or "kruskal". PS: "wilcox" can only work with two groups.
#'
#' @param colors A vector of colors. The number of colors should be larger than the number of
#' feature. Default is NULL, if NULL, plot_alpha_diversity will use ggsci::jco theme for the plot.
#'
#' @param dotsize See geom_dotplot(dotsize). Default is 0.3.
#'
#' @param print_to_screen Default is FALSE. If TRUE, print alpha-diversity table to the screen.
#'
#' @param label Default is FALSE. If TRUE, add sample name labels to plot.
#'
#' @export
#'
#' @examples
#' plot_alpha_diversity(demo_phyloseq_object, feature = "diagnosis", measures = "Chao1",
#' p_test = "kruskal")
plot_alpha_diversity <- function(
phyloseq,
feature,
feature2 = NA,
level = NA,
measures = NA,
p_test = FALSE,
colors = NULL,
dotsize = 0.3,
label = FALSE,
print_to_screen = FALSE
) {
set.seed(99)
# 将string转换成name,才能通过`!!`符号传入dplyr
feature_sym <- rlang::sym(feature)
if(!is.na(measures)) {
if(!measures %in% c("Observed", "Chao1", "ACE", "Shannon", "Simpson", "InvSimpson", "Fisher")) {
stop(paste0('Argument "measures" should be one of c("Observed", "Chao1", "ACE", "Shannon", ',
'"Simpson", "InvSimpson", "Fisher").'))
} else {
## Step 1: Use plot_richness function to calculate alpha diversity
if(is.na(level)) {
# 如果没有level,直接用phyloseq计算alpha diversity
alpha_diversity <- plot_richness(phyloseq, x = feature, measures = measures)
} else if(!level %in% c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")) {
# 如果level输入有误,停止运行
stop(paste0('Argument "level" should be one of "Kingdom", "Phylum", "Class", "Order", ',
'"Family", "Genus", "Species".'))
} else {
# 如果有level,用level生成OTU table,再计算alpha diversity
otu <- construct_otu_table(phyloseq, level = level)
alpha_diversity <- plot_richness(otu_table(otu, taxa_are_rows = F), measures = measures)
# 重新添加metadata,使其和直接用phyloseq计算alpha diversity输出的结果一样
metadata <- extract_metadata_phyloseq(phyloseq) %>%
dplyr::rename(samples = SampleID)
alpha_diversity$data <- alpha_diversity$data %>% left_join(metadata)
}
## Step 2: Calculate p-value
if (isFALSE(p_test)) {
p_test <- FALSE
} else {
if (p_test == "wilcox") {
# Prepare feature table for calculating Mann-Whitney U test
feature_tab_4_MWtest <- extract_metadata_phyloseq(phyloseq, feature)
# Extract feature levels
feature_0 <- feature_tab_4_MWtest[[feature]] %>% unique() %>% .[1]
feature_1 <- feature_tab_4_MWtest[[feature]] %>% unique() %>% .[2]
replace_feature <- c(as.character(feature_0), as.character(feature_1))
# Revalue feature levels as 0 and 1
feature_tab_4_MWtest[[feature]] <- plyr::mapvalues(
feature_tab_4_MWtest[[feature]], to = c(0, 1), from = replace_feature
)
p_value <- wilcox.test(alpha_diversity$data$value ~ feature_tab_4_MWtest[[feature]]) %>%
.$p.value
} else if (p_test == "kruskal") {
# Kruskal test
p_value <- kruskal.test(alpha_diversity$data$value,
factor(alpha_diversity$data[,feature])) %>%
.$p.value
} else {
stop("The input p_test is not supported.")
}
}
## Step 3: Plot alpha diversity
# Print alpha-diversity table
if (print_to_screen) {
alpha_diversity$data %>%
dplyr::select(samples, !!feature_sym, variable, value) %>%
dplyr::arrange(!!feature_sym) %>%
print()
}
## Step 4: Draw alpha diversity plot
y <- "value"
if (is.na(feature2)) {
p <- ggplot(data = alpha_diversity$data,
# Use aes_string() to pass variables to ggplot
aes_string(x = feature, y = y)) +
geom_boxplot(aes_string(fill = feature)) +
# The geom_jitter's dot is more than original data and I don't know why :( so don't use it.
# geom_jitter(
# aes_string(color = feature),
# size = size
# ) +
# The geom_dotplot can show the exact number of original data. But when using geom_dotplot,
# it does not accept "shape" aesthetic mapping, because the underlying drawing is done
# with circleGrob(), so remove "feature2" argument.
geom_dotplot(
aes_string(fill = feature),
binaxis = 'y', # Required by geom_dotplot, line dot with y-axis values
stackdir = 'center', # stack dotplot to center
dotsize = dotsize
) +
ylab(paste0(measures, " Index")) +
theme_classic() +
theme(panel.grid = element_blank(),
axis.text.y = element_text(size = 14),
axis.text.x = element_text(size = 14),
axis.title = element_text(size = 16),
legend.text = element_text(size = 12))
} else {
p <- ggplot(data = alpha_diversity$data,
# Use aes_string() to pass variables to ggplot
aes_string(x = feature, y = y)) +
geom_boxplot() +
geom_dotplot(aes_string(fill = feature2),
binaxis = 'y', # Required by geom_dotplot, line dot with y-axis values
stackdir = 'center', # stack dotplot to center
dotsize = dotsize) +
ylab(paste0(measures, " Index")) +
theme_classic() +
theme(panel.grid = element_blank(),
axis.text.y = element_text(size = 14),
axis.text.x = element_text(size = 14),
axis.title = element_text(size = 16),
legend.text = element_text(size = 12))
}
if (!isFALSE(p_test)) {
p <- p + annotate("text",
x = ((alpha_diversity$data[[feature]] %>% unique() %>% length() + 1)/2),
y = (max(alpha_diversity$data$value) * 1.1),
label = paste0(p_test, " p-value = ", round(p_value, 3)),
size = 3)
}
# 添加label
if(!is.na(level)) {
p <- p + labs(title = paste0(measures, " Index, ", level, " Level"))
} else {
p <- p + labs(title = paste0(measures, " Index"))
}
if (label) {
samples <- "samples"
if(is.na(feature2)) {
p <- p + ggrepel::geom_label_repel(
aes_string(x = feature, y = y, label = samples, color = feature)
)
} else {
p <- p + ggrepel::geom_label_repel(
aes_string(x = feature, y = y, label = samples, color = feature2)
)
}
}
if (is.null(colors)) {
p <- p + ggsci::scale_fill_jco() + ggsci::scale_color_jco()
} else if (length(colors) < length(unique(alpha_diversity$data[[feature]]))) {
stop(paste0("The number of colors is smaller than the number of ", feature, "."))
} else {
p <- p + scale_fill_manual(values = colors) + scale_color_manual(values = colors)
}
p
}
} else {
alpha_diversity <- plot_richness(phyloseq, x = feature) %>%
.[["data"]] %>%
dplyr::select(-se) %>%
tidyr::spread(variable, value)
return(alpha_diversity)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.