#' plot_beta_diversity
#'
#' This is a function for plotting beta diversity.
#'
#' @param phyloseq A phyloseq object contain OTU table, taxonomy table, sample metadata and
#' phylogenetic tree, or a phyloseq object only contain constructed OTU table and metadata.
#'
#' @param feature The column name of the feature you want to select from metadata, e.g. "Phenotype".
#'
#' @param feature2 The column name of another feature you want to select from metadata, which will
#' show in different shape, e.g. "Gender". Default is NA.
#'
#' @param feature2_shape The shape values for feature2. See ggplot2::scale_shape_manual.
#'
#' @param level Which taxonomy level will be used to calculate beta diversity. Default is NA. If NA,
#' the original OTU table in phyloseq will be used. If level is given, first an OTU table with
#' taxonomy will be constructed using construct_otu_table, then the OTU table will be used to
#' calculate beta diversity through phyloseq::distance. The level name should be one of "Kingdom",
#' "Phylum", "Class", "Order", "Family", "Genus", "Species".
#'
#' @param method The method to calculate beta diversity. Method should be one of c("bray", "jaccard",
#' "unifrac", "wunifrac"). Default is "bray". PS: "unifrac" and "wunifrac" require a phylogenetic
#' tree.
#'
#' @param colors A vector of colors. The number of colors should be larger than the number of
#' feature. Default is NULL, if NULL, plot_beta_diversity will use ggsci::jco theme for the plot.
#'
#' @param size Value for geom_point(size). Default is 3.
#'
#' @param print_to_screen Default is FALSE. If TRUE, print beta-diversity table to the screen.
#'
#' @param label Default is FALSE. If TRUE, add sample name labels to plot.
#'
#' @param group_column The first parameter for adding arrows to the plot. Choose a column to group.
#' The arrows will be added to each group.
#'
#' @param arrange_column The second parameter for adding arrows to the plot. Choose a column to
#' arrange. This column should be continuous variables. Each group will be arrange by variables in
#' this column, the first variable is where the arrow starts, pointing to the second and to the last
#' variable.
#'
#' @param add_pc1 Default is FALSE. If TRUE, plot out top 3 most correlated OTUs to PC1.
#'
#' @param add_pc1_otu_level Default is FALSE. If FALSE, add_pc1 will extract otu_table from phyloseq
#' directly. If phyloseq object has both otu_table and tax_table, add_pc1_otu_level can get
#' different taxonomy for top 3 most correlated OTUs in add_pc1. If "all", will plot all taxonomy
#' level. Note: If `level` argument is provided, the corresponding OTU with taxonomy will be used to
#' compute correlation with PC1 and PC2 values, `add_pc1_otu_level` and `add_pc2_otu_level` need to
#' be set to FALSE.
#'
#' @param add_pc1_cor_method The method for computing correlation between PC1 and OTUs. One of
#' "spearman" (default), "kendall", or "pearson".
#'
#' @param add_pc2 Same as add_pc1.
#'
#' @param add_pc2_otu_level Same as add_pc1_otu_level.
#'
#' @param add_pc2_cor_method Same as add_pc1_cor_method.
#'
#' @export
#'
#' @examples
#' plot_beta_diversity(demo_phyloseq_object, feature = "diagnosis")
plot_beta_diversity <- function(
phyloseq,
feature,
feature2 = NA,
feature2_shape = c(0:21),
level = NA,
method = "bray",
colors = NULL,
size = 3,
label = FALSE,
group_column = NA,
arrange_column = NA,
add_pc1 = FALSE,
add_pc1_otu_level = FALSE,
add_pc1_cor_method = "spearman",
add_pc2 = FALSE,
add_pc2_otu_level = FALSE,
add_pc2_cor_method = "spearman",
print_to_screen = FALSE
) {
set.seed(99)
## Step 1: Calculate beta diversity
if(!method %in% c("bray", "jaccard", "unifrac", "wunifrac")) {
stop(
paste0('Beta diversity method should be one of c("bray", "jaccard", "unifrac", "wunifrac").')
)
} else {
if(is.na(level)) {
# 如果没有给level,就直接用phyloseq的otu计算beta diversity
beta <- cmdscale(phyloseq::distance(physeq = phyloseq, method = method), eig = TRUE)
} else if(!level %in% c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")) {
# 如果level输入错误,停止运行
stop(paste0('Argument "level" should be one of "Kingdom", "Phylum", "Class", "Order", ',
'"Family", "Genus", "Species".'))
} else {
# 如果level输入正确:
# 1.先判断是否是"unifrac"或"wunifrac"
if(method %in% c("unifrac", "wunifrac")) {
# "unifrac"和"wunifrac"需要input有phy_tree的phyloseq object才能计算
stop(
paste0('Calculate Unifrac and Weighted-Unifrac requires phyloseq object with phy_tree.')
)
}
# 2.判断是否重复提供add_pc1_otu_level和add_pc2_otu_level
if(!isFALSE(add_pc1_otu_level) | !isFALSE(add_pc2_otu_level)) {
# 如果提供了`level`,就强制用`level`去画pc1,否则会变成画pcoa时用的`level`,画pc1计算
# correlation时用otu,产生误导。
# 如果没有提供`level`但提供了`add_pc1_otu_level`,那么就用otu计算correlation,和画pcoa时用
# otu level保持一致,再用得到的前3个OTU去找taxonomy。
stop(
paste0("If `level` argument is provided, the corresponding OTU with taxonomy will be used",
" to compute correlation with PC1/PC2 values, `add_pc1_otu_level` and ",
"`add_pc2_otu_level` need to be set to FALSE.")
)
}
# 生成对应level的otu再计算beta
otu <- construct_otu_table(phyloseq, level = level)
beta <- cmdscale(
phyloseq::distance(physeq = otu_table(otu, taxa_are_rows = F), method = method),
eig = TRUE
)
}
}
if(method == "bray") {
plot_title <- "Bray–Curtis"
} else if(method == "jaccard") {
plot_title <- "Jaccard"
} else if(method == "unifrac") {
plot_title <- "UniFrac"
} else if(method == "wunifrac") {
plot_title <- "Weighted Unifrac"
}
## Step 2: Construct plot table
# Extract PC1 and PC2
PC <- as.data.frame(beta$points)
colnames(PC) <- c("PC1", "PC2")
PC <- rownames_to_column(PC, var = "SampleID")
# Extract metadata
metadata <- extract_metadata_phyloseq(phyloseq)
# Join two tables
beta_plot <- left_join(PC, metadata)
# Print beta-diversity table
if(print_to_screen) {
if(is.na(feature2)) {
beta_plot %>%
dplyr::select(SampleID, !!feature, PC1, PC2) %>%
arrange_(feature) %>%
print()
} else {
beta_plot %>%
dplyr::select(SampleID, !!feature, !!feature2, PC1, PC2) %>%
arrange_(feature) %>%
print()
}
}
## Step 3: Plot beta diversity
# Make x-axis and y-axis names for aes_string
x_name <- "PC1"
y_name <- "PC2"
# Make factor for color, prevent "Error: Continuous value supplied to discrete scale"
beta_plot[[feature]] <- beta_plot[[feature]] %>% as.factor()
# 为add_pc1_info添加facet_grid(rows = vars(...))所需的column
beta_plot$PCoA <- "PCoA"
# Plot beta diversity
if(is.na(feature2)) {
p <- ggplot(data = beta_plot,
# In the first layer (i.e. ggplot()), aes() can only contain x and y,
# color, shape, etc. should be in upper layer (i.e. geom_point).
aes_string(x = x_name, y = y_name)) + # Use aes_string() to pass variables to ggplot
# Use 'color' instead of 'fill' in geom_point
geom_point(aes_string(color = feature), size = size) +
xlab(paste("PC1:", round(100*as.numeric(beta$eig[1]/sum(beta$eig)), 2), "%", sep = " ")) +
ylab(paste("PC2:", round(100*as.numeric(beta$eig[2]/sum(beta$eig)), 2), "%", sep = " ")) +
#labs(title = plot_title) +
theme_classic() +
theme(panel.grid = element_blank(),
axis.text.y = element_text(size = 14),
axis.text.x = element_text(size = 14),
axis.title = element_text(size = 16),
legend.text = element_text(size = 12))
} else {
p <- ggplot(data = beta_plot,
# In the first layer (i.e. ggplot()), aes() can only contain x and y,
# color, shape, etc. should be in upper layer (i.e. geom_point).
aes_string(x = x_name, y = y_name)) + # Use aes_string() to pass variables to ggplot
# Use 'color' instead of 'fill' in geom_point
geom_point(aes_string(color = feature, shape = feature2), size = size) +
xlab(paste("PC1:", round(100*as.numeric(beta$eig[1]/sum(beta$eig)), 2), "%", sep = " ")) +
ylab(paste("PC2:", round(100*as.numeric(beta$eig[2]/sum(beta$eig)), 2), "%", sep = " ")) +
#labs(title = plot_title) +
scale_shape_manual(values = feature2_shape) +
theme_classic() +
theme(panel.grid = element_blank(),
axis.text.y = element_text(size = 14),
axis.text.x = element_text(size = 14),
axis.title = element_text(size = 16),
legend.text = element_text(size = 12))
}
# 添加label
if(!is.na(level)) {
p <- p + labs(title = paste0(plot_title, ", ", level, " Level"))
} else {
p <- p + labs(title = plot_title)
}
if(label) {
label_name = "SampleID"
p <- p + ggrepel::geom_label_repel(aes_string(x = x_name, y = y_name, label = label_name,
color = feature))
}
if(is.null(colors)) {
p <- p + ggsci::scale_fill_jco() + ggsci::scale_color_jco()
} else if(length(colors) < length(unique(beta_plot[[feature]]))) {
stop(paste0("The number of colors is smaller than the number of ", feature, "."))
} else {
p <- p + scale_fill_manual(values = colors) + scale_color_manual(values = colors)
}
# Add arrows to plot
if(!is.na(group_column) & !is.na(arrange_column)) {
# 将string转换成name,才能通过!!符号传入dplyr
group_column_sym <- rlang::sym(group_column)
arrange_column_sym <- rlang::sym(arrange_column)
p_data <- p$data %>%
# 按照group_column进行group
dplyr::group_by(!!group_column_sym) %>%
# 按照arrange_column排序,添加.by_group才能按照group排序
dplyr::arrange(!!arrange_column_sym, .by_group = TRUE)
# 用for loop将每根线都添加到plot
for(i in 1:nrow(p_data)) {
# 先按照group slice,提取X,Y end的坐标
xyend <- p_data %>%
dplyr::slice((i + 1))
# 如果所有样本都没有X,Y end的坐标,则停止
if(nrow(xyend) == 0) {
break
}
# 通过group_column找到X,Y end对应的X,Y坐标
xy <- p_data %>%
dplyr::filter(!!group_column_sym %in% as.character(xyend[[group_column]])) %>%
dplyr::slice(i)
# 创建geom_segment所需的dataframe,必须有 x, xend, y, yend 4列
arrows_tab <- left_join(
dplyr::select(xy, !!group_column_sym, x = PC1, y = PC2),
dplyr::select(xyend, !!group_column_sym, xend = PC1, yend = PC2)
)
# 添加到beta diversity plot
p <- p + geom_segment(
data = arrows_tab, aes(x = x, xend = xend, y = y, yend = yend),
arrow = arrow(length = unit(0.2, "cm")), color = "darkgrey"
)
}
} else if(is.na(group_column) & !is.na(arrange_column)) {
stop(paste0("group_column is missing."))
} else if(!is.na(group_column) & is.na(arrange_column)) {
stop(paste0("arrange_column is missing."))
}
if(add_pc1 | add_pc2) {
# 为add_pc1和add_pc2准备好OTU table
# 取出OTU(rows是SampleID,和PC一样)
if(is.na(level)) {
if(!taxa_are_rows(phyloseq)) {
otu <- otu_table(phyloseq) %>% as.matrix() %>% as.data.frame()
} else {
otu <- otu_table(phyloseq) %>% as.matrix() %>% t() %>% as.data.frame()
}
} else if(!level %in% c("Kingdom", "Phylum", "Class", "Order", "Family", "Genus", "Species")) {
# 如果level输入错误,停止运行
stop(paste0('Argument "level" should be one of "Kingdom", "Phylum", "Class", "Order", ',
'"Family", "Genus", "Species".'))
} else {
# 如果`level`不是NA,那么一开始计算distance的时候就会生成对应level的otu
otu <- otu
}
}
if(add_pc1) {
# 计算所有OTU与PC1的correlation
cor_pc1 <- NULL
for(i in 1:ncol(otu)) {
cor_pc1[i] <- cor(otu[i], PC$PC1, method = add_pc1_cor_method)
}
# 取出correlation最高的3个OTU
cor_pc1 <- data.frame(cor = abs(cor_pc1), row.names = colnames(otu)) %>%
rownames_to_column("OTU") %>%
dplyr::arrange(desc(cor)) %>%
dplyr::filter(rownames(.) %in% rownames(.)[1:3])
top3otu <- cor_pc1$OTU
# 取出top 3 otu的abundance
otu_pc1 <- dplyr::select(otu, !!top3otu) %>% rownames_to_column("SampleID")
# 将3个OTU合为一列,用于facet_wrap;将abundance合为一列,用于plot
data_pc1 <- left_join(PC, otu_pc1) %>%
tidyr::gather(key = "OTU", value = "abundance", !!top3otu)
data_pc1$OTU <- factor(data_pc1$OTU, levels = top3otu)
# 用计算correlation后得到的OTU去找taxonomy
if(!isFALSE(add_pc1_otu_level)) {
if(is.null(tax_table(phyloseq, errorIfNULL = F))) {
stop(paste0("Taxonomy table is empty."))
} else {
tax <- tax_table(phyloseq) %>% as.data.frame() %>% rownames_to_column("OTU")
# 如果add_pc1_otu_level == "all",则将所有level都合并成一个level
if(add_pc1_otu_level == "all" | add_pc1_otu_level == "All") {
tax <- tax %>% tidyr::unite(2:ncol(.), col = !!add_pc1_otu_level, sep = "|")
}
tax <- tax %>% dplyr::select(OTU, !!add_pc1_otu_level)
data_pc1 <- left_join(data_pc1, tax) %>%
dplyr::select(-OTU) %>%
dplyr::rename(OTU = !!add_pc1_otu_level)
}
}
# plot pc1
pc1 <- ggplot(data = data_pc1, aes(x = PC1, y = abundance, color = OTU)) +
geom_line() +
facet_grid(rows = vars(OTU), # rows相当于表格里的行,所以facet后plot会分成几行
scales = "free") +
xlab(paste("PC1:", round(100*as.numeric(beta$eig[1]/sum(beta$eig)), 2), "%", sep = " ")) +
ylab("Abundance") +
theme_bw() +
theme(
panel.grid = element_blank(),
axis.text.y = element_text(size = 14),
axis.text.x = element_text(size = 14),
axis.title = element_text(size = 16),
legend.text = element_text(size = 12),
strip.text.y = element_blank() #去掉facet的label
)
}
if(add_pc2) {
# 计算所有OTU与PC2的correlation
cor_pc2 <- NULL
for(i in 1:ncol(otu)) {
cor_pc2[i] <- cor(otu[i], PC$PC2, method = add_pc2_cor_method)
}
# 取出correlation最高的3个OTU
cor_pc2 <- data.frame(cor = abs(cor_pc2), row.names = colnames(otu)) %>%
rownames_to_column("OTU") %>%
dplyr::arrange(desc(cor)) %>%
dplyr::filter(rownames(.) %in% rownames(.)[1:3])
top3otu <- cor_pc2$OTU
# 取出top 3 otu的abundance
otu_pc2 <- dplyr::select(otu, !!top3otu) %>% rownames_to_column("SampleID")
# 将3个OTU合为一列,用于facet_wrap;将abundance合为一列,用于plot
data_pc2 <- left_join(PC, otu_pc2) %>%
tidyr::gather(key = "OTU", value = "abundance", !!top3otu)
data_pc2$OTU <- factor(data_pc2$OTU, levels = top3otu)
# 用计算correlation后得到的OTU去找taxonomy
if(!isFALSE(add_pc2_otu_level)) {
if(is.null(tax_table(phyloseq, errorIfNULL = F))) {
stop(paste0("Taxonomy table is empty."))
} else {
tax <- tax_table(phyloseq) %>% as.data.frame() %>% rownames_to_column("OTU")
# 如果add_pc2_otu_level == "all",则将所有level都合并成一个level
if(add_pc2_otu_level == "all" | add_pc2_otu_level == "All") {
tax <- tax %>% tidyr::unite(2:ncol(.), col = !!add_pc2_otu_level, sep = "|")
}
tax <- tax %>% dplyr::select(OTU, !!add_pc2_otu_level)
data_pc2 <- left_join(data_pc2, tax) %>%
dplyr::select(-OTU) %>%
dplyr::rename(OTU = !!add_pc2_otu_level)
}
}
# plot pc2
pc2 <- ggplot(data = data_pc2, aes(x = PC2, y = abundance, color = OTU)) +
geom_line() +
facet_grid(cols = vars(OTU), # cols相当于表格里的列,所以facet后plot会分成几列
scales = "free") +
xlab(paste("PC2:", round(100*as.numeric(beta$eig[2]/sum(beta$eig)), 2), "%", sep = " ")) +
ylab("Abundance") +
theme_bw() +
theme(
panel.grid = element_blank(),
axis.text.y = element_text(size = 14),
axis.text.x = element_text(size = 14, angle = 270),
axis.title = element_text(size = 16),
legend.text = element_text(size = 12),
legend.position = "left",
strip.text.x = element_blank() #去掉facet的label
) +
coord_flip()
}
if(add_pc1 & add_pc2) {
# 同时添加pc1和pc2
#p <- ggpubr::ggarrange(p, pc2, pc1, nrow = 2, ncol = 2)
p_legend <- cowplot::get_legend(p)
p <- p +
facet_wrap(vars(PCoA)) +
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
axis.title.y = element_blank(),
axis.text.y = element_blank(),
strip.text = element_blank(),
legend.position = "none",
title = element_blank())
# 去掉pc1的Y轴信息
pc1 <- pc1 + theme(axis.title.y = element_blank(), axis.text.y = element_blank(),
legend.position = "bottom")
# 去掉pc2的X轴信息
pc2 <- pc2 + theme(axis.title.x = element_blank(), axis.text.x = element_blank())
# 合并
p <- cowplot::plot_grid(pc2, p, p_legend, pc1, ncol = 2)
} else if(isTRUE(add_pc1) & isFALSE(add_pc2)) {
# 只添加pc1
p <- p +
facet_grid(rows = vars(PCoA)) + #和pc1的操作一样
theme(axis.title.x = element_blank(),
axis.text.x = element_blank(),
strip.text.y = element_blank())
p <- cowplot::plot_grid(p, pc1, nrow = 2, align = "v")
} else if(isFALSE(add_pc1) & isTRUE(add_pc2)) {
# 只添加pc2
p <- p +
facet_grid(cols = vars(PCoA)) + #和pc2的操作一样
theme(axis.title.y = element_blank(),
axis.text.y = element_blank(),
strip.text.x = element_blank())
p <- cowplot::plot_grid(pc2, p, ncol = 2, align = "h")
}
# Show plot
p
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.