# 绘制alpha多样性箱线图并添加统计分组 Alpha boxplot & ANOVA+Tukey/LSD test
#
# This is the first function named 'alpha_boxplot'
# which draw box plot with alpha and metadata, and return a ggplot2 object
#' @title Plotting alpha diversity boxplot for each group with ANOVA statistics
#' @description Input alpha index and metadata, and manual set alpha index and metadata column name
#' ANOVA+TukeyHSD/agricolae::LSD.test calculate p-value, and dplyr summary each group max for p-value groups position.
#' ggplot2 show boxplot, jitter and stat groups.
#' @param alpha_div alpha diversity matrix, typical output of usearch(-alpha_div)/QIIME/vegan
#' rowname is sample ID, colname is index of alpha diversity;
#' @param index index type of alpha diversity;
#' @param metadata matrix or data frame, including sample ID and group ID;
#' @param groupID column name for group ID.
#' @details
#' By default, returns richness diversity index
#' The available diversity indices include the following:
#' \itemize{
#' \item{most used indices: richness, simpson, shannon_2}
#' \item{other used indices: berger_parker, buzas_gibson, dominance, equitability, jost, jost1, reads, robbins, simpson, shannon_2, shannon_10}
#' }
#' @return ggplot2 object.
#' @author Contact: Yong-Xin Liu \email{yxliu@@genetics.ac.cn}
#' @references
#'
#' Yong-Xin Liu, Yuan Qin, Tong Chen, Meiping Lu, Xubo Qian, Xiaoxuan Guo & Yang Bai.
#' A practical guide to amplicon and metagenomic analysis of microbiome data.
#' Protein Cell, 2020(41), 1-16, DOI: \url{https://doi.org/10.1007/s13238-020-00724-8}
#'
#' @seealso alpha_barplot
#' @examples
#' # Input alpha index (alpha_div) and metadata, select richness (index type) and Group (catagory)
#' alpha_boxplot(alpha_div, metadata, "richness", "Group")
#' # Select shannon_2 (index type) and Site (catagory)
#' alpha_boxplot(alpha_div = alpha_div, metadata = metadata, index = "shannon_2", groupID = "Site")
#' @export
alpha_boxplot <- function(alpha_div, metadata, index = "richness", groupID = "Group",
outlier = TRUE
) {
# 依赖关系检测与安装
p_list = c("ggplot2", "dplyr", "multcompView") # "agricolae"
for(p in p_list){
if (!requireNamespace(p)){
install.packages(p)}
suppressPackageStartupMessages(library(p, character.only = TRUE, quietly = TRUE, warn.conflicts = FALSE))
}
# 测试默认参数
# library(amplicon)
# index = "richness"
# groupID = "Group"
# metadata = read.table("metadata2.txt", header=T, row.names=1, sep="\t", comment.char="", stringsAsFactors = F)
# metadata = subset(metadata, Group %in% c("KO","OE"))
# 交叉筛选
idx = rownames(metadata) %in% rownames(alpha_div)
metadata = metadata[idx,,drop=F]
alpha_div = alpha_div[rownames(metadata),]
# 提取样品组信息,默认为group可指定
sampFile = as.data.frame(metadata[, groupID],row.names = row.names(metadata))
# colnames(sampFile)[1] = "group"
# 合并alpha_div和metadata
df = cbind(alpha_div[rownames(sampFile),index], sampFile)
colnames(df) = c(index,"group")
# 统计各种显著性
model = aov(df[[index]] ~ group, data=df)
# 计算Tukey显著性差异检验
Tukey_HSD = TukeyHSD(model, ordered = TRUE, conf.level = 0.95)
# 提取比较结果
Tukey_HSD_table = as.data.frame(Tukey_HSD$group)
# 保存统计结果
# 保存一个制表符,解决存在行名时,列名无法对齐的问题
write.table(paste(date(), "\nGroup\t", groupID, "\n\t", sep=""), file=paste("alpha_boxplot_TukeyHSD.txt",sep=""),append = T, quote = F, eol = "", row.names = F, col.names = F)
# 保存统计结果,有warning正常
suppressWarnings(write.table(Tukey_HSD_table, file=paste("alpha_boxplot_TukeyHSD.txt",sep=""), append = T, quote = F, sep="\t", eol = "\n", na = "NA", dec = ".", row.names = T, col.names = T))
# 函数:将Tukey检验结果P值转换为显著字母分组
# 输入文件为图基检验结果和分组
generate_label_df = function(TUKEY, variable){
# library(multcompView)
# 转换P值为字母分组
## 提取图基检验中分组子表的第4列P adjust值
Tukey.levels = TUKEY[[variable]][,4]
# 方法1.multcompLetters函数将两两p值转换为字母,data.frame并生成列名为Letters的数据框
Tukey.labels = data.frame(multcompLetters(Tukey.levels)['Letters'])
# 方法2. 解决字母顺序相反的问题
# library(multcomp)
# tuk <- cld(glht(model, alternative = 'two.sided', linfct = mcp(group = 'Tukey')), sig = p, decreasing = TRUE)
# Tukey.labels <- data.frame(Letters=tuk$mcletters$Letters, stringsAsFactors = FALSE)
# 按分组名字母顺序
## 提取字母分组行名为group组名
Tukey.labels$group = rownames(Tukey.labels)
# 按组名的字母顺序排列,默认的Levels
Tukey.labels=Tukey.labels[order(Tukey.labels$group), ]
return(Tukey.labels)
}
# 当只有两组时,用LSD标注字母
if (length(unique(df$group)) == 2){
# LSD检验,添加差异组字母
library(agricolae)
out = LSD.test(model, "group", p.adj="none")
stat = out$groups
# 分组结果添入Index
df$stat=stat[as.character(df$group),]$groups
# 当大于两组时,用multcompView标注字母
}else{
# library(multcompView)
LABELS = generate_label_df(Tukey_HSD , "group")
df$stat=LABELS[as.character(df$group),]$Letters
}
# 设置分组位置为各组y最大值+高的5%
max=max(df[,c(index)])
min=min(df[,index])
x = df[,c("group",index)]
y = x %>% group_by(group) %>% summarise_(Max=paste('max(',index,')',sep=""))
y=as.data.frame(y)
rownames(y)=y$group
df$y=y[as.character(df$group),]$Max + (max-min)*0.05
if (outlier) {
# 绘图 plotting
p = ggplot(df, aes(x=group, y=.data[[index]], color=group)) +
geom_boxplot(alpha=1,
# outlier.shape = NA,
# outlier.size=0,
size=0.7,
width=0.5, fill="transparent") +
labs(x="Groups", y=paste(index, "index"), color=groupID) + theme_classic() +
geom_text(data=df, aes(x=group, y=y, color=group, label=stat)) +
geom_jitter(position=position_jitter(0.17), size=1, alpha=0.7)+
theme(text=element_text(family="sans", size=7))
p
} else{
# 绘图 plotting
p = ggplot(df, aes(x=group, y=.data[[index]], color=group)) +
geom_boxplot(alpha=1,
outlier.shape = NA,
outlier.size=0,
size=0.7,
width=0.5, fill="transparent") +
labs(x="Groups", y=paste(index, "index"), color=groupID) + theme_classic() +
geom_text(data=df, aes(x=group, y=y, color=group, label=stat)) +
geom_jitter(position=position_jitter(0.17), size=1, alpha=0.7)+
theme(text=element_text(family="sans", size=7))
p
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.