R/trans_diff.R
In microeco: Microbial Community Ecology Data Analysis

#' @title 
#' Create \code{trans_diff} object for the differential analysis on the taxonomic abundance
#'
#' @description
#' This class is a wrapper for a series of differential abundance test and indicator analysis methods, including 
#'  LEfSe based on the Segata et al. (2011) <doi:10.1186/gb-2011-12-6-r60>,
#'  random forest <doi:10.1016/j.geoderma.2018.09.035>, metastat based on White et al. (2009) <doi:10.1371/journal.pcbi.1000352>,
#'  non-parametric Kruskal-Wallis Rank Sum Test,
#'  Dunn's Kruskal-Wallis Multiple Comparisons based on the \code{FSA} package, Wilcoxon Rank Sum and Signed Rank Tests, t-test, anova, 
#'  Scheirer Ray Hare test, 
#'  R package \code{metagenomeSeq} Paulson et al. (2013) <doi:10.1038/nmeth.2658>, 
#'  R package \code{ANCOMBC} <doi:10.1038/s41467-020-17041-7>, R package \code{ALDEx2} <doi:10.1371/journal.pone.0067019; 10.1186/2049-2618-2-15>, 
#'  R package \code{MicrobiomeStat} <doi:10.1186/s13059-022-02655-5>, beta regression <doi:10.18637/jss.v034.i02>, R package \code{maaslin2},
#'  linear mixed-effects model and generalized linear mixed model.
#'  
#' @export
trans_diff <- R6Class(classname = "trans_diff",
	public = list(
		#' @param dataset default NULL; \code{\link{microtable}} object.
		#' @param method default "lefse". Some methods (e.g., "lefse", "KW", "wilcox", "anova", "lm", "betareg", "glmm" and "glmm_beta") 
		#' 	 invoke the \code{taxa_abund} list (generally relative abundance data) of input microtable object for the analysis.
		#' 	 Some (e.g., "metastat", "metagenomeSeq", "ALDEx2_t", "DESeq2", "edgeR", "ancombc2" and "linda") use the \code{otu_table} of input microtable object for the analysis. 
		#' 	 Available options include:
		#'   \describe{
		#'     \item{\strong{'lefse'}}{LEfSe method based on Segata et al. (2011) <doi:10.1186/gb-2011-12-6-r60>}
		#'     \item{\strong{'rf'}}{random forest and non-parametric test method based on An et al. (2019) <doi:10.1016/j.geoderma.2018.09.035>}
		#'     \item{\strong{'metastat'}}{Metastat method for all paired groups based on White et al. (2009) <doi:10.1371/journal.pcbi.1000352>}
		#'     \item{\strong{'metagenomeSeq'}}{zero-inflated log-normal model-based differential test method from \code{metagenomeSeq} package.}
		#'     \item{\strong{'KW'}}{KW: Kruskal-Wallis Rank Sum Test for all groups (>= 2)}
		#'     \item{\strong{'KW_dunn'}}{Dunn's Kruskal-Wallis Multiple Comparisons when group number > 2; see dunnTest function in \code{FSA} package}
		#'     \item{\strong{'wilcox'}}{Wilcoxon Rank Sum and Signed Rank Tests for all paired groups }
		#'     \item{\strong{'t.test'}}{Student's t-Test for all paired groups}
		#'     \item{\strong{'anova'}}{ANOVA for one-way or multi-factor analysis; see \code{cal_diff} function of \code{trans_alpha} class}
		#'     \item{\strong{'scheirerRayHare'}}{Scheirer Ray Hare test for nonparametric test used for a two-way factorial experiment; 
		#'     	  see \code{scheirerRayHare} function of \code{rcompanion} package}
		#'     \item{\strong{'lm'}}{Linear Model based on the \code{lm} function}
		#'     \item{\strong{'ALDEx2_t'}}{runs Welch's t and Wilcoxon tests with \code{ALDEx2} package; see also the test parameter in \code{ALDEx2::aldex} function;
		#'     	  ALDEx2 uses the centred log-ratio (clr) transformation and estimates per-feature technical variation within each sample using Monte-Carlo instances 
		#'     	  drawn from the Dirichlet distribution; Reference: <doi:10.1371/journal.pone.0067019> and <doi:10.1186/2049-2618-2-15>; 
		#'     	  require \code{ALDEx2} package to be installed 
		#'     	  (\href{https://bioconductor.org/packages/release/bioc/html/ALDEx2.html}{https://bioconductor.org/packages/release/bioc/html/ALDEx2.html})}
		#'     \item{\strong{'ALDEx2_kw'}}{runs Kruskal-Wallace and generalized linear model (glm) test with \code{ALDEx2} package; 
		#'     	  see also the \code{test} parameter in \code{ALDEx2::aldex} function.}
		#'     \item{\strong{'DESeq2'}}{Differential expression analysis based on the Negative Binomial (a.k.a. Gamma-Poisson) distribution based on the \code{DESeq2} package.}
		#'     \item{\strong{'edgeR'}}{The \code{exactTest} method of \code{edgeR} package is implemented.}
		#'     \item{\strong{'ancombc2'}}{Analysis of Compositions of Microbiomes with Bias Correction (ANCOM-BC) 
		#'        based on the \code{ancombc2} function from \code{ANCOMBC} package.
		#'        If the \code{fix_formula} parameter is not provided, the function can automatically assign it by using group parameter.
		#'        For this method, the \code{group} parameter is directly passed to the group parameter of \code{ancombc2} function.
		#'        Reference: <doi:10.1038/s41467-020-17041-7><10.1038/s41592-023-02092-7>; Require \code{ANCOMBC} package to be installed 
		#'        (\href{https://bioconductor.org/packages/release/bioc/html/ANCOMBC.html}{https://bioconductor.org/packages/release/bioc/html/ANCOMBC.html})}
		#'     \item{\strong{'linda'}}{Linear Model for Differential Abundance Analysis of High-dimensional Compositional Data 
		#'     	  based on the \code{linda} function of \code{MicrobiomeStat} package. 
		#'     	  For linda method, please provide either the group parameter or the formula parameter.
		#'     	  When the formula parameter is provided, it should start with '~' as it is directly used by the linda function.
		#'     	  If the group parameter is used, the prefix '~' is not necessary as the function can automatically add it.
		#'     	  The parameter \code{feature.dat.type = 'count'} has been fixed. Other parameters can be passed to the \code{linda} function.
		#'     	  Reference: <doi:10.1186/s13059-022-02655-5>}
		#'     \item{\strong{'maaslin2'}}{finding associations between metadata and potentially high-dimensional microbial multi-omics data based on the Maaslin2 package.
		#'     	  Using this option can invoke the \code{trans_env$cal_cor} function with \code{method = "maaslin2"}.}
		#'     \item{\strong{'betareg'}}{Beta Regression based on the \code{betareg} package. 
		#'     	  Please see the \code{beta_pseudo} parameter for the use of pseudo value when there is 0 or 1 in the data}
		#'     \item{\strong{'lme'}}{Linear Mixed Effect Model based on the \code{lmerTest} package.
		#'     	  In the return table, the significance of fixed factors are tested by function \code{anova}.
		#'     	  The significance of 'Estimate' in each term of fixed factors comes from the model.}
		#'     \item{\strong{'glmm'}}{Generalized linear mixed model (GLMM) based on the \code{glmmTMB} package.
		#'     	  The \code{formula} and \code{family} parameters are needed. 
		#'     	  Please refer to glmmTMB package to select the family function, e.g. \code{family = glmmTMB::lognormal(link = "log")}.
		#'     	  The usage of formula is similar with that in 'lme' method.
		#'     	  For more available parameters, please see \code{glmmTMB::glmmTMB} function and use parameter passing.
		#'     	  In the result, Conditional R2 and Marginal R2 represent the variance explained by both fixed and random effects and the variance explained by 
		#'     	  fixed effects, respectively. For more details on R2 calculation, please refer to the article <doi: 10.1098/rsif.2017.0213>.
		#'     	  The significance of fixed factors are tested by Chi-square test from function \code{car::Anova}.
		#'     	  The significance of 'Estimate' in each term of fixed factors comes from the model.}
		#'     \item{\strong{'glmm_beta'}}{Generalized linear mixed model with a family function of beta distribution, 
		#'     	  developed for the relative abundance (ranging from 0 to 1) of taxa specifically. 
		#'     	  This is an extension of the GLMM model in \code{'glmm'} option.
		#'     	  The only difference is in \code{glmm_beta} the family function is fixed with the beta distribution function, 
		#'     	  i.e. \code{family = glmmTMB::beta_family(link = "logit")}.
		#'     	  Please see the \code{beta_pseudo} parameter for the use of pseudo value when there is 0 or 1 in the data}
		#'   }
		#' @param group default NULL; sample group used for the comparision; a colname of input \code{microtable$sample_table};
		#' 	  It is necessary when method is not "anova" or method is "anova" but formula is not provided.
		#' 	  Once group is provided, the return res_abund will have mean and sd values for group.
		#' @param taxa_level default "all"; 'all' represents using abundance data of all taxonomic ranks; 
		#' 	  For testing at a specific rank, provide taxonomic rank name, such as "Genus".
		#' 	  If the provided taxonomic name is neither 'all' nor a colname in tax_table of input dataset (e.g., "ASV"), 
		#' 	  the function will use the features in input \code{microtable$otu_table} automatically.
		#' 	  Note that a specific level (e.g., "ASV") should be provided for \code{method}: 
		#' 	  "metastat", "metagenomeSeq", "ALDEx2_t", "DESeq2", "edgeR", "ancombc2", "linda", "maaslin2".
		#' @param filter_thres default 0; the abundance threshold, such as 0.0005 when the input is relative abundance; only available when method != "metastat".
		#' 	  The features with abundances lower than filter_thres will be filtered.
		#' @param alpha default 0.05; significance threshold to select taxa when method is "lefse" or "rf"; 
		#'    or used to generate significance letters when method is 'anova' or 'KW_dunn' like the alpha parameter in \code{cal_diff} of \code{trans_alpha} class.
		#' @param p_adjust_method default "fdr"; p.adjust method; see method parameter of \code{p.adjust} function for other available options; 
		#'    "none" means disable p value adjustment; So when \code{p_adjust_method = "none"}, P.adj is same with P.unadj.
		#' @param transformation default NULL; feature abundance transformation method in the class \code{\link{trans_norm}},
		#'    such as 'AST' for the arc sine square root transformation.
		#'    Only available when \code{method} is one of "KW", "KW_dunn", "wilcox", "t.test", "anova", "scheirerRayHare", "betareg" and "lme".
		#' @param remove_unknown default TRUE; whether remove unknown features that donot have clear classification information.
		#' @param lefse_subgroup default NULL; sample sub group used for sub-comparision in lefse; Segata et al. (2011) <doi:10.1186/gb-2011-12-6-r60>.
		#' @param lefse_min_subsam default 10; sample numbers required in the subgroup test.
		#' @param lefse_sub_strict default FALSE; whether remove the features strictly in the sub-checking.
		#'    FALSE means only removing the features that have different orders of medians across sub-groups with those across groups and the statistics are also significant.
		#'    TRUE means removing the features that are not significant in one (or more) sub-test or have different orders of medians across sub-groups with those across groups.
		#' @param lefse_sub_alpha default NULL; The significance threshold in the test for lefse sub-groups. NULL means it is same with \code{alpha}.
		#' @param lefse_norm default 1000000; normalization value used in lefse to scale abundances for each level. 
		#'    A \code{lefse_norm} value < 0 (e.g., -1) means no normalization same with the LEfSe python version.
		#' @param nresam default 0.6667; sample number ratio used in each bootstrap for method = "lefse" or "rf".
		#' @param boots default 30; bootstrap test number for method = "lefse" or "rf".
		#' @param rf_imp_type default 2; the type of feature importance in random forest when \code{method = "rf"}. 
		#'    Same with \code{type} parameter in \code{importance} function of \code{randomForest} package.
		#'    1=mean decrease in accuracy (MeanDecreaseAccuracy), 2=mean decrease in node impurity (MeanDecreaseGini).
		#' @param group_choose_paired default NULL; a vector used for selecting the required groups for paired testing instead of all paired combinations across groups;
		#'    Available when method is "metastat", "metagenomeSeq", "ALDEx2_t" or "edgeR".
		#' @param metagenomeSeq_count default 1; Filter features to have at least 'counts' counts.; see the count parameter in MRcoefs function of \code{metagenomeSeq} package.
		#' @param ALDEx2_sig default c("wi.eBH", "kw.eBH"); which column of the final result is used as the significance asterisk assignment;
		#'   applied to method = "ALDEx2_t" or "ALDEx2_kw"; the first element is provided to "ALDEx2_t"; the second is provided to "ALDEx2_kw";
		#'   for "ALDEx2_t", the available choice is "wi.eBH" (Expected Benjamini-Hochberg corrected P value of Wilcoxon test)
		#'   and "we.eBH" (Expected BH corrected P value of Welch's t test); for "ALDEx2_kw"; for "ALDEx2_t",
		#'   the available choice is "kw.eBH" (Expected BH corrected P value of Kruskal-Wallace test) and "glm.eBH" (Expected BH corrected P value of glm test).
		#' @param by_group default NULL; a column of sample_table used to perform the differential test 
		#'   among groups (\code{group} parameter) for each group (\code{by_group} parameter). So \code{by_group} has a higher level than \code{group} parameter.
		#'   Same with the \code{by_group} parameter in \code{trans_alpha} class. 
		#'   Only available when method is one of \code{c("KW", "KW_dunn", "wilcox", "t.test", "anova", "scheirerRayHare")}.
		#' @param by_ID default NULL; a column of sample_table used to perform paired t test or paired wilcox test for the paired data,
		#'   such as the data of plant compartments for different plant species (ID). 
		#'   So \code{by_ID} in sample_table should be the smallest unit of sample collection without any repetition in it.
		#'   Same with the \code{by_ID} parameter in trans_alpha class.
		#' @param beta_pseudo default .Machine$double.eps; the pseudo value used when the parameter \code{method} is \code{'betareg'} or \code{'glmm_beta'}.
		#'   As the beta distribution function limits 0 < response value < 1, a pseudo value will be added for the data that equal to 0.
		#'   The data that equal to 1 will be replaced by \code{1/(1 + beta_pseudo)}.
		#' @param ... parameters passed to \code{cal_diff} function of \code{trans_alpha} class when method is one of 
		#' 	 "KW", "KW_dunn", "wilcox", "t.test", "anova", "betareg", "lme", "glmm" or "glmm_beta";
		#' 	 passed to \code{randomForest::randomForest} function when method = "rf";
		#' 	 passed to \code{ANCOMBC::ancombc2} function when method is "ancombc2" (except tax_level, global and fix_formula parameters);
		#' 	 passed to \code{ALDEx2::aldex} function when method = "ALDEx2_t" or "ALDEx2_kw";
		#' 	 passed to \code{DESeq2::DESeq} function when method = "DESeq2";
		#' 	 passed to \code{MicrobiomeStat::linda} function when method = "linda";
		#' 	 passed to \code{trans_env$cal_cor} function when method = "maaslin2".
		#' @return res_diff and res_abund.\cr
		#'   \strong{res_abund} includes mean abundance of each taxa (Mean), standard deviation (SD), standard error (SE) and sample number (N) in the group (Group).\cr
		#'   \strong{res_diff} is the detailed differential test result depending on the method choice, may containing:\cr
		#'     \strong{"Comparison"}: The groups for the comparision, maybe all groups or paired groups. If this column is not found, all groups are used;\cr
		#'     \strong{"Group"}: Which group has the maximum median or mean value across the test groups; 
		#'        For non-parametric methods, median value; For t.test, mean value;\cr
		#'     \strong{"Taxa"}: which taxa is used in this comparision;\cr
		#'     \strong{"Method"}: Test method used in the analysis depending on the method input;\cr
		#'     \strong{"LDA" or others}: LDA: linear discriminant score in LEfSe; 
		#'     	  MeanDecreaseAccuracy and MeanDecreaseGini: mean decreasing in accuracy or in node impurity (gini index) in random forest;\cr
		#'     \strong{"P.unadj"}: original p value;\cr
		#'     \strong{"P.adj"}: adjusted p value;\cr
		#'     \strong{"Estimate" and "Std.Error"}: When method is "betareg", "lm", "lme" or "glmm", 
		#'        "Estimate" and "Std.Error" represent fitted coefficient and its standard error, respectively;\cr
		#'     \strong{Others}: qvalue: qvalue in metastat analysis.
		#' @examples
		#' \donttest{
		#' data(dataset)
		#' t1 <- trans_diff$new(dataset = dataset, method = "lefse", group = "Group")
		#' t1 <- trans_diff$new(dataset = dataset, method = "rf", group = "Group")
		#' t1 <- trans_diff$new(dataset = dataset, method = "metastat", group = "Group", taxa_level = "Genus")
		#' t1 <- trans_diff$new(dataset = dataset, method = "wilcox", group = "Group")
		#' }
		initialize = function(
			dataset = NULL,
			method = c("lefse", "rf", "metastat", "metagenomeSeq", "KW", "KW_dunn", "wilcox", "t.test", "anova", "scheirerRayHare", "lm",
				"ancombc2", "ALDEx2_t", "ALDEx2_kw", "DESeq2", "edgeR", "linda", "maaslin2", "betareg", "lme", "glmm", "glmm_beta")[1],
			group = NULL,
			taxa_level = "all",
			filter_thres = 0,
			alpha = 0.05,
			p_adjust_method = "fdr",
			transformation = NULL,
			remove_unknown = TRUE,
			lefse_subgroup = NULL,
			lefse_min_subsam = 10,
			lefse_sub_strict = FALSE,
			lefse_sub_alpha = NULL,
			lefse_norm = 1000000,
			nresam = 0.6667,
			boots = 30,
			rf_imp_type = 2,
			group_choose_paired = NULL,
			metagenomeSeq_count = 1,
			ALDEx2_sig = c("wi.eBH", "kw.eBH"),
			by_group = NULL,
			by_ID = NULL,
			beta_pseudo = .Machine$double.eps,
			...
			){
			if(is.null(p_adjust_method)){
				message("Redefine p_adjust_method = ", sQuote("fdr"), " instead of NULL. To disable p value adjustment, please set p_adjust_method = ", sQuote("none"), " ...")
				p_adjust_method <- "fdr"
			}
			if(is.null(dataset)){
				self$method <- NULL
				message("Input dataset is NULL. Please run the functions with customized data ...")
			}else{
				method <- match.arg(method, c("lefse", "rf", "metastat", "metagenomeSeq", "KW", "KW_dunn", "wilcox", "t.test", 
					"anova", "scheirerRayHare", "lm", "ancombc2", "ALDEx2_t", "ALDEx2_kw", "DESeq2", "edgeR", "linda", "maaslin2", "betareg", "lme", "glmm", "glmm_beta"))

				tmp_dataset <- clone(dataset)
				sampleinfo <- tmp_dataset$sample_table
				if(is.null(group) & ! method %in% c("anova", "scheirerRayHare", "lm", "betareg", "lme", "glmm", "glmm_beta", "maaslin2", "ancombc2", "linda", "DESeq2")){
					stop("The group parameter is necessary for differential test method: ", method, " !")
				}
				if(!is.null(group)){
					if(! method %in% c("linda", "DESeq2")){
						if(! group %in% colnames(sampleinfo)){
							stop("Please provide a correct colname of sample_table for group parameter!")
						}
						if(length(group) > 1){
							stop("Please provide only one colname of sample_table for group parameter!")
						}
					}
					if(group %in% colnames(sampleinfo)){
						if(is.factor(sampleinfo[, group])){
							self$group_order <- levels(sampleinfo[, group])
							sampleinfo[, group] %<>% as.character
						}else{
							self$group_order <- unique(as.character(sampleinfo[, group]))
						}
					}					
				}
				check_taxa_abund(tmp_dataset)
				
				if(method == "lefse"){
					if(lefse_norm > 0){
						# ensure normalization is performed
						tmp_dataset$taxa_abund %<>% lapply(function(x){
							if(nrow(x) == 1){
								x[1, ] <- 1
								x
							}else{
								as.data.frame(apply(x, 2, function(y){y/sum(y)}))
							}
						})
					}
					# check abnormal characters
					first_check <- unlist(lapply(tmp_dataset$taxa_abund, function(x){any(grepl("\t|\n", rownames(x)))}))
					if(any(first_check)){
						check_which <- names(tmp_dataset$taxa_abund)[first_check]
						stop("There are abnormal characters (\\t or \\n) in the row names of abundance table at level: ", paste0(check_which, collapse = " "), 
							" ! Please fix it in tax_table and rerun cal_abund function in microtable object!")
					}
				}
				
				if(method %in% c("metastat", "metagenomeSeq", "DESeq2", "edgeR", "ALDEx2_t", "ALDEx2_kw", "ancombc2", "linda", "maaslin2")){
					if(taxa_level == "all"){
						message("The taxa_level parameter cannot be 'all' for the current method! Automatically change it to 'ASV' ...")
						taxa_level <- "ASV"
					}
				}
				if(grepl("all", taxa_level, ignore.case = TRUE)){
					abund_table <- do.call(rbind, unname(tmp_dataset$taxa_abund))
				}else{
					if(! taxa_level %in% names(tmp_dataset$taxa_abund)){
						if(! taxa_level %in% colnames(tmp_dataset$tax_table)){
							message("Provided taxa_level: ", taxa_level, " not in tax_table of input data set; use features in otu_table ...")
							tmp_dataset$add_rownames2taxonomy(use_name = taxa_level)
						}
						suppressMessages(tmp_dataset$cal_abund(rel = TRUE))
					}
					abund_table <- tmp_dataset$taxa_abund[[taxa_level]]
				}
				
				message(nrow(abund_table), " input features ...")
				filter_output <- filter_lowabund_feature(abund_table = abund_table, filter_thres = filter_thres)
				abund_table <- filter_output$abund_table
				filter_features <- filter_output$filter_features
				
				if(method %in% c("lefse", "rf", "KW", "KW_dunn", "wilcox", "t.test", "anova", "scheirerRayHare", "lm", "betareg", "lme", "glmm", "glmm_beta")){
					if(remove_unknown){
						abund_table %<>% {.[!grepl("__$|uncultured$|Incertae..edis$|_sp$", rownames(.), ignore.case = TRUE), ]}
						message(nrow(abund_table), " features are remained after removing unknown features ...")
						if(nrow(abund_table) == 0){
							stop("No available feature! Please set the parameter: remove_unknown = FALSE")
						}
					}
					if(method == "lefse"){
						if(lefse_norm > 0){
							abund_table %<>% {. * lefse_norm}
							self$lefse_norm <- lefse_norm
						}
					}
					
					if(method %in% c("KW", "KW_dunn", "wilcox", "t.test", "anova", "scheirerRayHare", "lm", "betareg", "lme", "glmm", "glmm_beta")){
						abund_foralpha <- abund_table
						if(!is.null(transformation)){
							message("Perform data transformation with method: ", transformation, " ...")
							tmp <- suppressMessages(trans_norm$new(as.data.frame(t(abund_foralpha))))
							abund_foralpha <- tmp$norm(method = transformation) %>% t %>% as.data.frame
						}
						if(method == "KW_dunn"){
							# filter taxa with 1 across all samples
							abund_foralpha %<>% .[apply(., 1, function(x){length(unique(x)) != 1}), ]
						}
						if(method %in% c("betareg", "glmm_beta")){
							if(any(abund_foralpha > 1)){
								abund_foralpha %<>% {./(max(.) + beta_pseudo)}
							}
							if(any(abund_foralpha == 0)){
								abund_foralpha[abund_foralpha == 0] <- beta_pseudo
							}
							if(any(abund_foralpha == 1)){
								abund_foralpha[abund_foralpha == 1] <- 1/(1 + beta_pseudo)
							}
						}
						tem_data <- clone(tmp_dataset)
						tem_data$alpha_diversity <- as.data.frame(t(abund_foralpha))
						tem_data1 <- suppressMessages(trans_alpha$new(dataset = tem_data, group = group, by_group = by_group, by_ID = by_ID))
						tem_data1$cal_diff(method = method, p_adjust_method = p_adjust_method, alpha = alpha, ...)
						output <- tem_data1$res_diff
						if(!is.null(tem_data1$res_model)){
							self$res_model <- tem_data1$res_model
							message("The original ", method, " models list is stored in object$res_model ...")
						}
						colnames(output)[colnames(output) == "Measure"] <- "Taxa"
						method <- tem_data1$cal_diff_method
						if("Letter" %in% colnames(output)){
							output <- cbind.data.frame(output, Significance = output$Letter)
						}
					}
				}
				
				if(method %in% c("lefse", "rf")){
					group_vec <- sampleinfo[, group] %>% as.factor
					comparisions <- paste0(levels(group_vec), collapse = " - ")
					message("Start Kruskal-Wallis rank sum test for ", group, " ...")
					res_class <- suppressWarnings(lapply(seq_len(nrow(abund_table)), function(x) private$test_mark(abund_table[x, ], group_vec, method = "kruskal.test")))
					
					pvalue_raw <- unlist(lapply(res_class, function(x) x$p_value))
					names(pvalue_raw) <- rownames(abund_table)
					pvalue_raw[is.nan(pvalue_raw)] <- 1
					message(sum(pvalue_raw < alpha), " taxa found significant ...")

					pvalue <- p.adjust(pvalue_raw, method = p_adjust_method)
					sel_taxa <- pvalue < alpha
					message("After P value adjustment, ", sum(sel_taxa), " taxa found significant ...")
					private$check_taxa_number(sel_taxa, p_adjust_method)
					abund_table_sub <- abund_table[sel_taxa, ]
					pvalue_sub <- pvalue[sel_taxa]
					class_taxa_median_sub <- lapply(res_class, function(x) x$med) %>% do.call(cbind, .) %>% .[, sel_taxa]
				}
				if(method == "rf"){
					# change the name in case of additional problem from the taxonomic names
					nametable <- cbind.data.frame(name = rownames(abund_table_sub), repl = paste0("t", 1:nrow(abund_table_sub)), stringsAsFactors = FALSE)
					rownames(nametable) <- nametable$repl
					predictors <- t(abund_table_sub)
					colnames(predictors) <- nametable$repl
					res <- NULL
					if(all(table(as.character(sampleinfo[, group])) < 4)){
						if(nresam < 1){
							message("The sample number in all your group less than 4, automatically set nresam = 1 for random forest analysis !")
							nresam <- 1
						}
					}
					for(num in seq_len(boots)){
						# resampling
						sample_names_resample <- rownames(predictors)[base::sample(1:nrow(predictors), size = ceiling(nrow(predictors) * nresam))]
						predictors_sub <- predictors[sample_names_resample, ]
						sampleinfo_resample <- sampleinfo[sample_names_resample, , drop = FALSE]
						# make sure the groups and samples numbers right
						if(length(unique(sampleinfo_resample[, group])) != length(unique(sampleinfo[, group])) | min(table(sampleinfo_resample[, group])) < 2){
							next
						}
						rf_data <- data.frame(response = as.factor(sampleinfo_resample[, group]), predictors_sub, stringsAsFactors = FALSE)
						tem_classify <- randomForest::randomForest(response~., data = rf_data, importance = TRUE, ...)
						# use importance to evaluate
						imp <- randomForest::importance(tem_classify, type = rf_imp_type)
						colnames(imp)[1] <- num
						if(is.null(res)){
							res <- imp
						}else{
							res <- cbind(res, imp[rownames(res), , drop = FALSE])
						}
					}
					res <- apply(res, 1, mean) %>% as.data.frame
					use_method <- ifelse(length(levels(group_vec)) > 2, "Kruskal-Wallis rank sum test & Random Forest", "Wilcoxon rank sum test & Random Forest")
					Taxa_name <- nametable[rownames(res), "name"]
					res <- data.frame(
						Comparison = comparisions,
						Taxa = Taxa_name,
						Group = apply(class_taxa_median_sub, 2, function(x) rownames(class_taxa_median_sub)[which.max(x)])[Taxa_name], 
						Method = use_method,
						Metric = res[, 1], 
						stringsAsFactors = FALSE)
					output <- dplyr::arrange(res, dplyr::desc(Metric))
					if(rf_imp_type == 1){
						colnames(output)[colnames(output) == "Metric"] <- "MeanDecreaseAccuracy"
					}else{
						colnames(output)[colnames(output) == "Metric"] <- "MeanDecreaseGini"
					}
					rownames(output) <- output$Taxa
					output$P.unadj <- pvalue_raw[as.character(output$Taxa)]
					output$P.adj <- pvalue_sub[as.character(output$Taxa)]
					output$Significance <- generate_p_siglabel(output$P.adj, nonsig = "ns")
				}
				if(method == "lefse"){
					all_class_pairs <- combn(unique(as.character(group_vec)), 2)
					# check the difference among subgroups
					if(!is.null(lefse_subgroup)){
						message("Start lefse subgroup biomarkers check for ", lefse_subgroup, " ...")
						all_sub_number <- as.data.table(sampleinfo)[, .N, by = c(group, lefse_subgroup)] %>% as.data.frame %>% .$N
						if(all(all_sub_number < lefse_min_subsam)){
							stop("All sample numbers for subgroups < ", lefse_min_subsam, "! Please consider using small lefse_min_subsam parameter!")
						}
						remove_list_total <- list()
						# for each group paires
						for(i in seq_len(ncol(all_class_pairs))){
							y1 <- all_class_pairs[, i]
							y1_sub_pairs <- expand.grid(
								unique(sampleinfo[sampleinfo[, group] == y1[1], lefse_subgroup]), 
								unique(sampleinfo[sampleinfo[, group] == y1[2], lefse_subgroup]), 
								stringsAsFactors = FALSE
								) %>% t
							y1_sub_pairs <- y1_sub_pairs[, unlist(lapply(1:ncol(y1_sub_pairs), function(x){
								ifelse(any(c(sum(sampleinfo[, group] == y1[1] & sampleinfo[, lefse_subgroup] == y1_sub_pairs[1, x]) < lefse_min_subsam, 
									sum(sampleinfo[, group] == y1[2] & sampleinfo[, lefse_subgroup] == y1_sub_pairs[2, x]) < lefse_min_subsam)), FALSE, TRUE)
							})), drop = FALSE]
							if(ncol(y1_sub_pairs) == 0) next
							res_sub_total <- list()
							# check each subgroup pairs under fixed group pair condition
							for(j in 1:ncol(y1_sub_pairs)){
								y2 <- y1_sub_pairs[, j]
								abund_table_sub_y2 <- abund_table_sub[, c(
									rownames(sampleinfo[sampleinfo[, group] == y1[1] & sampleinfo[, lefse_subgroup] == y2[1], ]), 
									rownames(sampleinfo[sampleinfo[, group] == y1[2] & sampleinfo[, lefse_subgroup] == y2[2], ])
									)]
								group_vec_sub2 <- c(sampleinfo[sampleinfo[, group] == y1[1] & sampleinfo[, lefse_subgroup] == y2[1], group], 
									sampleinfo[sampleinfo[, group] == y1[2] & sampleinfo[, lefse_subgroup] == y2[2], group])
								res_sub <- lapply(seq_len(nrow(abund_table_sub_y2)), function(x) private$test_mark(abund_table_sub_y2[x,], group_vec_sub2))
								res_sub_total[[j]] <- res_sub
							}
							raw_median <- class_taxa_median_sub[y1, ] %>% 
								{.[1, ] > .[2, ]} %>% 
								as.vector
							check_median_sub <- sapply(res_sub_total, function(x) unlist(lapply(x, function(y) {y$med[y1, 1] %>% {.[1] > .[2]}}))) %>% as.data.frame
							check_median_sub[] <- lapply(check_median_sub, function(x) x == raw_median)
							check_p_sub <- sapply(res_sub_total, function(x) unlist(lapply(x, function(y) y$p_value))) %>% as.data.frame
							if(is.null(lefse_sub_alpha)){
								lefse_sub_alpha <- alpha
							}else{
								if(!is.numeric(lefse_sub_alpha)){
									stop("Please provide numeric value for lefse_sub_alpha parameter!")
								}
							}
							if(lefse_sub_strict){
								remove_list <- unlist(lapply(seq_len(nrow(check_median_sub)), function(x){
									if(all(unlist(check_p_sub[x, ]) < lefse_sub_alpha)){
										if(all(unlist(check_median_sub[x, ]))){
											FALSE
										}else{
											TRUE
										}
									}else{
										TRUE
									}
								}))
							}else{
								remove_list <- unlist(lapply(seq_len(nrow(check_median_sub)), function(x){
									if(all(unlist(check_median_sub[x, ]))){
										FALSE
									}else{
										if(any(check_p_sub[x, !unlist(check_median_sub[x, ])] < lefse_sub_alpha)){
											TRUE
										}else{
											FALSE
										}
									}
								}))
							}
							remove_list_total[[i]] <- remove_list
						}
						if(!identical(remove_list_total, list())){
							remove_list_total %<>% do.call(cbind, .) %>% apply(., 1, any)
							message("Remove ", sum(remove_list_total), " biomarkers after subgroup check ...")
							abund_table_sub %<>% .[!remove_list_total, ]
							if(nrow(abund_table_sub) == 0){
								stop("No biomarkers remained after subgroup check! stop running!")
							}
							pvalue_sub %<>% .[!remove_list_total]
							class_taxa_median_sub %<>% .[, !remove_list_total]
						}
					}
					res_lda <- list()
					for(num in seq_len(boots)){
						res_lda_pair <- list()
						sample_names_resample <- colnames(abund_table_sub)[base::sample(1:ncol(abund_table_sub), size = ceiling(ncol(abund_table_sub) * nresam))]
						abund_table_sub_resample <- abund_table_sub[, sample_names_resample]
						sampleinfo_resample <- sampleinfo[sample_names_resample, , drop = FALSE]
						# make sure the groups and samples number available
						if(sum(table(as.character(sampleinfo_resample[, group])) > 1) < 2){
							res_lda[[num]] <- NA
							next
						}
						for(i in seq_len(ncol(all_class_pairs))){
							sel_samples <- sampleinfo_resample[, group] %in% all_class_pairs[, i]
							if(length(table(as.character(sampleinfo_resample[sel_samples, group]))) < 2){
								res_lda_pair[[i]] <- NA
								next
							}
							if(min(table(as.character(sampleinfo_resample[sel_samples, group]))) < 2){
								res_lda_pair[[i]] <- NA
								next
							}
							group_vec_lda <- sampleinfo_resample[sel_samples, group] %>% as.character %>% as.factor
							abund_table_sub_lda <- abund_table_sub_resample[, sel_samples]
							abund_table_sub_lda %<>% .[apply(., 1, sd) > 1.0e-10, ]
							if(is.null(lefse_subgroup)){
								abund1 <- cbind.data.frame(t(abund_table_sub_lda), Group = group_vec_lda)
							}else{
								subgroup_vec <- sampleinfo_resample[sel_samples, lefse_subgroup] %>% as.character %>% as.factor
								# consider subgroup as an independent variable
								abund1 <- cbind.data.frame(t(abund_table_sub_lda), Group = group_vec_lda, lefse_subgroup = subgroup_vec)
							}
							check_res <- tryCatch(mod1 <- MASS::lda(Group ~ ., abund1, tol = 1.0e-10), error = function(e) { skip_to_next <- TRUE})
							if(rlang::is_true(check_res)) {
								res_lda_pair[[i]] <- NA
								next
							}else{
								w <- mod1$scaling[,1]
								if(is.null(names(w))){
									names(w) <- rownames(mod1$scaling)
								}
								w_unit <- w/sqrt(sum(w^2))
								w_unit %<>% {.[!grepl("lefse_subgroup", names(.))]}
								ss <- abund1[, !colnames(abund1) %in% c("Group", "lefse_subgroup")]
								xy_matrix <- as.matrix(ss)
								LD <- xy_matrix %*% w_unit
								effect_size <- tapply(LD, group_vec_lda, mean) %>% as.vector %>% {.[1] - .[2]} %>% abs
								coeff <- abs(w_unit * effect_size)
								coeff[is.nan(coeff)] <- 0
								names(coeff) %<>% gsub("^`|`$", "", .)
								rres <- mod1$means %>% as.data.frame
								colnames(rres) %<>% gsub("^`|`$", "", .)
								rres <- rres[, rownames(abund_table_sub_lda), drop = FALSE]
								rres1 <- apply(rres, 2, function(x) abs(x[1] - x[2]))
								res_lda_pair[[i]] <- (rres1 + coeff[names(rres1)]) *0.5
							}
						}
						res_lda[[num]] <- res_lda_pair
					}
					res <- sapply(rownames(abund_table_sub), function(k){
						unlist(lapply(seq_len(ncol(all_class_pairs)), function(p){
							unlist(lapply(res_lda, function(x){ x[[p]][k]})) %>% .[!is.na(.)] %>% mean
						})) %>% 
						.[!is.na(.)] %>% 
						.[!is.nan(.)] %>% 
						max
					})
					res <- sapply(res, function(x) {log10(1 + abs(x)) * ifelse(x > 0, 1, -1)})
					output <- cbind.data.frame(Group = apply(class_taxa_median_sub, 2, function(x) rownames(class_taxa_median_sub)[which.max(x)]), 
						LDA = res,
						P.unadj = pvalue_raw[names(pvalue_sub)],
						P.adj = pvalue_sub)
					output %<>% .[order(.$LDA, decreasing = TRUE), ]
					output <- cbind.data.frame(Comparison = comparisions, Taxa = rownames(output), Method = "LEfSe", output)
					message("Minimum LDA score: ", range(output$LDA)[1], " maximum LDA score: ", range(output$LDA)[2])
					output$Significance <- generate_p_siglabel(output$P.adj, nonsig = "ns")
				}
				if(method %in% c("metastat", "metagenomeSeq", "ALDEx2_t", "edgeR")){
					tmp_dataset$sample_table %<>% dropallfactors
					if(is.null(group_choose_paired)){
						all_name <- combn(unique(as.character(sampleinfo[, group])), 2)
					}else{
						all_name <- combn(unique(group_choose_paired), 2)
					}
					message("Total ", ncol(all_name), " paired group for test ...")
					output <- data.frame()
				}
				if(method == "metastat"){
					ranknumber <- which(colnames(tmp_dataset$tax_table) %in% taxa_level)
					abund <- tmp_dataset$otu_table
					tax <- tmp_dataset$tax_table[, 1:ranknumber, drop=FALSE]
					merged_taxonomy <- apply(tax, 1, paste, collapse="|")
					abund1 <- cbind.data.frame(Display = merged_taxonomy, abund) %>% 
						reshape2::melt(id.var = "Display", value.name= "Abundance", variable.name = "Sample")
					abund1 <- data.table(abund1)[, sum_abund:=sum(Abundance), by=list(Display, Sample)] %>% 
						.[, c("Abundance"):=NULL] %>% 
						setkey(Display, Sample) %>% 
						unique() %>% 
						as.data.frame()
					new_abund <- as.data.frame(data.table::dcast(data.table(abund1), Display~Sample, value.var= list("sum_abund"))) %>% 
						`row.names<-`(.[,1]) %>% 
						.[,-1, drop = FALSE]
					new_abund <- new_abund[order(apply(new_abund, 1, mean), decreasing = TRUE), rownames(sampleinfo), drop = FALSE]

					for(i in 1:ncol(all_name)) {
						message(paste0("Run ", i, " : ", paste0(as.character(all_name[,i]), collapse = " - "), " ..."))
						use_data <- new_abund[ , unlist(lapply(as.character(all_name[,i]), function(x) which(as.character(sampleinfo[, group]) %in% x)))]
						use_data %<>% .[!grepl("__$", rownames(.)), ]
						use_data <- use_data[apply(use_data, 1, sum) != 0, ]
						g <- sum(as.character(sampleinfo[, group]) == as.character(all_name[1, i])) + 1
						res <- private$calculate_metastat(inputdata = use_data, g = g)
						add_name <- paste0(as.character(all_name[, i]), collapse = " - ") %>% rep(., nrow(res))
						res <- cbind.data.frame(compare = add_name, res)
						output <- rbind.data.frame(output, res)
					}
					output %<>% dropallfactors(unfac2num = TRUE)
					colnames(output)[1:2] <- c("Comparison", "Taxa")
					max_group <- lapply(seq_len(nrow(output)), function(x){
						group_select <- unlist(strsplit(output[x, "Comparison"], split = " - "))
						ifelse(output[x, "mean(group1)"] > output[x, "mean(group2)"], group_select[1], group_select[2])
					}) %>% unlist
					output$Significance <- generate_p_siglabel(output$qvalue, nonsig = "ns")
					output <- data.frame(output, Group = max_group)
				}
				if(method == "metagenomeSeq"){
					if(!require("metagenomeSeq")){
						stop("metagenomeSeq package not installed !")
					}
					for(i in 1:ncol(all_name)) {
						message(paste0("Run ", i, " : ", paste0(as.character(all_name[, i]), collapse = " - "), " ...\n"))
						use_dataset <- clone(tmp_dataset)
						use_dataset$sample_table %<>% .[.[, group] %in% as.character(all_name[, i]), , drop = FALSE]
						newdata <- private$generate_microtable_unrel(use_dataset, taxa_level, filter_thres, filter_features)
						obj <- newMRexperiment(
							newdata$otu_table, 
							phenoData= AnnotatedDataFrame(newdata$sample_table)
	#						featureData = AnnotatedDataFrame(use_dataset$tax_table)
							)
						## Normalization and Statistical testing
						obj_1 <- cumNorm(obj)
						pd <- pData(obj)
						if(group != "Group"){
							if("Group" %in% colnames(pd)){
								pd <- pd[, colnames(pd) != "Group", drop = FALSE]
							}
							colnames(pd)[which(colnames(pd) == group)] <- "Group"
						}
						mod <- model.matrix(~1 + Group, data = pd)
						objres1 <- fitFeatureModel(obj_1, mod)
						tb <- data.frame(logFC = objres1@fitZeroLogNormal$logFC, se = objres1@fitZeroLogNormal$se)
						p <- objres1@pvalues
						if(p_adjust_method == "ihw-ubiquity" | p_adjust_method == "ihw-abundance"){
							padj <- MRihw(objres1, p, p_adjust_method, 0.1)
						}else{
							padj <- p.adjust(p, method = p_adjust_method)
						}
						srt <- order(p, decreasing = FALSE)
						valid <- 1:length(padj)
						if (metagenomeSeq_count > 0) {
							np <- rowSums(objres1@counts)
							valid <- intersect(valid, which(np >= metagenomeSeq_count))
						}
						srt <- srt[which(srt %in% valid)]
						res <- cbind(tb[, 1:2], p)
						res <- cbind(res, padj)
						res <- as.data.frame(res[srt, ])
						colnames(res) <- c(colnames(tb)[1:2], "P.unadj", "P.adj")
						res <- cbind.data.frame(Taxa = rownames(res), res)
						rownames(res) <- NULL
						add_name <- paste0(as.character(all_name[, i]), collapse = " - ") %>% rep(., nrow(res))
						res <- cbind.data.frame(Comparison = add_name, res)
						output <- rbind.data.frame(output, res)
					}
				}
				if(method == "DESeq2"){
					if(!require("DESeq2")){
						stop("DESeq2 package is not installed !")
					}
					use_dataset <- clone(tmp_dataset)
					newdata <- private$generate_microtable_unrel(use_dataset, taxa_level, filter_thres, filter_features)
					if(is.null(group)){
						all_parameters <- c(as.list(environment()), list(...))
						if(! "formula" %in% names(all_parameters)){
							stop("Either group or formula parameter should be provided!")
						}
						group <- all_parameters[["formula"]]
					}
					if(!grepl("^~", group)){
						group <- paste0("~", group)
					}
					use_formula <- stats::as.formula(group)
					deseq_obj <- DESeqDataSetFromMatrix(
						countData = newdata$otu_table,
						colData = newdata$sample_table,
						design = use_formula
						)
					res_deseq <- DESeq(deseq_obj, ...)
					self$res_diff_raw <- res_deseq
					message('Original result is stored in object$res_diff_raw ...')
					message('Merge the output tables ...')
					group %<>% gsub("^~", "", .)
					if(grepl("+", group, fixed = TRUE)){
						method <- paste0("DESeq formula: ", group)
					}
					all_groups <- strsplit(group, split = "+", fixed = TRUE) %>% unlist %>% gsub("^\\s+|\\s+$", "", .)
					output <- data.frame()
					res_names <- resultsNames(res_deseq) %>% .[. != "Intercept"]
					for(i in res_names){
						res <- results(res_deseq, name = i)
						res <- data.frame(res)
						if(grepl("_vs_", i, fixed = TRUE)){
							num <- lapply(all_groups, function(j){pmatch(j, i)}) %>% unlist %>% {!is.na(.)} %>% which
							i <- gsub(paste0(all_groups[num], "_"), "", i, fixed = TRUE)
							res$Comparison <- gsub("_vs_", " - ", i, fixed = TRUE)
						}else{
							res$Comparison <- i
						}
						res$Taxa <- rownames(res)
						rownames(res) <- NULL
						output <- rbind.data.frame(output, res)
					}
					colnames(output)[colnames(output) == "padj"] <- "P.adj"
					colnames(output)[colnames(output) == "pvalue"] <- "P.unadj"					
					output <- output[, c("Comparison", "Taxa", colnames(output)[1:(ncol(output) - 2)])]
				}
				if(method == "edgeR"){
					if(!require("edgeR")){
						stop("edgeR package is not installed!")
					}
					for(i in 1:ncol(all_name)) {
						message(paste0("Run ", i, " : ", paste0(as.character(all_name[,i]), collapse = " - "), " ...\n"))
						use_dataset <- clone(tmp_dataset)
						use_dataset$sample_table %<>% .[.[, group] %in% as.character(all_name[,i]), , drop = FALSE]
						newdata <- private$generate_microtable_unrel(use_dataset, taxa_level, filter_thres, filter_features)
						y <- DGEList(counts = newdata$otu_table, group = newdata$sample_table[, group])
						z <- calcNormFactors(y)
						z <- estimateTagwiseDisp(estimateCommonDisp(z))
						res_raw <- exactTest(z)
						st <- topTags(res_raw, n = nrow(newdata$otu_table), adjust.method = p_adjust_method, sort.by = "PValue")
						res <- st@.Data[[1]]
						colnames(res)[colnames(res) == "PValue"] <- "P.unadj"
						colnames(res)[colnames(res) == "FDR"] <- "P.adj"
						res <- cbind.data.frame(Taxa = rownames(res), res)
						rownames(res) <- NULL
						add_name <- paste0(as.character(all_name[, i]), collapse = " - ") %>% rep(., nrow(res))
						res <- cbind.data.frame(Comparison = add_name, res)
						output <- rbind.data.frame(output, res)
					}
				}
				if(method %in% c("ALDEx2_t", "ALDEx2_kw")){
					if(!require("ALDEx2")){
						stop("ALDEx2 package is not installed !")
					}
				}
				if(method == "ALDEx2_t"){
					for(i in 1:ncol(all_name)) {
						message(paste0("Run ", i, " : ", paste0(as.character(all_name[,i]), collapse = " - "), " ...\n"))
						use_dataset <- clone(tmp_dataset)
						use_dataset$sample_table %<>% .[.[, group] %in% as.character(all_name[,i]), , drop = FALSE]
						newdata <- private$generate_microtable_unrel(use_dataset, taxa_level, filter_thres, filter_features)
						res_raw <- ALDEx2::aldex(newdata$otu_table, newdata$sample_table[, group], test = "t", ...)
						res <- cbind.data.frame(Taxa = rownames(res_raw), res_raw)
						rownames(res) <- NULL
						add_name <- paste0(as.character(all_name[, i]), collapse = " - ") %>% rep(., nrow(res))
						res <- cbind.data.frame(Comparison = add_name, res) %>%
							.[, !grepl("^rab\\.", colnames(.))]
						output <- rbind.data.frame(output, res)
					}
				}
				if(method == "ALDEx2_kw"){
					use_dataset <- clone(tmp_dataset)
					use_dataset$sample_table %<>% dropallfactors
					newdata <- private$generate_microtable_unrel(use_dataset, taxa_level, filter_thres, filter_features)
					res_raw <- ALDEx2::aldex(newdata$otu_table, newdata$sample_table[, group], test = "kw", ...)
					res <- cbind.data.frame(Taxa = rownames(res_raw), res_raw)
					comparisions <- paste0(unique(as.character(use_dataset$sample_table[, group])), collapse = " - ")
					output <- cbind.data.frame(Comparison = comparisions, res)
				}
				if(method %in% c("ALDEx2_t", "ALDEx2_kw")){
					if(! any(colnames(output) %in% ALDEx2_sig)){
						stop("ALDEx2_sig is not found in the result! Please check the input!")
					}else{
						output$P.adj <- output[, ALDEx2_sig %>% .[. %in% colnames(output)] %>% .[1]]
					}
				}
				if(method == "ancombc2"){
					if(!require("ANCOMBC")){
						stop("ANCOMBC package is not installed !")
					}
					if(!require("file2meco")){
						stop("Please install file2meco package! The function meco2phyloseq is required!")
					}
					use_dataset <- clone(tmp_dataset)
					newdata <- private$generate_microtable_unrel(use_dataset, taxa_level, filter_thres, filter_features)
					newdata <- file2meco::meco2phyloseq(newdata)
					# check fix_formula parameter
					all_parameters <- c(as.list(environment()), list(...))
					if("fix_formula" %in% names(all_parameters)){
						res_raw <- ANCOMBC::ancombc2(newdata, tax_level = NULL, assay_name = "counts", group = group, ...)
					}else{
						res_raw <- ANCOMBC::ancombc2(newdata, tax_level = NULL, assay_name = "counts", group = group, fix_formula = group, ...)
					}
					self$res_diff_raw <- res_raw
					tmp <- res_raw$res
					if(is.null(tmp)){
						stop("The res in the ancombc2 results is NULL!")
					}
					message('Converting res to long format ...')
					res_convert <- data.frame()
					for(i in seq_len(nrow(tmp))){
						taxon_data <- tmp[i, ]
						raw_colnames <- colnames(taxon_data)
						all_factors <- raw_colnames %>% .[grepl("lfc_", .)] %>% gsub("lfc_", "", .)
						res_convert <- rbind(res_convert, data.frame(Taxa = tmp[i, "taxon"], 
							Factors = all_factors, 
							lfc = taxon_data %>% .[, grepl("^lfc_", raw_colnames)] %>% unlist,
							se = taxon_data %>% .[, grepl("^se_", raw_colnames)] %>% unlist,
							W = taxon_data %>% .[, grepl("^W_", raw_colnames)] %>% unlist,
							p = taxon_data %>% .[, grepl("^p_", raw_colnames)] %>% unlist,
							P.adj = taxon_data %>% .[, grepl("^q_", raw_colnames)] %>% unlist,
							diff = taxon_data %>% .[, grepl("^diff_", raw_colnames)] %>% unlist,
							passed_ss = taxon_data %>% .[, grepl("^passed_ss_", raw_colnames)] %>% unlist
							)
						)
					}
					output <- res_convert
				}
				if(method == "linda"){
					if(!require("MicrobiomeStat")){
						stop("MicrobiomeStat package is not installed!")
					}
					use_dataset <- clone(tmp_dataset)
					newdata <- private$generate_microtable_unrel(use_dataset, taxa_level, filter_thres, filter_features)
					if(is.null(group)){
						all_parameters <- c(as.list(environment()), list(...))
						if(! "formula" %in% names(all_parameters)){
							stop("Either group or formula parameter should be provided!")
						}
						group <- all_parameters[["formula"]]
						if(!grepl("^~", group)){
							stop("The input formula parameter should start with ~! Please read the document of formula parameter of MicrobiomeStat::linda function!")
						}
						res <- MicrobiomeStat::linda(as.matrix(newdata$otu_table), newdata$sample_table, feature.dat.type = 'count', ...)
					}else{
						if(!grepl("^~", group)){
							group <- paste0("~", group)
						}
						res <- MicrobiomeStat::linda(as.matrix(newdata$otu_table), newdata$sample_table, formula = group, feature.dat.type = 'count', ...)
					}
					self$res_diff_raw <- res
					message('Original result is stored in object$res_diff_raw ...')
					message('Merge the output tables ...')
					group %<>% gsub("^~", "", .)
					# different cases
					output <- data.frame()
					if(grepl("+", group, fixed = TRUE) | grepl("*", group, fixed = TRUE)){
						# multi-factor
						for(i in names(res$output)){
							tmp <- res$output[[i]]
							tmp <- data.frame(Taxa = rownames(tmp), Factors = i, tmp)
							output %<>% rbind(., tmp)
						}
						method <- paste0("linda formula: ", group)
					}else{
						# normal case
						if(group %in% colnames(newdata$sample_table)){
							all_groups <- newdata$sample_table[, group] %>% as.character %>% unique
							for(i in names(res$output)){
								tmp <- res$output[[i]]
								tmp_group_element1 <- gsub(group, "", i, fixed = TRUE)
								tmp_group_element2 <- paste0(group, all_groups) %>% .[!. %in% names(res$output)] %>% gsub(group, "", .)
								tmp <- data.frame(Comparison = paste0(tmp_group_element1, " - ", tmp_group_element2), Taxa = rownames(tmp), tmp)
								output %<>% rbind(., tmp)
							}
						}else{
							stop('The group is not found in the column names of sample_table! Skip the merging step!')
						}
					}
					colnames(output)[colnames(output) %in% c("pvalue", "padj")] <- c("P.unadj", "P.adj")
				}
				if(method == "maaslin2"){
					tmp_trans_env <- trans_env$new(dataset = tmp_dataset, env_cols = 1:ncol(tmp_dataset$sample_table))
					tmp_trans_env$cal_cor(use_data = taxa_level, method = method, filter_thres = filter_thres,
						plot_heatmap = FALSE, plot_scatter = FALSE, ...)
					output <- tmp_trans_env$res_cor
					self$res_trans_env <- tmp_trans_env
					message('Raw trans_env object for maaslin2 is stored in object$res_trans_env ...')
				}
				
				# use trans_alpha to get the abundance summary data
				if(grepl("lefse", method, ignore.case = TRUE)){
					if(lefse_norm > 0){
						tmp_abund_alpha <- abund_table_sub/lefse_norm
					}else{
						tmp_abund_alpha <- abund_table_sub
					}
				}else{
					if(grepl("rf", method, ignore.case = TRUE)){
						tmp_abund_alpha <- abund_table_sub
					}else{
						tmp_abund_alpha <- abund_table
					}
				}
				if(!is.null(group)){
					if(group %in% colnames(sampleinfo)){
						tmp_mt <- clone(tmp_dataset)
						tmp_mt$alpha_diversity <- as.data.frame(t(tmp_abund_alpha))
						tmp_alpha <- suppressMessages(trans_alpha$new(dataset = tmp_mt, group = group, by_group = by_group))
						self$res_abund_tmp <- tmp_alpha
						res_abund <- tmp_alpha$data_stat
						colnames(res_abund)[colnames(res_abund) == "Measure"] <- "Taxa"
					}else{
						res_abund <- NULL
					}
				}else{
					res_abund <- NULL
				}
				if(is.null(res_abund)){
					res_abund <- reshape2::melt(rownames_to_column(abund_table, "Taxa"), id.vars = "Taxa")
				}
				self$res_abund <- res_abund
				message('Taxa abundance table is stored in object$res_abund ...')
				
				if(!is.null(output)){
					if("P.adj" %in% colnames(output)){
						if(!"Significance" %in% colnames(output)){
							output$Significance <- generate_p_siglabel(output$P.adj, nonsig = "")
						}
					}
				}
				if(method == "ancombc2"){
					message("Original ancombc2 results are stored in object$res_diff_raw ...")
					if(!is.null(output)){
						message("Converted test result is stored in object$res_diff ...")
					}
					method <- "ancombc2_formula"
				}else{
					message(method , " analysis result is stored in object$res_diff ...")
				}
				if(!is.null(output)){
					if("Factors" %in% colnames(output)){
						output[, "Factors"] %<>% gsub("\\s+$", "", .)
					}
					if(! "Method" %in% colnames(output)){
						output$Method <- method
					}
				}
				self$res_diff <- output
				
				self$method <- method
				self$taxa_level <- taxa_level
				# save abund_table for the cladogram
				self$abund_table <- abund_table
				self$by_group <- by_group
			}
		},
		#' @description
		#' Plot the abundance of taxa. 
		#' 
		#' The significance can be optionally added in the plot. 
		#' The taxa displayed are based on the taxa in the 'res_diff' table, selected using parameters. 
		#' If the user filters out the non-significant taxa from the 'res_diff' table, these taxa will also be filtered from the plot.
		#' 
		#' @param use_number default 1:10; numeric vector; the sequences of taxa (1:n) selected in the plot; 
		#'   If n is larger than the number of total significant taxa, automatically use the total number as n.
		#' @param color_values default \code{RColorBrewer::brewer.pal}(8, "Dark2"); color pallete for groups.
		#' @param select_taxa default NULL; character vector to provide taxa names. 
		#' 	 The taxa names should be same with the names shown in the plot, not the 'Taxa' column names in \code{object$res_diff$Taxa}.
		#' @param simplify_names default TRUE; whether use the simplified taxonomic name.
		#' @param keep_prefix default TRUE; whether retain the taxonomic prefix.
		#' @param group_order default NULL; a vector to order groups, i.e. reorder the legend and colors in plot; 
		#' 	  If NULL, the function can first check whether the group column of sample_table is factor. If yes, use the levels in it.
		#' 	  If provided, overlook the levels in the group of sample_table.
		#' @param order_x_mean default FALSE; whether order the taxa in x axis by the means of abundances from large to small. 
		#' 	  If \code{TRUE}, all other factors in the data will become invalid.
		#' @param coord_flip default TRUE; whether flip cartesian coordinates so that horizontal becomes vertical, and vertical becomes horizontal.
		#' @param add_sig default TRUE; whether add the significance label to the plot.
		#' @param xtext_angle default 45; number (e.g. 45). Angle of text in x axis.
		#' @param xtext_size default 13; x axis text size. NULL means the default size in ggplot2. If \code{coord_flip = TRUE}, it represents the text size of the y axis.
		#' @param ytitle_size default 17; y axis title size. If \code{coord_flip = TRUE}, it represents the title size of the x axis (i.e. "Relative abundance").
		#' @param ... parameters passed to \code{plot_alpha} function of \code{\link{trans_alpha}} class.
		#' @return ggplot.
		#' @examples
		#' \donttest{
		#' t1 <- trans_diff$new(dataset = dataset, method = "anova", group = "Group", taxa_level = "Genus")
		#' t1$plot_diff_abund(use_number = 1:10)
		#' t1$plot_diff_abund(use_number = 1:10, add_sig = TRUE)
		#' t1 <- trans_diff$new(dataset = dataset, method = "wilcox", group = "Group")
		#' t1$plot_diff_abund(use_number = 1:20)
		#' t1$plot_diff_abund(use_number = 1:20, add_sig = TRUE)
		#' t1 <- trans_diff$new(dataset = dataset, method = "lefse", group = "Group")
		#' t1$plot_diff_abund(use_number = 1:20)
		#' t1$plot_diff_abund(use_number = 1:20, add_sig = TRUE)
		#' }
		plot_diff_abund = function(
			use_number = 1:10,
			color_values = RColorBrewer::brewer.pal(8, "Dark2"),
			select_taxa = NULL,
			simplify_names = TRUE,
			keep_prefix = TRUE,
			group_order = NULL,
			order_x_mean = FALSE,
			coord_flip = TRUE,
			add_sig = TRUE,
			xtext_angle = 45,
			xtext_size = 13,
			ytitle_size = 17,
			...
			){
			method <- self$method
			diff_data <- self$res_diff
			
			if(grepl("formula", method)){
				stop("This function can not be applied to multi-factor analysis!")
			}
			if(is.null(self$res_abund_tmp)){
				stop("This function can not be applied for the current method!")
			}
			tmp_transalpha <- clone(self$res_abund_tmp)
			group <- tmp_transalpha$group

			colnames(tmp_transalpha$data_alpha)[colnames(tmp_transalpha$data_alpha) == "Measure"] <- "Taxa"
			tmp_transalpha$by_group <- "Taxa"
			tmp_transalpha$data_alpha$Measure <- "all"
			colnames(tmp_transalpha$data_stat)[colnames(tmp_transalpha$data_stat) == "Measure"] <- "Taxa"
			tmp_transalpha$by_group <- "Taxa"
			tmp_transalpha$data_stat$Measure <- "all"

			# sort and select taxa
			if(method == "metastat"){
				message('Reorder taxa according to qvalue in res_diff from low to high ...')
				diff_data %<>% .[order(.$qvalue, decreasing = FALSE), ]
			}else{
				# lefse and rf are ordered
				if(! method %in% c("lefse", "rf", "anova")){
					if("P.adj" %in% colnames(diff_data)){
						message('Reorder taxa according to P.adj in res_diff from low to high ...')
						diff_data %<>% .[order(.$P.adj, decreasing = FALSE), ]
					}
				}
			}
			if(nrow(diff_data) == 0){
				stop("No significant taxa can be used to plot the abundance!")
			}
			if(simplify_names == T){
				diff_data$Taxa %<>% gsub(".*\\|", "", .)
				tmp_transalpha$data_alpha$Taxa %<>% gsub(".*\\|", "", .)
				tmp_transalpha$data_stat$Taxa %<>% gsub(".*\\|", "", .)
			}
			if(keep_prefix == F){
				diff_data$Taxa %<>% gsub(".__", "", .)
				tmp_transalpha$data_alpha$Taxa %<>% gsub(".__", "", .)
				tmp_transalpha$data_stat$Taxa %<>% gsub(".__", "", .)
			}
			if(is.null(select_taxa)){
				if(length(use_number) > length(unique(as.character(diff_data$Taxa)))){
					message("The length of use_number is larger than taxa number in object$diff_data. Use all taxa ...")
					use_number <- 1:length(unique(as.character(diff_data$Taxa)))
				}
				diff_data %<>% .[.$Taxa %in% unique(as.character(diff_data$Taxa))[use_number], ]
				diff_data$Taxa %<>% factor(., levels = unique(as.character(.)))
			}else{
				message('Use provided select_taxa to filter and reorder taxa ...')
				diff_data %<>% .[.$Taxa %in% select_taxa, ]
				if(nrow(diff_data) == 0){
					stop("No taxa is remained! Please check the input select_taxa parameter!")
				}
				diff_data$Taxa %<>% factor(., levels = select_taxa)
			}

			tmp_transalpha$data_alpha %<>% .[.$Taxa %in% levels(diff_data$Taxa), ]
			tmp_transalpha$data_stat %<>% .[.$Taxa %in% levels(diff_data$Taxa), ]
			tmp_transalpha$data_alpha$Taxa %<>% factor(., levels = levels(diff_data$Taxa))
			tmp_transalpha$data_stat$Taxa %<>% factor(., levels = levels(diff_data$Taxa))
			if(order_x_mean){
				mean_orders <- names(sort(tapply(tmp_transalpha$data_alpha$Value, tmp_transalpha$data_alpha[, "Taxa"], mean), decreasing = TRUE))
				tmp_transalpha$data_alpha[, "Taxa"] %<>% factor(., levels = mean_orders)
				tmp_transalpha$data_stat[, "Taxa"] %<>% factor(., levels = mean_orders)
				diff_data$Taxa %<>% factor(., levels = mean_orders)
			}
			if(is.null(group_order)){
				if(!is.factor(tmp_transalpha$data_alpha[, group])){
					tmp_transalpha$data_alpha[, group] %<>% factor(., levels = self$group_order)
				}
				if(!is.factor(tmp_transalpha$data_stat[, group])){
					tmp_transalpha$data_stat[, group] %<>% factor(., levels = self$group_order)
				}
			}else{
				tmp_transalpha$data_alpha[, group] %<>% factor(., levels = group_order)
				tmp_transalpha$data_stat[, group] %<>% factor(., levels = group_order)
			}

			if(coord_flip){
				tmp_transalpha$data_alpha[, group] %<>% factor(., levels = rev(levels(.)))
				tmp_transalpha$data_stat[, group] %<>% factor(., levels = rev(levels(.)))
				color_values <- expand_colors(color_values, length(unique(tmp_transalpha$data_alpha[, group]))) %>% rev

				tmp_transalpha$data_alpha$Taxa %<>% factor(., levels = rev(levels(.)))
				tmp_transalpha$data_stat$Taxa %<>% factor(., levels = rev(levels(.)))
				diff_data$Taxa %<>% factor(., levels = rev(levels(.)))
				
				xtext_angle <- 0
			}
			
			# manipulate the diff table
			if(add_sig){
				diff_data$Measure <- "all"
				colnames(diff_data)[colnames(diff_data) == "Taxa"] <- "by_group"
				
				tmp_transalpha$res_diff <- diff_data
				# for groups > 3, show the global comparision
				if((length(unique(tmp_transalpha$data_alpha[, group])) > 2 & method %in% c("lefse", "rf", "KW", "ALDEx2_kw")) | 
					(length(unlist(gregexpr(" - ", diff_data$Comparison[1]))) > 1 & method == "ancombc2")){
					tmp_transalpha$cal_diff_method <- "KW"
				}else{
					if("Letter" %in% colnames(diff_data)){
						tmp_transalpha$cal_diff_method <- "anova"
					}else{
						tmp_transalpha$cal_diff_method <- "wilcox"
					}
				}
			}
			
			p <- tmp_transalpha$plot_alpha(measure = "all", 
				color_values = color_values, 
				order_x_mean = FALSE, 
				add_sig = add_sig, 
				xtext_angle = xtext_angle,
				ggtheme = theme_bw(),
				...)
			
			p <- p + ylab("Relative abundance")
			
			if(coord_flip){
				p <- p + coord_flip() + 
					theme(axis.title.x = element_text(size = ytitle_size), axis.text.x = element_text(size = ytitle_size * 0.618), axis.text.y = element_text(size = xtext_size)) +
					theme(panel.grid.minor.y = element_blank(), panel.grid.major.y = element_blank()) +
					theme(legend.position = "right") +
					guides(color = guide_legend(reverse = TRUE), fill = guide_legend(reverse = TRUE))
			}

			p
		},
		#' @description
		#' Bar plot for indicator index, such as LDA score and P value.
		#'
		#' @param color_values default \code{RColorBrewer::brewer.pal}(8, "Dark2"); colors palette for different groups.
		#' @param color_group_map default FALSE; whether match the colors to groups in order to fix the color in each group when part of groups are not shown in the plot.
		#'    When \code{color_group_map = TRUE}, the group_order inside the object will be used as full groups set to guide the color extraction.
		#' @param use_number default 1:10; numeric vector; the taxa numbers used in the plot, i.e. 1:n.
		#' @param threshold default NULL; threshold value of indicators for selecting taxa, such as 3 for LDA score of LEfSe.
		#' @param select_group default NULL; this is used to select the paired group when multiple comparisions are generated;
		#'   The input select_group must be one of \code{object$res_diff$Comparison}.
		#' @param keep_full_name default FALSE; whether keep the taxonomic full lineage names.
		#' @param keep_prefix default TRUE; whether retain the taxonomic prefix, such as "g__".
		#' @param group_order default NULL; a vector to order the legend and colors in plot; 
		#' 	  If NULL, the function can first determine whether the group column of \code{microtable$sample_table} is factor. If yes, use the levels in it.
		#' 	  If provided, this parameter can overwrite the levels in the group of \code{microtable$sample_table}.
		#' @param group_aggre default TRUE; whether aggregate the features for each group.
		#' @param group_two_sep default TRUE; whether display the features of two groups on opposite sides of the coordinate axes when there are only two groups in total.
		#' @param coord_flip default TRUE; whether flip cartesian coordinates so that horizontal becomes vertical, and vertical becomes horizontal.
		#' @param add_sig default FALSE; whether add significance label (asterisk) above the bar.
		#' @param add_sig_increase default 0.1; the axis position (\code{Value + add_sig_increase * max(Value)}) from which to add the significance label; 
		#' 	  only available when \code{add_sig = TRUE}.
		#' @param add_sig_text_size default 5; the size of added significance label; only available when \code{add_sig = TRUE}.
		#' @param xtext_angle default 45; number ranging from 0 to 90; used to make x axis text generate angle to reduce text overlap; 
		#' 	  only available when coord_flip = FALSE.
		#' @param xtext_size default 10; text size of x axis.
		#' @param ytext_size default NULL; text size of y axis. NULL means default ggplot2 value.
		#' @param axis_text_y deprecated. Please use \code{ytext_size} argument instead.
		#' @param heatmap_cell default "P.unadj"; the column of data for the cell of heatmap when formula with multiple factors is found in the method.
		#' @param heatmap_sig default "Significance"; the column of data for the significance label of heatmap.
		#' @param heatmap_x default "Factors"; the column of data for the x axis of heatmap.
		#' @param heatmap_y default "Taxa"; the column of data for the y axis of heatmap.
		#' @param heatmap_lab_fill default "P value"; legend title of heatmap.
		#' @param ... parameters passing to \code{geom_bar} for the bar plot or 
		#' 	  \code{plot_cor} function in \code{\link{trans_env}} class for the heatmap of multiple factors when formula is found in the method.
		#' @return ggplot.
		#' @examples
		#' \donttest{
		#' t1$plot_diff_bar(use_number = 1:20)
		#' }
		plot_diff_bar = function(
			color_values = RColorBrewer::brewer.pal(8, "Dark2"),
			color_group_map = FALSE,
			use_number = 1:10,
			threshold = NULL,
			select_group = NULL,
			keep_full_name = FALSE,
			keep_prefix = TRUE,
			group_order = NULL,
			group_aggre = TRUE,
			group_two_sep = TRUE,
			coord_flip = TRUE,
			add_sig = FALSE,
			add_sig_increase = 0.1,
			add_sig_text_size = 5,
			xtext_angle = 45,
			xtext_size = 10,
			ytext_size = NULL,
			axis_text_y = deprecated(),
			heatmap_cell = "P.unadj",
			heatmap_sig = "Significance",
			heatmap_x = "Factors",
			heatmap_y = "Taxa",
			heatmap_lab_fill = "P value",
			...
			){
			
			if(lifecycle::is_present(axis_text_y)) {
				lifecycle::deprecate_warn("1.13.1", "plot_diff_bar(axis_text_y)", "plot_diff_bar(ytext_size)")
				ytext_size <- axis_text_y
			}
			
			use_data <- self$res_diff
			method <- self$method
			if(is.null(method)){
				if(is.null(use_data)){
					stop("The res_diff should be provided when method is NULL!")
				}else{
					if(!"Value" %in% colnames(use_data)){
						stop("No Value column found in the object$res_diff!")
					}
				}
			}
			if(keep_full_name == F){
				if(any(grepl("\\..__", use_data$Taxa))){
					use_data$Taxa %<>% gsub(".*(.__.*?$)", "\\1", .)
				}else{
					use_data$Taxa %<>% gsub(".*\\|", "", .)
				}
			}
			if(keep_prefix == F){
				use_data$Taxa %<>% gsub(".__", "", .)
			}

			if((! grepl("formula", method)) & (! method %in% c("maaslin2"))){
				if(method == "lefse"){
					colnames(use_data)[colnames(use_data) == "LDA"] <- "Value"
					ylab_title <- "LDA score"
				}else{
					if(method == "rf"){
						if("MeanDecreaseAccuracy" %in% colnames(use_data)){
							ylab_title <- "MeanDecreaseAccuracy"
						}else{
							ylab_title <- "MeanDecreaseGini"
						}
						colnames(use_data)[colnames(use_data) %in% c("MeanDecreaseAccuracy", "MeanDecreaseGini")] <- "Value"
					}else{
						if(method == "metastat"){
							use_data %<>% .[.$qvalue < 0.05, ]
							use_data$Value <- 1 - use_data$qvalue
							ylab_title <- "1 - qvalue"
						}else{
							if(method != "anova"){
								use_data %<>% .[.$P.adj < 0.05, ]
								use_data$Value <- 1 - use_data$P.adj
								ylab_title <- "1 - P.adjust"
							}else{
								stop("This function can not be used to ", method," currently!")
							}
						}
					}
				}
				if(!method %in% c("lefse", "rf")){
					if(length(unique(use_data$Comparison)) > 1){
						# make sure the Group not replicated for multiple comparisions
						if(is.null(select_group)){
							message('Multiple comparisions found. But select_group parameter not provided. Select the first group pair to show ...')
							select_group <- unique(use_data$Comparison)[1]
						}else{
							if(length(select_group) > 1){
								stop("The select_group parameter should only have one element! Please check the input!")
							}
							if(! select_group %in% use_data$Comparison){
								stop("The select_group parameter must be one of elements of object$res_diff$Comparison!")
							}
						}
						use_data %<>% .[.$Comparison %in% select_group, ]
					}
				}
				use_data %<>% .[!duplicated(.$Taxa), ]
				if(nrow(use_data) == 0){
					stop("No available data can be used to show!")
				}
				if(is.null(threshold)){
					sel_num <- use_number
				}else{
					sel_num <- sum(use_data$Value > threshold)
					if(sel_num == 0){
						stop("Too large threshold provided, no data selected!")
					}
					sel_num <- 1:sel_num
				}
				if(length(sel_num) > nrow(use_data)){
					sel_num <- 1:nrow(use_data)
				}
				use_data %<>% .[sel_num, ]
				if("Group" %in% colnames(use_data)){
					if(is.null(group_order)){
						if(!is.factor(use_data$Group)){
							if((!is.null(self$group_order)) & (length(unique(use_data$Group)) == length(self$group_order))){
								use_data$Group %<>% factor(., levels = self$group_order)
							}else{
								use_data$Group %<>% as.character %>% as.factor
							}
						}
					}else{
						use_data$Group %<>% factor(., levels = group_order)
					}
					if(color_group_map){
						# fix colors for each group
						all_groups <- self$group_order
						# make sure colors length enough for selection
						color_values <- expand_colors(color_values, length(all_groups))
						use_groups <- levels(use_data$Group)
						color_values %<>% .[match(use_groups, all_groups)]
					}else{
						color_values <- expand_colors(color_values, length(levels(use_data$Group)))
					}
				}
				
				if(group_aggre & "Group" %in% colnames(use_data)){
					# rearrange orders
					use_data$Taxa %<>% as.character
					if(length(levels(use_data$Group)) == 2 & group_two_sep){
						feature_orders <- lapply(levels(use_data$Group), function(x){
							if(x == levels(use_data$Group)[1]){
								use_data[as.character(use_data$Group) %in% x, ] %>% .[order(.$Value, decreasing = TRUE), "Taxa"]
							}else{
								use_data[as.character(use_data$Group) %in% x, ] %>% .[order(.$Value, decreasing = FALSE), "Taxa"]
							}
						}) %>% unlist %>% unique
						use_data[use_data$Group == levels(use_data$Group)[2], "Value"] %<>% {. * -1}
					}else{
						feature_orders <- lapply(levels(use_data$Group), function(x){
							use_data[as.character(use_data$Group) %in% x, ] %>% .[order(.$Value, decreasing = TRUE), "Taxa"]
						}) %>% unlist %>% unique
					}
				}else{
					use_data %<>% .[order(.$Value, decreasing = TRUE), ]
					feature_orders <- use_data$Taxa
				}

				if(coord_flip){
					use_data$Taxa %<>% factor(., levels = rev(feature_orders))
				}else{
					use_data$Taxa %<>% factor(., levels = feature_orders)
				}

				self$plot_diff_bar_taxa <- levels(use_data$Taxa) %>% rev
				if(add_sig){
					added_value <- add_sig_increase * max(abs(use_data$Value))
					use_data$add_sig_position <- ifelse(use_data$Value > 0, use_data$Value + added_value, use_data$Value - added_value)
				}
				
				if("Group" %in% colnames(use_data)){
					p <- ggplot(use_data, aes(x = Taxa, y = Value, color = Group, fill = Group, group = Group)) +
						scale_color_manual(values = color_values) +
						scale_fill_manual(values = color_values)
				}else{
					p <- ggplot(use_data, aes(x = Taxa, y = Value))
				}
				p <- p +
					geom_bar(stat = "identity", position = position_dodge(), ...) +
					theme_bw() +
					ylab(ylab_title) +
					xlab("") +
					theme(axis.title = element_text(size = 17), axis.text.y = element_text(size = ytext_size, color = "black")) +
					theme(panel.grid.minor.y = element_blank(), panel.grid.major.y = element_blank(), panel.grid.minor.x = element_blank()) +
					theme(panel.border = element_blank()) +
					theme(axis.line.x = element_line(color = "grey60", linetype = "solid", lineend = "square"))
				
				if(add_sig){
					p <- p + geom_text(aes(x = Taxa, y = add_sig_position, label = Significance), data = use_data, size = add_sig_text_size)
				}
				
				if(coord_flip){
					p <- p + coord_flip()
				}else{
					p <- p + ggplot_xtext_anglesize(xtext_angle = xtext_angle, xtext_size = xtext_size)
				}
				p
			}else{
				# heatmap for multi-factor
				message("Perform heatmap instead of bar plot as formula is found ...")
				tmp <- use_data
				tmp_trans_env <- convert_diff2transenv(tmp, heatmap_x, heatmap_y, heatmap_cell, heatmap_sig, heatmap_lab_fill)
				tmp_trans_env$plot_cor(keep_full_name = keep_full_name, keep_prefix = keep_prefix, 
					xtext_angle = xtext_angle, xtext_size = xtext_size, ytext_size = ytext_size, ...)
			}
		},
		#' @description
		#' Plot the cladogram using taxa with significant difference.
		#'
		#' @param color default \code{RColorBrewer::brewer.pal}(8, "Dark2"); color palette used in the plot.
		#' @param group_order default NULL; a vector to order the legend in plot; 
		#' 	  If NULL, the function can first check whether the group column of sample_table is factor. If yes, use the levels in it.
		#' 	  If provided, this parameter can overwrite the levels in the group of sample_table. 
		#' 	  If the number of provided group_order is less than the number of groups in \code{res_diff$Group}, the function will select the groups of group_order automatically.
		#' @param use_taxa_num default 200; integer; The taxa number used in the background tree plot; select the taxa according to the mean abundance .
		#' @param filter_taxa default NULL; The mean relative abundance used to filter the taxa with low abundance.
		#' @param use_feature_num default NULL; integer; The feature number used in the plot; 
		#'	  select the features according to the metric (method = "lefse" or "rf") from high to low.
		#' @param clade_label_level default 4; the taxonomic level for marking the label with letters, root is the largest.
		#' @param select_show_labels default NULL; character vector; The features to show in the plot with full label names, not the letters.
		#' @param only_select_show default FALSE; whether only use the the select features in the parameter \code{select_show_labels}.
		#' @param sep default "|"; the seperate character in the taxonomic information.
		#' @param branch_size default 0.2; numberic, size of branch.
		#' @param alpha default 0.2; shading of the color.
		#' @param clade_label_size default 2; basic size for the clade label; please also see \code{clade_label_size_add} and \code{clade_label_size_log}.
		#' @param clade_label_size_add default 5; added basic size for the clade label; see the formula in \code{clade_label_size_log} parameter.
		#' @param clade_label_size_log default \code{exp(1)}; the base of \code{log} function for added size of the clade label; the size formula: 
		#'   \code{clade_label_size + log(clade_label_level + clade_label_size_add, base = clade_label_size_log)}; 
		#'   so use \code{clade_label_size_log}, \code{clade_label_size_add} and \code{clade_label_size}
		#'   can totally control the label size for different taxonomic levels.
		#' @param node_size_scale default 1; scale for the node size.
		#' @param node_size_offset default 1; offset for the node size.
		#' @param annotation_shape default 22; shape used in the annotation legend.
		#' @param annotation_shape_size default 5; size used in the annotation legend.
		#' @return ggplot.
		#' @examples
		#' \dontrun{
		#' t1$plot_diff_cladogram(use_taxa_num = 100, use_feature_num = 30, select_show_labels = NULL)
		#' }
		plot_diff_cladogram = function(
			color = RColorBrewer::brewer.pal(8, "Dark2"),
			group_order = NULL,
			use_taxa_num = 200,
			filter_taxa = NULL,
			use_feature_num = NULL,
			clade_label_level = 4,
			select_show_labels = NULL,
			only_select_show = FALSE,
			sep = "|",
			branch_size = 0.2,
			alpha = 0.2,
			clade_label_size = 2,
			clade_label_size_add = 5,
			clade_label_size_log = exp(1),
			node_size_scale = 1,
			node_size_offset = 1,
			annotation_shape = 22,
			annotation_shape_size = 5
			){
			# developed based on microbiomeMarker package
			abund_table <- self$abund_table
			if(! all(grepl("^.__", rownames(abund_table)))){
				stop("It seems that there is no standard prefix for taxonomic information (e.g., k__ and p__), ",  
					"which is necessary for this analysis. Please use the tidy_taxonomy function to tidy the tax_table of the microtable object, ",
					"re-run cal_abund function to calculate relative abundance, and then re-perform the LEfSe analysis!")
			}
			marker_table <- self$res_diff %>% dropallfactors
			method <- self$method
			
			if(! method %in% c("lefse", "rf")){
				stop("This function currently can only be used for method = 'lefse' or 'rf' !")
			}
			if(self$taxa_level != "all"){
				stop("This function is available only when taxa_level = 'all' !")
			}
			if(!is.null(use_feature_num)){
				if(use_feature_num > nrow(marker_table)){
					message("Input use_feature_num ", use_feature_num, " larger than available features number ", nrow(marker_table), " ! ", 
						"Use ", nrow(marker_table), " instead of it ...")
				}else{
					message("Select ", use_feature_num, " significant features ...")
					marker_table %<>% .[1:use_feature_num, ]
				}
			}
			if(only_select_show == T){
				marker_table %<>% .[.$Taxa %in% select_show_labels, ]
			}
			# color legend order settings
			if(is.null(group_order)){
				if(! is.null(self$group_order)){
					color_groups <- self$group_order
				}else{
					color_groups <- marker_table$Group %>% as.character %>% as.factor %>% levels
				}
			}else{
				color_groups <- group_order
			}
			
			# filter redundant groups
			if(! all(color_groups %in% unique(marker_table$Group))){
				tmp_message <- color_groups[! color_groups %in% unique(marker_table$Group)]
				message("Part of groups in group_order, ", paste(tmp_message, collapse = " "), ", not found in the feature table ...")
				color_groups %<>% .[. %in% unique(marker_table$Group)]
			}
			if(! all(unique(marker_table$Group) %in% color_groups)){
				tmp_message <- unique(marker_table$Group)[! unique(marker_table$Group) %in% color_groups]
				message("Part of groups in the feature table, ", paste(tmp_message, collapse = " "), ", not found in the group_order ...")
				marker_table %<>% .[.$Group %in% color_groups, ]
			}
			# get the color palette
			if(length(color) < length(unique(marker_table$Group))){
				stop("Please provide enough color palette! There are ", length(unique(marker_table$Group)), 
					" groups, but only ", length(color), " colors provideed in color parameter!")
			}else{
				color <- color[1:length(unique(marker_table$Group))]
			}

			tree <- private$plot_backgroud_tree(abund_table = abund_table, use_taxa_num = use_taxa_num, filter_taxa = filter_taxa, sep = sep)
			
			# generate annotation
			annotation <- private$generate_cladogram_annotation(marker_table, tree = tree, color = color, color_groups = color_groups, sep = sep)
			# check again filtered groups
			if(!is.null(use_taxa_num)){
				if(! all(color_groups %in% unique(annotation$enrich_group))){
					tmp_message <- color_groups[! color_groups %in% unique(annotation$enrich_group)]
					message("Biomarkers in group(s), ", paste(tmp_message, collapse = " "), ", not found in the background tree!", 
						" Please try to enlarge parameter use_taxa_num from ", use_taxa_num, " to a larger one ...")
					color_groups %<>% .[. %in% unique(annotation$enrich_group)]
				}
			}

			annotation_info <- dplyr::left_join(annotation, tree$data, by = c("node" = "label")) %>%
				dplyr::mutate(label = .data$node, id = .data$node.y, level = as.numeric(.data$node_class))
			hilight_para <- dplyr::transmute(
				annotation_info,
				node = .data$id,
				fill = .data$color,
				alpha = alpha,
				extend = private$get_offset(.data$level)
			)
			hilights_g <- purrr::pmap(hilight_para, ggtree::geom_hilight)
			tree <- purrr::reduce(hilights_g, `+`, .init = tree)
			
			# hilight legend
			hilights_df <- dplyr::distinct(annotation_info, .data$enrich_group, .data$color)
			hilights_df$x <- 0
			hilights_df$y <- 1
			# resort the table used for the legend color and text
			hilights_df %<>% `row.names<-`(.$enrich_group) %>% .[color_groups, ]
			# make sure the right order in legend
			hilights_df$enrich_group %<>% factor(., levels = color_groups)

			# add legend
			tree <- tree + 
				geom_rect(aes(xmin = x, xmax = x, ymax = y, ymin = y, fill = enrich_group), data = hilights_df, inherit.aes = FALSE) +
				guides(fill = guide_legend(title = NULL, order = 1, override.aes = list(fill = hilights_df$color)))

			# set nodes color and size
			nodes_colors <- rep("white", nrow(tree$data))
			nodes_colors[annotation_info$id] <- annotation_info$color
			node_size <- node_size_scale*log(tree$data$abd) + node_size_offset
			tree$data$node_size <- node_size
			tree <- tree + ggtree::geom_point2(aes(size = I(node_size)), fill = nodes_colors, shape = 21)

			## add clade labels
			clade_label <- dplyr::transmute(
				annotation_info,
				node = .data$id,
				offset = private$get_offset(.data$level)-0.4,
				offset.text = 0,
				angle = purrr::map_dbl(.data$id, private$get_angle, tree = tree),
				label = .data$label,
				fontsize = clade_label_size + log(.data$level + clade_label_size_add, base = clade_label_size_log),
				barsize = 0,
				extend = 0.2,
				hjust = 0.5,
				level = .data$level
			) %>% dplyr::arrange(desc(.data$level))

			clade_label$offset.text <- unlist(lapply(seq_len(nrow(clade_label)), function(x){
				if(clade_label$angle[x] < 180){ 0.2 }else{ 0 }}))
			clade_label$angle <- unlist(lapply(clade_label$angle, function(x){
				if(x < 180){ x - 90 }else{ x + 90 }}))
			clade_label_new <- clade_label

			# add letters label to replace long taxonomic label
			if(is.null(select_show_labels)){
				# outer circle --> larger level; label smaller levels, i.e. finer taxonomy
				ind <- clade_label$level < clade_label_level	
			}else{
				ind <- ! clade_label$label %in% select_show_labels
			}
			ind_num <- sum(ind)

			if(ind_num > 0){
				if(ind_num < 27){
					use_letters <- letters
				}else{
					if(ind_num < 326){
						use_letters <- apply(combn(letters, 2), 2, function(x){paste0(x, collapse = "")})
					}else{
						stop("Too much features to be labelled with letters, consider to use use_feature_num parameter to reduce the number!")
					}
				}
				clade_label_new$label_legend <- clade_label_new$label_show <- clade_label_new$label_raw <- clade_label_new$label
				clade_label_new$label_show[ind] <- use_letters[1:ind_num]
				clade_label_new$label_legend[ind] <- paste0(clade_label_new$label_show[ind], ": ", clade_label_new$label[ind])
				clade_label_new$label <- clade_label_new$label_show
				# delete redundant columns to avoid warnings
				clade_label <- clade_label_new %>% .[, which(! colnames(.) %in% c("label_raw", "label_show", "label_legend", "level"))]
			}

			clade_label_g <- purrr::pmap(clade_label, ggtree::geom_cladelabel)
			tree <- purrr::reduce(clade_label_g, `+`, .init = tree)

			# if letters are used, add guide labels
			if(ind_num > 0){
				guide_label <- clade_label_new[ind, ] %>%
					dplyr::mutate(color = annotation_info$color[match(.data$label_raw, annotation_info$label)])
				tree <- tree + 
					geom_point(data = guide_label, inherit.aes = FALSE, aes_(x = 0, y = 0, shape = ~label_legend), size = 0, stroke = 0) +
						scale_shape_manual(values = rep(annotation_shape, nrow(guide_label))) +
						guides(shape = guide_legend(override.aes = list(
							size = annotation_shape_size, shape = annotation_shape, fill = guide_label$color)))
			}
			tree <- tree + theme(legend.position = "right", legend.title = element_blank())
			tree
		}
	),
	private = list(
		# group test for lefse or rf
		test_mark = function(dataframe, group, min_num_nonpara = 1, method = NULL){
			d1 <- as.data.frame(t(dataframe))
			taxaname <- colnames(d1)[1]
			d1$Group <- group
			colnames(d1)[1] <- "Value"
			formu <- reformulate("Group", "Value")
			if(any(table(as.character(group))) < min_num_nonpara){
				list(p_value = NA, med = NA)
			}else{
				if(! is.null(method)){
					method <- match.arg(method, c("wilcox.test", "kruskal.test"))
					if(method == "wilcox.test"){
						res1 <- wilcox.test(formula = formu, data = d1)
					}else{
						res1 <- kruskal.test(formula = formu, data = d1)
					}
				}else{
					if(length(unique(as.character(group))) == 2){
						res1 <- wilcox.test(formula = formu, data = d1)
					}else{
						res1 <- kruskal.test(formula = formu, data = d1)
					}
				}
				if(is.nan(res1$p.value)){
					res1$p.value <- 1
				}
				med <- tapply(d1[, 1], group, median) %>% as.data.frame
				# if all values are same, use mean
				if(length(unique(med[, 1])) == 1){
					med <- tapply(d1[, 1], group, mean) %>% as.data.frame
				}
				colnames(med) <- taxaname
				list(p_value = res1$p.value, med = med)
			}
		},
		check_taxa_number = function(sel_taxa, p_adjust_method){
			if(sum(sel_taxa) == 0){
				if(p_adjust_method == "none"){
					stop('No significant feature found!')
				}else{
					stop('No significant feature found! To disable p value adjustment, please use p_adjust_method = "none"!')
				}
			}
			if(sum(sel_taxa) == 1){
				if(p_adjust_method == "none"){
					stop('Only one significant feature found! Stop running subsequent process!')
				}else{
					stop('Only one significant feature found! Stop running subsequent process! To disable p value adjustment, please use p_adjust_method = "none"!')
				}
			}
		},
		generate_microtable_unrel = function(use_dataset, taxa_level, filter_thres, filter_features){
			use_dataset$tidy_dataset()
			suppressMessages(use_dataset$cal_abund(rel = FALSE))
			if(! taxa_level %in% names(use_dataset$taxa_abund)){
				stop("The taxa_level is not found in the column names of tax_table! ", 
				"Such case is generally caused by the customized use of taxa_abund list in your input microtable object! ", 
				"However, for the current method, the original abundance in otu_table is used. So please try other method!")
			}
			use_feature_table <- use_dataset$taxa_abund[[taxa_level]]
			if(filter_thres > 0){
				use_feature_table %<>% .[! rownames(.) %in% names(filter_features), ]
			}
			message("Available feature number: ", nrow(use_feature_table))
			newdata <- microtable$new(otu_table = use_feature_table, sample_table = use_dataset$sample_table)
			newdata$tidy_dataset()
			newdata
		},
		# plot the background tree according to raw abundance table
		plot_backgroud_tree = function(abund_table, use_taxa_num = NULL, filter_taxa = NULL, sep = "|"){
			# filter the taxa with unidentified classification or with space, in case of the unexpected error in the following operations
			abund_table %<>% {.[!grepl("\\|.__\\|", rownames(.)), ]} %>%
				{.[!grepl("\\s", rownames(.)), ]} %>%
				# also filter uncleared classification to make it in line with the lefse above
				{.[!grepl("Incertae_sedis|unculture", rownames(.), ignore.case = TRUE), ]}
			if(nrow(abund_table) <= 2){
				stop("After filtering out non-standard taxonomy information, the abundance table only has ", nrow(abund_table), " feature(s)! ", 
					"Is there an issue with the taxonomy table? ", 
					"Please first use the tidy_taxonomy function to process the taxonomy information table before constructing the microtable object.")
			}
			if(!is.null(use_taxa_num)){
				if(use_taxa_num < nrow(abund_table)){
					message("Select ", use_taxa_num, " most abundant taxa as the background cladogram ...")
					abund_table %<>% .[names(sort(apply(., 1, mean), decreasing = TRUE)[1:use_taxa_num]), ]
				}else{
					message("Provided use_taxa_num: ", use_taxa_num, " >= ", " total effective taxa number. Skip the selection ...")
				}
			}
			if(!is.null(filter_taxa)){
				abund_table %<>% .[apply(., 1, mean) > (self$lefse_norm * filter_taxa), ]
			}
			abund_table %<>% .[sort(rownames(.)), ]
			tree_table <- data.frame(taxa = row.names(abund_table), abd = rowMeans(abund_table), stringsAsFactors = FALSE) %>%
				dplyr::mutate(taxa = paste("r__Root", .data$taxa, sep = sep), abd = .data$abd/max(.data$abd)*100)
			taxa_split <- strsplit(tree_table$taxa, split = sep, fixed = TRUE)
			nodes <- purrr::map_chr(taxa_split, utils::tail, n = 1)
			# check whether some nodes duplicated from bad classification
			if(any(duplicated(nodes))){
				del <- nodes %>% .[duplicated(.)] %>% unique
				for(i in del){
					tree_table %<>% .[!grepl(paste0("\\|", i, "($|\\|)"), .$taxa), ]
				}
				taxa_split <- strsplit(tree_table$taxa, split = sep, fixed = TRUE)
				nodes <- purrr::map_chr(taxa_split, utils::tail, n = 1)
			}
			# add root node
			nodes %<>% c("r__Root", .)
			# levels used for extending clade label
			label_levels <- purrr::map_chr(nodes, ~ gsub("__.*$", "", .x)) %>%
				factor(levels = rev(unlist(lapply(taxa_split, function(x) gsub("(.)__.*", "\\1", x))) %>% .[!duplicated(.)]))

			# root must be a parent node
			nodes_parent <- purrr::map_chr(taxa_split, ~ .x[length(.x) - 1]) %>% c("root", .)

			# add index for nodes
			is_tip <- !nodes %in% nodes_parent
			index <- vector("integer", length(is_tip))
			index[is_tip] <- 1:sum(is_tip)
			index[!is_tip] <- (sum(is_tip) + 1):length(is_tip)

			edges <- cbind(parent = index[match(nodes_parent, nodes)], child = index)
			edges <- edges[!is.na(edges[, 1]), ]
			if(! inherits(edges, "matrix")){
				stop("The parent and child nodes are not correctly recognized! ",
					"Please try to use tidy_taxonomy function to tidy taxonomic table in your microtable object! Then rerun cal_abund function and trans_diff class!")
			}
			# donot label tips
			node_label <- nodes[!is_tip]
			phylo <- structure(list(
				edge = edges, 
				node.label = node_label, 
				tip.label = nodes[is_tip], 
				edge.length = rep(1, nrow(edges)), 
				Nnode = length(node_label)
				), class = "phylo")
			mapping <- data.frame(
				node = index, 
				abd = c(100, tree_table$abd),
				node_label = nodes, 
				stringsAsFactors = FALSE)
			mapping$node_class <- label_levels
			tree <- tidytree::treedata(phylo = phylo, data = tibble::as_tibble(mapping))
			tree <- ggtree::ggtree(tree, size = 0.2, layout = 'circular')
			tree
		},
		# generate the cladogram annotation table
		generate_cladogram_annotation = function(marker_table, tree, color, color_groups, sep = "|") {
			use_marker_table <- marker_table
			feature <- use_marker_table$Taxa
			label <- strsplit(feature, split = sep, fixed = TRUE) %>% 
				purrr::map_chr(utils::tail, n =1)
			plot_color <- use_marker_table$Group %>% 
				as.character
			for(i in seq_along(color_groups)){
				plot_color[plot_color == color_groups[i]] <- color[i]
			}
			annotation <- data.frame(
				node = label,
				color = plot_color,
				enrich_group = use_marker_table$Group,
				stringsAsFactors = FALSE
			)
			# filter the feature with bad classification
			annotation %<>% .[label %in% tree$data$label, ]
			annotation
		},
		get_angle = function(tree, node){
			if (length(node) != 1) {
				stop("The length of `node` must be 1")
			}
			tree_data <- tree$data
			sp <- tidytree::offspring(tree_data, node)$node
			sp2 <- c(sp, node)
			sp.df <- tree_data[match(sp2, tree_data$node),]
			mean(range(sp.df$angle))
		},
		get_offset = function(x){(x*0.2+0.2)^2},
		# dependent functions in utility
		calculate_metastat = function(inputdata, g, pflag = FALSE, threshold = NULL, B = NULL){
			trans_data <- load_frequency_matrix(input = inputdata)
			res <- detect_differentially_abundant_features(jobj = trans_data, g = g, pflag = pflag, threshold = threshold, B = B)
			res
		}
	),
	lock_class = FALSE,
	lock_objects = FALSE
)