MicrobiomeAnalystR: MicrobiomeAnalystR - A comprehensive R package for statistical, visual, and functional analysis of the microbiome.

Documented in CoreMicrobeAnalysis Perform16FunAnot_mem PerformAlphaDiversityComp PerformBetaDiversity PerformCategoryComp PerformNetworkCorrelation PlotAlphaBoxData PlotAlphaData PlotBoxDataCorr PlotDataPieFromPie PlotFunAnotSummary PlotGroupPieGraph PlotOverallPieGraph PlotPhylogeneticTree PlotPiechart PlotRarefactionCurve PlotSamplePieGraph PlotSampleTaxaAundanceBar PlotTaxaAbundanceArea PlotTaxaAbundanceBarSamGrp PlotTaxaAundanceBar PrepareHeatTreePlot PrepareHeatTreePlotDataParse_cmf PrepareHeatTreePlotDataParse_cmf_diff_table PrepareHeatTreePlotDataParse_cmf_plot SavePiechartImg UpdatePieData

##################################################
## R script for MicrobiomeAnalyst
## Description: Data/resource management functions
## Author: Jeff Xia, jeff.xia@mcgill.ca
###################################################

#'Perform alpha diversity
#'@description This functions performs alpha diversity.
#'@param mbSetObj Input the name of the mbSetObj.
#'@param opt Character, input the name of the statistical method
#'used to calculate the significance of the alpha diversity measure.
#'"tt" for T-test or ANOVA, and "nonpar" for Mann-Whitney or
#'Kruskall-Wallis.
#'@param metadata Character, input the name of the experimental factor
#'to group the samples.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
PerformAlphaDiversityComp <- function(mbSetObj, opt, metadata, pair.wise = "false"){
  #print(metadata)
  mbSetObj <- .get.mbSetObj(mbSetObj); 
  data <- mbSetObj$analSet$alpha;
  cls <- as.factor(data[,metadata]);
  x <- data$value;
  stat.info <- NULL;
  print(c("cls",cls))
  if(length(levels(cls)) > 2){
    if(opt=="nonpar"){
      res <- kruskal.test(x ~ cls);
      stat.info <- paste("p-value: ", signif(res$p.value, 5), "; [Kruskal-Wallis] statistic: ", signif(res$statistic, 5) , sep="");
    }else{
      res <- anova(aov(x ~ cls));
      stat.info <- paste("p-value: ", signif(res$"Pr(>F)"[1], 5), "; [ANOVA] F-value: ", signif(res$"F value"[1], 5), sep="");
    }

    if(pair.wise != "false"){
        # get pairs
          co    <- combn(unique(as.character(cls)),2);
          nco   <- NCOL(co);
          out   <- data.frame(matrix(NA, nrow=nco, ncol=3));
          dimnames(out)[[2]] <- c('pairs', 'statistic', 'pval');

          for(j in 1:nco) {

            inx1 <- cls %in% co[1,j];
            inx2 <- cls %in% co[2,j];

            if(opt=="tt"){
                res <- t.test(x[inx1], x[inx2]);
            }else if(opt=="ht"){
                res <- ecolTest::Hutcheson_t_test(x[inx1], x[inx2], shannon.base = 10);
            }else{
                res <- wilcox.test(x[inx1], x[inx2]);
            }

            out[j,1] <- paste(co[1,j], 'vs', co[2,j]);
            out[j,2] <- res$statistic;
            out[j,3] <- res$p.value;
         }
         
         # add adjusted p value
         out$p.adj <- p.adjust(out$pval, method="fdr");
    }

  }else{
    inx1 <- which(cls==levels(cls)[1]);
    inx2 <- which(cls==levels(cls)[2]);
    
    if(opt=="tt"){
      res <- t.test(x[inx1], x[inx2]);
      stat.info <- paste("p-value: ", signif(res$p.value, 5), "; [T-test] statistic: ", signif(res$statistic, 5), sep="");
    }else if(opt=="ht"){
       res <- ecolTest::Hutcheson_t_test(x[inx1], x[inx2], shannon.base = 10);
       stat.info <- paste("p-value: ", signif(res$p.value, 5), "; [Hutcheson T-test] statistic: ", signif(res$statistic, 5), sep="");
    }else{
      res <- wilcox.test(x[inx1], x[inx2]);
      stat.info <- paste("p-value: ", signif(res$p.value, 5), "; [Mann-Whitney] statistic: ", signif(res$statistic, 5), sep="");
    }

    if(pair.wise != "false"){ # same for two group case
        out <- list();
        out$pairs <- paste(levels(cls)[1], 'vs', levels(cls)[2]);
        out$statistic <- signif(res$statistic, 5);
        out$p.adj <- out$p.value <- signif(res$p.value, 5);
        out<- data.frame(out);
    }
  }
  
  mbSetObj$analSet$alpha.stat.info <- stat.info;
  if(pair.wise != "false"){ 
    rownames(out) <- out$pairs;
    out$pairs <- NULL;
    mbSetObj$analSet$alpha.stat.pair <- mbSetObj$analSet$resTable <- signif(out,5);
    fast.write(mbSetObj$analSet$resTable, file="pairwise_alpha.csv");
  }

  if(.on.public.web){
    .set.mbSetObj(mbSetObj)
    return(stat.info);
  }else{
    return(.set.mbSetObj(mbSetObj))
  }
}
####################################
###########Correlation Networks###########
#####################################


#'Function to call for correlation network
#'@description This function runs the fastspar or cor.test 
#'@param mbSetObj Input the name of the mbSetObj.
#'@param taxrank Character, input the taxonomic level
#' to perform partial correlation analysis.
#'@param cor.method Character, input the correlation method.
#'Supported methods are pearson, spearman, kendall and sparcc.
#'@param colorOpt Character, input what to color the nodes by. Default
#'the nodes will be colored by their expression levels.
#'@param permNum Numeric, input the number of permutations to perform. Default
#'is set to 100.
#'@param pvalCutoff Numeric, input the p-value cutoff. Default is set to 0.05.
#'@param corrCutoff Numeric, input the correlation cutoff. Default is set to 0.3.
#'@param abundOpt Character, default is set to "mean".
#'@param corr.net.name Character, input the name of the plot to save.
#'@param plotNet Boolean. Set to TRUE if you would like a network visualization
#'outputted to your current working directory. 
#'@param networkType Character, "static" to create a static image or
#'"interactive" to create an interactive network saved as an html 
#'in your working directory.
#'@param netLayout Character, layout from ggraph. "kk" for the spring-based algorithm by Kamada and Kawai
#'as default. "drl" for force directed algorithm from the DrL toolbox. "lgl" for Large Graph Layout. "fr" for
#'force-directed of Fruchterman and Reingold.
#'@param netTextSize Numeric, input the preferred font size to be used in the network
#'plot.
#'@import ppcor
#'@import igraph
#'@export

PerformNetworkCorrelation <- function(mbSetObj, taxrank, cor.method="pearson", colorOpt="expr", 
                                      permNum=100, pvalCutoff=0.05, corrCutoff=0.3, abundOpt="mean", 
                                      corr.net.name, plotNet = FALSE, netType="static", netLayout="kk",
                                      netTextSize = 2.5){
  if(!exists("my.corr.net")){ # public web on same user dir
    .load.scripts.on.demand("utils_corrnet.Rc");    
  }
  return(my.corr.net(mbSetObj, taxrank, cor.method, colorOpt, permNum, pvalCutoff, corrCutoff, abundOpt, corr.net.name, plotNet, netType, netLayout,netTextSize));
}

#'Function to create box plots of important features
#'@description This functions plots box plots of a selected feature.
#'@param mbSetObj Input the name of the mbSetObj.
#'@param boxplotName Character, input the name of the 
#'box plot.
#'@param feat Character, input the name of the selected 
#'feature.
#'@param format Character, by default the plot format
#'is "png".
#'@param dpi Dots per inch. Numeric, by default
#'it is set to 72.
#'@parm colorPal Character, input the name of the preferred color palette.
#'Use "default" for the RColor brewer Set1 palette, "virdis" for the viridis color palette, and
#'"dark" for the RColor brewer Dark2 palette.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import grid
#'@import gridExtra
PlotBoxDataCorr<-function(mbSetObj, boxplotName, feat, format="png", sel.meta="", dpi=72){
  mbSetObj <- .get.mbSetObj(mbSetObj);
  colorPal <- "dark";
  
  load_ggplot();
  load_grid();
  load_gridExtra();
  if(mbSetObj$module.type == "meta"){
    merged <- qs::qread("merged.data.qs");
    merged <- subsetPhyloseqByDataset(mbSetObj, merged);
    data <- t(as.data.frame(as(otu_table(merged), "matrix"),check.names=FALSE));
    data <- as.data.frame(data);
    variable <- sel.meta;
    data$class <- as.vector(as.matrix(sample_data(merged))[,sel.meta])
    
    a <- as.numeric(data[,feat]);
  }else{
    variable <- mbSetObj$analSet$var.typecor;
    data <- mbSetObj$analSet$boxdatacor;
    a <- as.numeric(data[,feat]);
  }
  min.val <- min(abs(a[a!=0]))/5;
  data$log_feat <- log2((a + sqrt(a^2 + min.val))/2);
  boxplotName = paste(boxplotName,".",format, sep="");
  
  numGrps <- length(levels(data$class))
  
  if(numGrps == 2){
    width <- 325
  }else if(numGrps < 4){
    width <- 350
  }else if(numGrps < 6){
    width <- 375
  }else{
    width <- 400
  }
  
  Cairo::Cairo(file=boxplotName, width=width, height=300, type=format, bg="white", dpi=dpi);
  
  box <- ggplot(data, aes(x=data$class, y = data$log_feat, fill=as.factor(class))) + stat_boxplot(geom ='errorbar') + 
    geom_boxplot(outlier.shape = NA) + geom_jitter() + theme_bw() + labs(y="Log-transformed Counts\n", x=paste0("\n",variable), fill=variable) +
    ggtitle(feat) + theme(plot.title = element_text(hjust=0.5, size=13, face="bold"), axis.title=element_text(size=11), legend.title=element_text(size=11), axis.text=element_text(size=10));
  #remove grid
  box <- box + theme(panel.grid.major = element_blank(), panel.grid.minor = element_blank(), panel.background = element_blank(), panel.border = element_rect(colour = "#787878", fill=NA, size=0.5))
  
  if(colorPal == "viridis"){
    box <- box + scale_fill_viridis_d()
  }else if(colorPal == "set1"){
    box <- box + scale_fill_brewer(palette="Set1")
  }else if(colorPal == "dark"){
    box <- box + scale_fill_brewer(palette="Dark2")
  }
  
  print(box)
  dev.off();
  return(.set.mbSetObj(mbSetObj))
}

SparccToNet <- function(mbSetObj=NULL, corr.net.name, networkType="static", netLayout="kk", netTextSize){
  if(!exists("my.sparcc.net")){ # public web on same user dir
    .load.scripts.on.demand("utils_sparcc.Rc");    
  }
  return(my.sparcc.net(mbSetObj, corr.net.name, networkType, netLayout, netTextSize));
}

simpleCap <- function(x) {
  s <- strsplit(x, " ")[[1]]
  paste(toupper(substring(s, 1,1)), substring(s, 2),
        sep="", collapse=" ")
}

PerformLayOut <- function(g){

  vc <- vcount(g);
    if(vc > 5000) {
      pos.xy <- layout_with_lgl(g);
    }else if(vc < 100){
      pos.xy <- layout_with_kk(g);
    }else{
      pos.xy <- layout_with_fr(g);
    }
  pos.xy;
}

PrepareCorrExpValues <- function(mbSetObj, meta, taxalvl, color, layoutOpt, comparison, wilcox.cutoff){
  load_phyloseq();

  mbSetObj <- .get.mbSetObj(mbSetObj);
  load_metacoder();

  tax_o <- taxalvl;
  dm <- mbSetObj$dataSet$proc.phyobj;
  dims <- ncol(tax_table(dm))
  tax_table_new = data.frame("Kingdom" = "Root", as(tax_table(dm), "matrix")[, 1:dims],check.names=FALSE) # add root to tax table
  tax_table(dm) <- as.matrix(tax_table_new);
  
  dm_samples = as(sample_data(dm), "data.frame");
  dm_samples <- cbind.data.frame("sample_id" = row.names(dm_samples), dm_samples); # add sample_id to sample table
  row.names(dm_samples) <- c();
  dm_samples[] <- lapply(dm_samples, as.character);

  if(comparison != "all"){
    grp.nms <- mbSetObj$corr.net$comparison;
    if(is.null(grp.nms)){
      AddErrMsg("Please specify groups first!");
      return(0);
    }
    dm_samples_cmf <- dm_samples[dm_samples[[meta]] %in% grp.nms, ]; #subset sample data by meta variable
  }else{
    grp.nms <- unique(dm_samples[[meta]]) # all 
    dm_samples_cmf <- dm_samples
  }

  otu_dm <- as.data.frame(as(otu_table(dm), "matrix"),check.names=FALSE);
  tax_dm <- as.data.frame(as(tax_table(dm), "matrix"),check.names=FALSE);
  tax_dm[] <- lapply(tax_dm, as.character);#make sure characters in tax_dm;

  depth <- ncol(tax_dm)
  rank_dm <- c("r", "p", "c", "o", "f", "g", "s");
  names(tax_dm) <- rank_dm[1:depth];
  
  for (i in 1:ncol(tax_dm)){
    for (j in 1:nrow(tax_dm)){
      if (is.na(tax_dm[j, i])){
        tax_dm[j, i] <- "";
      } else {
        tax_dm[j, i] <- paste(names(tax_dm)[i],
                              tax_dm[j, i],
                              sep = "__");
      }
    }
  } #add __ to tax table
  
  if(taxalvl == "Phylum"){
    tax <- "p";
  } else if(taxalvl == "Class"){
    tax <- "c";
  } else if(taxalvl == "Order"){
    tax <- "o";
  } else if(taxalvl == "Family"){
    tax <- "f";
  } else if(taxalvl == "Genus"){
    tax <- "g"
  } else {
    tax <- "s";
  }; # get tax rank for heat tree
  tax_dm <- tax_dm[, 1:which(names(tax_dm) == tax)]; #subset tax table
  rank_dm_new <- rank_dm[1:which(rank_dm == tax)];
  tax_dm$lineage <- apply(tax_dm[, rank_dm_new], 1, paste, collapse = ";"); #collapse all tax ranks
  dm_otu <- cbind.data.frame("otu_id" = row.names(tax_dm),
                             "lineage" = tax_dm$lineage,
                             otu_dm); #make new otu table
  row.names(dm_otu) <- c();
  dm_otu$lineage <- gsub(";+$", "", dm_otu$lineage); #remove empty tax names
  
  dm_otu_cmf <- dm_otu[, c("otu_id", "lineage", dm_samples_cmf$sample_id)]; #make otu table ready for heat tree  
  PrepareHeatTreePlotDataParse_cmf_res <- PrepareHeatTreePlotDataParse_cmf(dm_otu_cmf, dm_samples_cmf, meta);
  dm_obj_cmf = PrepareHeatTreePlotDataParse_cmf_res;
  mbSetObj$dataSet$selected.grps = dm_samples_cmf$sample_id
  mbSetObj$dataSet$comparison = grp.nms 
  mbSetObj$dataSet$meta = meta
  cls = data.frame(dm_obj_cmf$data$class_data,check.names=FALSE)
  cls = cls[!duplicated(cls$taxon_id),]
  cls = cls[,c(1,4)]
  diff_table = data.frame(dm_obj_cmf$data$diff_table,check.names=FALSE)
  df = merge(diff_table, cls, by="taxon_id")
  df = df[which(df$tax_name != "Root"),]
  df = df[order(df$tax_name),]
  mbSetObj$analSet$diff_table <- df;
  
  if(.on.public.web){
    .set.mbSetObj(mbSetObj);
    PrepareBoxPlot(mbSetObj, taxalvl, mbSetObj$dataSet$meta);
    return(1)
  }else{
    mbSetObj <- PrepareBoxPlot(mbSetObj, taxalvl, mbSetObj$dataSet$meta);
    return(.set.mbSetObj(mbSetObj));
  }
}

PrepareBoxPlot <- function(mbSetObj, taxrank, variable){

  load_phyloseq();
  
  mbSetObj <- .get.mbSetObj(mbSetObj);
  
  selSamples <- mbSetObj$dataSet$selected.grps
  
  grps = mbSetObj$dataSet$comparison
  taxrank_boxplot <- taxrank;
  claslbl_boxplot <- as.factor(sample_data(mbSetObj$dataSet$norm.phyobj)[[variable]]);
  claslbl_boxplot <- claslbl_boxplot[which(claslbl_boxplot %in% grps)];
  # build phyloslim obj in fly
  filt.dataphy <- mbSetObj$dataSet$filt.data;
  filt.dataphy <- apply(filt.dataphy, 2, as.integer);
  filt.dataphy <- otu_table(filt.dataphy, taxa_are_rows =TRUE);
  sample_table_boxplot <- sample_data(mbSetObj$dataSet$proc.phyobj, errorIfNULL=TRUE);
  filt.dataphy <- merge_phyloseq(filt.dataphy, sample_table_boxplot);
  taxa_names(filt.dataphy) <- rownames(mbSetObj$dataSet$filt.data);
  data_boxplot <- filt.dataphy;
  
  if(mbSetObj$module.type=="mdp"){
    mbSetObj$dataSet$taxa_table <- tax_table(mbSetObj$dataSet$proc.phyobj);
    data_boxplot <- merge_phyloseq(data_boxplot, mbSetObj$dataSet$taxa_table);
  }
  
  #using by default names for shotgun data
  if(mbSetObj$module.type=="sdp"){
    taxrank_boxplot<-"OTU";
  }
  
  if(taxrank_boxplot!="OTU"){
    #merging at taxonomy levels
    data_boxplot <- fast_tax_glom_mem(data_boxplot, taxrank_boxplot);
    if(is.null(data_boxplot)){
      AddErrMsg("Errors in projecting to the selected taxanomy level!");
      return(0);
    }
    
    nm_boxplot <- tax_table(data_boxplot)[,taxrank_boxplot];
    nm_boxplot <- as.character(nm_boxplot);
    #converting NA values to unassigned
    nm_boxplot[is.na(nm_boxplot)] <- "Not_Assigned";
    data1_boxplot <- as.matrix(otu_table(data_boxplot));
    rownames(data1_boxplot) <- nm_boxplot;
    
    #all NA club together
    data1_boxplot <- as.matrix(t(sapply(by(data1_boxplot, rownames(data1_boxplot), colSums), identity)));
    data1_boxplot <- otu_table(data1_boxplot,taxa_are_rows=T);
    data_boxplot <- merge_phyloseq(data1_boxplot, sample_data(data_boxplot));
  }
  
  nm_boxplot <- taxa_names(data_boxplot);
  dat3t_boxplot <- as.data.frame(t(otu_table(data_boxplot)),check.names=FALSE);
  colnames(dat3t_boxplot) <- nm_boxplot; 
  
  #individual boxplot for features
  box_data <- as.data.frame(dat3t_boxplot[which(rownames(dat3t_boxplot) %in% selSamples),],check.names=FALSE);
  box_data$class <- claslbl_boxplot;
  mbSetObj$analSet$boxdatacor <- box_data;
  mbSetObj$analSet$var.typecor <- variable;
  
  return(.set.mbSetObj(mbSetObj));
}

#####################################
###########Core Microbiome###########
#####################################

#'Perform core microbiome analysis
#'@description This functions performs core microbiome analysis.
#'@param mbSetObj Input the name of the mbSetObj.
#'@param imgName Input the name of the core microbiome analysis plot.
#'@param preval Numeric, input the sample prevalence cutoff. This is
#'the minimum number of samples that share this microbe.
#'@param detection Numeric, input the relative abundance cutoff. This
#'is the minimum detection threshold of X percent relative abundance.
#'@param palette Character, input the color palette.
#'"bwm" for the default color palette /(blue, white, red/), "gbr"
#'for the red and green color palette, "heat" for the yellow to red color
#'palette, "topo" for the blue, green, and yellow color palette, 
#'"gray" for the gray color palette, "byr" for the blue, yellow and red color
#'palette, "viridis" for the viridis default color palette and "plasma"
#'for the plasma viridis color palette.
#'@param format Character, input the preferred
#'format of the plot. By default it is set to "png".
#'@param dpi Numeric, input the dots per inch. By default
#'it is set to 72.
#'@param interactive Boolean, if set to TRUE, saves the plot
#'as an interactive html plot.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import RColorBrewer
#'@import viridis
CoreMicrobeAnalysis<-function(mbSetObj, imgName, preval, detection, taxrank,
                              palette, viewOpt, analOpt, expFact, group, expFact2,
                              format="png", dpi=72, width=NA, interactive = FALSE){
  #print(viewOpt)
  mbSetObj <- .get.mbSetObj(mbSetObj);
  
  data <- mbSetObj$dataSet$proc.phyobj;
 
  expFact <- expFact
  expFact2 <- expFact2
  group <- group
 
 if(analOpt == "smpl_grp_all" & viewOpt=="bar"){
 
  # grp <- as.character(mbSetObj[["dataSet"]][["sample_data"]][[expFact2]])
  # data <- eval(parse(text = paste("phyloseq:::subset_samples(data,", expFact2, "%in%", "\"", grp, "\"", ")", sep="")))
   mbSetObj <- core_comp_grp(mbSetObj,imgName, preval, detection, taxrank,
                          palette, viewOpt,expFact2,format, dpi, width)
   
  }else{
    if(analOpt == "smpl_grp"){
        data <- eval(parse(text = paste("phyloseq:::subset_samples(data,", expFact, "==", "\"", group, "\"", ")", sep="")));
    }else if(analOpt == "smpl_grp_all"){
        grp <- as.character(mbSetObj[["dataSet"]][["sample_data"]][[expFact2]])
        data <- eval(parse(text = paste("phyloseq:::subset_samples(data,", expFact2, "%in%", "\"", grp, "\"", ")", sep="")))
 }

    # check min 2 reps 
    samples_left <- nsamples(data)
    
    if(samples_left<2){
      AddErrMsg("More than 2 replicates are required in your group!")
      return(0)
    }

    if(taxrank=="OTU"){
        data <- otu_table(data,taxa_are_rows=T);
    }else{
        #merging at taxonomy levels
        data <- fast_tax_glom_mem(data, taxrank);
        if(is.null(data)){
            AddErrMsg("Errors in projecting to the selected taxanomy level!");
            return(0);
        }
        nm <- as.character(tax_table(data)[,taxrank]);
        y <- which(is.na(nm)==TRUE);
        #converting NA values to unassigned
        nm[y] <- "Not_Assigned";
        data1 <- as.matrix(otu_table(data));
        rownames(data1) <- nm;
        #all NA club together
        data1 <- as.matrix(t(sapply(by(data1, rownames(data1), colSums), identity)));
        data <- otu_table(data1, taxa_are_rows=T);
    }
 
    #transform data to relative abundances, then obtain full phyloseq obj of just core microbiota
    data.compositional <- transform_sample_counts(data,function(x) x / sum(x));
    data.core <- core(data.compositional, detection = detection, prevalence = preval);
 
    core.nm <- data.frame(prevalence(data.compositional, detection = detection, sort = TRUE),check.names=FALSE);
    colnames(core.nm)[1] <- "Prevelance";
    fileName <- "core_microbiome.csv";
    fast.write(core.nm, file=fileName);
  
    imgName = paste(imgName, ".", format, sep="");
    mbSetObj$imgSet$core <- imgName;

    #setting the size of plot
    if(is.na(width)){
        minW <- 800;
        myW <- 10*18 + 200;
        if(myW < minW){
            myW <- minW;
        }
        w <- round(myW/72,2);
    }
  
    myH <- nrow(data.core)*18 + 150;
    h <- round(myH/65);
  
    #if more than 1500 features will be present; subset to most abundant=>1500 features.
    #OTUs already in unique names;
    if(ntaxa(data.core)>1500){
        data.core = prune_taxa(names(sort(taxa_sums(data.core), TRUE))[1:1500], data.core);
        viewOpt == "overview";
        if(is.na(width)){
            if(w >9.3){
                w <- 9.3;
            }
        }
        if(h > w){
            h <- w;
        }
    }
 

  if(viewOpt!="bar"){
    Cairo::Cairo(file=imgName, unit="in",width=w, height=h, type=format, bg="white",dpi=dpi);
  
     if(palette=="gbr"){
        colors <- grDevices::colorRampPalette(c("green", "black", "red"), space="rgb")(10);
    }else if(palette == "heat"){
        colors <- heat.colors(10);
    }else if(palette == "topo"){
        colors <- topo.colors(10);
    }else if(palette == "gray"){
        colors <- grDevices::colorRampPalette(c("grey90", "grey10"), space="rgb")(10);
    }else if(palette == "viridis") {
        colors <- rev(viridis::viridis(10))
    }else if(palette == "plasma") {
        colors <- rev(viridis::plasma(10))
    }else {
        load_rcolorbrewer();
        colors <- rev(grDevices::colorRampPalette(RColorBrewer::brewer.pal(10, "RdBu"))(10));
    }
    p <- plot_core(data.core, plot.type=viewOpt, colours = colors, prevalences = seq(.05, 1, .05), 
                 detections = 10^seq(log10(detection), log10(max(abundances(data.core))), length = 10)) + 
    ylab(paste0("\n", taxrank)) + xlab("\nDetection Threshold (Relative Abundance (%))") + 
    guides(fill = guide_legend(keywidth = 1.5, keyheight = 1)) + 
    theme(axis.text=element_text(size=10), axis.title=element_text(size=11.5), legend.title=element_text(size=11));
  
  }else{
   Cairo::Cairo(file=imgName, unit="in",width=h, height=w/1.5, type=format, bg="white",dpi=dpi);
  
    dt = abundances(data.core)
    prev = apply(dt,1,function(x) sum(x>detection)/length(x))
    dt = data.frame(feat = rownames(dt),prev = prev,stringsAsFactors = F) 
    dt <- dt[order(-dt$prev), ];

    if(palette=="gbr"){
        colors <- grDevices::colorRampPalette(c("green", "black", "red"), space="rgb")(nrow(dt));
    }else if(palette == "heat"){
        colors <- heat.colors(nrow(dt));
    }else if(palette == "topo"){
        colors <- topo.colors(nrow(dt));
    }else if(palette == "gray"){
        colors <- grDevices::colorRampPalette(c("grey90", "grey10"), space="rgb")(nrow(dt));
    }else if(palette == "viridis") {
        colors <- rev(viridis::viridis(nrow(dt)))
    }else if(palette == "plasma") {
        colors <- rev(viridis::plasma(nrow(dt)))
    }else {
        load_rcolorbrewer();
        colors <- rev(grDevices::colorRampPalette(RColorBrewer::brewer.pal(10, "RdBu"))(nrow(dt)));
    }
    
    p <- ggplot(dt, aes(x = reorder(feat, -prev), y = prev, fill = reorder(feat, -prev))) +
        geom_bar(stat = "identity", color = "black") +
        scale_fill_manual(values = colors)  +
        ylab(paste0("\n", "Prevalence")) +   xlab(paste0("\n", taxrank)) + 
        guides(fill = "none") + 
        theme_bw()+
        theme(axis.text=element_text(size=10), 
            axis.title=element_text(size=11.5), 
            axis.text.x = element_text(angle = 45, hjust = 1))
  }

  print(p);
  dev.off();
  
  if(interactive){
    library(plotly)
    library(htmlwidgets)
    ax <- list(
      zeroline = FALSE,
      showline = FALSE,
      showgrid = FALSE
    )
    p <- plotly::ggplotly(p)
    p <- p %>% layout(xaxis = ax, yaxis = ax)
    htmlwidgets::saveWidget(p, "core_interactive.html")
  }
  mbSetObj$analSet$core<-signif(as.matrix(core.nm),5);
  mbSetObj$analSet$core.taxalvl<-taxrank;
  mbSetObj$paramSet$core <- list(
        taxalvl=taxrank,
        preval=preval, 
        detection=detection,
        analOpt = analOpt,
        expFact = expFact, 
        group = group,
        expFact2 = expFact2
    )
   };
  return(.set.mbSetObj(mbSetObj));
}

core_comp_grp <- function(mbSetObj,imgName, preval, detection, taxrank,
                          palette, viewOpt,expFact2,format="png", dpi=72, width=NA){
  
  data <- mbSetObj$dataSet$proc.phyobj;
  grp <- unique(as.character(mbSetObj[["dataSet"]][["sample_data"]][[expFact2]]))
  dt <- lapply(grp,function(group) eval(parse(text = paste("phyloseq:::subset_samples(data,", expFact2, "==", "\"", group, "\"", ")", sep=""))))
  names(dt) <- grp 
  dt <-lapply(dt,function(data){
    if(taxrank=="OTU"){
      data <- otu_table(data,taxa_are_rows=T);
    }else{
      #merging at taxonomy levels
      data <- fast_tax_glom_mem(data, taxrank);
      if(is.null(data)){
        AddErrMsg("Errors in projecting to the selected taxanomy level!");
        return(0);
      }
      nm <- as.character(tax_table(data)[,taxrank]);
      y <- which(is.na(nm)==TRUE);
      #converting NA values to unassigned
      nm[y] <- "Not_Assigned";
      data1 <- as.matrix(otu_table(data));
      rownames(data1) <- nm;
      #all NA club together
      data1 <- as.matrix(t(sapply(by(data1, rownames(data1), colSums), identity)));
      data <- otu_table(data1, taxa_are_rows=T);
    }
  })
  dt.compositional <- lapply(dt,function(data) transform_sample_counts(data,function(x) x / sum(x))) ;
  data.core <- lapply(dt.compositional,function(data.compositional) core(data.compositional, detection = detection, prevalence = preval));
  
  core.nm <- lapply(dt.compositional,function(data.compositional) data.frame(prevalence(data.compositional, detection = detection, sort = TRUE),check.names=FALSE));
  core.nm <- lapply(names(core.nm), function(name) {
    x <- core.nm[[name]]
    z <- tibble::rownames_to_column(x, "Feature")
    names(z)[2] <- "Prevalence"
    z$Group <- name  
    return(z)
  })
  core.nm = do.call(rbind,core.nm)
  fileName <- "core_microbiome.csv";
  fast.write(core.nm, file=fileName);
  
  
  imgName = paste( imgName, ".", format, sep="");
  mbSetObj$imgSet$core <- imgName;
 

   if(viewOpt!="bar"){
   if(palette=="gbr"){
    colors <- grDevices::colorRampPalette(c("green", "black", "red"), space="rgb")(10);
  }else if(palette == "heat"){
    colors <- heat.colors(10);
  }else if(palette == "topo"){
    colors <- topo.colors(10);
  }else if(palette == "gray"){
    colors <- grDevices::colorRampPalette(c("grey90", "grey10"), space="rgb")(10);
  }else if(palette == "viridis") {
    colors <- rev(viridis::viridis(10))
  }else if(palette == "plasma") {
    colors <- rev(viridis::plasma(10))
  }else {
    load_rcolorbrewer();
    colors <- rev(grDevices::colorRampPalette(RColorBrewer::brewer.pal(10, "RdBu"))(10));
  }
  library(gridExtra)
  library(cowplot)   
  num_groups <- length(data.core)
  ncol <- min(3, ceiling(sqrt(num_groups)))   
   nrow <- ceiling(num_groups / ncol)   
   num_features <- length(unique(unlist(lapply(data.core,function(x) rownames(x)))))
   axis_text_size <- ifelse(num_features > 50, 6, ifelse(num_features > 30, 8, 10))
 
   plots <- lapply(seq_along(data.core), function(i) {
         coredt <- data.core[[i]]
    plot <- plot_core(coredt, plot.type = viewOpt, colours = colors, 
                    prevalences = seq(.05, 1, .05), 
                    detections = 10^seq(log10(detection), log10(max(abundances(coredt))), length = 10)) + 
    guides(fill = guide_legend(keywidth = 1.5, keyheight = 1)) +
     ggtitle(names(data.core)[i]) + 
    theme(
      axis.text.y = element_text(size = axis_text_size),        
       axis.text.x = element_text(size = axis_text_size, angle = 45, vjust = 0.5, hjust = 1),         
      axis.title = element_text(angle = 90, size = 12)  )  

  if ((i - 1) %% ncol == 0) {
    plot <- plot + ylab(paste0("\n", taxrank))
  } else {
    plot <- plot + theme(axis.title.y = element_blank())
  }
     
  if (i > (nrow - 1) * ncol) {
    plot <- plot + xlab("\nDetection Threshold (Relative Abundance (%))")
  } else {
    plot <- plot + theme(axis.title.x = element_blank())
  }
   
  return(plot)
})
 
shared_legend <- get_legend(plots[[1]])
 
plots <- lapply(plots, function(p) p + theme(legend.position = "none"))
 
combined_plot <- plot_grid(
  plot_grid(plotlist = plots, ncol = ncol, align = "hv"),   
  shared_legend,                                           
  rel_widths = c(4, 0.5)                                 
)

w <- max(13, ncol * 5.5)  
h <- max(5, nrow * 5)  
print(c(ncol,nrow,w,h))
 
Cairo::Cairo(file = imgName, unit = "in", width = w, height = h, type = format, bg = "white", dpi = dpi)
print(combined_plot)
dev.off()
  
  }else{
  
  dtls = lapply(data.core, abundances)  
  prev =lapply(dtls,function(dt) apply(dt,1,function(x) sum(x>detection)/length(x)))
  dtls= lapply(prev,function(dt) data.frame(feat = names(dt),prev = dt,stringsAsFactors = F) )
  dtls <- lapply(dtls,function(dt) dt[order(-dt$prev), ])
  dtls <- lapply(names(dtls), function(name) {
    x <- dtls[[name]]
    x$group <- name  # Add a new column with the name of each element
    return(x)
  })
  
  dt = do.call(rbind,dtls)
 
 
  num_features <- length(unique(dt$feat)) 
  num_groups <- length(unique(dt$group))
  axis_text_size <- ifelse(num_features > 80, 6, ifelse(num_features > 30, 8, 10))
  axis_title_size <- ifelse(num_features > 80, 9, ifelse(num_features > 30, 11, 12))

library(ggsci)
library(dplyr)
library(tidyr)
df_complete <- dt %>%
  complete(feat, group, fill = list(prev = 0))
df_complete <- df_complete %>%
  arrange(group, desc(prev)) %>%
  mutate(feat = factor(feat, levels = unique(feat))) # Order feat within each group by descending prev

 plot<- ggplot(df_complete, aes(x = feat, y = prev, fill = group)) +
  geom_bar(stat = "identity") +
    ylab("\nPrevalence") + 
    xlab(paste0("\n", taxrank)) + 
  scale_fill_d3() + # Apply the D3 color palette
  facet_grid(group ~ ., scales = "free_x", space = "free_x") + # Facet by group in one column
  theme_minimal() +
  theme(
    strip.text.y = element_text(angle = 0, size = 12), # Rotate facet labels to make them more readable
    legend.position = "none", # Remove the legend
    panel.spacing = unit(0.5, "lines"),
    # Adjust x-axis text size and show only on the bottom panel
    axis.text.x = element_text(angle = 45, hjust = 1, size = axis_text_size), # Adjust font size
    axis.ticks.x = element_line(),
    axis.title.x = element_text(vjust = -0.5)
  )
 
 w <- min(max(9, num_features * 0.2),13)
 h <- max(6, num_groups * 2)          
 print(c(w,h))
 Cairo::Cairo(file = imgName, unit = "in", width = w, height = h, type = format, bg = "white", dpi = dpi)
 print(plot)
 dev.off()
}
core.nm$Prevalence =  signif(core.nm$Prevalence ,5)
 mbSetObj$analSet$core<- as.matrix(core.nm);
  mbSetObj$analSet$core.taxalvl<-taxrank;
 mbSetObj$paramSet$core <- list(
   taxalvl=taxrank,
   preval=preval, 
   detection=detection,
   analOpt = "smpl_grp_all",
   expFact2 = expFact2
 )
 return(mbSetObj)
}

######################Adapted from the Microbiome R package#########################

core<-function(x, detection, prevalence, include.lowest=FALSE) {
  xorig <- x
  # TODO: add optional renormalization such that the core member
  # abundances would
  # sum up to 1 ?
  taxa <- core_members(x, detection, prevalence,
                       include.lowest=include.lowest)
  prune_taxa(taxa, xorig)
}

core_members<-function(x, detection=detection, prevalence=prevalence,
                       include.lowest=FALSE) {
  # Pick taxa x samples matrix
  x <- abundances(x)

  if (include.lowest) {
    taxa <- names(which(prevalence(x, detection,
                                   include.lowest=include.lowest) >= prevalence))
  } else {
    taxa <- names(which(prevalence(x, detection,
                                   include.lowest=include.lowest) > prevalence))
  }
  taxa
}

abundances<-function(x, transform="identity") {
  # Pick the OTU data
  if (class(x) == "phyloseq") {
    # Pick OTU matrix
    otu <- get_taxa(x)
    # Ensure that taxa are on the rows
    if (all(c(!taxa_are_rows(x), phyloseq::ntaxa(x) > 1, nsamples(x) > 1))) {
      otu <- t(otu)
    }
    if (phyloseq::ntaxa(x) == 1) {
      otu <- matrix(otu, nrow=1)
      rownames(otu) <- taxa(x)
      colnames(otu) <- sample_names(x)
    }
    if (nsamples(x) == 1) {
      otu <- matrix(otu, ncol=1)
      rownames(otu) <- taxa(x)
      colnames(otu) <- sample_names(x)
    }
  } else if (is.vector(x)) {
    otu <- as.matrix(x, ncol=1)
  } else {
    # If x is not vector or phyloslim object then let us assume it is a
    # taxa x samples
    # count matrix
    otu <- x
  }
  # Apply the indicated transformation
  if (!transform == "identity") {
    otu <- transform(otu, transform)
  }
  otu
}

plot_core<-function(x, prevalences=seq(.1, 1, 0.1), detections=20,
                    plot.type="lineplot", colours=NULL, # gray(seq(0, 1, length=5)),
                    min.prevalence=NULL, taxa.order=NULL, horizontal=FALSE) {
  
  if (length(detections) == 1) {
    detections <- 10^seq(log10(0.001), log10(max(abundances(x),
                                                 na.rm=TRUE)), length=detections)
  }
  
  if (plot.type == "lineplot") {
    # Calculate the core matrix (prevalences x abundance thresholds)
    coremat <- core_matrix(x, prevalences, detections)
    res <- core_lineplot(coremat)
  } else if (plot.type == "heatmap") {
    # Here we use taxon x abundance thresholds table indicating prevalences
    res <- core_heatmap(abundances(x),
                        dets=detections, cols=colours,
                        min.prev=min.prevalence, taxa.order=taxa.order)

  } 
  p <- res$plot;
  if (horizontal) {
    p <- p + coord_flip() + theme(axis.text.x=element_text(angle=90))
  }
  p
}

#'@import data.table
core_heatmap<-function(x, dets, cols, min.prev, taxa.order){
  
  load_datatable();
  
  data <- x;
  
  DetectionThreshold <- Taxa <- Prevalence <- NULL
  # Prevalences with varying dets
  prev <- lapply(dets, function(th) {
    prevalence(data, detection=th)
  })
  prev <- do.call("cbind", prev)
  colnames(prev) <- as.character(dets)
  
  # # Exclude rows and cols that never exceed the given prevalence
  if (!is.null(min.prev)) {
    prev <- prev[rowMeans(prev > min.prev) > 0,
                 colMeans(prev > min.prev) > 0]
  }
  
  df <- as.data.frame(prev,check.names=FALSE)
  df$ID <- rownames(prev)

  df <- data.table::melt(as.data.table(df), "ID");
  names(df) <- c("Taxa", "DetectionThreshold", "Prevalence")
  df$DetectionThreshold <- as.numeric(as.character(df$DetectionThreshold))
  df$Prevalence <- as.numeric(as.character(df$Prevalence))
  if (is.null(taxa.order)) {
    o <- names(sort(rowSums(prev)))
  } else {
    o <- taxa.order
  }
  df$Taxa <- factor(df$Taxa, levels=o)
  theme_set(theme_bw(10))
  p <- ggplot(df, aes(x=DetectionThreshold, y=Taxa, fill=Prevalence))
  p <- p + geom_tile()
  p <- p + xlab("Detection Threshold")
  p <- p + scale_x_log10(breaks=round(as.numeric(names(split(df$DetectionThreshold, sort(df$DetectionThreshold%%3)))),3))
  if (!is.null(cols)) {
    p <- p + scale_fill_gradientn("Prevalence",
                                  breaks=seq(from=0, to=1, by=0.1),
                                  colours=cols,
                                  limits=c(0, 1))
  }
  return(list(plot=p, data=df))
}

prevalence <- function(x, detection=0, sort=FALSE, count=FALSE,
                       include.lowest=FALSE) {
  if (is.null(detection)) {
    detection <- (-Inf)
  }
  if (is.null(x)) {
    warning("x is NULL - returning NULL")
    return(NULL)
  }
  # Convert phyloslim to matrix
  if (class(x) == "phyloseq") {
    x <- abundances(x)
  }
  if (is.vector(x)) {
    if (include.lowest) {
      prev <- sum(x >= detection)
    } else {
      prev <- sum(x > detection)
    }
  } else if (any(c(is.matrix(x), is.data.frame(x)))) {
    
    if (include.lowest) {
      prev <- rowSums(x >= detection)
    } else {
      prev <- rowSums(x > detection)
    }
  }
  if (!count) {
    prev <- prev/prevalence_nsamples(x)
  }
  if (sort) {
    prev <- rev(sort(prev))
  }
  prev
}

# Internal auxiliary function
prevalence_nsamples <- function(x) {
  if (is.vector(x)) {
    n <- length(x)
  } else if (any(c(is.matrix(x), is.data.frame(x)))) {
    n <- ncol(x)
  }
  n
}

###########################
### Pie Chart Functions ###
###########################

#'Main function to plot pie graphs of microbiome data.
#'@description This functions plots pie graphs of microbiome data. 
#'In particular, it plots the overall pie chart (all samples). The
#'data used to calculate the pie-chart is saved in your working 
#'directory as ""piechart_abundances.csv".
#'@param mbSetObj Input the name of the mbSetObj.
#'@param taxalvl Character, input the taxonomic level to perform
#'classification. For instance, "Genus" to use the Genus level.
#'@param feat_cnt Set the minimum feature count that is used to "bin"
#'together small taxa. 
#'@param calcmeth Merge small taxa based on the sum /("sum"/)
#'or median /("med"/) counts across all samples or groups.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import reshape
PlotOverallPieGraph<-function(mbSetObj, taxalvl, feat_cnt, calcmeth, 
                              toptaxapie, pietoptaxaopt){
  
  mbSetObj <- .get.mbSetObj(mbSetObj);
  
  load_reshape();
  load_phyloseq();

  set.seed(28053448);
  
  #using filtered data
  data <- mbSetObj$dataSet$filt.data;
  
  if("matrix" %in% class(mbSetObj$dataSet$filt.data)){
    data <- otu_table(data, taxa_are_rows =TRUE);
  }
  
  sample_table <- sample_data(mbSetObj$dataSet$proc.phyobj, errorIfNULL=TRUE);
  datapie <<- merge_phyloseq(data, tax_table(mbSetObj$dataSet$proc.phyobj), sample_table);
  
  #using reduce names
  data <- otu_table(datapie);
  data <- data.frame(data,check.names=FALSE);
  data_tax <- tax_table(datapie);
  
  #reshaping data
  data <- t(data);
  
  if(taxalvl=="OTU"){
    taxa_nm <- as.matrix(colnames(data));
    rownames(taxa_nm) <- colnames(data);
    rownames(taxa_nm) <- sub("^X", "", rownames(taxa_nm))
  }else{
    taxa_nm <- data.matrix(data_tax[,taxalvl]);
  }
  
  y <- which(is.na(taxa_nm)==TRUE);
  
  #converting NA values to unassigned
  taxa_nm[y] <- "Not_Assigned";
  colnames(data) <- taxa_nm[,1];
  nms <- colnames(data);
  data <- data.frame(data %*% sapply(unique(nms),"==",nms),check.names=FALSE);
  colnames(data)<- gsub("\\."," ",colnames(data));
  
  if(pietoptaxaopt == "bottom"){
    if(calcmeth=="sum"){
      ind <- which(colSums(data)>feat_cnt);
      ind1 <- which(colSums(data)<feat_cnt);
    }else{
      dt <- apply(data,2,median);
      ind <- which(dt>feat_cnt);
      ind1 <- which(dt<feat_cnt);
    }
    if(length(ind)==0){
      AddErrMsg("All features have lower read count than given minimum count filter. Please lower the cut off for minimum count.");
      return(0);
    }
    
    if(length(ind1)>0){
      colnames(data)[ind1] <- "Others";
      data <- as.data.frame(do.call(cbind, by(t(data),INDICES=names(data),FUN=colSums)),check.names=FALSE);
    }
    
    data$step <- factor(rownames(data));
    data <- melt(data,id='step');
    data$step <- as.numeric(data$step);
    piedata <- aggregate(. ~variable , data=data[-1], FUN=sum);
    
    # order by abundance
    ord.inx <- order(piedata$value, decreasing = TRUE);
    piedata <- piedata[ord.inx,];
    piedata_write <- piedata;
  } else {
    
    if(calcmeth=="sum"){
      order_taxa <- order(sapply(data, sum), decreasing = TRUE)
    } else {
      order_taxa <- order(sapply(data, median), decreasing = TRUE)
    }
    
    data <- data[, order_taxa];
    data <- data.frame("variable" = names(data),
                       "value" = apply(data, 2, sum),check.names=FALSE);
    row.names(data) <- NULL;
    data$variable <- as.character(data$variable);
    
    piedata_write <- data[order(data$value, decreasing = TRUE), ]
    piedata <- data;
    contain_notassign <- any(grepl("^Not_Assigned$", piedata$variable));
    if(nrow(piedata) < toptaxapie){
      piedata <- piedata;
    } else {
      if (! contain_notassign){
        top_piedata <- head(piedata, n = toptaxapie);
        bottom_piedata <- tail(piedata, n = nrow(piedata) - toptaxapie);
        other_piedata <- data.frame("variable" = "Others", 
                                    "value" = sum(bottom_piedata$value),check.names=FALSE);
        piedata <- rbind.data.frame(top_piedata, other_piedata);
      } else {
        if(nrow(piedata) == toptaxapie + 1){
          piedata <- piedata;
        } else {
          not_assigned_piedata <- piedata[which(piedata$variable == "Not_Assigned"), ];
          piedata <- piedata[which(piedata$variable != "Not_Assigned"), ];
          top_piedata <- head(piedata, n = toptaxapie);
          bottom_piedata <- tail(piedata, n = nrow(piedata) - toptaxapie);
          other_piedata <- data.frame("variable" = "Others", 
                                      "value" = sum(bottom_piedata$value),check.names=FALSE);
          piedata <- rbind.data.frame(top_piedata, other_piedata, not_assigned_piedata);
        }
      }
    }
    piedata <- piedata[order(piedata$value, decreasing = TRUE), ];
  }
  
  piedata_write$percentage <- round((piedata_write$value / sum(piedata_write$value) * 100), digits = 2)
  colnames(piedata_write) <- c("Taxa", "Abundance", "Percentage")
  fast.write(piedata_write, "piechart_abundances.csv");
  
  piedata <<- piedata;
  
  return(.set.mbSetObj(mbSetObj));
}

#'Main function to plot pie graphs of microbiome data.
#'@description This functions plots pie graphs of microbiome data. 
#'@param mbSetObj Input the name of the mbSetObj.
#'@param taxalvl Character, input the taxonomic level to perform
#'classification. For instance, "Genus" to use the Genus level.
#'@param metadata Character, select which grouping to use.
#'@param clslevel Character, input which group to plot.
#'@param feat_cnt Set the minimum feature count that is used to "bin"
#'together small taxa. 
#'@param calcmeth Merge small taxa based on the sum /("sum"/)
#'or median /("med"/) counts across all samples or groups.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import reshape
PlotGroupPieGraph <- function(mbSetObj, taxalvl, metadata, clslevel,
                              feat_cnt, calcmeth, toptaxapie, pietoptaxaopt){
  mbSetObj <- .get.mbSetObj(mbSetObj);
  
  load_reshape();
  load_phyloseq();
  
  set.seed(28053443);
  
  #using filtered data
  data <- mbSetObj$dataSet$filt.data;
  
  if("matrix" %in% class(mbSetObj$dataSet$filt.data)){
    data<-otu_table(data,taxa_are_rows =TRUE);
  }
  
  smpl <- data.frame(sample_data(mbSetObj$dataSet$proc.phyobj),check.names=FALSE);
  smpl1 <- sample_data(subset(smpl,smpl[metadata]==clslevel));
  data <- prune_samples(sample_names(smpl1), data);
  data <- merge_phyloseq(data, smpl1);
  mbSetObj$dataSet$taxa_table <- tax_table(mbSetObj$dataSet$proc.phyobj);
  datapie <<- merge_phyloseq(data, mbSetObj$dataSet$taxa_table);
  
  #using reduce names
  data <- t(data.frame(otu_table(datapie),check.names=FALSE));
  data_tax <- tax_table(datapie);
  
  if(taxalvl=="OTU"){
    taxa_nm <- as.matrix(colnames(data));
    rownames(taxa_nm) <- colnames(data);
    rownames(taxa_nm) <- sub("^X", "", rownames(taxa_nm))
  }else{
    taxa_nm <- data.matrix(data_tax[,taxalvl]);
  }
  
  #converting NA values to unassigned
  y <- which(is.na(taxa_nm)==TRUE);
  taxa_nm[y] <- "Not_Assigned";
  nms <- colnames(data) <- taxa_nm[,1];
  data <- data.frame(data %*% sapply(unique(nms),"==",nms),check.names=FALSE);
  colnames(data) <- gsub("\\."," ",colnames(data));
  
  if(pietoptaxaopt == "bottom"){
    if(calcmeth=="sum"){
      ind <- which(colSums(data)>feat_cnt);
      ind1 <- which(colSums(data)<feat_cnt);
    }else{
      dt <- apply(data,2,median);
      ind <- which(dt>feat_cnt);
      ind1 <- which(dt<feat_cnt);
    }
    if(length(ind)==0){
      AddErrMsg("All features have lower read count than given minimum count filter. Please lower the cut off for minimum count.");
      return(0);
    }
    
    if(length(ind1)>0){
      colnames(data)[ind1] <- "Others";
      data <- as.data.frame(do.call(cbind, by(t(data),INDICES=names(data),FUN=colSums)),check.names=FALSE);
    }
    
    data$step <- factor(rownames(data));
    data <- melt(data,id='step');
    data$step <- as.numeric(data$step);
    piedata <- aggregate(. ~variable , data=data[-1], FUN=sum);
    
    # order by abundance
    ord.inx <- order(piedata$value, decreasing = TRUE);
    piedata <- piedata[ord.inx,];
    piedata_write <- piedata;
  } else {
    
    if(calcmeth=="sum"){
      order_taxa <- order(sapply(data, sum), decreasing = TRUE)
    } else {
      order_taxa <- order(sapply(data, median), decreasing = TRUE)
    }
    
    data <- data[, order_taxa];
    data <- data.frame("variable" = names(data),
                       "value" = apply(data, 2, sum),check.names=FALSE);
    row.names(data) <- NULL;
    data$variable <- as.character(data$variable);
    
    piedata_write <- data[order(data$value, decreasing = TRUE), ]
    piedata <- data;
    contain_notassign <- any(grepl("^Not_Assigned$", piedata$variable));
    if(nrow(piedata) < toptaxapie){
      piedata <- piedata;
    } else {
      if (! contain_notassign){
        top_piedata <- head(piedata, n = toptaxapie);
        bottom_piedata <- tail(piedata, n = nrow(piedata) - toptaxapie);
        other_piedata <- data.frame("variable" = "Others", 
                                    "value" = sum(bottom_piedata$value),check.names=FALSE);
        piedata <- rbind.data.frame(top_piedata, other_piedata);
      } else {
        if(nrow(piedata) == toptaxapie + 1){
          piedata <- piedata;
        } else {
          not_assigned_piedata <- piedata[which(piedata$variable == "Not_Assigned"), ];
          piedata <- piedata[which(piedata$variable != "Not_Assigned"), ];
          top_piedata <- head(piedata, n = toptaxapie);
          bottom_piedata <- tail(piedata, n = nrow(piedata) - toptaxapie);
          other_piedata <- data.frame("variable" = "Others", 
                                      "value" = sum(bottom_piedata$value),check.names=FALSE);
          piedata <- rbind.data.frame(top_piedata, other_piedata, not_assigned_piedata);
        }
      }
    }
    piedata <- piedata[order(piedata$value, decreasing = TRUE), ];
  }
  piedata_write$percentage <- round((piedata_write$value / sum(piedata_write$value) * 100), digits = 2)
  colnames(piedata_write) <- c("Taxa", "Abundance", "Percentage")
  fast.write(piedata_write, "piechart_abundances.csv");
  piedata <<- piedata;
  return(.set.mbSetObj(mbSetObj));
}

#'Main function to plot sample-wise pie graphs of microbiome data.
#'@description This functions plots sample-wise pie graphs of microbiome data. 
#'@param mbSetObj Input the name of the mbSetObj.
#'@param taxalvl Character, input the taxonomic level to perform
#'classification. For instance, "Genus" to use the Genus level.
#'@param feat_cnt Set the minimum feature count that is used to "bin"
#'together small taxa. 
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@exportmbSetObj, taxalvl, smplmbSetObj, taxalvl, smplnm, feat_cnt, calcmethnm, feat_cnt, calcmeth
#'@import reshape
PlotSamplePieGraph<-function(mbSetObj, taxalvl, smplnm, feat_cnt, toptaxapie, pietoptaxaopt){
  mbSetObj <- .get.mbSetObj(mbSetObj);

  load_reshape();
  load_phyloseq();
  set.seed(28053443);

  #using filtered data
  data <- mbSetObj$dataSet$filt.data;
  
  if("matrix" %in% class(mbSetObj$dataSet$filt.data)){
    data <- otu_table(data,taxa_are_rows =TRUE);
  }
  
  sample_table <- sample_data(mbSetObj$dataSet$proc.phyobj, errorIfNULL=TRUE);
  data <- merge_phyloseq(data, tax_table(mbSetObj$dataSet$proc.phyobj), sample_table);
  data <- prune_samples(smplnm, data);
  mbSetObj$dataSet$taxa_table <- tax_table(mbSetObj$dataSet$proc.phyobj);
  #datapie <<- merge_phyloseq(data, mbSetObj$dataSet$taxa_table)
  pie_try <- try(merge_phyloseq(data, mbSetObj$dataSet$taxa_table));
  if(class(pie_try) == "try-error"){
    datapie <<- data;
  } else {
    datapie <<- merge_phyloseq(data, mbSetObj$dataSet$taxa_table);
  }
  
  #using reduce names
  data <- otu_table(datapie);
  data <- data.frame(data,check.names=FALSE);
  zero_row <- which(data[[1]] != 0);
  data_tmp <- as.data.frame(data[zero_row, ],check.names=FALSE)
  row.names(data_tmp) <- row.names(data)[zero_row];
  names(data_tmp) <- smplnm;
  data <- data_tmp;
  
  data_tax <- tax_table(datapie);
  data_tax <- data_tax[zero_row, ]
  
  #reshaping data
  data <- t(data);
  
  if(taxalvl=="OTU"){
    taxa_nm <- as.matrix(colnames(data));
    rownames(taxa_nm) <- colnames(data);
    rownames(taxa_nm) <- sub("^X", "", rownames(taxa_nm))
  }else{
    taxa_nm <- data.matrix(data_tax[,taxalvl]);
  }
  
  y <- which(is.na(taxa_nm)==TRUE);
  #converting NA values to unassigned
  taxa_nm[y] <- "Not_Assigned";
  colnames(data) <- taxa_nm[,1];
  nms <- colnames(data);
  data <- data.frame(data %*% sapply(unique(nms),"==",nms),check.names=FALSE);
  colnames(data) <- gsub("\\."," ",colnames(data));
  
  if(pietoptaxaopt == "bottom"){
    ind <- which(colSums(data)>feat_cnt);
    ind1 <- which(colSums(data)<feat_cnt);
    
    if(length(ind)==0){
      AddErr("All features have lower read count than given minimum count filter. Please lower the cut off for minimum count.");
      return(0);
    }
    
    if(length(ind1)>0){
      colnames(data)[ind1] <- "Others";
      data<-as.data.frame(t(rowsum(t(data),group = colnames(data))),check.names=FALSE);
    }
    
    data$step <- factor(rownames(data));
    data <- melt(data,id='step');
    data$step <- as.numeric(data$step);
    piedata <- aggregate(. ~variable , data=data[-1], FUN=sum);
    
    # order by abundance
    ord.inx <- order(piedata$value, decreasing = TRUE);
    piedata <- piedata[ord.inx,];
  } else {
    piedata <- data.frame("variable" = colnames(data),
                          "value" = t(data)[, 1],check.names=FALSE)
    # order by abundance
    ord.inx <- order(piedata$value, decreasing = TRUE);
    piedata <- piedata[ord.inx,];
    contain_notassign <- any(grepl("^Not_Assigned$", piedata$variable));
    if(nrow(piedata) <= toptaxapie){
      piedata <- piedata;
    } else {
      if (! contain_notassign){
        top_piedata <- head(piedata, n = toptaxapie);
        bottom_piedata <- tail(piedata, n = nrow(piedata) - toptaxapie);
        other_piedata <- data.frame("variable" = "Others", 
                                    "value" = sum(bottom_piedata$value),check.names=FALSE);
        piedata <- rbind.data.frame(top_piedata, other_piedata);
      } else {
        if(nrow(piedata) == toptaxapie + 1){
          piedata <- piedata;
        } else {
          not_assigned_piedata <- piedata[which(piedata$variable == "Not_Assigned"), ];
          piedata <- piedata[which(piedata$variable != "Not_Assigned"), ];
          top_piedata <- head(piedata, n = toptaxapie);
          bottom_piedata <- tail(piedata, n = nrow(piedata) - toptaxapie);
          other_piedata <- data.frame("variable" = "Others", 
                                      "value" = sum(bottom_piedata$value),check.names=FALSE);
          piedata <- rbind.data.frame(top_piedata, other_piedata, not_assigned_piedata);
        }
      }
    }
  }
  piedata <<- piedata;
  return(.set.mbSetObj(mbSetObj));
}

#'Function to plot pie-chart data.
#'@description This functions plots pie charts of microbiome data. 
#'@param mbSetObj Input the name of the mbSetObj.
#'@param taxalvl Character, input the taxonomic level to perform
#'classification. For instance, "Genus" to use the Genus level.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import reshape

PlotDataPieFromPie<-function(mbSetObj, taxalvl, metadata, clslevel,
                             taxaposn, lowtaxa){
  
  mbSetObj <- .get.mbSetObj(mbSetObj);
  
  load_reshape();
  set.seed(280534431);
  high_taxa <<- as.character(piedata$variable[taxaposn]);
  
  if(high_taxa=="Not_Assigned"){
    AddErrMsg("Cannot map to Not_Assigned level!");
    return(0);
  }
  
  lowlvl_nm <<- lowtaxa;
  
  datataxa <- as.matrix(tax_table(datapie));
  subsettax_table <- tax_table(subset(datataxa,datataxa[,taxalvl]==high_taxa));
  data1 <- prune_taxa(taxa_names(subsettax_table),datapie);
  datapietaxatab <<- data_tax <- tax_table(data1);
  datapietaxa <<- data <- t(data.frame(otu_table(data1),check.names=FALSE));
  taxa_nm <- data.matrix(data_tax[,lowlvl_nm]);
  
  #converting NA values to unassigned
  y <- which(is.na(taxa_nm)==TRUE);
  taxa_nm[y] <- "Not_Assigned";
  colnames(data) <- taxa_nm[,1];
  nms <- colnames(data);
  data <- data.frame(data %*% sapply(unique(nms),"==",nms),check.names=FALSE);
  
  if(length(nms)==1){
    colnames(data)<-nms;
  }
  
  colnames(data) <- gsub("\\."," ",colnames(data));
  data$step <- factor(rownames(data));
  data <- melt(data,id='step');
  data$step <- as.numeric(data$step);
  
  fact <- factor(data$variable)
  levels(fact) <- sub("^X", "", levels(fact))
  
  color_var <- levels(fact);
  x <- length(color_var);
  x.colors <<- rep(col_vector,length.out=x);
  
  piedata2 <- aggregate(. ~variable , data=data[-1], FUN=sum);
  # order by abundance
  ord.inx <- order(piedata2$value, decreasing = TRUE);
  piedata2 <<- piedata2[ord.inx,];
  
  return(.set.mbSetObj(mbSetObj));
  
}

#'Function to update pie-chart data.
#'@description This functions updates pie chart data. 
#'@param mbSetObj Input the name of the mbSetObj.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import reshape
UpdatePieData<-function(mbSetObj, lowtaxa){
  
  mbSetObj <- .get.mbSetObj(mbSetObj);
  
  load_reshape();
  
  set.seed(280534432);
  
  data <- datapietaxa;
  data_tax <- datapietaxatab;
  high_taxa <- high_taxa;
  lowlvl_nm <<- lowtaxa;
  taxa_nm <- as.data.frame(data_tax[,lowlvl_nm],check.names=FALSE);
  taxa_nm <- as.matrix(taxa_nm);
  y <- which(is.na(taxa_nm)==TRUE);
  
  #converting NA values to unassigned
  taxa_nm[y] <- "Not_Assigned";
  colnames(data) <- taxa_nm[,1];
  nms <- colnames(data);
  data <- data %*% sapply(unique(nms),"==",nms);
  data <- data.frame(data,check.names=FALSE);
  
  if(length(nms)==1){
    colnames(data)<-nms;
  }
  
  colnames(data) <- gsub("\\."," ",colnames(data));
  data$step <- factor(rownames(data));
  data <- melt(data,id='step');
  data$step <- as.numeric(data$step);
  color_var <- levels(factor(data$variable));
  x <- length(color_var);
  x.colors <<- rep(col_vector,length.out=x);
  # piedatas is two column stats (variable and value)
  piedata2 <<- aggregate(. ~variable , data=data[-1], FUN=sum);
  
  return(.set.mbSetObj(mbSetObj));
  
}

#'Function to save pie-chart
#'@description This functions saves created pie chart plot. 
#'@param mbSetObj Input the name of the mbSetObj.
#'@param taxalvl Character, input the taxonomic level to perform
#'classification. For instance, "Genus" to use the Genus level.
#'@param pieName Character, input the name of the pie chart plot.
#'@param format Character, input the preferred
#'format of the plot. By default it is set to "png".
#'@param dpi Numeric, input the dots per inch. By default
#'it is set to 72.
#'@param interactive boolean, if true, return plotly else save png using Cairo
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
SavePiechartImg <- function(mbSetObj, taxalvl, pieName="", format="png", dpi=72, interactive=F) {
  mbSetObj <- .get.mbSetObj(mbSetObj);
  set.seed(280);
  pieName = paste(pieName,".", format, sep="");
  orig.piedata <- piedata;
  piedata <- transform(transform(piedata, value=value/sum(value)));
  
  #rownames are still arranged by decending order
  piedataimg <- piedata;

  row.names(piedataimg) <- NULL;
  x.cols <- pie.cols;
  
  # java color code to R color code
  x.cols <- paste("#",x.cols, sep="");  

  
  mbSetObj$analSet$pie<-piedataimg;
  mbSetObj$analSet$pie.taxalvl<-taxalvl;
  piedataimg$rawValue <- orig.piedata$value
  if(interactive){
    fig <- plot_ly(piedataimg, labels = ~variable, values = ~rawValue, type = 'pie', 
                   textinfo = 'label+percent', insidetextorientation = 'radial', width=1000,
                   height=800) %>%
      layout(title = '',
             xaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE),
             yaxis = list(showgrid = FALSE, zeroline = FALSE, showticklabels = FALSE))
    
    .set.mbSetObj(mbSetObj)
    return(fig);
  }else{
    mbSetObj$imgSet$pie <- pieName;
    box <- ggplot(piedataimg, aes(x="", y = value, fill=reorder(variable,-value))) +
      geom_bar(width = 1, stat = "identity") + theme_bw() +
      coord_polar(theta = "y",direction=-1,start = 4.71239) + scale_fill_manual(values=c(x.cols))+
      geom_text(aes(x=1.6,label = scales::percent(round(value,2))), check_overlap = T,size=3,position = position_stack(vjust = 0.5),color="grey48") +
      theme(legend.position="right",axis.text = element_blank(),axis.ticks = element_blank(),panel.grid  = element_blank(), plot.title = element_text(hjust=0.5, face="bold"),legend.text=element_text(color="grey48")) +
      labs(x="", y="",fill="");
    Cairo::Cairo(file=pieName, width=630, height=500, type=format, bg="white", dpi=dpi);
    print(box);
    dev.off();
    return(.set.mbSetObj(mbSetObj))
  }
  
}

#'Function to create pie-chart plot.
#'@description This functions creates the pie chart plot. 
#'@param mbSetObj Input the name of the mbSetObj.
#'@param format Character, input the preferred
#'format of the plot. By default it is set to "png".
#'@param dpi Numeric, input the dots per inch. By default
#'it is set to 72.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
PlotPiechart <- function(mbSetObj, rel_perct, pieName, format="png", dpi=72) {
  
  mbSetObj <- .get.mbSetObj(mbSetObj);
  
  set.seed(28056188); 
  pieName = paste(pieName,".", format, sep="");
  piedata2 <- transform(transform(piedata2, value=value/sum(value)));
  ind <- which(piedata2[,"value"]>rel_perct);
  ind1 <- which(piedata2[,"value"]<rel_perct);
  
  if(length(ind)==0){
    AddErrMsg("All features have lower relative abundance than given minimum abundance. Please lower the cut off for relative abundance.");
    return(0);
  }
  
  if(length(ind1)>0){
    levels(piedata2$variable)[ind1] <- "Others";
    piedata2 <- aggregate(value~variable, piedata2, FUN=sum);
  }
  
  ind_zero <- which(piedata2[,"value"]==0);
  
  if(length(ind_zero)>0){
    piedata2 <- piedata2[-ind_zero,];
  }
  
  Cairo::Cairo(file=pieName,width=630, height=400, type=format, bg="white",dpi=dpi);
  
  box=ggplot(piedata2, aes(x="", y = value, fill=variable)) +
    geom_bar(width = 1, stat = "identity") + theme_bw() +
    coord_polar(theta = "y") + scale_fill_manual(values=c(x.colors))+
    geom_text(aes(x=1.7, label = scales::percent(value)), check_overlap = T,size=3, position = position_stack(vjust = 0.5)) +
    theme(legend.position="bottom",axis.text = element_blank(),axis.ticks = element_blank(),panel.grid  = element_blank(), plot.title = element_text(hjust=0.5, face="bold")) +
    labs(x="", y="",fill =lowlvl_nm) + ggtitle(high_taxa);
  print(box);
  
  dev.off();
  
  return(.set.mbSetObj(mbSetObj));
}

#######################################
###########Alpha-diversity#############
#######################################

#'Function to plot alpha-diversity analysis.
#'@description This functions creates a plot for alpha-diversity. 
#'@param mbSetObj Input the name of the mbSetObj.
#'@param data.src Character, input whether alpha diversity 
#'is calculated using the filtered /("filt"/) or raw data /("orig"/).
#'@param bargraphName Character, input the name of the plot.
#'@param distName Character, input the diversity measure
#'to calculate alpha-diversity. "Chao1", "Observed", "ACE", "Shannon", 
#'"Simpson", or "Fisher".
#'@param metadata Character, input the name of the experimental
#'factor to group the samples.
#'@param taxrank Character, input the taxonomic level to calculate
#'alpha-diversity. "Phylum", "Class", "Order",
#'"Family", "Genus", "Species" or "OTU".   
#'@param group Boolean, input whether or not to group the samples in the 
#'dot plot. 0 or 1.
#'@param colors Character, set to "default" to use the default colors, 
#'"viridis" to use the viridis color palette, "plasma" to use the plasma
#'color palette, "magma" to use the magma color palette, or
#'"magma" to use the magma color palette.
#'@param format Character, input the preferred
#'format of the plot. By default it is set to "png".
#'@param dpi Numeric, input the dots per inch. By default
#'it is set to 72.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import viridis
PlotAlphaData<-function(mbSetObj, data.src, bargraphName, distName, metadata, 
                        taxrank, colors = "default", format="png", dpi=72, interactive = FALSE){

  load_phyloseq();

  mbSetObj <- .get.mbSetObj(mbSetObj);  
  set.seed(13133);
  
  dataName <- mbSetObj$dataSet$name;
  module.type <- mbSetObj$module.type;
  
  if(module.type == "meta"){
    data <- qs::qread("merged.data.raw.qs");
    data <- subsetPhyloseqByDataset(mbSetObj, data);

  }else{
    if(data.src == "orig"){
      data <- readDataQs("orig.phyobj", module.type, dataName);
    }else{
      data <- mbSetObj$dataSet$proc.phyobj;
    }
  }

  if(taxrank!="OTU"){
    #merging at taxonomy levels
    data <- fast_tax_glom_mem(data, taxrank);
    if(is.null(data)){
      AddErrMsg("Errors in projecting to the selected taxanomy level!");
      return(0);
    }
  } 
  #reordering the sample (getting reordered rownames)
  sam <- sample_data(data);
  sam <- sam[order(sam[[metadata]])];
  smplord <- rownames(sam);
  
  smpl.num <- length(smplord);
  if(smpl.num < 25){
    width <- 600
  }else if(smpl.num >= 25 & smpl.num <= 50){
    width <- 750
  }else{
    width <- 900
  }

  bargraphName = paste(bargraphName, ".", format, sep="");
  mbSetObj$imgSet$alpha <- bargraphName;  
  
  box = plot_richness(data, color = metadata, measures = distName) + scale_x_discrete(limits=c(smplord));
  
  if(colors == "viridis"){
    box = box + viridis::scale_color_viridis(discrete=TRUE)
  }else if(colors %in% c("magma","plasma","inferno")){
    box <- box + viridis::scale_color_viridis(option=colors, discrete=TRUE)
  }
  
  mbSetObj$analSet$alpha <- box$data;
  fast.write(mbSetObj$analSet$alpha, file="alphadiversity.csv");
  box = box + theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1,vjust= 1));
  box$layers <- box$layers[-1];
  box <- box + geom_point(size=3, alpha=0.7);
  #getting scale for plot (using same for boxplot also)
  ylimits <<- layer_scales(box)$y$range$range;
  
  Cairo::Cairo(file=bargraphName, width, height=450, type=format, bg="white", dpi=dpi);

  if(mbSetObj$module.type == "meta"){
     box <- box + facet_grid(. ~ dataset, scales = "free", space = "free");
  }
  print(box);
  dev.off();
  
  mbSetObj$analSet$alpha.meth <- distName;
  mbSetObj$analSet$alpha.metadata <- metadata;
  mbSetObj$analSet$alpha.taxalvl <- taxrank;
  
  if(interactive){
    library(plotly)
    library(htmlwidgets)
    p <- plotly::ggplotly(box)
    p[["x"]][["layout"]][["annotations"]][[1]][["y"]] <- -0.075 # Samples
    p[["x"]][["layout"]][["annotations"]][[2]][["x"]] <- -0.06 # Alpha
    p <- p %>% layout(margin = list(l = 20, r = 20, b = 20, t = 40))
    htmlwidgets::saveWidget(p, "alpha_richness_interactive.html")
  }
  
  return(.set.mbSetObj(mbSetObj))
}

#'Function to create bar plots of selected taxa level.
#'@description This functions creates bar plots of a selected taxa level. 
#'@param mbSetObj Input the name of the mbSetObj.
#'@param barplotName Character, input the name of the bar plot.
#'@param taxalvl Character, input the taxonomic level to perform
#'classification. For instance, "Genus" to use the Genus level.
#'@param format Character, input the preferred
#'format of the plot. By default it is set to "png".
#'@param dpi Numeric, input the dots per inch. By default
#'it is set to 72.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import reshape
PlotSampleTaxaAundanceBar<-function(mbSetObj, barplotName, taxalvl, samplnm,
                                    imgOpt, feat_cnt, toptaxa, abunTopTaxaOpt, 
                                    appendnm, format="png", dpi=72){
  mbSetObj <- .get.mbSetObj(mbSetObj);
  
  if(.on.public.web){
    load_reshape();
    load_phyloseq();
  }
  if(mbSetObj$module.type == "meta"){
    merged <- qs::qread("merged.data.qs");
    data <- subsetPhyloseqByDataset(mbSetObj, merged);
  }else{
    #using filtered data
    data <- mbSetObj$dataSet$filt.data;
  }
  if("matrix" %in% class(mbSetObj$dataSet$filt.data)){
    data<-otu_table(data, taxa_are_rows =TRUE);
  }
  
  sample_table <- sample_data(mbSetObj$dataSet$proc.phyobj, errorIfNULL=TRUE);
  data1 <- merge_phyloseq(data, tax_table(mbSetObj$dataSet$proc.phyobj), sample_table);
  
  yLbl <- "Actual Abundance";
  
  data <- as.matrix(otu_table(data1));
  data <- data[,samplnm,drop=FALSE];
  data <- t(data);
  data_tax <- tax_table(data1);
  
  #reshaping data
  if(taxalvl=="OTU"){
    taxa_nm <- as.matrix(colnames(data));
    rownames(taxa_nm) <- colnames(data);
    rownames(taxa_nm) <- sub("^X", "", rownames(taxa_nm))
  }else{
    
    taxa_nm <- as.character(data_tax[,taxalvl]);
    
    y <- which(is.na(taxa_nm)==TRUE);
    #converting NA values to unassigned
    taxa_nm[y] <- "Not_Assigned";
    
    if(appendnm=="T"){
      all_nm <- colnames(tax_table(data1));
      hg_nmindx <- which(all_nm==taxalvl)-1;
      
      if(hg_nmindx!=0){
        nma <- as.character(tax_table(data1)[,hg_nmindx]);
        y1 <- which(is.na(nma)==TRUE);
        nma[y1] <- "Not_Assigned";
        nm <- paste0(nma,"_",taxa_nm);
        ind <- which(nm=="Not_Assigned_Not_Assigned");
        nm[ind] <- "Not_Assigned";
        nm <- gsub("_Not_Assigned", "",nm, perl = TRUE);
        taxa_nm <- nm;
      }
    }
  }
  
  taxa_nm <- as.matrix(taxa_nm);
  
  if(appendnm=="T"){
    y <- which(grepl("Not_Assigned", taxa_nm))
  }else{
    y <- which(is.na(taxa_nm)==TRUE);
  }
  
  #converting NA values to unassigned; before order it to last position using ZZZ as its name
  taxa_nm[y] <- "ZZZ";
  colnames(data) <- taxa_nm[,1];
  nms <- colnames(data);
  data <- data %*% sapply(unique(nms),"==",nms);
  data <- data.frame(data,check.names=FALSE);
  data <- data[ , order(names(data))];
  indx <- which(colnames(data)=="ZZZ");
  colnames(data)[indx] <- "NA";
  
  if(abunTopTaxaOpt == "bottom"){
    ind <- which(colSums(data)>feat_cnt);
    ind1 <- which(colSums(data)<feat_cnt);
    
    if(length(ind)==0){
      AddErrMsg("All features have lower read count than given minimum count filter. Please lower the cut off for minimum count.");
      return(0);
    }
    
    if(length(ind1)>0){
      colnames(data)[ind1] <- "Others";
      data <- as.data.frame(t(rowsum(t(data),group = colnames(data))),check.names=FALSE);
    }
    
    if(imgOpt=="barnorm"){
      data <- as.data.frame(apply(data,1, function(x) x/sum(x)),check.names=FALSE);
      data <- as.data.frame(t(data),check.names=FALSE);
      yLbl <- "Relative Abundance";
    }
    
    feat_no<-ncol(data);
    fast.write(t(data), file="taxa_abund.csv");
    data$step <- factor(rownames(data));
    data <- melt(data,id='step');
    data$step <- as.numeric(data$step);
    data <- data[order(data[,2]),];
    data <- data[,-1];
  } else {
    if(imgOpt=="barnorm"){
      data <- as.data.frame(apply(data,1, function(x) x/sum(x)),check.names=FALSE);
      data <- as.data.frame(t(data),check.names=FALSE);
      yLbl <- "Relative Abundance";
    }
    
    fast.write(t(data), file="taxa_abund.csv");
    data$step <- factor(rownames(data));
    data <- melt(data,id='step');
    data$step <- as.numeric(data$step);
    data <- data[order(data[,2]),];
    data <- data[,-1];
    
    data <- data[which(data$value != 0), ]
    data$variable <- as.character(data$variable);
    data <- data[order(data$value, decreasing = TRUE), ];
    
    contain_notassign <- any(grepl('^NA$', data$variable));
    if(nrow(data) <= toptaxa){
      data <- data;
    } else {
      if (! contain_notassign){
        data <- head(data, n = toptaxa);
        bottom_data <- tail(data, n = nrow(data) - toptaxa);
        other_data <- data.frame("variable" = "Others", 
                                 "value" = sum(bottom_data$value),check.names=FALSE);
        data <- rbind.data.frame(top_data, other_data);
      } else {
        if(nrow(data) == toptaxa + 1){
          data <- data;
        } else {
          not_assigned_data <- data[which(data$variable == "NA"), ];
          data <- data[which(data$variable != "NA"), ];
          top_data <- head(data, n = toptaxa);
          bottom_data <- tail(data, n = nrow(data) - toptaxa);
          other_data <- data.frame("variable" = "Others", 
                                   "value" = sum(bottom_data$value),check.names=FALSE);
          data <- rbind.data.frame(top_data, other_data, not_assigned_data);
        }
      }
    }
    feat_no<-nrow(data);
  }
  
  w<-600;
  
  if(feat_no < 3){
    w<-w;
  } else if (feat_no <5){
    w<-w+100;
  } else if (feat_no < 10){
    w<-w+200;
  } else if (feat_no < 20){
    w<-w+400;
  } else if (feat_no > 20){
    w<-w+500;
  }
  
  a <- feat_no;
  
  if(length(a)<50){
    h<-feat_no*50;
  }else{
    h<-feat_no*25;
  }
  
  #sorting by descending
  rdaName = paste(barplotName, ".rda", sep="");
  jsonName = paste(barplotName, ".json", sep="")
  barplotName = paste(barplotName, ".",format, sep="");
  mbSetObj$imgSet$stack<-barplotName;
  mbSetObj$imgSet$stackRda <-rdaName;
  mbSetObj$imgSet$stackType <- "sample";
  
  Cairo::Cairo(file=barplotName,width=w, height=h, type=format, bg="white",dpi=dpi);
  box <- ggplot(data, aes(x=reorder(variable,value),y=value))+geom_bar(stat="identity",width=0.6,fill="steelblue")+theme_bw()+
    theme(axis.text.x = element_text(angle = 0,vjust=0.5))+
    labs(y=yLbl,x="",fill=taxalvl)+coord_flip()+
    theme(panel.background = element_blank(),
          panel.grid.major = element_blank(), panel.grid.minor = element_blank(),legend.position="none");
  print(box);
  dev.off();

  save(box,file=rdaName);
  
    p <- ggplotly_modified(box, tempfile_path = paste0(getwd(), "/temp_file4plotly"));

jsonlist <- RJSONIO::toJSON(p, pretty = T,force = TRUE,.na = "null");
sink(jsonName);
cat(jsonlist);
sink();

  mbSetObj$analSet$stack<-data;
  mbSetObj$analSet$stack.taxalvl<-taxalvl;
  mbSetObj$analSet$plot<-"Stacked Bar";
  return(.set.mbSetObj(mbSetObj));
}

#'Function to create box plots for alpha diversity analysis
#'@description This functions performs metagenome seq analysis on the microbiome data.
#'@param mbSetObj Input the name of the mbSetObj.
#'@param boxplotName Character, input the name of the boxplot.
#'@param distName Character, input the diversity measure
#'to calculate alpha-diversity. "Chao1", "Observed", "ACE", "Shannon", 
#'"Simpson", or "Fisher".
#'@param metadata Input the name of the experimental factor to group the samples.
#'@param colors Use "default", "viridis" to 
#'use the viridis color palette, "plasma" to use the plasma
#'color palette, "magma" to use the magma color palette, or
#'"magma" to use the magma color palette.
#'@param format Character, input the preferred
#'format of the plot. By default it is set to "png".
#'@param dpi Numeric, input the dots per inch. By default
#'it is set to 72.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import ggplot2
#'@import viridis
  
  PlotAlphaBoxData<-function(mbSetObj, boxplotName, distName, metadata, 
                             colors="default", format="png", dpi=72){
    mbSetObj <- .get.mbSetObj(mbSetObj);
    load_viridis();
    
    set.seed(1313397);
    data <- mbSetObj$analSet$alpha;
    CLASS <- data[,metadata];
    vals <- data[,"value"]
    boxplotName = paste(boxplotName, ".", format, sep="");
    mbSetObj$imgSet$alpha.box<-boxplotName;
    
    grp_size <- length(levels(CLASS))
    
    if(grp_size <= 2){
      width <- 400;
    }else if(grp_size >= 3 & grp_size < 6){
      width <- 600;
    }else{
      width <- 700;
    }
    
    if(colors %in% c("magma","plasma","inferno","viridis")){
      box1 = ggplot(data, aes(CLASS, vals, fill = CLASS))
    }else{
      box1 = ggplot(data, aes(CLASS, vals, color = CLASS))
    }
    
    box1 = box1 + stat_boxplot(geom ='errorbar', width=0.2) +
      geom_boxplot(alpha=0.7, outlier.shape = NA,
                   position = position_dodge(width = 0), width=0.3) +
      geom_jitter(width = 0.1) + #reduce the width of jitter
      stat_summary(fun.y=mean, #add mean point
                   geom = "point",
                   shape = 18,
                   size = 4,
                   color = "black") +
      theme_bw() +
      theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust = 1, size = 11),
            axis.text.y = element_text(size = 11),
            legend.text = element_text(size = 11)) + #adjust titles
      labs(title = "",
           y= paste("Alpha-diversity Index:", as.character(data$variable[[1]]), sep = " "),
           x="") + #remove x = CLASS, add title name, change y name
      theme(axis.title.y = element_text(size=14)) + 
      coord_cartesian(ylim = c(ylimits[1], ylimits[2]));
    
    if(colors == "viridis"){
      box1 <- box1 + viridis::scale_fill_viridis(discrete = TRUE);
    }else if(colors %in% c("magma","plasma","inferno")){
      box1 <- box1 + viridis::scale_fill_viridis(option=colors, discrete=TRUE)
    }
    
    Cairo::Cairo(file=boxplotName, width=width, height=400, type=format, bg="white", dpi=dpi);
    if(mbSetObj$module.type == "meta"){
      box1 <- box1 + facet_grid(. ~ dataset, scales = "free", space = "free");
    }
    print(box1);
    dev.off();
    
    return(.set.mbSetObj(mbSetObj))
  }
  

######################################
###########Beta-diversity#############
######################################

#'Function to plot beta diversity.
#'@description This functions creates beta diversity plots.
#'@param mbSetObj Input the name of the mbSetObj.
#'@param plotNm Character, input the name of the beta-diversity score plot.
#'@param ordmeth Ordination method. Character, input "PCoA" to create a PCoA plot and
#'"NMDS" to create a NMDS plot.
#'@param distName Character, input the name of the distance method. "bray" for
#'Bray-Curtis Index, "jsd" for Jensen-Shannon Divergence, "jaccard" for Jaccard Index,
#'"unifrac" for Unweighted Unifrac Distance and "wunifrac" for Weighted Unifrac Distance.
#'@param colopt Character, input whether the data points should be colored by
#'experimental factor with "expfac", taxon abundance with "taxa" or alpha diversity 
#'with "alphadiv".
#'@param metadata Input the name of the preferred experimental factor, only used if 
#'colopt is "expfac".
#'@param showlabel Character, input whether or not to label samples in the plot.
#'"none" to label no samples, "samnm" to label samples by their name, and "Class" for
#'their group classification.
#'@param taxrank Character, input the taxonomic level to calculate
#'beta-diversity.
#'@param taxa Character, input the specific taxon used to color the data points. Only
#'used if colopt is set to "taxa".
#'@param alphaopt Character, input the name of the alpha-diversity metric. "Chao1",
#' "Observed", "ACE", "Shannon", "Simpson", or "Fisher".
#'@param ellopt Character, input "yes" to show ellipses and "no" to not.
#'@param format Character, input the preferred
#'format of the plot. By default it is set to "png".
#'@param dpi Numeric, input the dots per inch. By default
#'it is set to 72.
#'@param custom_col Set to "none" to use the default color palette, "viridis" to use the 
#'default viridis color palette, "plasma" to use the plasma viridis color palette and
#'"magma" to use the magma color palette.
#'@param interactive Boolean, if set to TRUE, saves the plot
#'as an interactive html plot.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import data.table
#'@import ape

PerformBetaDiversity <- function(mbSetObj, plotNm, ordmeth, distName, colopt, metadata, 
                                 showlabel, taxrank, taxa, alphaopt, ellopt, comp.method, format="png", dpi=72,
                                 custom_col = "none",pairwise, interactive = FALSE){
  #save.image("beta.Rdata");
  combined <- F;
  mbSetObj <- .get.mbSetObj(mbSetObj);
  module.type <- mbSetObj$module.type;
  load_ggplot()
  load_datatable();
  load_viridis();
  load_phyloseq();
  err.vec <<- ""
  set.seed(13134);
  if(all(c(module.type == "meta", !combined))){
    mdata.all <- mbSetObj$mdata.all;
    sel.nms <- names(mdata.all)[mdata.all==1];
    dataNames <- sel.nms;
  }else{
    dataNames <- mbSetObj$dataSet$name
  }
  ord.list <- list();
  for(i in 1:length(dataNames)){
    dataName <- dataNames[i];
    if(module.type == "meta"){
      mbSetObj$dataSet <- readDataset(dataName);
    }
    if(all(c(module.type == "meta", combined))){
      proc.phyobj <- qs::qread("merged.data.raw.qs");
      norm.phyobj <- qs::qread("merged.data.qs");
      proc.phyobj <- subsetPhyloseqByDataset(mbSetObj, proc.phyobj);
      norm.phyobj <- subsetPhyloseqByDataset(mbSetObj, norm.phyobj);

    }else{
      proc.phyobj <- mbSetObj$dataSet$proc.phyobj;
      norm.phyobj <- mbSetObj$dataSet$norm.phyobj;
    }    
    
    #using normalized data
    
    phyloseq_objs <- readDataQs("phyloseq_objs.qs",mbSetObj$module.type,dataName)
    
    data <- phyloseq_objs$merged_obj[[taxrank]]
    if(is.null(data)){
      AddErrMsg("Errors in projecting to the selected taxanomy level!");
      return(0);
    }
    
    if(colopt=="taxa"){
      if(taxrank=="OTU"){
        data1 <- as.matrix(otu_table(data));
        feat_data <- as.numeric(data1[taxa,]);
      }else{
        nm <- as.character(tax_table(data)[,taxrank]);
        if(sum(is.na(nm))/length(nm) > 0.7){
          AddErrMsg("More than 70% values are missing at this taxa!");
          return(0);
        }
        #converting NA values to unassigned
        nm[is.na(nm)] <- "Not_Assigned";
        data1 <- as.matrix(otu_table(data));
        rownames(data1) <- nm;
        #all NA club together
        data1 <- as.matrix(t(sapply(by(data1, rownames(data1), colSums), identity)));
        feat_data <- data1[taxa,];
      }
      sample_data(data)$taxa <- feat_data;
      indx <- which(colnames(sample_data(data))=="taxa");
      colnames(sample_data(data))[indx] <- taxa;
      taxa1 <- colnames(sample_data(data))[indx];
      taxaorig <- taxa;
      #if the taxa names are numeric then X is appending to column name
      
      if(!is.na(as.numeric(taxa))=="TRUE"){
        taxa <- paste("X",taxa, sep = "", collapse = NULL);
      }
    }else if(colopt=="alphadiv") {
      data1 <- proc.phyobj;
      box <- plot_richness(data1, measures = alphaopt);
      alphaboxdata <- box$data;
      sam_nm <- sample_names(data);
      alphaboxdata <- alphaboxdata[alphaboxdata$samples %in% sam_nm,];
      alphaval <- alphaboxdata$value;
      sample_data(data)$alphaopt <- alphaval;
      indx <- which(colnames(sample_data(data))=="alphaopt");
      colnames(sample_data(data))[indx] <- alphaopt;
    }else if(colopt=="continuous") {
      require("MMUPHin");
      require("vegan");
      
      proc.phyobj <- qs::qread("merged.data.raw.qs");
      data1 <- proc.phyobj;
      
      #sub_sam_data <- sam_data[which(sam_data[,metadata] == meta.grp), ]
      #sub_data <- data@otu_table[, rownames(sub_sam_data)];
      sub_sam_data <- sample_data(data1); 
      
      #sub_data <- as.matrix(otu_table(data1)); 
      sub_data <- data1@otu_table[, rownames(sub_sam_data)];
      sub_data <- apply(sub_data, 2, function(x) x / sum(x))
      dist.data <- vegdist(t(sub_data));
      
      fit_continuous <- continuous_discover(feature_abd = sub_data,
                                            batch = "dataset",
                                            data = sub_sam_data,
                                            control = list(var_perc_cutoff = 0.5,
                                                           verbose = FALSE));
      
      loading <- data.frame(feature = rownames(fit_continuous$consensus_loadings),
                            loading1 = fit_continuous$consensus_loadings[, 1]);
      
      sample_data(data1)$loading <- fit_continuous$consensus_scores[, 1];
      oldDF <- as(sample_data(data1), "data.frame");
      newDF <- oldDF[oldDF$dataset %in% dataName, ];
      sample_data(data) <- sample_data(newDF);
      
    }else{
      data<-data;
    }
    if(distName=="wunifrac"){
      
      load_ape();
      pg_tree <- qs::qread("tree.qs");
      pg_tb <- tax_table(data);
      pg_ot <- otu_table(data);
      pg_sd <- sample_data(data);
      pg_tree <- prune_taxa(taxa_names(pg_ot), pg_tree);
      data <- merge_phyloseq(pg_tb, pg_ot, pg_sd, pg_tree);
      # check if pg_tree is null
      if(is.null(data@phy_tree)){
        AddErrMsg("Tree tip labels do not match feature names in the OTU/taxonomic tables!");
        return(0)
      }
      
      qs::qsave(data, "data_unifra.qs");


      if(ordmeth=="PCA"){
          ord <- prcomp(t(data@otu_table@.Data), center=TRUE, scale=F)
      }else{
          ord <- ordinate(data,method = ordmeth,"unifrac",weighted=TRUE);
      }
    } else if (distName=="unifrac") {
      
      load_ape();      
      pg_tree <- qs::qread("tree.qs");
      pg_tb <- tax_table(data);
      pg_ot <- otu_table(data);
      pg_sd <- sample_data(data);
      pg_tree <- prune_taxa(taxa_names(pg_ot), pg_tree);
      data <- merge_phyloseq(pg_tb, pg_ot, pg_sd, pg_tree);
      
      # check if pg_tree is null
      if(is.null(data@phy_tree)){
        AddErrMsg("Tree tip labels do not match feature names in the OTU/taxonomic tables!");
        return(0)
      }
      
      qs::qsave(data, "data_unifra.qs");
   
  if(ordmeth=="PCA"){
          ord <- prcomp(t(data@otu_table@.Data), center=TRUE, scale=F)
      }else{
             ord <- ordinate(data, method = ordmeth,"unifrac");
      }
    }else{ 
      if(ordmeth=="PCA"){
          ord <- prcomp(t(data@otu_table@.Data), center=TRUE, scale=F)
      }else{
           ord <- ordinate(data, method = ordmeth,distName);
      }

    }   
    if(ordmeth == "NMDS"){
      ord$vectors <- ord$points;
      colnames(ord$vectors) <- c("Axis.1", "Axis.2")
    }else if(ordmeth == "PCA"){
        ord$vectors <- ord$x;
      colnames(ord$vectors)[1:3] <- c("Axis.1", "Axis.2", "Axis.3")
    }
    
    if(colopt == "continuous"){
      ord$vectors <- as.data.frame(ord$vectors);
      ord$vectors <- cbind(sample_data(data)$loading, ord$vectors);
      colnames(ord$vectors)[1] <- "loading"
    }else if(all(c(colopt == "alphadiv", module.type == "meta"))){
      ord$vectors <- as.data.frame(ord$vectors);
      
      ord$vectors <- cbind(sample_data(data)[,alphaopt], ord$vectors);
      colnames(ord$vectors)[1] <- alphaopt
      
    }
    .set.mbSetObj(mbSetObj);
    PerformCategoryComp(mbSetObj, taxrank, comp.method,distName, metadata,pairwise);
    mbSetObj <- .get.mbSetObj(mbSetObj);
    ord$stat.info <- mbSetObj$analSet$stat.info;
    
    ord.list[[dataName]] <- ord;
    
  }
  plotNm = paste(plotNm, ".", format, sep="");
  mbSetObj$imgSet$beta2d<-plotNm;
  
  if(all(c(module.type == "meta", !combined))){
    dat.num <- length(ord.list);
    if(dat.num > 3){
      colNum=2;
    }else{
      colNum=1;
    }
    merged.data <- qs::qread("merged.data.qs");
    merged.data <- subsetPhyloseqByDataset(mbSetObj, merged.data);
    sam_data <- sample_data(merged.data);
    pdataframe = plyr::ldply(ord.list, function(x){
      df = sam_data;
      return(x$vectors)
    })
    pdataframe <- cbind(sam_data, pdataframe);
    pdataframe$sample_id <- rownames(pdataframe)

    res <- lapply(ord.list, function(x){ return(x$stat.info)})
    stats <- unlist(res);
    for(i in 1:length(stats)){
      stats[i] <- paste0(dataNames[i], ": ", stats[i]);
    }
    names(stats) <- dataNames;
    
    if(colopt =="continuous") {
      box <- ggplot(pdataframe, aes(Axis.1, Axis.2, color=loading)) + scale_colour_gradient2(low="green",mid="black", high="red");
    }else if(colopt =="alphadiv"){
      box <- ggplot(pdataframe, aes(Axis.1, Axis.2, color=pdataframe[,alphaopt])) + scale_colour_gradient2(low="green", high="red") + labs(color = alphaopt);
    }else{
      box <- ggplot(pdataframe, aes(Axis.1, Axis.2, color=study_condition, fill=study_condition));
    }
    box <- box + geom_point(size=4) +
      facet_wrap(~dataset, scales="free", ncol= colNum, labeller = labeller(dataset = stats));
    if(colNum == 1){
      height.multiplier <- dat.num;
      width <- 720;
    }else{
      height.multiplier <- ceiling(dat.num/2)
      width <- 1220;
    }
    height <- 100 + 400 * height.multiplier;
    
  }else{
    sam_data <- as.data.frame(sample_data(data),check.names=FALSE);
    if(colopt=="taxa"){
      box = plot_ordination(data, ord, color=taxa) + labs(aesthetic=taxaorig) + scale_colour_gradient(low="green", high="red");
    }else if(colopt=="alphadiv") {
      box = plot_ordination(data, ord, color=alphaopt)+ scale_colour_gradient(low="green", high="red");
    }else if(colopt=="continuous") {
      box = plot_ordination(data, ord, color="loading")+ scale_colour_gradient2(low="green", high="red");
    }else{
      box = plot_ordination(data, ord, color=metadata);
    }  
    height <- 500;
    width <- 720;
  }
  
  box$layers <- box$layers[-1];
  
  if(showlabel=="samnm"){
    box = box + ggrepel::geom_text_repel(aes(label=sample_id), hjust=0.5, vjust=2, size=3, fontface="bold");
    box = box + geom_point(size=4, alpha=0.6) + theme_bw();
  }else if(showlabel=="none"){
    box=box+geom_point(size=4, alpha=0.8) + theme_bw();
  }else{
    showlabel <<- showlabel;
    bx_data <<- data.frame(box$data,check.names=FALSE);
    box = box + ggrepel::geom_text_repel(aes(label=bx_data[ ,showlabel]), hjust=0.5, vjust=2, size=3, fontface="bold");
    box = box + geom_point(size=4, alpha=0.6) + theme_bw();
  }
  
  #used for area color for ellipse
  if(colopt=="expfac"){
    clsLbl <- quo(sam_data[[metadata]]);
    
    if (ellopt=="yes"){
      box = box + stat_ellipse(type="norm", linetype=2, geom = "polygon", alpha = 0.2, aes(fill = !!clsLbl), show.legend=FALSE);
    }
  }
  
  if(custom_col == "viridis"){
    box = box + viridis::scale_color_viridis(discrete = TRUE) + viridis::scale_fill_viridis(discrete = TRUE)
  }else if(custom_col == "plasma"){
    box = box + viridis::scale_color_viridis(option="plasma", discrete = TRUE) + viridis::scale_fill_viridis(option="plasma", discrete = TRUE)
  }else if(custom_col == "magma"){
    box = box + viridis::scale_color_viridis(option="magma", discrete = TRUE) + viridis::scale_fill_viridis(option="magma", discrete = TRUE)
  }
  
  box = box + theme(text = element_text(size = 14));
  
  Cairo::Cairo(file=plotNm, width=width, height=height, type=format, bg="white",dpi=dpi);
  print(box);
  dev.off();
  
  #saving info for report generation
  mbSetObj$analSet$beta <- data;
  mbSetObj$analSet$beta.metadata <- metadata;
  mbSetObj$analSet$beta.meth <- ordmeth;
  mbSetObj$analSet$beta.dist <- distName;
  mbSetObj$analSet$beta.taxalvl <- taxrank;
  
  # if it is NMDS, show stress value in the plot
  if(ordmeth == "NMDS"){
    mbSetObj$analSet$beta.stress <- paste("[NMDS] Stress =", signif(ord$stress, 5));
  }
  return(.set.mbSetObj(mbSetObj))
}

#'Plot functional annotation summary
#'@description This functions plots the functional annotation summary.
#'@param mbSetObj Input the name of the mbSetObj.
#'@param format Character, input the preferred
#'format of the plot. By default it is set to "png".
#'@param dpi Numeric, input the dots per inch. By default
#'it is set to 72.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
PlotFunAnotSummary<-function(mbSetObj, imgName, format="png",funanno, dpi=72){
  
  mbSetObj <- .get.mbSetObj(mbSetObj);
  
  set.seed(280561499);
   
  if(funanno=="gg12"){
    func.file = "func.pred.gg12"
    mbSetObj$analSet$funcode <- "picrust_ko";
  }else if(funanno=="gg13"){
    func.file = "func.pred.gg13"
    mbSetObj$analSet$funcode <- "picrust_ko";
  }else if(funanno =="tax4fun2"){
    func.file = "func.pred.tax4fun2";
    mbSetObj$analSet$funcode <- "tax4fun2_ko";
  }else{
    func.file = "func.pred.tax4fun"
    mbSetObj$analSet$funcode <- "tax4fun_ko";
  }
  
  if(is.null(mbSetObj$analSet[[func.file]])){
    result <- qs::qread(func.file);
  }else{
    result <- mbSetObj$analSet[[func.file]];
    qs::qsave(mbSetObj$analSet[[func.file]], file=func.file);
    mbSetObj$analSet[[func.file]] <- NULL;
  }
 
  imgName = paste(imgName, ".",format, sep="");
  mbSetObj$imgSet[[func.file]] <- imgName;
  Cairo::Cairo(file=imgName, width=900, height=480, type=format, bg="white",dpi=dpi);
  box <- ggplot(stack(log(result)), aes(x = factor(ind, levels = names(result)), y=values)) + labs(x=NULL, y="log (KO Counts)") + geom_boxplot() + theme(axis.text.x = element_text(angle = 90, hjust = 1))
  print(box);
  dev.off();
  
  return(.set.mbSetObj(mbSetObj))
}

#'Plot functional annotation summary
#'@description This functions plots the functional annotation summary.
#'@param mbSetObj Input the name of the mbSetObj.
#'@param barplotName Character, input the name of the barplot.
#'@param viewOpt Character.
#'@param taxalvl Character, input the taxonomic level to perform
#'classification. For instance, "Genus" to use the Genus level.
#'@param metadata If users wish to merge samples to groups in the stacked bar plot,
#'set this to the preferred grouping.  
#'@param feat_cnt Set the minimum feature count that is used to "bin"
#'together small taxa. 
#'@param colpalopt Select the color palette options. "set3", 
#'which is the Set3 from the R Color Brewer, "cont21" which is
#'a set of 21 colors, "cont28" which is a set of 28 colors, and 
#'"cont42" which is a set of 42 colors. For users who wish to use
#'a color palette robust to colorblindness 
#'/(https://cran.r-project.org/web/packages/viridis/vignettes/intro-to-viridis.html/), 
#'use "viridis", "magma", "plasma", or "inferno".
#'@param calcmeth Merge small taxa based on the sum /("sum"/)
#'or median /("med"/) counts across all samples or groups.
#'@param format Character, input the preferred
#'format of the plot. By default it is set to "png".
#'@param dpi Numeric, input the dots per inch. By default
#'it is set to 72.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import reshape
#'@import viridis
PlotTaxaAbundanceArea<-function(mbSetObj, barplotName, viewOpt, taxalvl, metadata,
                                feat_cnt, colpalopt, calcmeth, toptaxa, abunTopTaxaOpt, 
                                appendnm=FALSE, format="png", dpi=72){

  mbSetObj <- .get.mbSetObj(mbSetObj);
  load_reshape();
  load_viridis();
  load_phyloseq();
  
  if(mbSetObj$module.type == "meta"){
    if(viewOpt=="smpl_grp"){
      AddErrMsg("Taxa area can not be plotted for merged samples.");
      return(0);
    }
    data1 <- qs::qread("merged.data.raw.qs");
    data1 <- subsetPhyloseqByDataset(mbSetObj, data1);

    sample_table <- sample_data(data1);
    data <- as.data.frame(t(otu_table(data1)),check.names=FALSE);
  }else{
    #using filtered data
    data <- mbSetObj$dataSet$filt.data;
    if("matrix" %in% class(mbSetObj$dataSet$filt.data)){
      data <- otu_table(data,taxa_are_rows =TRUE);
    }
    sample_table <- sample_data(mbSetObj$dataSet$proc.phyobj, errorIfNULL=TRUE);
    data1 <- merge_phyloseq(data, tax_table(mbSetObj$dataSet$proc.phyobj), sample_table);
  }
  
  if(viewOpt=="smpl_grp"){
    data <- as.data.frame(t(otu_table(data1)),check.names=FALSE);
    data <- cbind(data,variable=data1@sam_data[[metadata]]);
    data <- aggregate(. ~variable,data,sum);
    gp_nm <- rownames(data)<-data[,1];
    data <- data[,-1];
    data <- data[order(rownames(data)),];
    clsLbl <- sort(unique(factor(data1@sam_data[[metadata]])));
    colvec <- NULL;
  }else {
    data <- data.frame(otu_table(data1),check.names=FALSE);
    
    # reorder data based on groups
    sam <- sample_data(data1);
    smpl_nm <- row.names(sam);
    
    if(metadata == "none"){
      sam$newnewnew <- rep("one", nrow(sam));
      metadata <- "newnewnew";
    }
    clsLbl <- factor(sam[[metadata]]);
    if(all(c(length(levels(clsLbl)) > 9, min(table(clsLbl)) < 3))){
      AddErrMsg("Too many facets to be displayed - please select a more meaningful facet option with at least 3 samples per group.");
      return(0);
    }
    ord.inx <- order(clsLbl);
    smpl_nm <- smpl_nm[ord.inx];
    clsLbl <- clsLbl[ord.inx];
    colvec <- as.numeric(clsLbl)+1;
    data <- t(data[,ord.inx]);
  }
  
  data_tax <- tax_table(data1);
  
  #reshaping data
  if(taxalvl=="OTU"){
    taxa_nm <- as.matrix(colnames(data));
    rownames(taxa_nm) <- colnames(data);
    rownames(taxa_nm) <- sub("^X", "", rownames(taxa_nm))
  }else{
    
    taxa_nm <- as.character(data_tax[,taxalvl]);
    
    y <- which(is.na(taxa_nm)==TRUE);
    #converting NA values to unassigned
    taxa_nm[y] <- "Not_Assigned";
    
    if(appendnm=="T"){
      all_nm <- colnames(tax_table(data1));
      hg_nmindx <- which(all_nm==taxalvl)-1;
      
      if(hg_nmindx!=0){
        nma <- as.character(tax_table(data1)[,hg_nmindx]);
        y1 <- which(is.na(nma)==TRUE);
        nma[y1] <- "Not_Assigned";
        nm <- paste0(nma,"_",taxa_nm);
        ind <- which(nm=="Not_Assigned_Not_Assigned");
        nm[ind] <- "Not_Assigned";
        nm <- gsub("_Not_Assigned", "",nm, perl = TRUE);
        taxa_nm <- nm;
      }
    }
  }
  
  taxa_nm <- as.matrix(taxa_nm);
  
  if(appendnm=="T"){
    y <- which(grepl("Not_Assigned", taxa_nm))
  }else{
    y <- which(is.na(taxa_nm)==TRUE);
  }
  
  #converting NA values to unassigned; before order it to last position using ZZZ as its name
  taxa_nm[y] <- "ZZZ";
  colnames(data) <- taxa_nm[,1];
  nms <- colnames(data);
  data <- as.matrix(data);
  data <- data %*% sapply(unique(nms),"==",nms);
  data <- data.frame(data,check.names=FALSE);
  data <- data[ ,order(names(data))];
  indx <- which(colnames(data)=="ZZZ");
  colnames(data)[indx] <- "NA";
  
  if(abunTopTaxaOpt == "top"){
    if(calcmeth=="sum"){
      feat_cnt <- sort(sapply(data, sum), decreasing = TRUE)[toptaxa];
    }else {
      feat_cnt <- sort(sapply(data, median), decreasing = TRUE)[toptaxa];
    }
  }  
  
  if(calcmeth=="sum"){
    ind<-which(colSums(data)>feat_cnt);
    ind1<-which(colSums(data)<feat_cnt);
  } else {
    dt<-apply(data,2,median);
    ind<-which(dt>feat_cnt);
    ind1<-which(dt<feat_cnt);
  }
  
  if(length(ind)==0){
    AddErrMsg("All features have lower read count than given minimum count filter. Please lower the cut off for minimum count.");
    return(0);
  }
  
  if(length(ind1)>0){
    colnames(data)[ind1] <- "Others";
    data<- as.data.frame(do.call(cbind, by(t(data),INDICES=names(data),FUN=colSums)),check.names=FALSE);
  }

  feat_no<-ncol(data);
  #adjust height according to number of legends
  h<-540;
  if(feat_no < 10){
    h<-h;
  } else if (feat_no < 20){
    h<-h+100;
  } else if (feat_no < 50){
    h<-h+200;
  } else if (feat_no < 100){
    h<-h+400;
  } else if (feat_no > 100){
    h<-h+500;
  }
  
  # width calculation
  #a <- nsamples(data1);
  a <- nrow(sample_table);
  min.w <- 540;
  if(length(a)<50){
    w <- a*35;
  }else{
    w <- a*25;
  }
  if(w < min.w){
    w <- min.w;
  }
  print(c(w,h))
  fast.write(t(data), file="taxa_abund.csv");
  data$facetOpt <- as.character(clsLbl);
  data$step <- factor(rownames(data), levels = rownames(data));
  data <- melt(data,id=c('step', 'facetOpt'));
  data$step <- as.numeric(data$step);
  # data$sample <- data$step;
  data <- data[order(data[,3]),];
  data <- data[,-1];

  if(viewOpt=="smpl_grp"){
    data$step <- rep(1:length(gp_nm),feat_no);
    lbl <- gp_nm;
  }else {
    data$step <- rep(1:a,feat_no);
    lbl <- smpl_nm;
  }
  
  tmp_df <- aggregate(data$value, by=list(data$variable), FUN=mean)
  var_level <- tmp_df[order(tmp_df$x, decreasing = TRUE), ][[1]]
  data$variable <- factor(data$variable,
                          levels = var_level) # change the factor level of taxa
  levels(data$variable) <- sub("^X", "", levels(data$variable)) 
  
  
  data$facetOpt <- factor(data$facetOpt,levels=(unique(data$facetOpt)) )
  
  #color schema
  color_var <- levels(factor(data$variable));
  x <- length(color_var);
  if(colpalopt=="grad"){
    indx <- which(color_var=="NA");
    #color schema for ggplot
    x.colors <- hcl(h=seq(15,375,length=(x+1)),l=65,c=100)[1:x];
    x.colors[indx] <- "#666666";
  }else if (colpalopt=="cont21"){
    x.colors <- rep(custom_col21,length.out=x);
  }else if (colpalopt=="cont28"){
    x.colors <- rep(custom_final28,length.out=x);
  }else {
    x.colors <- rep(custom_col42,length.out=x);
  }
  
  jsonName = paste(barplotName, ".json", sep="");
  rdaName = paste(barplotName, ".rda", sep="");
  barplotName = paste(barplotName, ".",format, sep="");
  mbSetObj$imgSet$stack <- barplotName;
  mbSetObj$imgSet$stackRda <-rdaName;
  mbSetObj$imgSet$stackType <- "area";
 
  Cairo::Cairo(file=barplotName,width=min(w,1000), height=h, type=format, bg="white",dpi=dpi);
  
  box <- ggplot(data,aes(x=step,y=value)) + theme_bw() +
    theme(axis.text.x = element_text(angle = 90, hjust =1,vjust=0.5)) +
    geom_area(aes(fill=variable), position='fill') +
    facet_grid(~ facetOpt, space = "free", scales = "free") +
    scale_x_continuous(breaks=seq(1,length(unique(data$step)),1),labels=lbl) +
    labs(y="Relative abundance",x="",fill=taxalvl) +
    theme(legend.position="bottom",legend.box = "vertical") +
    theme(axis.text.x = element_text(colour="black"),axis.title.x=element_blank());
  
  if(colpalopt=="set3"){
    cols.needed <- length(unique(data$variable))
    if(cols.needed > 12){
      col.func <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(12, "Set3"))
      box <- box + scale_fill_manual(values=col.func(cols.needed),
                                     guide = guide_legend(direction = "horizontal",
                                                          ncol = 5))
    } else {
      box <- box + scale_fill_brewer(palette = "Set3",
                                     guide = guide_legend(direction = "horizontal",
                                                          ncol = 5))
    }
  }else if(colpalopt=="viridis"){
    box <- box + viridis::scale_fill_viridis(discrete=TRUE)
  }else if(colpalopt %in% c("magma","plasma","inferno")){
    box <- box + viridis::scale_fill_viridis(option=colpalopt, discrete=TRUE)
  }else{
    box <- box + scale_fill_manual(values=c(x.colors))
  }
  
  if(metadata == "newnewnew"){
    box <- box + theme(strip.text.x = element_blank())
  }
  if(mbSetObj$module.type == "meta"){
    #box <- box + facet_grid(. ~ dataset  , scales = "free", space = "free");
  }
  
  print(box);
  dev.off();
  # for plotly
  save(box,file=rdaName);
    p <- ggplotly_modified(box, tempfile_path = paste0(getwd(), "/temp_file4plotly"));

 narm <- p[["x"]][["data"]]
   for(i in 1:length(narm)){
      narm[[i]]$y[is.na(narm[[i]]$y)]=0;
   }
   p[["x"]][["data"]] <- narm

jsonlist <- RJSONIO::toJSON(p, pretty = T,force = TRUE,.na = "null");
sink(jsonName);
cat(jsonlist);
sink();
  mbSetObj$analSet$stack<-data;
  mbSetObj$analSet$stack.taxalvl<-taxalvl;
  mbSetObj$analSet$plot<-"Stacked Area";
  
  return(.set.mbSetObj(mbSetObj));
  
}

#'Function to plot bar charts for alpha diversity
#'@description This functions plots bar charts of different taxonomic levels
#'for alpha diversity.
#'@param mbSetObj Input the name of the mbSetObj.
#'@param barplotName Character, input the name of the barplot.
#'@param taxalvl Character, input the taxonomic level to perform
#'classification. For instance, "Genus" to use the Genus level.
#'@param metadata If users wish to merge samples to groups in the stacked bar plot,
#'set this to the preferred grouping.  
#'@param facet Character, set the group to separate the bar plots.
#'@param imgOpt Character, set the graph type. Stacked bar
#'using the actual abundance, use "barraw". Stacked bar using
#'the percentage abundance, use "barnorm".
#'@param feat_cnt Set the minimum feature count that is used to "bin"
#'together small taxa. 
#'@param colpalopt Select the color palette options. "set3", 
#'which is the Set3 from the R Color Brewer, "cont21" which is
#'a set of 21 colors, "cont28" which is a set of 28 colors, and 
#'"cont42" which is a set of 42 colors. For users who wish to use
#'a color palette robust to colorblindness 
#'/(https://cran.r-project.org/web/packages/viridis/vignettes/intro-to-viridis.html/), 
#'use "viridis", "magma", "plasma", or "inferno".
#'@param calcmeth Merge small taxa based on the sum /("sum"/)
#'or median /("med"/) counts across all samples or groups.
#'@param format Character, input the preferred
#'format of the plot. By default it is set to "png".
#'@param dpi Numeric, input the dots per inch. By default
#'it is set to 72.
#'@param interactive Boolean, if TRUE, will save an interactive
#'version of the Stacked Bar plot using plotly.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import reshape
#'@import ggplot2
#'@import viridis
PlotTaxaAundanceBar<-function(mbSetObj, barplotName, taxalvl, facet, facet2, imgOpt, 
                              feat_cnt, colpalopt, calcmeth, toptaxa, abunTopTaxaOpt, 
                              appendnm, format="png", dpi=72, interactive = FALSE){
  load_reshape();
  load_ggplot();
  load_viridis();
  load_phyloseq(); 
  #save.image("tax.RData");
  mbSetObj <- .get.mbSetObj(mbSetObj);
  
  if(mbSetObj$module.type == "meta"){
    data1 <- qs::qread("merged.data.raw.qs");
    data1 <- subsetPhyloseqByDataset(mbSetObj, data1);
    sample_table <- sample_data(data1);
    data <- as.data.frame(otu_table(data1),check.names=FALSE);
    #facet2="dataset";
  }else{
    data <- mbSetObj$dataSet$filt.data;
    if("matrix" %in% class(mbSetObj$dataSet$filt.data)){
      data <- otu_table(data, taxa_are_rows =TRUE);
    }
    sample_table <- sample_data(mbSetObj$dataSet$proc.phyobj, errorIfNULL=TRUE);
    data1 <- merge_phyloseq(data, tax_table(mbSetObj$dataSet$proc.phyobj), sample_table);
    data <- as.data.frame(otu_table(data1),check.names=FALSE);
  }
  # reorder data based on groups
  sam <- sample_data(data1);
  
  if(facet == "none"){
    sam$newnewnew <- rep("one", nrow(sam));
    facet <- "newnewnew";
  }
  
  clsLbl <- factor(sam[[facet]]);
  if(length(clsLbl)==0){
    AddErrMsg("Invalid class label! Ensure there are no special characters (i.e. punctuation marks) in your group names!")
    return(0)
  }
  
  if(all(c(length(levels(clsLbl)) > 9, min(table(clsLbl)) < 3))){
    AddErr("Too many facets to be displayed - please select a more meaningful facet option with at least 3 samples per group.");
    return(0);
  }
  
  sample_data(data1) <- sam;
  
  metalp <- as(sample_data(data1), "data.frame") #extract metadata table
  smpl_nm <- sample_names(data1);
  
  ord.inx <- order(clsLbl);
  smpl_nm <- smpl_nm[ord.inx];
  clsLbl <- clsLbl[ord.inx];
  colvec <- as.numeric(clsLbl)+1;
  data <- t(data[,ord.inx]);
  
  data_tax <- tax_table(data1);
  
  if(taxalvl=="OTU"){
    taxa_nm <- as.matrix(colnames(data));
    rownames(taxa_nm) <- colnames(data);
    rownames(taxa_nm) <- sub("^X", "", rownames(taxa_nm))
  }else{
    
    taxa_nm <- as.character(data_tax[,taxalvl]);
    y <- which(is.na(taxa_nm)==TRUE | taxa_nm == "");
    
    #converting NA and empty values to unassigned
    taxa_nm[y] <- "Not_Assigned";
    
    if(appendnm=="T"){
      all_nm <- colnames(tax_table(data1));
      hg_nmindx <- which(all_nm==taxalvl)-1;
      
      if(hg_nmindx!=0){
        nma <- as.character(tax_table(data1)[,hg_nmindx]);    
        y1 <- which(is.na(nma)==TRUE | nma == "");
        nma[y1] <- "Not_Assigned";
        nm <- paste0(nma,"_",taxa_nm);
        ind <- which(nm=="Not_Assigned_Not_Assigned");
        nm[ind] <- "Not_Assigned";
        nm <- gsub("_Not_Assigned", "",nm, perl = TRUE);
        taxa_nm <- nm;
      }
    }
  }
  
  #reshaping data
  taxa_nm <- as.matrix(taxa_nm);
  
  if(appendnm=="T"){
    y <- which(grepl("Not_Assigned", taxa_nm))
  }else{
    y <- which(is.na(taxa_nm)==TRUE | taxa_nm== "");
  }
  #converting NA values to unassigned; before order it to last position using ZZZ as its name
  taxa_nm[y] <- "ZZZ";
  colnames(data) <- taxa_nm[,1];
  nms <- colnames(data);
  data <- as.matrix(data);
  data <- data %*% sapply(unique(nms),"==",nms);
  data <- data.frame(data,check.names=FALSE);
  
  data <- data[ , order(names(data))];
  indx <- which(colnames(data)=="ZZZ");
  
  colnames(data)[indx] <- "NA";
  
  
  if(abunTopTaxaOpt == "top"){
    if(calcmeth=="sum"){
      feat_cnt <- sort(sapply(data, sum), decreasing = TRUE)[toptaxa];
    }else {
      feat_cnt <- sort(sapply(data, median), decreasing = TRUE)[toptaxa];
    }
  }
  
  if(calcmeth=="sum"){
    ind <- which(colSums(data)>feat_cnt);
    ind1 <- which(colSums(data)<feat_cnt);
  } else {
    dt <- apply(data,2,median);
    ind <- which(dt>feat_cnt);
    ind1 <- which(dt<feat_cnt);
  }
  
  if(length(ind)==0){
    AddErrMsg("All features have lower read count than given minimum count filter. Please lower the cut off for minimum count.");
    return(0);
  }
  
  if(length(ind1)>0){
    colnames(data)[ind1] <- "Others";
    data <- as.data.frame(do.call(cbind, by(t(data),INDICES=names(data),FUN=colSums)),check.names=FALSE);
  }
  
  yLbl <- "Actual Abundance";
  
  if(imgOpt=="barnorm"){
    data <- as.data.frame(apply(data,1, function(x) x/sum(x)),check.names=FALSE);
    data<-as.data.frame(t(data),check.names=FALSE);
    yLbl <- "Relative Abundance";
  }
  
  feat_no <- ncol(data);
  h <- 630;
  if(feat_no < 10){
    h <- h;
  } else if (feat_no < 20){
    h <- h+100;
  } else if (feat_no < 50){
    h <- h+200;
  } else if (feat_no < 100){
    h <- h+300;
  } else {
    h <- h+300 +3*(feat_no-100);
  }
  w <- h + 200;
  sam_num <- nrow(data);
  if(sam_num>50){
    w <- h + sam_num*7;
  }

  data <- data[row.names(metalp),]
  data[[get("facet")]] <- metalp[[get("facet")]]
  data$sample <- row.names(data);
  fast.write(t(data), file="taxa_abund.csv");
  data <- melt(data, id = c("sample", get("facet")))
  tmp_df <- aggregate(data$value, by=list(data$variable), FUN=mean)
  var_level <- tmp_df[order(tmp_df$x, decreasing = TRUE), ][[1]]
  
  data$variable <- factor(data$variable, levels = var_level) # change the factor level of taxa
  levels(data$variable) <- sub("^X", "", levels(data$variable));
  
  if(all(c(facet2 != "null", facet2 != "none", facet2 != facet))){
    data$variable2 <- sam[[facet2]][match(data$sample, row.names(sam))]; 
  }
  
  #color schema
  color_var <- levels(factor(data$variable));
  x <- length(color_var);
  
  if(colpalopt=="grad"){
    indx <- which(color_var=="NA");
    #color schema for ggplot
    x.colors <- hcl(h=seq(15,375,length=(x+1)),l=65,c=100)[1:x];
    x.colors[indx] <- "#666666";
  }else if (colpalopt=="cont21"){
    x.colors <- rep(custom_col21,length.out=x);
  }else if (colpalopt=="cont28"){
    x.colors <- rep(custom_final28,length.out=x);
  }else {
    x.colors <- rep(custom_col42,length.out=x);
  }
  
  jsonName = paste(barplotName,".json", sep="");
  rdaName = paste(barplotName,".rda", sep="");
  barplotName = paste(barplotName, ".",format, sep="");
  
  mbSetObj$imgSet$stack <- barplotName;
  mbSetObj$imgSet$stackRda <-rdaName;
  mbSetObj$imgSet$stackType <- "default";
  
  
  if(length(unique(data$sample)) <= 10){
    guide_num = 3
  }else if (length(unique(data$sample)) <= 20){
    guide_num = 4
  }else{
    guide_num = 5
  }
  Cairo::Cairo(file=barplotName,width=w, height=h, type=format, bg="white",dpi=dpi);
  box <- ggplot(data = data,
                aes(x = sample,
                    y = value,
                    fill = variable)) +
    geom_bar(stat="identity", position="stack") +
    labs(y=yLbl,x="", fill = taxalvl)+
    theme(legend.position="bottom",legend.box = "vertical",
          axis.text.x = element_text(angle = 45,
                                     hjust =1,vjust=1), #remove colour=colvec
          axis.title.x=element_blank(),
          panel.background = element_rect(fill = "grey90",
                                          colour = "grey90",
                                          size = 0.5, linetype = "solid"),
          axis.line = element_line(colour = "lightgrey"));
  
  if(any(c(facet2 == "null", facet2 == "none", facet2 == facet))){
    box <- box +
      facet_grid(as.formula(paste("~", facet)), scales = "free", space = "free") 
  } else {
    box <- box +
      facet_grid(as.formula(paste("variable2", "~", facet)), scales = "free", space = "free") 
  }
  
  if(colpalopt %in% c("set1", "set2", "set3")){
    
    cols.needed <- length(unique(data$variable))
    colpalopt <- tools::toTitleCase(tolower(colpalopt))
    
    if(colpalopt == "Set1"){
      n.cols <- 9
    }else if(colpalopt == "Set2"){
      n.cols <- 8
    }else{
      n.cols <- 12
    }
    
    if(cols.needed > n.cols){
      col.func <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(n.cols, colpalopt))
      box <- box + scale_fill_manual(values=col.func(cols.needed), guide = guide_legend(direction = "horizontal", ncol=guide_num))
    }else{
      box <- box + scale_fill_brewer(palette = colpalopt, guide = guide_legend(direction = "horizontal", ncol=guide_num))
    }
  } else if(colpalopt=="viridis"){
    box <- box + viridis::scale_fill_viridis(discrete=TRUE) + guides(color=guide_legend(ncol=guide_num)) 
  }else if(colpalopt %in% c("magma","plasma","inferno")){
    box <- box + viridis::scale_fill_viridis(option=colpalopt, discrete=TRUE) + guides(color=guide_legend(ncol=guide_num))
  }else{
    box <- box + scale_fill_manual(values=c(x.colors)) + guides(color=guide_legend(ncol=guide_num))
  }
  
  if(facet == "newnewnew"){
    box <- box + theme(strip.text.x = element_blank())
  }
    box <- box + theme( text = element_text(size = 14), axis.text.x = element_text(size = 9))

  save(box,file=rdaName);
  print(box)
  dev.off();
  p <- ggplotly_modified(box, tempfile_path = paste0(getwd(), "/temp_file4plotly"));
  
  jsonlist <- RJSONIO::toJSON(p, pretty = T,force = TRUE,.na = "null");
  sink(jsonName);
  cat(jsonlist);
  sink();
  
  mbSetObj$analSet$stack <- data;
  mbSetObj$analSet$stack.taxalvl <- taxalvl;
  mbSetObj$analSet$plot <- "Stacked Bar";
  
  return(.set.mbSetObj(mbSetObj));
}


#'Function to perform categorical comparison.
#'@description This functions performs categorical comparisons.
#'@param mbSetObj Input the name of the mbSetObj.
#'@param taxaLvl Input the name of the taxonomic level to calculate Beta-diversity comparison.
#'@param method Statistical method to calculate
#'beta-diversity significance. Use "adonis" for Permutational MANOVA, "anosim" for
#'Analysis of Group Similarities and "permdisp" for
#'Homogeneity of Group Dispersions.
#'@param distnm Character, input the name of the distance method. "bray" for
#'Bray-Curtis Index, "jsd" for Jensen-Shannon Divergence, "jaccard" for Jaccard Index,
#'"unifrac" for Unweighted Unifrac Distance and "wunifrac" for Weighted Unifrac Distance.
#'@param variable Input the name of the experimental factor to group the samples.
#'@param covariates Boolean. TRUE to consider covariates, FALSE to only consider the
#'main effect. Only valid for PERMANOVA.
#'@param cov.vec Character vector. Input the names of the covariates to consider in the 
#'PERMANOVA model.
#'@param model.additive Boolean. If TRUE, the model will be additive (i.e. data ~ var1 + var2),
#'making the assumption that the two factor variables are independent. 
#'However, if FALSE, the model will consider the synergistic effects of the variables - interaction (i.e. data ~ var1*var2).
#'"Different explanatory variables the effects on the outcome of a change in one variable may either not depend on the
#'level of the other variable (additive model) or it may depend on the level of the other variable (interaction model)." 
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import vegan
PerformCategoryComp <- function(mbSetObj, taxaLvl, method, distnm, variable, pairwise=FALSE,
                                covariates = FALSE, cov.vec = NA, model.additive = TRUE){
  
  mbSetObj <- .get.mbSetObj(mbSetObj);
  load_vegan();

  if(distnm %in% c("wunifrac", "unifrac")) {
    data <- qs::qread("data_unifra.qs");
  } else {
     if(!(exists("phyloseq_objs"))){
      phyloseq_objs <- readDataQs("phyloseq_objs.qs",mbSetObj$module.type,mbSetObj$dataSet$name)
   }
     data <- phyloseq_objs$merged_obj[[taxaLvl]]
     if(is.null(data)){
        AddErrMsg("Errors in projecting to the selected taxanomy level!");
        return(0);
      }

  }
  
  data <- transform_sample_counts(data, function(x) x/sum(x));

  data.dist <- phyloseq::distance(data, method=distnm);
  group <- phyloseq::get_variable(data,variable);
  stat.info <- "";
  resTab <- list();
 
  if(method=="adonis"){
    sampledf <- data.frame(sample_data(data), check.names=FALSE); 
    outcome <- "data.dist";
    variables <- variable;
    f <- as.formula(paste(outcome,paste(variables, collapse = " * "), sep = " ~ "));
    
    # advanced feature to include co-variates and different model options (NOT implemented in web?)
    if(covariates){
      variables <- c(variable, cov.vec)
      
      if(model.additive){
        f <- as.formula(
          paste(outcome,
                paste(variables, collapse = " + "),
                sep = " ~ "))
      }else{
        f <- as.formula(
          paste(outcome,
                paste(variables, collapse = " * "),
                sep = " ~ "))
      }
    }
   
    res <- adonis2(formula = f, data = sampledf);
    resTab <- res[1,];
    stat.info <- paste("[PERMANOVA] F-value: ", signif(resTab$F, 5),  "; R-squared: ", signif(resTab$R2, 5), "; p-value: ", signif(resTab$Pr, 5), sep="");   
    stat.info.vec <- c(signif(resTab$F, 5), signif(resTab$R2, 5), signif(resTab$Pr, 5));
    names(stat.info.vec) <- c("F-value", "R-squared", "p-value");

    if(pairwise != "false"){
        grp <- sample_data(mbSetObj$dataSet$norm.phyobj)[[variable]]
       if(length(levels(grp)) == 2){ # same as before  
            res <- data.frame(t(c(stat.info.vec, p.adj=signif(resTab$Pr, 5))));
            rownames(res) <- paste(levels(grp)[1], ".vs.", levels(grp)[2]);
       }else{
            res <- .permanova_pairwise(x = data.dist,  grp);
            rownames(res) <- res$pairs;
            res$pairs <- NULL;
      }
      mbSetObj$analSet$beta.stat.pair <- mbSetObj$analSet$resTable <- signif(res,5);
      fast.write(mbSetObj$analSet$resTable, file="pairwise_permanova.csv");
   }
 
}else if(method=="anosim"){ # just one group
    anosim <- anosim(data.dist, group=group);
    resTab$Rval <- anosim$statistic;
    resTab$pval <- anosim$signif;
    stat.info <- paste("[ANOSIM] R: ", signif(resTab$Rval, 5), "; p-value < ", signif(resTab$pval, 5), sep="");
    stat.info.vec <- c(signif(resTab$Rval, 5), signif(resTab$pval, 5));
    names(stat.info.vec) <- c("R", "p-value");

  }else if (method=="permdisp"){ # just one group
    beta <- betadisper(data.dist, group=group);
    resTab <- anova(beta);
    stat.info <- paste("[PERMDISP] F-value: ", signif(resTab$"F value"[1], 5), "; p-value: ", signif(resTab$"Pr(>F)"[1], 5), sep="");
    stat.info.vec <- c(signif(resTab$"F value"[1], 5), signif(resTab$"Pr(>F)"[1], 5));
    names(stat.info.vec) <- c("F-value", "p-value");
  }else if(method=="mirkat"){
    
    if(!exists("MiRKAT")){ # public web on same user dir
      .load.scripts.on.demand("utils_mirkat.Rc"); 
    }
    exclud <- which(colnames(data@sam_data) %in% variable)
    X = data@sam_data[,-exclud]
    X = apply(X, 2, function(x) as.numeric(factor(x)))
    K.weighted <- D2K(as.matrix(data.dist))
    Gp <- as.numeric(factor(group))
    res<- MiRKAT(y =Gp, X = X, Ks = K.weighted, out_type = "C", method = "davies", returnKRV = TRUE, returnR2 = TRUE)
    resTab <- do.call(cbind,res);
    stat.info <- paste("[MiRKAT] R-squared: ", signif(resTab[1,"R2"], 5), "; p-value: ", signif(resTab[1,"p_values"], 5),"; KRV: ",signif(resTab[1,"KRV"], 5), sep="");
    stat.info.vec <- signif(resTab[1, c("R2", "p_values", "KRV")], 5);
    names(stat.info.vec) <- c("R-squared", "p-value", "KRV");
  }

  mbSetObj$analSet$stat.info <- stat.info;
  mbSetObj$analSet$stat.info.vec <- stat.info.vec;
  return(.set.mbSetObj(mbSetObj));
}

################################################
###### generate figure barplot with group#######
################################################
#'Function to plot group-wise bar charts.
#'@description This functions plots group-wise bar charts of a specified 
#'taxa for alpha diversity analysis.
#'@param mbSetObj Input the name of the mbSetObj.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import reshape
PlotTaxaAbundanceBarSamGrp<-function(mbSetObj, barplotName, taxalvl, metadata, facet2, imgOpt,
                                     feat_cnt, colpalopt, calcmeth, toptaxa,abunTopTaxaOpt, 
                                     appendnm, format="png", dpi=80, interactive = FALSE){
  load_phyloseq();
  
  mbSetObj <- .get.mbSetObj(mbSetObj);
  
  if(.on.public.web){
    load_reshape();
    load_viridis();
  }
  
  if(mbSetObj$module.type == "meta"){
    data1 <- qs::qread("merged.data.raw.qs");
    sample_table <- sample_data(data1);
    data <- as.data.frame(t(otu_table(data1)),check.names=FALSE);
    #if(facet2 == "none"){
      #facet2="dataset";
    #}
  }else{
    #using filtered data
    data <- mbSetObj$dataSet$filt.data;
    if("matrix" %in% class(mbSetObj$dataSet$filt.data)){
      data<-otu_table(data,taxa_are_rows =TRUE);
    }
    
    sample_table <- sample_data(mbSetObj$dataSet$proc.phyobj, errorIfNULL=TRUE);
    data1 <- merge_phyloseq(data, tax_table(mbSetObj$dataSet$proc.phyobj), sample_table);
  }
  
  yLbl <- "Actual Abundance";
  data <- as.data.frame(t(otu_table(data1)),check.names=FALSE);
  flag <- (facet2 == "null" || facet2 == "none" || facet2 == metadata);
  
  if(flag){
    data <- cbind(data, variable=data1@sam_data[[metadata]]);
    clsLbl <- unique(factor(data1@sam_data[[metadata]]));
  } else {
    data$variable <- paste(data1@sam_data[[metadata]], data1@sam_data[[facet2]], sep = ";");
    clsLbl <- unique(factor(paste(data1@sam_data[[metadata]],data1@sam_data[[facet2]], sep = ";")));
  }
  
  data <- aggregate(. ~variable,data,sum);
  rownames(data) <- data[,1];
  data <- data[,-1];
  data <- data[rownames(data),];
  
  data_tax <- tax_table(data1);
  #reshaping data
  if(taxalvl=="OTU"){
    taxa_nm <- as.matrix(colnames(data));
    rownames(taxa_nm) <- colnames(data);
    rownames(taxa_nm) <- sub("^X", "", rownames(taxa_nm))
  }else{
    taxa_nm <- as.character(data_tax[,taxalvl]);
    y <- which(is.na(taxa_nm)==TRUE);
    #converting NA values to unassigned
    taxa_nm[y] <- "Not_Assigned";
    
    if(appendnm=="T"){
      all_nm <- colnames(tax_table(data1));
      hg_nmindx <- which(all_nm==taxalvl)-1;
      
      if(hg_nmindx!=0){
        nma <- as.character(tax_table(data1)[,hg_nmindx]);
        y1 <- which(is.na(nma)==TRUE);
        nma[y1] <- "Not_Assigned";
        nm <- paste0(nma,"_",taxa_nm);
        ind <- which(nm=="Not_Assigned_Not_Assigned");
        nm[ind] <- "Not_Assigned";
        nm <- gsub("_Not_Assigned", "",nm, perl = TRUE);
        taxa_nm <- nm;
      }
    }
  }
  
  taxa_nm <- as.matrix(taxa_nm);
  
  if(appendnm=="T"){
    y <- which(grepl("Not_Assigned", taxa_nm))
  }else{
    y <- which(is.na(taxa_nm)==TRUE);
  }
  
  #converting NA values to unassigned; before order it to last position using ZZZ as its name
  taxa_nm[y] <- "ZZZ";
  colnames(data) <- taxa_nm[,1];
  nms <- colnames(data);
  data <- as.matrix(data);
  data <- data %*% sapply(unique(nms),"==",nms);
  data <- data.frame(data,check.names=FALSE);
  data <- data[ , order(names(data))];
  indx <- which(colnames(data)=="ZZZ");
  colnames(data)[indx] <- "NA";
  if(abunTopTaxaOpt == "top"){
    if(calcmeth=="sum"){
      feat_cnt <- sort(sapply(data, sum), decreasing = TRUE)[toptaxa];
    }else {
      feat_cnt <- sort(sapply(data, median), decreasing = TRUE)[toptaxa];
    }
  }
  if(calcmeth=="sum"){
    ind <- which(colSums(data)>feat_cnt);
    ind1 <- which(colSums(data)<feat_cnt);
  } else {
    dt <- apply(data,2,median);
    ind <- which(dt>feat_cnt);
    ind1 <- which(dt<feat_cnt);
  }
  
  if(length(ind)==0){
    AddErrMsg("All features have lower read count than given minimum count filter. Please lower the cut off for minimum count.");
    return(0);
  }
  
  if(length(ind1)>0){
    colnames(data)[ind1] <- "Others";
    data <- as.data.frame(do.call(cbind, by(t(data),INDICES=names(data),FUN=colSums)),check.names=FALSE);
  }
  
  if(imgOpt=="barnorm"){
    data <- as.data.frame(apply(data,1, function(x) x/sum(x)),check.names=FALSE);
    data <- as.data.frame(t(data),check.names=FALSE);
    yLbl <- "Relative Abundance";
  }
  
  w <- 650;
  #adjust height according to number of taxa 
  feat_no <- ncol(data); 
  if(feat_no < 10){
    h<-w;
  } else if (feat_no < 20){
    w<-w+100;
    h<-w/2+100;
  } else if (feat_no < 50){
    w<-w+200;
    h<-w/2+150;
  } else if (feat_no < 100){
    w<-w+350
    h<-w/2+200;
  } else if (feat_no > 100){
    w<-w+500
    h<-w/2+300;
  }
  
  if(flag){
  } else {
    feat_no2 <- nlevels(as.factor(data1@sam_data[[facet2]]));
    if(feat_no2 < 3){
      w <- w;
    } else if (feat_no2 < 4){
      w <- w * 1.1
    } else if (feat_no2 < 6){
      w <- w * 1.2;
    } else if (feat_no2 < 8){
      w <- w * 1.3;
    } else  if (feat_no2 < 10) {
      w <- w * 1.4;
    } else  if (feat_no2 < 12) {
      w <- w * 1.5;
    } else  if (feat_no2 < 14) {
      w <- w * 1.6;
    } else {
      w <- w * 2;
    }
  }
  # final check
  if(w > 1800){
    w <- 1800;
  }
  if(h < w/3){
    h <- w/3
  }
  if(h < 480){
    h <- 480;
  }
  
  fast.write(t(data), file="taxa_abund.csv");
  data$step <- factor(rownames(data), levels = rownames(data));
  gp_nm <- data$step
  data <- melt(data,id='step');
  data$step <- as.numeric(data$step);
  data <- data[order(data[,2]),];
  data <- data[,-1];
  data$step <- rep(gp_nm,feat_no);
  
  tmp_df <- aggregate(data$value, by=list(data$variable), FUN=mean)
  var_level <- tmp_df[order(tmp_df$x, decreasing = TRUE), ][[1]]
  #as.character()# get the order of taxa relative abundance
  data$variable <- factor(data$variable, levels = var_level) # change the factor level of taxa
  levels(data$variable) <- sub("^X", "", levels(data$variable)) 
  
  if(flag){
  }else {
    data$variable2 <- sapply(strsplit(as.character(data$step), ";", fixed = TRUE), "[", 2);
    data$step <- sapply(strsplit(as.character(data$step), ";", fixed = TRUE), "[", 1);
  }
  
  #color schema
  color_var <- levels(factor(data$variable));
  x <- length(color_var);
  
  if(colpalopt=="grad"){
    indx <- which(color_var=="NA");
    #color schema for ggplot
    x.colors <- hcl(h=seq(15,375,length=(x+1)),l=65,c=100)[1:x];
    x.colors[indx] <- "#666666";
  }else if (colpalopt=="cont21"){
    x.colors <- rep(custom_col21,length.out=x);
  }else if (colpalopt=="cont28"){
    x.colors <- rep(custom_final28,length.out=x);
  }else {
    x.colors<-rep(custom_col42,length.out=x);
  }
  
  if(x <= 10){
    guide_num = 3
  }else if (x <= 20){
    guide_num = 4
  }else{
    guide_num = 5
  }
  
  rdaName = paste(barplotName, ".rda", sep="");
  jsonName = paste(barplotName, ".json", sep="");
  barplotName = paste(barplotName, ".",format, sep="");
  mbSetObj$imgSet$stack<-barplotName;
  mbSetObj$imgSet$stackRda <-rdaName;
  mbSetObj$imgSet$stackType <- "group";
  
  stackdata <<- data
  Cairo::Cairo(file=barplotName,width=w, height=h, type=format, bg="white",dpi=dpi);
  box <- ggplot(data,aes(x = step, y = value, fill = variable))+
    geom_bar(stat="identity", position="stack", width = 0.4)+
    #scale_y_continuous(expand = c(0, 0, 0.3, 0)) +
    theme_bw() + theme(legend.position="bottom",legend.box = "vertical")+
    labs(y=yLbl,x="",fill=taxalvl)+
    theme(axis.text.x = element_text(angle = 0,vjust=0.5))+
    coord_flip()+
    theme(panel.background = element_rect(fill = "grey90",
                                          colour = "grey90",
                                          size = 0.5, linetype = "solid"),
          panel.border = element_blank())
  
  if(flag){
    box <- box;
    #if(mbSetObj$module.type == "meta" && "variable2" %in% colnames(data)){
    #  box <- box + facet_grid(variable2 ~ . , scales = "free", space = "free");
    #}
  } else {
    box <- box + facet_grid(variable2 ~ ., scales = "free");
  }
  
  if(colpalopt %in% c("set1", "set2", "set3")){
    
    cols.needed <- length(unique(data$variable))
    colpalopt <- tools::toTitleCase(tolower(colpalopt))
    
    if(colpalopt == "Set1"){
      n.cols <- 9
    }else if(colpalopt == "Set2"){
      n.cols <- 8
    }else{
      n.cols <- 12
    }
    
    if(cols.needed > n.cols){
      col.func <- grDevices::colorRampPalette(RColorBrewer::brewer.pal(n.cols, colpalopt))
      box <- box + scale_fill_manual(values=col.func(cols.needed),
                                     guide = guide_legend(direction = "horizontal",
                                                          ncol = guide_num))
    } else {
      box <- box + scale_fill_brewer(palette = colpalopt,
                                     guide = guide_legend(direction = "horizontal",
                                                          ncol = guide_num))
    }
  }else if(colpalopt %in% c("magma","plasma","inferno","viridis")){
    box <- box + viridis::scale_fill_viridis(option=colpalopt, discrete=TRUE) + guides(fill=guide_legend(ncol=guide_num))
  }else{
    box <- box + scale_fill_manual(values=c(x.colors)) + guides(fill=guide_legend(ncol=guide_num))
  }
  
  save(box,file=rdaName);
  print(box+guides(fill=guide_legend(ncol=3)));
  dev.off();
  
  p <- ggplotly_modified(box, tempfile_path = paste0(getwd(), "/temp_file4plotly"));
  
  jsonlist <- RJSONIO::toJSON(p, pretty = T,force = TRUE,.na = "null");
  sink(jsonName);
  cat(jsonlist);
  sink();
  
  mbSetObj$analSet$stack<-data;
  mbSetObj$analSet$stack.taxalvl<-taxalvl;
  mbSetObj$analSet$plot<-"Stacked Bar";
  return(.set.mbSetObj(mbSetObj));
}

#'Function to create rarefraction curves of microbiome data
#'@description This functions plots rarefraction curves.
#'@param mbSetObj Input the name of the mbSetObj.
#'@param data.src Character, use "orig" to use the original data or
#'"proc" to use the processed data.
#'@param linecolor Character, input the metadata you wish to 
#'group the sample line colors.
#'@param linetype Character, input the metadata you wish to 
#'group the sample line types.
#'@param facet Character, input the metadata you wish to 
#'group the samples.
#'@param step Numeric, input the step number. Default is set to 5.
#'@param imgName Character, input the name of the plot to be saved.
#'@param format Character, input the type of plot to be saved. Default is 
#'set to png. 
#'@param dpi Numeric, input the dpi for the plot to be saved. Default
#'is set to 72.
#'@param interactive Boolean, if set to TRUE, saves the plot
#'as an interactive html plot.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
PlotRarefactionCurve <- function(mbSetObj, data.src, linecolor, linetype, facet, step=5, 
                                 imgName, format="png", dpi=72, interactive = FALSE){

  load_phyloseq();
 current.msg<<-"";
  mbSetObj <- .get.mbSetObj(mbSetObj);
  # should use unfiltered data
  if(data.src == "orig"){
    data_rare <- qs::qread("orig.phyobj");
  }else{
    data_rare <- mbSetObj$dataSet$proc.phyobj;
  }
  
  sample_table_msg <- sample_data(data_rare, errorIfNULL=TRUE);

  if(min(table(factor(sample_table_msg [[linecolor]]))) < 3 | min(table(factor(sample_table_msg [[linetype]]))) < 3 | min(table(factor(sample_table_msg [[facet]]))) < 3){
    AddErrMsg("Please select a more meaningful group option or adjust the library depth during the rarefaction step to ensure a minimum of three samples per group.");
    return(0);
  }
  
  #get good's coverage index
  goods_coverage <- ComputeGoods(data_rare)
 
  fast.write(goods_coverage, "goods_coverage.csv", row.names = FALSE, quote = FALSE);
  
  feat_no <- nsamples(data_rare);
 
  #adjust height according to number of legends
  w <- 600;
  
  if(feat_no < 10){
    w<-w;
  } else if (feat_no < 20){
    w<-w+100;
  } else if (feat_no < 50){
    w<-w+200;
  } else if (feat_no < 100){
    w<-w+400;
  } else if (feat_no > 100){
    w<-w+500;
  }
  
  imgName = paste(imgName,".", format, sep="");
  Cairo::Cairo(file=imgName, width = 1.5 * w, height = 540, type=format, bg="white", dpi=dpi);
  linecolor <- ifelse(linecolor == "none", "NULL", linecolor);
  linetype <- ifelse(linetype == "none", "NULL", linetype);

  box <- ggrare2(data_rare,
                 data.src = data.src,
                 color = linecolor,
                 label = "Sample",
                 linetype = linetype,
                 se = FALSE,  # this is not to meaningful
                 step = step);
 
  if(!is.null(facet) & facet != "none"){
    box <- box + facet_wrap(as.formula(paste("~", facet)))
  } else {
    box <- box;
  }
  print(box);
  dev.off();
  
  mbSetObj$analSet$rarefaction_curve <- imgName;
  mbSetObj$analSet$rarefaction_curve_data.src = ifelse(data.src == "orig", "original", "filtered");
  
  if(interactive){
    library(plotly)
    library(htmlwidgets)
    ax <- list(
      zeroline = FALSE,
      showline = FALSE,
      showgrid = FALSE
    )
    p <- plotly::ggplotly(box)
    p <- p %>% layout(xaxis = ax, yaxis = ax)
    htmlwidgets::saveWidget(p, "rarefaction_interactive.html")
  }
  
  return(.set.mbSetObj(mbSetObj))
}

#'Function to prepare data for phylogenetic tree.
#'@description This functions prepares the data to plot the phylogenetic tree.
#'@param mbSetObj Input the name of the mbSetObj.
#'@param format Character, input the preferred
#'format of the plot. By default it is set to "png".
#'@param dpi Numeric, input the dots per inch. By default
#'it is set to 72.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
PlotPhylogeneticTree <-function(mbSetObj, color, shape, taxa, treeshape, imgName, format="png", dpi=72){

  load_phyloseq();
  
  mbSetObj <- .get.mbSetObj(mbSetObj);
  
  data <- mbSetObj$dataSet$filt.data;
  
  if("matrix" %in% class(mbSetObj$dataSet$filt.data)){
    data <- otu_table(data,taxa_are_rows =TRUE);
  }
  
  sample_table <- sample_data(mbSetObj$dataSet$proc.phyobj, errorIfNULL=TRUE);
  
  if(min(table(factor(sample_table [[color]]))) < 3 | min(table(factor(sample_table [[shape]]))) < 3){
    AddErrMsg("Too many groups to be displayed - please select a more meaningful group option with at least 3 samples per group.");
    return(0);
  }
  
  data1 <- merge_phyloseq(data,tax_table(mbSetObj$dataSet$proc.phyobj), sample_table);
  pg_tree <- qs::qread("tree.qs");
  pg_tb <- tax_table(data1);
  pg_ot <- otu_table(data1);
  pg_sd <- sample_data(data1);
  pg_tree <- prune_taxa(taxa_names(pg_ot), pg_tree);
  data_tree <- merge_phyloseq(pg_tb, pg_ot, pg_sd, pg_tree);
  for (i in sample_variables(data_tree)){
    if(is.integer(sample_data(data_tree)[[i]]) | is.character(sample_data(data_tree)[[i]])){
      sample_data(data_tree)[[i]] <- factor(sample_data(data_tree)[[i]])
    }
  }
  
  data_tree <- fast_tax_glom_mem(data_tree, taxa);
  if(is.null(data_tree)){
    AddErrMsg("Errors in projecting to the selected taxanomy level!");
    return(0);
  } 
  
  feat_no <- length(phy_tree(data_tree)$tip.label);
  if(feat_no > 50){
    AddErrMsg("There are too many tree tips to be display, please select a higher taxonomy level");
    return(0);
  }
  
  #adjust height according to number of legends
  w<-600;
  if(feat_no < 10){
    w<-w;
  } else if (feat_no < 20){
    w<-w+100;
  } else if (feat_no < 50){
    w<-w+200;
  } else if (feat_no < 100){
    w<-w+400;
  } else if (feat_no > 100){
    w<-w+500;
  }
  
  
  imgName = paste(imgName,".", format, sep="");
  box <- plot_tree(data_tree,
                   ladderize = "left",
                   color = color,
                   shape = shape,
                   label.tips = taxa,
                   size = "abundance");
  
  if(treeshape == "rectangular"){
    Cairo(file=imgName, width = 1.5 * w, height = w, type=format, bg="white", dpi=dpi);
  }else{
    box <- box + coord_polar(theta="y");
    Cairo(file=imgName, width = 1.5*w, height = 1.5*w, type=format, bg="white", dpi=dpi);
  }
  print(box);
  dev.off();
  
  mbSetObj$analSet$phylogenetic_tree_curve <- imgName;
  mbSetObj$analSet$phylogenetic_tree_curve_tax_level <- taxa;
  
  return(.set.mbSetObj(mbSetObj))
}

#########################
#####plot heat tree######
#########################

GetHtGroupItems <- function(mbSetObj, meta){
  load_phyloseq();

  mbSetObj <- .get.mbSetObj(mbSetObj);
  dm <- mbSetObj$dataSet$proc.phyobj;  
  dm_samples = as(sample_data(dm), "data.frame");
  grp.nms <- as.character(unique(dm_samples[[meta]]));
  return(grp.nms);
}

SetGroupItems <- function(mbSetObj, groups){
  
  mbSetObj <- .get.mbSetObj(mbSetObj);
  num.groups <- length(groups)
  
  if(num.groups < 2){
    AddErrMsg("Minimum 2 groups for comparison!")
    return(0)
  }
  
  mbSetObj$corr.net$comparison <- groups
  return(.set.mbSetObj(mbSetObj))
}

GetHtMetaCpInfo <- function(mbSetObj, meta){
  load_phyloseq();

  mbSetObj <- .get.mbSetObj(mbSetObj);
  dm <- mbSetObj$dataSet$proc.phyobj;  
  dm_samples = as(sample_data(dm), "data.frame");
  
  grp.nms <- as.character(unique(dm_samples[[meta]]));
  myargs <- list();
  inx = 0
  for (m in 1:(length(grp.nms) - 1)) {
    for (n in (m + 1):length(grp.nms)) {
      inx <- inx + 1
      myargs[[inx]] <- paste(grp.nms[m], "_vs_", grp.nms[n], sep = "");
    }
  }
  return(unlist(myargs));
}

#'Function to plot heat tree
#'@description This functions plots the heat tree.
#'@param mbSetObj Input the name of the mbSetObj.
#'@param meta Input the name of the group.
#'@param taxalvl Character, input the taxonomic level to perform
#'classification. For instance, "Genus" to use the Genus level.
#'@param color Character, input the color palette code. "dbgr"
#'for dark blue, grey and red. "bgy" for greenblue, grey and yellow.
#'"ggr" for green, grey and red. "pgy" for purple, gray and yellow. 
#'"tgr" for teal, grey and red. "ggg" for green, grey and gold. Additionally,
#'the viridis R package can be used to generate color schemes. "plasma" for 
#'the plasma color scheme, "viridis" for the default viridis color scheme and
#'"cividis" for the cividis color scheme.
#'@param layoutOpt Character, input the layout of the heat tree. "dft"
#'for the default layout and "reda" for reingold-tilford.
#'@param comparison Character, input the group comparisons.
#'@param wilcox.cutoff Numeric, input the Wilcoxan p-value cutoff
#'for significant node labels.
#'@param imgName Character, input the name of the heat tree plot.
#'@param format Character, input the preferred
#'format of the plot. By default it is set to "png".
#'@param dpi Numeric, input the dots per inch. By default
#'it is set to 72.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import metacoder
PrepareHeatTreePlot <- function(mbSetObj, meta, taxalvl, color, layoutOpt, comparison, 
                                wilcox.cutoff, switchCmpDirection, colorMode, showLabels, imgName, format="png", dpi=72){
  load_metacoder();
  load_phyloseq();

  mbSetObj <- .get.mbSetObj(mbSetObj);  
  tax_o <- taxalvl;
  dm <- mbSetObj$dataSet$proc.phyobj;
  tax_table_new = data.frame("Kingdom" = "Root", as(tax_table(dm), "matrix")[, 1:ncol(tax_table(dm))],check.names=FALSE) # add root to tax table
  
  tax_table(dm) <- as.matrix(tax_table_new);
  
  dm_samples = as(sample_data(dm), "data.frame");
  dm_samples <- cbind.data.frame("sample_id" = row.names(dm_samples), dm_samples); # add sample_id to sample table
  row.names(dm_samples) <- c();
  dm_samples[] <- lapply(dm_samples, as.character);
  
  otu_dm <- as.data.frame(as(otu_table(dm), "matrix"),check.names=FALSE);
  tax_dm <- as.data.frame(as(tax_table(dm), "matrix"),check.names=FALSE);
  tax_dm[] <- lapply(tax_dm, as.character);#make sure characters in tax_dm;
  depth <- ncol(tax_dm)
  rank_dm <- c("r", "p", "c", "o", "f", "g", "s");
  names(tax_dm) <- rank_dm[1:depth];
  
  for (i in 1:ncol(tax_dm)){
    for (j in 1:nrow(tax_dm)){
      if (is.na(tax_dm[j, i])){
        tax_dm[j, i] <- "";
      } else {
        tax_dm[j, i] <- paste(names(tax_dm)[i],
                              tax_dm[j, i],
                              sep = "__");
      }
    }
  } #add __ to tax table
  
  if(taxalvl == "Phylum"){
    tax <- "p";
  } else if(taxalvl == "Class"){
    tax <- "c";
  } else if(taxalvl == "Order"){
    tax <- "o";
  } else if(taxalvl == "Family"){
    tax <- "f";
  } else if(taxalvl == "Genus"){
    tax <- "g"
  } else {
    tax <- "s";
  }; # get tax rank for heat tree
  
  tax_dm <- tax_dm[, 1:which(names(tax_dm) == tax)]; #subset tax table
  rank_dm_new <- rank_dm[1:which(rank_dm == tax)];
  tax_dm$lineage <- apply(tax_dm[, rank_dm_new], 1, paste, collapse = ";"); #collapse all tax ranks
  dm_otu <- cbind.data.frame("otu_id" = row.names(tax_dm),
                             "lineage" = tax_dm$lineage,
                             otu_dm); #make new otu table
  row.names(dm_otu) <- c();
  dm_otu$lineage <- gsub(";+$", "", dm_otu$lineage); #remove empty tax names
  
  grp.nms <- strsplit(comparison, "_vs_")[[1]];
  #dm_samples_cmf <- dm_samples[dm_samples[[meta]] %in% grp.nms, ]; #subset sample data by meta variable  
  grpDF1 <- dm_samples[dm_samples[[meta]] == grp.nms[1], ];
  grpDF2 <- dm_samples[dm_samples[[meta]] == grp.nms[2], ];
  if(switchCmpDirection == "false"){
    dm_samples_cmf <- rbind.data.frame(grpDF1, grpDF2);
  } else {
    dm_samples_cmf <- rbind.data.frame(grpDF2, grpDF1);
    comparison <- paste0(grp.nms[2], "_vs_", grp.nms[1]);
  }
  dm_otu_cmf <- dm_otu[, c("otu_id", "lineage", dm_samples_cmf$sample_id)]; #make otu table ready for heat tree  
  
  PrepareHeatTreePlotDataParse_cmf_res <- PrepareHeatTreePlotDataParse_cmf(dm_otu_cmf, dm_samples_cmf, meta); #parse otu table to heat tree object
  PrepareHeatTreePlotDataParse_cmf_diff_table_res <- PrepareHeatTreePlotDataParse_cmf_diff_table(PrepareHeatTreePlotDataParse_cmf_res); #generate diff table
  PrepareHeatTreePlotDataParse_cmf_res <<- PrepareHeatTreePlotDataParse_cmf_res; #generate heat tree
  
  PrepareHeatTreePlotDataParse_cmf_plot(mbSetObj, color, layoutOpt, comparison, wilcox.cutoff,colorMode, showLabels, imgName, format, dpi=72);
  
  #below is for PDF reporter
  mbSetObj$analSet$heat_tree_plot <- paste0(imgName,".",format); 
  mbSetObj$analSet$heat_tree_meta <- meta;
  mbSetObj$analSet$heat_tree_tax <- tax_o;
  mbSetObj$analSet$heat_tree_comparison <- comparison;
  current.msg<<-"Heat tree analysis successful!";
  return(.set.mbSetObj(mbSetObj));
};

#'Function to prepare heat tree data
#'@description This functions plots the heat tree.
#'@param meta Input the name of the group.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import metacoder
#'@import taxa
PrepareHeatTreePlotDataParse_cmf <- function(dm_otu_cmf,
                                             dm_samples_cmf,
                                             meta){
  dm_obj_cmf <- metacoder::parse_tax_data(dm_otu_cmf,
                                          class_cols = "lineage",
                                          class_sep = ";",
                                          class_regex = "^(.+)__(.+)$",
                                          class_key = c(tax_rank = "info",
                                                        tax_name = "taxon_name"));
  
  dm_obj_cmf$data$tax_data <- calc_obs_props(dm_obj_cmf, "tax_data"); # normalization
  
  dm_obj_cmf$data$tax_abund <- calc_taxon_abund(dm_obj_cmf, "tax_data",
                                                cols = dm_samples_cmf$sample_id); #calculate taxon abundance
  dm_obj_cmf$data$diff_table <- compare_groups(dm_obj_cmf, dataset = "tax_abund", #make diff table
                                               cols = dm_samples_cmf$sample_id, # What columns of sample data to use
                                               groups = dm_samples_cmf[[meta]]); # What category each sample is assigned to;
  return(dm_obj_cmf);
};

#'Function to prepare heat tree data
#'@description This functions plots the heat tree.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import metacoder
PrepareHeatTreePlotDataParse_cmf_diff_table <- function(PrepareHeatTreePlotDataParse_cmf_res){#generate diff table for downloading
  
  dm_obj_cmf = PrepareHeatTreePlotDataParse_cmf_res;
  table_tax_dm <- dm_obj_cmf$data$class_data[-2];
  table_tax_dm <- table_tax_dm[!duplicated(table_tax_dm$taxon_id), ]; #remove redundancy
  
  tax_diff_dm <- merge(dm_obj_cmf$data$diff_table, 
                       table_tax_dm,
                       by = "taxon_id"); #add tax names
  tax_diff_dm <- cbind.data.frame("id" = tax_diff_dm[, 1],
                                  tax_diff_dm[, 8:9],
                                  tax_diff_dm[, 2:7]);
  
  fast.write(tax_diff_dm, 
             "tax_diff_dm.csv", 
             row.names = FALSE, quote = FALSE);
  
  return(tax_diff_dm);
};

#'Function to prepare heat tree data
#'@description This functions plots the heat tree.
#'@param mbSetObj Input the name of the mbSetObj.
#'@param color Character, input the color palette code. "dbgr"
#'for dark blue, grey and red. "bgy" for greenblue, grey and yellow.
#'"ggr" for green, grey and red. "pgy" for purple, gray and yellow. 
#'"tgr" for teal, grey and red. "ggg" for green, grey and gold. Additionally,
#'the viridis R package can be used to generate color schemes. "plasma" for 
#'the plasma color scheme, "viridis" for the default viridis color scheme and
#'"cividis" for the cividis color scheme.
#'@param layoutOpt Character, input the layout of the heat tree. "dft"
#'for the default layout and "reda" for reingold-tilford.
#'@param comparison Character, input the group comparisons.
#'@param wilcox.cutoff Numeric, input the Wilcoxan p-value cutoff
#'for significant node labels.
#'@param imgName Character, input the name of the heat tree plot.
#'@param format Character, input the preferred
#'format of the plot. By default it is set to "png".
#'@param dpi Numeric, input the dots per inch. By default
#'it is set to 72.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import metacoder
#'@import viridis
PrepareHeatTreePlotDataParse_cmf_plot <- function(mbSetObj, color, layoutOpt, comparison, wilcox.cutoff, colorMode, showLabels, imgName, format, dpi=72){
  
  mbSetObj <- .get.mbSetObj(mbSetObj);
  load_viridis();
  
  dm_obj_cmf = PrepareHeatTreePlotDataParse_cmf_res;
  if(color == "ggr"){
    color_new <- c("#006B30", "gray", "#E21818");
  } else if(color == "dbgr") {
    color_new <- c("#2A4196", "gray", "#F71735");
  } else if(color == "tgr"){
    color_new <- c("#007777", "gray", "#F71735");
  }else if(color == "bgy"){
    color_new <- c("#2EAA9C", "gray", "#FCB932");
  }else if(color == "ggg"){
    color_new <- c("#007777", "gray", "#DAA520");
  }else if(color == "plasma") {
    color_new <- rev(c(viridis::plasma(10)[4:9], "lightgrey"))
  }else if(color == "viridis") {
    color_new <- rev(c(viridis::viridis(10)[4:9], "lightgrey"))
  }else if(color == "cividis") {
    color_new <- rev(c(viridis::cividis(10)[4:9], "lightgrey"))
  }else {
    color_new <- c("#764b93", "gray", "#F0C808");
  };
  
  mbSetObj$analSet$heat_tree_plot <- imgName; # for PDF reporter
  
  set.seed(56784);
  
  Cairo::Cairo(file=paste0(imgName, ".", format), height = 875, width = 1000, type=format, bg="white", dpi=96);
  wilcox.cutoff <- as.numeric(wilcox.cutoff)

   if(colorMode=="sig"){
    dm_obj_cmf$data$diff_table$log2_median_ratio[dm_obj_cmf$data$diff_table$wilcox_p_value>wilcox.cutoff]  <- 0
   }

if(showLabels=="true"){
  if(layoutOpt == "reda"){# two layouts are provided
    box <- heat_tree(dm_obj_cmf,
                     node_label =  ifelse(wilcox_p_value < wilcox.cutoff, taxon_names, NA),  #taxon names
                     node_size = n_obs, # n_obs is a function that calculates, in this case, the number of OTUs per taxon
                     node_color = log2_median_ratio, # A column from `obj$data$diff_table`
                     node_color_interval = c(-8, 8), # The range of `log2_median_ratio` to display
                     node_color_range = color_new, # The color palette used
                     node_size_axis_label = "Abundance level",
                     node_color_axis_label = "Median ratio (log2)",
                     layout = "davidson-harel", # The primary layout algorithm
                     initial_layout = "reingold-tilford",
                     title = comparison,
                     title_size = 0.05,
                     node_label_size_range = c(0.02, 0.05),
                     output_file = NULL);
  } else {
    box <- heat_tree(dm_obj_cmf,
                     node_label =  ifelse(wilcox_p_value < wilcox.cutoff, taxon_names, NA),
                     node_size = n_obs, # n_obs is a function that calculates, in this case, the number of OTUs per taxon
                     node_color = log2_median_ratio, # A column from `obj$data$diff_table`
                     node_color_interval = c(-8, 8), # The range of `log2_median_ratio` to display
                     node_color_range = color_new, # The color palette used
                     node_size_axis_label = "Abundance level",
                     node_color_axis_label = "Median ratio (log2)",
                     title = comparison,
                     title_size = 0.05,
                     node_label_size_range = c(0.02, 0.05),
                     output_file = NULL);

  }
}else{
if(layoutOpt == "reda"){# two layouts are provided
    box <- heat_tree(dm_obj_cmf,
                     node_label =  NA,  #taxon names
                     node_size = n_obs, # n_obs is a function that calculates, in this case, the number of OTUs per taxon
                     node_color = log2_median_ratio, # A column from `obj$data$diff_table`
                     node_color_interval = c(-8, 8), # The range of `log2_median_ratio` to display
                     node_color_range = color_new, # The color palette used
                     node_size_axis_label = "Abundance level",
                     node_color_axis_label = "Median ratio (log2)",
                     layout = "davidson-harel", # The primary layout algorithm
                     initial_layout = "reingold-tilford",
                     title = comparison,
                     title_size = 0.05,
                     node_label_size_range = c(0.02, 0.05),
                     output_file = NULL);
  } else {
    box <- heat_tree(dm_obj_cmf,
                     node_label =  NA,
                     node_size = n_obs, # n_obs is a function that calculates, in this case, the number of OTUs per taxon
                     node_color = log2_median_ratio, # A column from `obj$data$diff_table`
                     node_color_interval = c(-8, 8), # The range of `log2_median_ratio` to display
                     node_color_range = color_new, # The color palette used
                     node_size_axis_label = "Abundance level",
                     node_color_axis_label = "Median ratio (log2)",
                     title = comparison,
                     title_size = 0.05,
                     node_label_size_range = c(0.02, 0.05),
                     output_file = NULL);

  }

};
  print(box);
  dev.off();
  
  return(.set.mbSetObj(mbSetObj))
}

PlotGroupDataHeattree <- function(mbSetObj, meta, comparison, taxalvl, color, layoutOpt, 
                                  showLabels,imgName, format="png", dpi=72){
  load_metacoder();
  load_phyloseq();
  
  mbSetObj <- .get.mbSetObj(mbSetObj);
  tax_o <- taxalvl;
  
  dm <- mbSetObj$dataSet$proc.phyobj;
  tax_table_new = data.frame("Kingdom" = "Root", as(tax_table(dm), "matrix")[, 1:6],check.names=FALSE) # add root to tax table
  tax_table(dm) <- as.matrix(tax_table_new);
  
  dm_samples = as(sample_data(dm), "data.frame");
  dm_samples <- cbind.data.frame("sample_id" = row.names(dm_samples), dm_samples); # add sample_id to sample table
  row.names(dm_samples) <- c();
  dm_samples[] = lapply(dm_samples, FUN = function(x){gsub(" ", "", x)});
  comparison <- gsub(" ", "", comparison);
  if(sum(grepl("^[0-9]{1,}$", dm_samples[[meta]])) == nrow(dm_samples)){
    dm_samples[[meta]] <- paste(meta, dm_samples[[meta]], sep = "_");
    comparison <- paste(meta, comparison, sep = "_");
  };
  dm_samples_cmf <- dm_samples[dm_samples[[meta]] %in% comparison, ]; #subset sample data by meta variable
  dm_samples_cmf$meta_com <- comparison;
  
  flag <- FALSE;
  
  PrepareHeatTreePlotAbR(dm, tax_dm, taxalvl, dm_samples_cmf, color, showLabels,imgName, format, layoutOpt, comparison, flag);
  
  #below is for PDF reporter
  mbSetObj$analSet$heat_tree_plot <- imgName; 
  #mbSetObj$analSet$heat_tree_plot <- paste(imgName,".", format, sep="")
  mbSetObj$analSet$heat_tree_meta <- meta;
  mbSetObj$analSet$heat_tree_tax <- tax_o;
  mbSetObj$analSet$heat_tree_comparison <- comparison;
  current.msg<<-"Heat tree analysis successful!";
  return(.set.mbSetObj(mbSetObj));
};

PlotSampleDataHeattree <- function(mbSetObj,comparison, taxalvl, color, layoutOpt, 
                                  showLabels, imgName, format="png", dpi=72){
  load_metacoder();
  load_phyloseq();

  mbSetObj <- .get.mbSetObj(mbSetObj);
  tax_o <- taxalvl;
  
  dm <- mbSetObj$dataSet$proc.phyobj;
  tax_table_new = data.frame("Kingdom" = "Root", as(tax_table(dm), "matrix")[, 1:6],check.names=FALSE) # add root to tax table
  tax_table(dm) <- as.matrix(tax_table_new);
  
  dm_samples = as(sample_data(dm), "data.frame");
  dm_samples <- cbind.data.frame("sample_id" = row.names(dm_samples), dm_samples); # add sample_id to sample table
  row.names(dm_samples) <- c();
  dm_samples[] <- lapply(dm_samples, as.character);
  
  if(sum(grepl("^[0-9]{1,}$", dm_samples$sample_id)) == nrow(dm_samples)){
    dm_samples$sample_id <- paste("sample", dm_samples$sample_id, sep = "_");
    comparison <- paste("sample", comparison, sep = "_"); 
    flag <- TRUE;
  } else {
    flag <- FALSE; 
  };  
  
  dm_samples_cmf <- dm_samples[dm_samples$sample_id %in% comparison, ];
  dm_samples_cmf$meta_com <- comparison;
  
  PrepareHeatTreePlotAbR(dm, tax_dm, taxalvl, dm_samples_cmf, color,showLabels, imgName, format, layoutOpt, comparison, flag);
  
  #below is for PDF reporter
  mbSetObj$analSet$heat_tree_plot <- imgName; 
  mbSetObj$analSet$heat_tree_meta <- comparison;
  mbSetObj$analSet$heat_tree_tax <- tax_o;
  mbSetObj$analSet$heat_tree_comparison <- comparison;
  current.msg<<-"Heat tree analysis successful!";
  return(.set.mbSetObj(mbSetObj));
};

PlotOverviewDataHeattree <- function(mbSetObj, taxalvl, color, layoutOpt, 
                                    showLabels, imgName, format="png", dpi=72){
  load_metacoder();
  load_phyloseq();

  mbSetObj <- .get.mbSetObj(mbSetObj);
  tax_o <- taxalvl;
  
  dm <- mbSetObj$dataSet$proc.phyobj;
  tax_table_new = data.frame("Kingdom" = "Root", as(tax_table(dm), "matrix")[, 1:6],check.names=FALSE) # add root to tax table
  tax_table(dm) <- as.matrix(tax_table_new);
  
  dm_samples = as(sample_data(dm), "data.frame");
  dm_samples <- cbind.data.frame("sample_id" = row.names(dm_samples), dm_samples); # add sample_id to sample table
  row.names(dm_samples) <- c();
  dm_samples[] <- lapply(dm_samples, as.character);
  
  dm_samples_cmf <- dm_samples;
  dm_samples_cmf$meta_com <- "All_samples";
  comparison <- "All_samples";
  
  flag <- FALSE;
  
  PrepareHeatTreePlotAbR(dm, tax_dm, taxalvl, dm_samples_cmf, color,showLabels, imgName, format, layoutOpt, comparison, flag);  
  
  #below is for PDF reporter
  mbSetObj$analSet$heat_tree_plot <- imgName; 
  mbSetObj$analSet$heat_tree_meta <- "All samples";
  mbSetObj$analSet$heat_tree_tax <- tax_o;
  mbSetObj$analSet$heat_tree_comparison <- "All samples";
  current.msg<<-"Heat tree analysis successful!";
  return(.set.mbSetObj(mbSetObj));
};

PrepareHeatTreePlotAbR <- function(dm = dm, tax_dm = tax_dm, taxalvl = taxalvl, dm_samples_cmf = dm_samples_cmf, 
                                   color = color,showLabels, imgName = imgName, format = format, layoutOpt= layoutOpt, comparison = comparison, flag = flag){
  
  otu_dm <- as.data.frame(as(otu_table(dm), "matrix"),check.names=FALSE);
  tax_dm <- as.data.frame(as(tax_table(dm), "matrix"),check.names=FALSE);
  tax_dm[] <- lapply(tax_dm, as.character);#make sure characters in tax_dm;
  depth <- ncol(tax_dm)
  rank_dm <- c("r", "p", "c", "o", "f", "g", "s");
  names(tax_dm) <- rank_dm[1:depth];
  
  for (i in 1:ncol(tax_dm)){
    for (j in 1:nrow(tax_dm)){
      if (is.na(tax_dm[j, i])){
        tax_dm[j, i] <- "";
      } else {
        tax_dm[j, i] <- paste(names(tax_dm)[i],
                              tax_dm[j, i],
                              sep = "__");
      }
    }
  } #add __ to tax table
  
  if(taxalvl == "Phylum"){
    tax <- "p";
  } else if(taxalvl == "Class"){
    tax <- "c";
  } else if(taxalvl == "Order"){
    tax <- "o";
  } else if(taxalvl == "Family"){
    tax <- "f";
  } else if(taxalvl == "Genus"){
    tax <- "g"
  } else {
    tax <- "s";
  }; # get tax rank for heat tree
  
  tax_dm <- tax_dm[, 1:which(names(tax_dm) == tax)]; #subset tax table
  rank_dm_new <- rank_dm[1:which(rank_dm == tax)];
  tax_dm$lineage <- apply(tax_dm[, rank_dm_new], 1, paste, collapse = ";"); #collapse all tax ranks
  dm_otu <- cbind.data.frame("otu_id" = row.names(tax_dm),
                             "lineage" = tax_dm$lineage,
                             otu_dm); #make new otu table
  row.names(dm_otu) <- c();
  if(flag){
    names(dm_otu)[-c(1:2)] <- paste("sample", names(dm_otu)[-c(1:2)], sep = "_");
  };
  dm_otu$lineage <- gsub(";+$", "", dm_otu$lineage); #remove empty tax names
  
  dm_otu_cmf <- dm_otu[, c("otu_id", "lineage", dm_samples_cmf$sample_id)]; #make otu table ready for heat tree  
  dm_obj_cmf <- metacoder::parse_tax_data(dm_otu_cmf,
                                          class_cols = "lineage",
                                          class_sep = ";",
                                          class_regex = "^(.+)__(.+)$",
                                          class_key = c(tax_rank = "info",
                                                        tax_name = "taxon_name"));
  
  dm_obj_cmf$data$tax_data <- calc_obs_props(dm_obj_cmf, "tax_data"); # normalization
  
  dm_obj_cmf$data$tax_abund <- calc_taxon_abund(dm_obj_cmf, "tax_data",
                                                cols = dm_samples_cmf$sample_id); #calculate taxon abundance
  
  dm_obj_cmf$data$tax_occ <- calc_n_samples(dm_obj_cmf, "tax_abund", groups = dm_samples_cmf$meta_com)
  
  if(color == "ggr"){
    color_new <- c("#006B30", "gray", "#E21818");
  } else if(color == "dbgr") {
    color_new <- c("#2A4196", "gray", "#F71735");
  } else if(color == "tgr"){
    color_new <- c("#007777", "gray", "#F71735");
  }else if(color == "bgy"){
    color_new <- c("#2EAA9C", "gray", "#FCB932");
  }else if(color == "ggg"){
    color_new <- c("#007777", "gray", "#DAA520");
  }else if(color == "plasma") {
    color_new <- rev(c(viridis::plasma(10)[4:9], "lightgrey"))
  }else if(color == "viridis") {
    color_new <- rev(c(viridis::viridis(10)[4:9], "lightgrey"))
  }else if(color == "cividis") {
    color_new <- rev(c(viridis::cividis(10)[4:9], "lightgrey"))
  }else {
    color_new <- c("#764b93", "gray", "#F0C808");
  };
  
  
  set.seed(56784);
  
  Cairo::Cairo(file=paste0(imgName, ".", format), height = 875, width = 1000, type=format, bg="white", dpi=96);
  
  if(showLabels=="true"){
  if(layoutOpt == "reda"){# two layouts are provided
    box <- heat_tree(dm_obj_cmf,
                     node_label = taxon_names,
                     node_size = n_obs, # n_obs is a function that calculates, in this case, the number of OTUs per taxon
                     node_color = dm_obj_cmf$data$tax_occ[[2]],# A column from `obj$data$tax_occ`
                     node_color_range = color_new, # The color palette used
                     node_size_axis_label = "OTU count",
                     node_color_axis_label = "Samples with reads",
                     layout = "davidson-harel", # The primary layout algorithm
                     initial_layout = "reingold-tilford",
                     title = comparison,
                     title_size = 0.05,
                     node_label_size_range = c(0.02, 0.05),
                     output_file = NULL);
  } else {
    box <- heat_tree(dm_obj_cmf,
                     node_label = taxon_names,
                     node_size = n_obs, # n_obs is a function that calculates, in this case, the number of OTUs per taxon
                     node_color = dm_obj_cmf$data$tax_occ[[2]],# A column from `obj$data$tax_occ`
                     node_color_range = color_new, # The color palette used
                     node_size_axis_label = "OTU count",
                     node_color_axis_label = "Samples with reads",
                     title = comparison,
                     title_size = 0.05,
                     node_label_size_range = c(0.02, 0.05),
                     output_file = NULL);
  }
}else{
if(layoutOpt == "reda"){# two layouts are provided
    box <- heat_tree(dm_obj_cmf,
                     node_label = NA,
                     node_size = n_obs, # n_obs is a function that calculates, in this case, the number of OTUs per taxon
                     node_color = dm_obj_cmf$data$tax_occ[[2]],# A column from `obj$data$tax_occ`
                     node_color_range = color_new, # The color palette used
                     node_size_axis_label = "OTU count",
                     node_color_axis_label = "Samples with reads",
                     layout = "davidson-harel", # The primary layout algorithm
                     initial_layout = "reingold-tilford",
                     title = comparison,
                     title_size = 0.05,
                     node_label_size_range = c(0.02, 0.05),
                     output_file = NULL);
  } else {
    box <- heat_tree(dm_obj_cmf,
                     node_label = NA,
                     node_size = n_obs, # n_obs is a function that calculates, in this case, the number of OTUs per taxon
                     node_color = dm_obj_cmf$data$tax_occ[[2]],# A column from `obj$data$tax_occ`
                     node_color_range = color_new, # The color palette used
                     node_size_axis_label = "OTU count",
                     node_color_axis_label = "Samples with reads",
                     title = comparison,
                     title_size = 0.05,
                     node_label_size_range = c(0.02, 0.05),
                     output_file = NULL);
  }

}
   ;
  print(box);
  dev.off();
}

#######################################
###########Tax4Fun/PICRUSt ############
#######################################

#'Main function to perform 16S functional annotation
#'@description This is the main function to perform either PICRUSt or
#'SILVA for functional annotation on the mbSetObj.
#'@param mbSetObj Input the name of the mbSetObj.
#'@author Jeff Xia \email{jeff.xia@mcgill.ca}
#'McGill University, Canada
#'License: GNU GPL (>= 2)
#'@export
#'@import Tax4Fun

# use memoise tech to avoid repeating
Perform16FunAnot_mem <- function(mbSetObj, type, pipeline,ggversion){
  if(!exists("perform16funanot_mem")){
    require("memoise");
    perform16funanot_mem <<- memoise(Perform16FunAnot); 
  }
  
  res <- perform16funanot_mem(mbSetObj, type, pipeline,ggversion);
  return(res);
}


Perform16FunAnot<-function(mbSetObj, type, pipeline,ggversion) {
  if(!exists("my.16sfun.anot")){ # public web on same user dir
    .load.scripts.on.demand("utils_16s2fun.Rc");    
  }
  return(my.16sfun.anot(mbSetObj, type, pipeline,ggversion));
}

######################################
########## Getter Functions ##########
######################################

GetBetaDiversityStats<-function(mbSetObj){
  mbSetObj <- .get.mbSetObj(mbSetObj);
  return(mbSetObj$analSet$stat.info);
}

GetStressNMDS<-function(mbSetObj){
  mbSetObj <- .get.mbSetObj(mbSetObj);
  return(mbSetObj$analSet$beta.stress);
}

# getter
GetPieTaxaNames<- function(){
  res<-as.character(piedata$variable);
  return(res);
}

# getter
GetPieTaxaAbund<- function(){
  return(piedata$value);
}


######################################
######### Utility Functions ##########
######################################

# Get goods's coverage
ComputeGoods <-function(physeq_object){
  com <- t(get_sample(physeq_object))
  no.seqs <- rowSums(com)
  sing <- com==1
  no.singleton <- apply(sing, 1, sum)
  goods <- 100*(1-no.singleton/no.seqs)
  sample <- row.names(com)
  goods.sum <- cbind(sample, no.singleton, no.seqs, goods)
  goods.sum <- as.data.frame(goods.sum,check.names=FALSE)
  row.names(goods.sum) <- c()
  return(goods.sum)
}

# Utility function that performs rarefaction
ggrare2 <- function(physeq_object, data.src, label = NULL, color = NULL, plot = TRUE, linetype = NULL, se = FALSE, step=5) {
  
  x <- methods::as(otu_table(physeq_object), "matrix")
  
  if (taxa_are_rows(physeq_object)) { x <- t(x) }
  
  ## This script is adapted from vegan `rarecurve` function
  tot <- rowSums(x)
  S <- rowSums(x > 0)
  nr <- nrow(x)
  step_new = floor(max(tot)/as.integer(step))
  rarefun <- function(i) {
    #cat(paste("rarefying sample", rownames(x)[i]), sep = "\n")
    n <- seq(1, tot[i], by = step_new)
    if (n[length(n)] != tot[i]) {
      n <- c(n, tot[i])
    }

    y <- vegan::rarefy(x[i, ,drop = FALSE], n, se = se)
    
if(length(y)==1){
 current.msg <<- paste0("All the feature counts in sample ",rownames(x)[i]," is less than 5 which is necessary for rarefy. Please check the data or change the method for filteration.")
 return(0)
}
    if (nrow(y) != 1) {
      rownames(y) <- c(".S", ".se")
      return(data.frame(t(y), Size = n, Sample = rownames(x)[i],check.names=FALSE));
    } else {
      return(data.frame(.S = y[1, ], Size = n, Sample = rownames(x)[i],check.names=FALSE));
    }
  }
  
  f_n <- paste(data.src, step, "rds", sep = ".");
  print(nr)
  out <- lapply(seq_len(nr), rarefun)
  df <- do.call(rbind, out);
  
  # Get sample data
  if (!is.null(sample_data(physeq_object, FALSE))) {
    sdf <- methods::as(sample_data(physeq_object), "data.frame")
    sdf$Sample <- rownames(sdf);
    data <- merge(df, sdf, by = "Sample")
    labels <- data.frame(x = tot, y = S, Sample = rownames(x),check.names=FALSE)
    labels <- merge(labels, sdf, by = "Sample")
  }
  
  # Add, any custom-supplied plot-mapped variables
  if (length(color) > 1) {
    data$color <- color
    names(data)[names(data) == "color"] <- deparse(substitute(color))
    color <- deparse(substitute(color))
  }
  
  if (length(label) > 1) {
    labels$label <- label
    names(labels)[names(labels) == "label"] <- deparse(substitute(label))
    label <- deparse(substitute(label))
  }
  
  if (length(linetype) > 1) {
    data$linetype <- linetype
    names(data)[names(data) == "linetype"] <- deparse(substitute(linetype))
    linetype <- deparse(substitute(linetype))
  }
  
  p <- ggplot2::ggplot(data = data,
                       ggplot2::aes_string(x = "Size",
                                           y = ".S",
                                           group = "Sample",
                                           color = color,
                                           linetype = linetype))
  
  p <- p + ggplot2::labs(x = "Sequence Sample Size", y = "Species Richness")
  
  if (!is.null(label)) {
    p <- p + ggplot2::geom_text(data = labels,
                                ggplot2::aes_string(x = "x",
                                                    y = "y",
                                                    label = label,
                                                    color = color),
                                size = 4, hjust = 0) +
      scale_x_continuous(expand = c(0, 0, 0.3, 0))
  }
  
  p <- p + ggplot2::geom_line()
  if (se) { ## add standard error if available
    p <- p +
      ggplot2::geom_ribbon(ggplot2::aes_string(ymin = ".S - .se",
                                               ymax = ".S + .se",
                                               color = NULL,
                                               fill = color),
                           alpha = 0.2)
  }
  invisible(p)
}

generateColorArr <- function(grp.num, filenm=NULL) {
  grp.num <- as.numeric(grp.num);
  library(RColorBrewer);
  colArr<-brewer.pal(grp.num,"Dark2")
  if(is.null(filenm)){
    return(colArr);
  }else{
    sink(filenm);
    cat(rjson::toJSON(colArr));
    sink();
    return(filenm);
  }
}

PlotlyTaxaAbundance <- function(rdaName, type){
    load(rdaName);
    p <- plotly::ggplotly(box, width=1000, height=800);

    if(type=="area"){
       narm <- p[["x"]][["data"]]
       for(i in 1:length(narm)){
          narm[[i]]$y[is.na(narm[[i]]$y)]=0;
       }
       p[["x"]][["data"]] <- narm
    }
 p <- p %>% layout(note="stack")
    return(p);
}

PlotlyCompManht <- function(rdaName){
  
 load(rdaName)
p <- plotly::ggplotly(p, width=1000, height=800,tooltip = "text");
return(p)
}
xia-lab/MicrobiomeAnalystR documentation built on June 9, 2025, 11:51 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
xia-lab/MicrobiomeAnalystR
MicrobiomeAnalystR - A comprehensive R package for statistical, visual, and functional analysis of the microbiome.

R/mdp_utils.R
In xia-lab/MicrobiomeAnalystR: MicrobiomeAnalystR - A comprehensive R package for statistical, visual, and functional analysis of the microbiome.

R Package Documentation

Browse R Packages

We want your feedback!

xia-lab/MicrobiomeAnalystR MicrobiomeAnalystR - A comprehensive R package for statistical, visual, and functional analysis of the microbiome.

R/mdp_utils.R In xia-lab/MicrobiomeAnalystR: MicrobiomeAnalystR - A comprehensive R package for statistical, visual, and functional analysis of the microbiome.

R Package Documentation

Browse R Packages

We want your feedback!

xia-lab/MicrobiomeAnalystR
MicrobiomeAnalystR - A comprehensive R package for statistical, visual, and functional analysis of the microbiome.

R/mdp_utils.R
In xia-lab/MicrobiomeAnalystR: MicrobiomeAnalystR - A comprehensive R package for statistical, visual, and functional analysis of the microbiome.