R/metabolomics_computation.R

Defines functions met.test_normalization met.report_test_normalization met.report

Documented in met.report met.report_test_normalization met.test_normalization

#' Perform ANOVA analysis
#'
#' ANOVA analysis
#'
#' @param mSetObj Enter the name of the created mSet object. (see \code{\link[VisomX]{met.read_data}})
#' @param nonpar (Logical) Use a non-parametric test (\code{TRUE}) or not (\code{FALSE}).
#' @param thresh (Numeric) From 0 to 1, indicate the p-value threshold.
#' @param post.hoc (Character) Enter the name of the post-hoc test, \code{"fisher"} or \code{"tukey"}.
#' @param all_results (Logical) If \code{TRUE}, it the ANOVA results for all compounds with no post-hoc tests performed will be written as CSV file "anova_all_results.csv".
#' @param silent (Logical) Suppress message with number of significant features found (\code{TRUE}) or not (\code{FALSE}).
#' @return The input mSet object with ANOVA results added at mSetObj$analSet$aov.
#' @references adapted from \code{\link[MetaboAnalystR]{ANOVA.Anal}} (\url{https://github.com/xia-lab/MetaboAnalystR}).
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)
#' @export
met.ANOVA.Anal <- function (mSetObj = NA, nonpar = FALSE, thresh = 0.05, post.hoc = "fisher",
          all_results = FALSE, silent = FALSE)
{
  cut.off <- thresh
  sig.num <- 0
  if (nonpar) {
    aov.nm <- "Kruskal Wallis Test"
    anova.res <- apply(as.matrix(mSetObj$dataSet$norm), 2,
                       kruskal.test(x ~ cls), cls = mSetObj$dataSet$cls)
    res <- unlist(lapply(anova.res, function(x) {
      c(x$statistic, x$p.value)
    }))
    res <- data.frame(matrix(res, nrow = length(anova.res),
                             byrow = T), stringsAsFactors = FALSE)
    fstat <- res[, 1]
    p.value <- res[, 2]
    names(fstat) <- names(p.value) <- colnames(mSetObj$dataSet$norm)
    fdr.p <- stats::p.adjust(p.value, "fdr")
    inx.imp <- fdr.p <= thresh
    sig.num <- sum(inx.imp)
    if (sig.num > 0) {
      sig.f <- fstat[inx.imp]
      sig.p <- p.value[inx.imp]
      fdr.p <- fdr.p[inx.imp]
      sig.mat <- data.frame(signif(sig.f, 5), signif(sig.p,
                                                     5), signif(-log10(sig.p), 5), signif(fdr.p, 5),
                            "NA")
      rownames(sig.mat) <- names(sig.p)
      colnames(sig.mat) <- c("chi.squared", "p.value",
                             "-log10(p)", "FDR", "Post-Hoc")
      ord.inx <- order(sig.p, decreasing = FALSE)
      sig.mat <- sig.mat[ord.inx, , drop = F]
      fileName <- "kw_posthoc.csv"
      my.mat <- sig.mat[, 1:4]
      colnames(my.mat) <- c("chi_squared", "pval_KW",
                            "-log10(p)", "FDR")
    }
  }
  else {
    aov.nm <- "One-way ANOVA"
    aof <- function (x, cls) {
            stats::aov(x ~ cls)
          }
    aov.res <- apply(as.matrix(mSetObj$dataSet$norm),
                     2, aof, cls = mSetObj$dataSet$cls)
    anova.res <- lapply(aov.res,stats::anova)
    res <- unlist(lapply(anova.res, function(x) {
      c(x["F value"][1, ], x["Pr(>F)"][1,
      ])
    }))
    res <- data.frame(matrix(res, nrow = length(aov.res),
                             byrow = T), stringsAsFactors = FALSE)
    fstat <- res[, 1]
    p.value <- res[, 2]
    names(fstat) <- names(p.value) <- colnames(mSetObj$dataSet$norm)
    fdr.p <- stats::p.adjust(p.value, "fdr")
    if (all_results == TRUE) {
      all.mat <- data.frame(signif(p.value, 5), signif(-log10(p.value),
                                                       5), signif(fdr.p, 5))
      rownames(all.mat) <- names(p.value)
      colnames(all.mat) <- c("p.value", "-log10(p)",
                             "FDR")
      fast.write.csv(all.mat, "anova_all_results.csv")
    }
    inx.imp <- fdr.p <= thresh
    sig.num <- sum(inx.imp)
    if (sig.num > 0) {
      aov.imp <- aov.res[inx.imp]
      sig.f <- fstat[inx.imp]
      sig.p <- p.value[inx.imp]
      fdr.p <- fdr.p[inx.imp]
      cmp.res <- NULL
      post.nm <- NULL
      if (post.hoc == "tukey") {
        tukey.res.imp <- lapply(aov.imp, stats::TukeyHSD, conf.level = 1 -
                                  thresh)
        cmp.res.imp <-
          unlist(lapply(tukey.res.imp, function (tukey, cut.off) paste(rownames(tukey$cls)[tukey$cls[, "p adj"] <= cut.off], collapse = "; "),
                        cut.off = thresh))
        posthoc_p.adj.sig.imp <-
          unlist(lapply(tukey.res.imp, function (tukey, cut.off) {
            inx <- tukey$cls[, "p adj"] <= cut.off
            paste(signif(data.frame(tukey["cls"])[, 4][inx], 5), collapse = "; ")
          },
          cut.off = thresh))

        tukey.res.all <- lapply(aov.res, stats::TukeyHSD, conf.level = 1 -
                                  thresh)
        cmp.res.all <- unlist(lapply(tukey.res.all, function (tukey, cut.off) paste(rownames(tukey$cls)[tukey$cls[, "p adj"] <= cut.off], collapse = "; "),
                                     cut.off = thresh))
        posthoc_p.adj.all <- unlist(lapply(tukey.res.all, function (tukey)
          paste(signif(data.frame(tukey["cls"])[, 4], 5), collapse = "; ")))
        post.nm = "Tukey's HSD"
      }
      else {
        fisher.res <- lapply(aov.imp, LSD.test(aov.obj, "cls", alpha = thresh), thresh)
        cmp.res.imp <- unlist(lapply(fisher.res, paste(rownames(fisher)[fisher[, "pvalue"] <= cut.off], collapse = "; "),
                                     cut.off = thresh))
        posthoc_p.adj.sig.imp <-
          unlist(lapply(fisher.res, function (fisher, cut.off) {
            inx <- fisher[, "pvalue"] <= cut.off
            paste(signif(data.frame(fisher)[, 2][inx], 5), collapse = "; ")
          },
          cut.off = thresh))
        fisher.res.all <- lapply(aov.res, LSD.test(aov.obj, "cls", alpha = thresh), thresh)
        posthoc_p.adj.all <- unlist(lapply(fisher.res, function (fisher)
          paste(signif(data.frame(fisher)[, 2], 5), collapse = "; ")))
        post.nm = "Fisher's LSD"
      }
      sig.mat <- data.frame(
        signif(sig.f, 5),
        signif(sig.p, 5),
        signif(-log10(sig.p), 5),
        signif(fdr.p, 5),
        cmp.res.imp,
        posthoc_p.adj.sig.imp
      )
      rownames(sig.mat) <- names(sig.p)
      colnames(sig.mat) <- c("f.value", "p.value",
                             "-log10(p)", "FDR", post.nm, "posthoc_p.adj")
      ord.inx <- order(sig.p, decreasing = FALSE)
      sig.mat <- sig.mat[ord.inx, , drop = F]
      fileName <- "anova_posthoc.csv"
    }
  }
  if(!silent){
    print(paste(c("ANOVA analysis: A total of", sum(inx.imp), "significant features were found."),
                collapse = " "))
  }
  if (sig.num > 0) {
    res <- 1
    fast.write.csv(sig.mat, file = fileName)
    conditions <- levels(mSetObj$dataSet$cls)
    cntrst <- rev(apply(utils::combn(rev(conditions), 2), 2, paste,
                        collapse = "-"))
    aov <- list(aov.nm = aov.nm, sig.num = sig.num, sig.nm = fileName,
                raw.thresh = thresh, thresh = -log10(thresh), p.value = p.value, p.adj = stats::p.adjust(p.value, "fdr"),
                p.log = -log10(p.value), inx.imp = inx.imp, post.hoc = post.hoc,
                sig.mat = sig.mat, post.hoc.all = cbind(contrasts=paste(cntrst, collapse= "; "), data.frame(posthoc_signif=cmp.res.all, p.adj=posthoc_p.adj.all)))
  }
  else {
    res <- 0
    aov <- list(aov.nm = aov.nm, sig.num = sig.num, raw.thresh = thresh,
                thresh = -log10(thresh), p.value = p.value, p.log = -log10(p.value),
                inx.imp = inx.imp)
  }
  mSetObj$analSet$aov <- aov
  return(mSetObj)
}

#' Fold change analysis, unpaired
#'
#' Perform fold change analysis, method can be mean or median
#'
#' @param mSetObj Enter the name of the created mSet object. (see \code{\link[VisomX]{met.read_data}})
#' @param log2fc.thresh (Numeric) Log2(fold-change) threshold that is considered relevant.
#' @param grp1 (Character) Enter name of the first group for the contrast \code{grp1 vs. grp2}. If both group arguments are empty, the first two names in the list of groups are selected.
#' @param grp2 (Character) Enter name of the second group for the contrast \code{grp1 vs. grp2}. If both group arguments are empty, the first two names in the list of groups are selected.
#' @param paired (Logical) Are the data in both groups paired (\code{TRUE}) or not (\code{FALSE}).
#' @return The input mSet object with results of fold-change analysis added at mSetObj$analSet$fc$\code{grp1}_vs_\code{grp2}.
#' @references adapted from \code{\link[MetaboAnalystR]{FC.Anal}} (\url{https://github.com/xia-lab/MetaboAnalystR}).
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)
#' @export
met.FC.Anal <- function (mSetObj = NA, log2fc.thresh = 1, grp1 = NULL, grp2 = NULL, paired = FALSE)
{

  max.thresh = log2fc.thresh
  min.thresh = -log2fc.thresh
  if(is.null(grp1) && is.null(grp2)){
    grp1 <- unique(levels(mSetObj$dataSet$cls))[1]
    grp2 <- unique(levels(mSetObj$dataSet$cls))[2]
  }
  res <- met.GetFC(mSetObj, paired, grp1, grp2)
  fc.all <- res$fc.all
  fc.log <- res$fc.log
  inx.up <- fc.log > max.thresh
  inx.down <- fc.log < min.thresh
  names(inx.up) <- names(inx.down) <- names(fc.all)
  imp.inx <- inx.up | inx.down
  sig.mat <- cbind(fc.all[imp.inx, drop = F], fc.log[imp.inx,
                                                     drop = F])
  colnames(sig.mat) <- c("Fold Change", "log2(FC)")
  inx.ord <- order(abs(sig.mat[, 2]), decreasing = T)
  sig.mat <- sig.mat[inx.ord, , drop = F]
  fileName <- "fold_change.csv"
  fast.write.csv(sig.mat, file = fileName)
  if(!exists("fc", mSetObj$analSet)){
    mSetObj$analSet$fc <- list()
  }
  assign(paste0("fc_",grp1,"_vs_",grp2), list(paired = paired, raw.thresh = log2fc.thresh,
                                              max.thresh = max.thresh, min.thresh = min.thresh, fc.all = fc.all,
                                              fc.log = fc.log, inx.up = inx.up, inx.down = inx.down,
                                              inx.imp = imp.inx, sig.mat = sig.mat, grp1 = grp1, grp2 = grp2))
  mSetObj$analSet$fc[[paste0(grp1,"_vs_",grp2)]] <- get(paste0("fc_",grp1,"_vs_",grp2))

  return(mSetObj)
}

#' Used by higher functions to calculate fold change
#'
#' Utility method to calculate FC, used in higher function
#'
#' @param mSetObj Enter the name of the created mSet object. (see \code{\link[VisomX]{met.read_data}})
#' @param paired (Logical) Are the data in both groups paired (\code{TRUE}) or not (\code{FALSE}).
#' @param grp1 (Character) Enter name of the first group for the contrast \code{grp1 vs. grp2}. If both group arguments are empty, the first two names in the list of groups are selected.
#' @param grp2 (Character) Enter name of the second group for the contrast \code{grp1 vs. grp2}. If both group arguments are empty, the first two names in the list of groups are selected.
#' @return The input mSet object with results of fold-change analysis added at mSetObj$analSet$fc$\code{grp1}_vs_\code{grp2}.
#' @references adapted from \code{\link[MetaboAnalystR]{GetFC}} (\url{https://github.com/xia-lab/MetaboAnalystR}).
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)
#' @export
met.GetFC <- function (mSetObj = NA, paired = FALSE, grp1, grp2)
{
  assertthat::assert_that(grp1 %in% levels(mSetObj$dataSet$cls),
                          grp2 %in% levels(mSetObj$dataSet$cls),
                          msg = paste0("'",grp1, "' or '", grp2, "' are not valid conditions in the dataset. Valid conditions are:\n",
                                       paste(levels(mSetObj$dataSet$cls), collapse = ", ")))

  if (paired) {
    if (mSetObj$dataSet$combined.method) {
      data <- mSetObj$dataSet$norm
    }
    else {
      if(!is.null(mSetObj$dataSet$row_norm)){
        row.norm <- mSetObj$dataSet$row_norm
      } else {
        row.norm <-  qs::qread("row_norm.qs")
      }
      data <- log2(row.norm)
    }
    G1 <- data[which(mSetObj$dataSet$cls == levels(mSetObj$dataSet$cls)[charmatch(grp1, levels(mSetObj$dataSet$cls) ) ]), ]
    G2 <- data[which(mSetObj$dataSet$cls == levels(mSetObj$dataSet$cls)[charmatch(grp2, levels(mSetObj$dataSet$cls) )]), ]
    fc.mat <- G1 - G2
    fc.log <- apply(fc.mat, 2, mean)
    fc.all <- signif(2^fc.log, 5)
  }
  else {
    data <- NULL
    if (mSetObj$dataSet$combined.method) {
      data <- mSetObj$dataSet$norm
      m1 <- colMeans(data[which(mSetObj$dataSet$cls ==
                                  levels(mSetObj$dataSet$cls)[charmatch(grp1, levels(mSetObj$dataSet$cls) ) ]), ])
      m2 <- colMeans(data[which(mSetObj$dataSet$cls ==
                                  levels(mSetObj$dataSet$cls)[charmatch(grp2, levels(mSetObj$dataSet$cls) ) ]), ])
      fc.log <- signif(m1 - m2, 5)
      fc.all <- signif(2^fc.log, 5)
    }
    else {
      if(!is.null(mSetObj$dataSet$row_norm)){
        data <- mSetObj$dataSet$row_norm
      } else {
        data <-  qs::qread("row_norm.qs")
      }
      m1 <- colMeans(data[which(mSetObj$dataSet$cls ==
                                  levels(mSetObj$dataSet$cls)[charmatch(grp1, levels(mSetObj$dataSet$cls) ) ]), ])
      m2 <- colMeans(data[which(mSetObj$dataSet$cls ==
                                  levels(mSetObj$dataSet$cls)[charmatch(grp2, levels(mSetObj$dataSet$cls) ) ]), ])

      ratio <- m1/m2
      fc.all <- signif(ratio, 5)
      ratio[ratio < 0] <- 0
      fc.log <- signif(log2(ratio), 5)
      fc.log[is.infinite(fc.log) & fc.log < 0] <- -99
      fc.log[is.infinite(fc.log) & fc.log > 0] <- 99
    }
  }
  names(fc.all) <- names(fc.log) <- colnames(data)
  return(list(fc.all = fc.all, fc.log = fc.log))
}

#' Methods for non-specific filtering of variables
#'
#' \code{met.FilterVariable} filters non-informative variables (i.e., features with very small values, near-constant values, or low repeatability) from the dataset, dependent on the user-specified method for filtering. The function applies a filtering method, ranks the variables within the dataset, and removes variables based on its rank. The final dataset should contain no more than than 5000 variables for effective computing. If more features are present, the IQR filter will be applied to keep only a number of 5000, even if \code{filter = "none"}. Data filtering is performed as part of the data preparation workflow \code{\link[VisomX]{met.read_data}}.
#'
#' @param mSetObj Enter the name of the created mSet object (see \code{\link[MetaboAnalystR]{InitDataObjects}} and \code{\link[MetaboAnalystR]{Read.TextData}}).
#' @param filter (Character) Select an option for unspecific filtering based on the following ranking criteria:
#' \itemize{
#'  \item \code{"none"} apply no unspecific filtering.
#'  \item \code{"rsd"} filters features with low relative standard deviation across the dataset.
#'  \item \code{"nrsd"} is the non-parametric relative standard deviation.
#'  \item \code{"mean"} filters features with low mean intensity value across the dataset.
#'  \item \code{"median"} filters features with low median intensity value across the dataset.
#'  \item \code{"sd"} filters features with low absolute standard deviation across the dataset.
#'  \item \code{"mad"} filters features with low median absolute deviation across the dataset.
#'  \item \code{"iqr"} filters features with a low inter-quartile range across the dataset.
#'  }
#' @param remain.num (Numerical) Enter the number of variables to keep in your dataset. If \code{NULL}, the following empirical rules are applied during data filtering with the methods specified in \code{filter = ""}:
#' \itemize{
#'   \item \strong{Less than 250 variables:} 5% will be filtered
#'   \item \strong{250 - 500 variables:} 10% will be filtered
#'   \item \strong{500 - 1000 variables:} 25% will be filtered
#'   \item \strong{More than 1000 variables:} 40% will be filtered
#' }
#' @param qcFilter (Logical) Filter the variables based on the relative standard deviation of features in QC samples (\code{TRUE}), or not (\code{FALSE}). This filter can be applied in addition to other, unspecific filtering methods.
#' @param qc.rsd (Numeric) Define the relative standard deviation cut-off in %. Variables with a RSD greater than this number will be removed from the dataset. It is only necessary to specify this argument if \code{qcFilter} is \code{TRUE}. Otherwise, it will be ignored.
#' @param all.rsd (Numeric or \code{NULL}) Apply a filter based on the in-group relative standard deviation (RSD, in %) or not \code{NULL}. Therefore, the RSD of every feature is calculated for every group in the dataset. If the RSD of a variable in any group exceeds the indicated threshold, it is removed from the dataset. This filter can be applied in addition to other filtering methods and is especially useful to perform on data with technical replicates.
#' @return The input mSet object with filtered data added at mSetObj$dataSet$filt.
#' @references adapted from \code{\link[MetaboAnalystR]{FilterVariable}} (\url{https://github.com/xia-lab/MetaboAnalystR}).
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)
#' @export
met.FilterVariable <- function (mSetObj = NA, filter = "none", remain.num = NULL, qcFilter="F", qc.rsd=0.25, all.rsd = NULL)
{
  if(!(filter %in% c("none", "rsd", "nrsd", "mean", "median", "sd", "mad", "iqr"))){
    stop(paste0(filter, " is not a valid filtering method. Please run met.FilterVariable with any of the following methods:\n",
                "\"none\", \"rsd\", \"nrsd\", \"mean\", \"median\", \"sd\", \"mad\", \"iqr\""), call. = F)
  }
  mSetObj$dataSet$filt <- NULL
  if (is.null(mSetObj$dataSet$proc)) {
    int.mat <- as.matrix(mSetObj[["dataSet"]][["data_proc"]])
  }
  else {
    int.mat <- as.matrix(mSetObj$dataSet$proc)
  }
  cls <- mSetObj$dataSet$proc.cls
  mSetObj$dataSet$filt.cls <- cls
  if (substring(mSetObj$dataSet$format, 4, 5) == "ts") {
    mSetObj$dataSet$filt.facA <- mSetObj$dataSet$proc.facA
    mSetObj$dataSet$filt.facB <- mSetObj$dataSet$proc.facB
  }
  msg <- ""
  if (qcFilter == "T") {
    rsd <- qc.rsd/100
    qc.hits <- tolower(as.character(cls)) %in% "qc"
    if (sum(qc.hits) > 1) {
      qc.mat <- int.mat[qc.hits, ]
      sds <- apply(qc.mat, 2, stats::sd, na.rm = T)
      mns <- apply(qc.mat, 2, mean, na.rm = T)
      rsd.vals <- abs(sds/mns)
      gd.inx <- rsd.vals < rsd
      int.mat <- int.mat[, gd.inx]
      if (mSetObj$analSet$type == "mummichog") {
        msg <- paste("Removed ", sum(!gd.inx),
                     " features based on QC RSD values. QC samples are excluded from downstream functional analysis.")
      }
      else {
        msg <- paste("Removed ", sum(!gd.inx),
                     " features based on QC RSD values. QC samples are still kept. You can remove them later.")
      }
    }
    else if (sum(qc.hits) > 0) {
      AddErrMsg("RSD requires at least 2 QC samples, and only non-QC based filtering can be applied.")
      return(0)
    }
    else {
      AddErrMsg("No QC Samples (with class label: QC) found.  Please use non-QC based filtering.")
      return(0)
    }
  }
  filt.res <-
    met.PerformFeatureFilter(
      int.mat = int.mat,
      mSetObj = mSetObj,
      filter = filter,
      remain.num = remain.num,
      anal.type = mSetObj$analSet$type,
      all.rsd = all.rsd
    )
  mSetObj$dataSet$filt <- filt.res$data
  msg <- paste(msg, filt.res$msg)
  mSetObj$msgSet$filter.msg <- msg
  return(mSetObj)
}

#' Used by higher functions for non-specific filtering of variables
#'
#' Utility method to perform data filtering, used in higher function
#'
#' @param int.mat Data matrix with samples in rows and features in columns, generated by higher function.
#' @param mSetObj Enter the name of the created mSet object (see \code{\link[MetaboAnalystR]{InitDataObjects}} and \code{\link[MetaboAnalystR]{Read.TextData}}).
#' @param filter (Character) Select an option for unspecific filtering based on the following ranking criteria:
#' \itemize{
#'  \item \code{"none"} apply no unspecific filtering.
#'  \item \code{"rsd"} filters features with low relative standard deviation across the dataset.
#'  \item \code{"nrsd"} is the non-parametric relative standard deviation.
#'  \item \code{"mean"} filters features with low mean intensity value across the dataset.
#'  \item \code{"median"} filters features with low median intensity value across the dataset.
#'  \item \code{"sd"} filters features with low absolute standard deviation across the dataset.
#'  \item \code{"mad"} filters features with low median absolute deviation across the dataset.
#'  \item \code{"iqr"} filters features with a low inter-quartile range across the dataset.
#'  }
#' @param remain.num (Numerical) Enter the number of variables to keep in your dataset. If \code{NULL}, the following empirical rules are applied during data filtering with the methods specified in \code{filter = ""}:
#' \itemize{
#'   \item \strong{Less than 250 variables:} 5% will be filtered
#'   \item \strong{250 - 500 variables:} 10% will be filtered
#'   \item \strong{500 - 1000 variables:} 25% will be filtered
#'   \item \strong{More than 1000 variables:} 40% will be filtered
#' }
#' @param anal.type (Character) Type of analysis. Extracted from mSetObj by higher function at mSetObj$analSet$type.
#' @param all.rsd (Numeric or \code{NULL}) Apply a filter based on the in-group relative standard deviation (RSD, in %) or not \code{NULL}. Therefore, the RSD of every feature is calculated for every group in the dataset. If the RSD of a variable in any group exceeds the indicated threshold, it is removed from the dataset. This filter can be applied in addition to other filtering methods and is especially useful to perform on data with technical replicates.
#' @references adapted from \code{\link[MetaboAnalystR]{PerformFeatureFilter}} (\url{https://github.com/xia-lab/MetaboAnalystR}).
#' @return A list with filtered data and a message to inform about the chosen filtering conditions
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)
met.PerformFeatureFilter <- function (int.mat, mSetObj, filter = "none", remain.num = NULL, anal.type = NULL, all.rsd = NULL)
{
  feat.num <- ncol(int.mat)
  feat.nms <- colnames(int.mat)
  nm <- NULL
  msg <- ""
  remain <- c()
  if(!is.null(all.rsd)){
    all.rsd <- all.rsd/100
    # Define vector of compounds whose concentration is within the lower 5% quantile in at least one sample.
    smaller_0.05quant <- int.mat[, apply(int.mat < unname(stats::quantile(int.mat, probs = seq(0, 1, 0.05))[2]), 2, any)]
    # Assign a dummy RSD value of 0 to compounds within the lower 5% quantile
    vec.smaller_0.05quant <- replace(smaller_0.05quant[1,], !is.null(smaller_0.05quant[1,]), 0)
    # Define vector of compounds whose concentration is above the lower 5% quantile in all samples.
    greater_0.05quant <- int.mat[, apply(int.mat < unname(stats::quantile(int.mat, probs = seq(0, 1, 0.05))[2]), 2, any)!=TRUE]
    # Calculate the standard deviation for each compound in each group. Create a list of n vectors for n sample groups.
    ls.sd <-
      lapply(1:length(levels(mSetObj[["dataSet"]][["cls"]])), function (x)
        apply(greater_0.05quant[grep(levels(mSetObj[["dataSet"]][["cls"]])[x], as.character(mSetObj[["dataSet"]][["cls"]])),], 2, stats::sd, na.rm = T))
    # Calculate the average for each compound in each group. Create a list of n vectors for n sample groups.
    ls.mns <-
      lapply(1:length(levels(mSetObj[["dataSet"]][["cls"]])), function (x)
        apply(greater_0.05quant[grep(levels(mSetObj[["dataSet"]][["cls"]])[x], as.character(mSetObj[["dataSet"]][["cls"]])), ], 2, mean, na.rm = T))
    # Calculate the RSD for each compound in each group. Create a list of n vectors for n sample groups.
    ls.rsd <- mapply("/", ls.sd, ls.mns, SIMPLIFY = FALSE)
    # Combine group vectors in RSD list into matrix
    mat.rsd <- do.call(rbind, ls.rsd)
    # Create vector with the maximum RSD value among all groups for each compound.
    filter.val <-c(vec.smaller_0.05quant, apply(mat.rsd, 2, max, na.rm = T))
    # To restore the initial alphabetical order, leading Xs in compound names that were
    # automatically added in front of leading digits are removed. Then, the vector is
    # sorted and Xs are re-introduced.
    names(filter.val) <- str_replace(names(filter.val), "^X", "")
    filter.val<- filter.val[order(names(filter.val))]
    names(filter.val) <- gsub("^([0-9]+)", "X\\1", names(filter.val))
    # Check if each compound has an RSD value below the threshold.
    remain <- c(remain, filter.val <= all.rsd)
    nm <- paste0("Relative standard deviation with a defined upper threshold (", length(remain)-length(remain[remain]), " features with RSD > ", all.rsd, "). Only metabolites with values above the 5% quantile in all samples were considered for filtering.")
    msg <- paste0(msg, "-> Feature filtering based on: ",
                  nm, "\n")
    # Apply RSD filter to data matrix
    int.mat <- int.mat[, remain]
  }
  if (filter == "none" && feat.num < 5000) {
    remain <- rep(TRUE, feat.num)
    msg <- paste(msg, "No unspecific filtering was applied")
  } else {
    if (filter == "rsd") {
      sds <- apply(int.mat, 2, stats::sd, na.rm = T)
      mns <- apply(int.mat, 2, mean, na.rm = T)
      filter.val <- abs(sds/mns)
      nm <- "Low relative standard deviation"
    }
    else if (filter == "nrsd") {
      mads <- apply(int.mat, 2, stats::mad, na.rm = T)
      meds <- apply(int.mat, 2, stats::median, na.rm = T)
      filter.val <- abs(mads/meds)
      nm <- "Non-paramatric relative standard deviation"
    }
    else if (filter == "mean") {
      filter.val <- apply(int.mat, 2, mean, na.rm = T)
      nm <- "mean"
    }
    else if (filter == "sd") {
      filter.val <- apply(int.mat, 2, stats::sd, na.rm = T)
      nm <- "standard deviation"
    }
    else if (filter == "mad") {
      filter.val <- apply(int.mat, 2, stats::mad, na.rm = T)
      nm <- "Median absolute deviation"
    }
    else if (filter == "median") {
      filter.val <- apply(int.mat, 2, stats::median, na.rm = T)
      nm <- "median"
    }
    else if (filter == "iqr"){
      filter.val <- apply(int.mat, 2, stats::IQR, na.rm = T)
      nm <- "Interquantile Range"
    }
    else {
      stop(paste0("\"", filter, "\" is not a valid filtering condition. See ?met.FilterVariable for more information about suitable options."))
    }
    rk <- rank(-filter.val, ties.method = "random")
    if (is.null(remain.num)) {
      if (feat.num < 250) {
        remain <- rk < feat.num * 0.95
        msg <- paste0(msg, "-> Further feature filtering based on ",
                     nm, " (", round(feat.num*0.05), " features, i.e., 5% of all features).")
      }
      else if (feat.num < 500) {
        remain <- rk < feat.num * 0.9
        msg <- paste0(msg, "-> Further feature filtering based on ",
                      nm, " (", round(feat.num*0.1), " features, i.e., 10% of all features).")
      }
      else if (feat.num < 1000) {
        remain <- rk < feat.num * 0.75
        msg <- paste0(msg, "-> Further feature filtering based on ",
                      nm, " (", round(feat.num*0.25), " features, i.e., 25% of all features).")
      }
      else {
        remain <- rk < feat.num * 0.6
        msg <- paste0(msg, "-> Further feature filtering based on ",
                      nm, " (", round(feat.num*0.4), " features, i.e., 40% of all features).")
        if (anal.type == "mummichog") {
          max.allow <- 7500
        }
        else if (anal.type == "power" || anal.type ==
                 "ts") {
          max.allow <- 5000
        }
        else {
          max.allow <- 2500
        }
        if (sum(remain) > max.allow) {
          remain <- rk < max.allow
          msg <- paste(msg, paste("Reduced to",
                                  max.allow, "features based on", nm))
        }
      }
    }
    else {
      remain <- rk < remain.num
    }
  }
  cat(msg)
  return(list(data = int.mat[, remain], msg = msg))
}

#' Perform T-test analysis
#'
#' \code{met.Ttests.Anal} performs the Student's t-test on chosen sample groups. For large datasets (> 1000 variables), both the paired information and the group variance will be ignored, and the default parameters will be used for t-tests to save computational time. If you choose non-parametric tests (Wilcoxon rank-sum test), the group variance will be ignored.
#'
#' @param mSetObj Enter the name of the created mSet object (see \code{\link[VisomX]{met.read_data}}).
#' @param grp1 (Character) Enter name of the first group for the contrast \code{grp1 vs. grp2}. If both group arguments are empty, the first two names in the list of groups are selected.
#' @param grp2 (Character) Enter name of the second group for the contrast \code{grp1 vs. grp2}. If both group arguments are empty, the first two names in the list of groups are selected.
#' @param nonpar (Logical) Use a non-parametric test (\code{TRUE}) or not (\code{FALSE}).
#' @param threshp (Numerical) Enter the adjusted p-value (FDR) cutoff.
#' @param paired (Logical) Is the data paired (\code{TRUE}) or not (\code{FALSE}).
#' @param equal.var (Logical) Is the group variance equal (\code{TRUE}) or not (\code{FALSE}).
#' @param pvalType (Character) p value type used to apply significances based on the chosen threshold \code{threshp}. Can be \code{"fdr"} for adjusted p values or \code{"raw"} for raw p values.
#' @param all_results (Logical) Create a CSV file with T-Test results for all compounds (\code{TRUE}) or not (\code{FALSE}).
#' @param silent (Logical) Suppress message about the number of significant features found in the console (\code{TRUE}) or not (\code{FALSE}).
#' @return The input mSet object with T-Test results added at mSetObj$analSet$tt$grp1_vs_grp2.
#' @references adapted from \code{\link[MetaboAnalystR]{Ttests.Anal}} (\url{https://github.com/xia-lab/MetaboAnalystR}).
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)
#' @export
met.Ttests.Anal <- function (mSetObj = NA, grp1, grp2, nonpar = FALSE, threshp = 0.05, paired = FALSE,
          equal.var = TRUE, pvalType = "fdr", all_results = TRUE, silent = FALSE)
{
  if(!is.null(grp1) && !is.null(grp2)){
    assertthat::assert_that(grp1 %in% levels(mSetObj$dataSet$cls),
                            grp2 %in% levels(mSetObj$dataSet$cls),
                            msg = paste0("'",grp1, "' or '", grp2, "' are not valid conditions in the dataset. Valid conditions are:\n",
                                         paste(levels(mSetObj$dataSet$cls), collapse = ", ")))
  }
  if(is.null(grp1) && is.null(grp2)){
    grp1 <- unique(levels(mSetObj$dataSet$cls))[1]
    grp2 <- unique(levels(mSetObj$dataSet$cls))[2]
  }
  res <- met.GetTtestRes(mSetObj, grp1, grp2, paired, equal.var, nonpar)
  t.stat <- res[, 1]
  p.value <- res[, 2]
  names(t.stat) <- names(p.value) <- colnames(mSetObj$dataSet$norm)
  p.log <- -log10(p.value)
  fdr.p <- stats::p.adjust(p.value, "fdr")
  if (all_results == TRUE) {
    all.mat <- data.frame(signif(t.stat, 5), signif(p.value,
                                                    5), signif(p.log, 5), signif(fdr.p, 5))
    if (nonpar) {
      tt.nm = "Wilcoxon Rank Test"
      file.nm <- "wilcox_rank_all.csv"
      colnames(all.mat) <- c("V", "p.value",
                             "-log10(p)", "FDR")
    }
    else {
      tt.nm = "T-Tests"
      file.nm <- paste0("t_test_all_", grp1, "_vs_", grp2, ".csv")
      colnames(all.mat) <- c("t.stat", "p.value",
                             "-log10(p)", "FDR")
    }
    fast.write.csv(all.mat, file = file.nm)
  }
  if (pvalType == "fdr") {
    inx.imp <- fdr.p <= threshp
  }
  else {
    inx.imp <- p.value <= threshp
  }
  sig.num <- sum(inx.imp, na.rm = TRUE)
  if(silent==FALSE){
    print(paste("T-test: A total of", sig.num, "significant features were found for contrast:", grp1, "vs.", grp2))
  }
  if (sig.num > 0) {
    sig.t <- t.stat[inx.imp]
    sig.p <- p.value[inx.imp]
    lod <- -log10(sig.p)
    sig.q <- fdr.p[inx.imp]
    sig.mat <- cbind(sig.t, sig.p, lod, sig.q)
    colnames(sig.mat) <- c("t.stat", "p.value",
                           "-log10(p)", "FDR")
    ord.inx <- order(sig.p)
    sig.mat <- sig.mat[ord.inx, , drop = F]
    sig.mat <- signif(sig.mat, 5)
    if (nonpar) {
      tt.nm = "Wilcoxon Rank Test"
      file.nm <- "wilcox_rank.csv"
      colnames(sig.mat) <- c("V", "p.value",
                             "-log10(p)", "FDR")
    }
    else {
      tt.nm = "T-Tests"
      file.nm <- paste0("t_test_", grp1, "_vs_", grp2, ".csv")
      colnames(sig.mat) <- c("t.stat", "p.value",
                             "-log10(p)", "FDR")
    }
    fast.write.csv(sig.mat, file = file.nm)
    tt <- list(tt.nm = tt.nm, sig.nm = file.nm, sig.num = sig.num,
               paired = paired, pval.type = pvalType, raw.thresh = threshp,
               t.score = t.stat, p.value = p.value, p.log = p.log,
               thresh = -log10(threshp), inx.imp = inx.imp, sig.mat = sig.mat)
  }
  else {
    tt <- list(sig.num = sig.num, paired = paired, pval.type = pvalType,
               raw.thresh = threshp, t.score = t.stat, p.value = p.value,
               p.log = p.log, thresh = -log10(threshp), inx.imp = inx.imp)
  }
  if(!exists("tt", mSetObj$analSet)){
    mSetObj$analSet$tt <- list()
  }
  mSetObj$analSet$tt[[paste0(grp1,"_vs_",grp2)]] <- tt
  return(mSetObj)
}

#' Used by higher functions to retrieve T-test p-values
#'
#' Utility method to to get p values via T-test.
#'
#' @param mSetObj Enter the name of the created mSet object (see \code{\link[VisomX]{met.read_data}}).
#' @param grp1 (Character) Enter name of the first group for the contrast \code{grp1 vs. grp2}.
#' @param grp2 (Character) Enter name of the second group for the contrast \code{grp1 vs. grp2}.
#' @param paired (Logical) Is the data paired (\code{TRUE}) or not (\code{FALSE}).
#' @param equal.var (Logical) Is the group variance equal (\code{TRUE}) or not (\code{FALSE}).
#' @param nonpar (Logical) Use a non-parametric test (\code{TRUE}) or not (\code{FALSE}).
#' @return A data frame with T-test results.
#' @references adapted from \code{\link[MetaboAnalystR]{GetTtestRes}} (\url{https://github.com/xia-lab/MetaboAnalystR}).
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)
met.GetTtestRes <- function (mSetObj = NA, grp1, grp2, paired = FALSE, equal.var = TRUE, nonpar = F)
{

  inx1 <- which(mSetObj$dataSet$cls == levels(mSetObj$dataSet$cls)[charmatch(grp1, levels(mSetObj$dataSet$cls) ) ])
  inx2 <- which(mSetObj$dataSet$cls == levels(mSetObj$dataSet$cls)[charmatch(grp2, levels(mSetObj$dataSet$cls) ) ])
  if (length(inx1) == 1 || length(inx2) == 1) {
    equal.var <- TRUE # overwrite option if one does not have enough replicates
  }
  data <- as.matrix(mSetObj$dataSet$norm)
  if (!exists("mem.tt")) {
    mem.tt <<- memoise::memoise(.get.ttest.res)
  }
  return(mem.tt(data, inx1, inx2, paired, equal.var, nonpar))
}

#' Impute missing variables
#'
#' Replace missing variables via a chosen method. Data need to be re-calibrated after this step, including \code{\link[VisomX]{met.PerformFeatureFilter}} as well as \code{\link[VisomX]{met.normalize}}. Data imputation is performed as part of the data preparation workflow \code{\link[VisomX]{met.read_data}}.
#'
#' @param mSetObj Enter the name of the created mSet object ((see \code{\link[VisomX]{met.initialize}} and \code{\link[MetaboAnalystR]{Read.TextData}}).
#' @param method (Character) Select the option to replace missing variables:
#' \itemize{
#'  \item \code{"lod"} replaces missing values with 1/5 of the minimum value for the respective variable.
#'  \item \code{"rowmin"} replaces missing values with the half sample minimum.
#'  \item \code{"colmin"} replaces missing values with the half feature minimum.
#'  \item \code{"mean"} replaces missing values with the mean value of the respective feature column.
#'  \item \code{"median"} replaces missing values with the median value of the respective feature column.
#'  \item \code{"knn_var"} imputes missing values by finding the features in the training set “closest” to it and averages these nearby points to fill in the value.
#'  \item \code{"knn_smp"} imputes missing values by finding the samples in the training set “closest” to it and averages these nearby points to fill in the value.
#'  \item \code{"bpca"} applies Bayesian PCA to impute missing values.
#'  \item \code{"ppca"} applies probabilistic PCA to impute missing values.
#'  \item \code{"svdImpute"} applies singular value decomposition to impute missing values.
#'  }
#' @return The input mSet object with imputed data at mSetObj$dataSet$data_proc.
#' @references adapted from \code{\link[MetaboAnalystR]{ImputeMissingVar}} (\url{https://github.com/xia-lab/MetaboAnalystR}).
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)
#' @export
met.impute <- function (mSetObj = NA, method = "min")
  {
  mSetObj$dataSet$filt <- mSetObj$dataSet$edit <- NULL
  if(!is.null(mSetObj$dataSet$preproc)){
    int.mat <- mSetObj$dataSet$preproc
  }
  else {
    int.mat <- qs::qread("preproc.qs")
  }
  new.mat <- NULL
  msg <- mSetObj$msgSet$replace.msg
  if (method == "exclude") {
    good.inx <- apply(is.na(int.mat), 2, sum) == 0
    new.mat <- int.mat[, good.inx, drop = FALSE]
    msg <- c(msg, "Variables with missing values were excluded.")
  }
  else if (method == "lod") {
    new.mat <- ReplaceMissingByLoD(int.mat)
    msg <- c(msg, "Missing variables were replaced by LoDs (1/5 of the min positive value for each variable)")
  }
  else if (method == "rowmin") {
    new.mat <- apply(int.mat, 1, function(x) {
      if (sum(is.na(x)) > 0) {
        x[is.na(x)] <- min(x, na.rm = T)/2
      }
      x
    })
    msg <- c(msg, "Missing variables were replaced by 1/2 of min values for each sample row.")
  }
  else if (method == "colmin") {
    new.mat <- apply(int.mat, 2, function(x) {
      if (sum(is.na(x)) > 0) {
        x[is.na(x)] <- min(x, na.rm = T)/2
      }
      x
    })
    msg <- c(msg, "Missing variables were replaced by 1/2 of min values for each feature column.")
  }
  else if (method == "mean") {
    new.mat <- apply(int.mat, 2, function(x) {
      if (sum(is.na(x)) > 0) {
        x[is.na(x)] <- mean(x, na.rm = T)
      }
      x
    })
    msg <- c(msg, "Missing variables were replaced with the mean value for each feature column.")
  }
  else if (method == "median") {
    new.mat <- apply(int.mat, 2, function(x) {
      if (sum(is.na(x)) > 0) {
        x[is.na(x)] <- median(x, na.rm = T)
      }
      x
    })
    msg <- c(msg, "Missing variables were replaced with the median for each feature column.")
  }
  else {
    if (method == "knn_var") {
      new.mat <- t(impute::impute.knn(t(int.mat))$data)
    }
    else if (method == "knn_smp") {
      new.mat <- impute::impute.knn(data.matrix(int.mat))$data
    }
    else {
      if (method == "bpca") {
        new.mat <- pcaMethods::pca(int.mat, nPcs = 5,
                                   method = "bpca", center = T)@completeObs
      }
      else if (method == "ppca") {
        new.mat <- pcaMethods::pca(int.mat, nPcs = 5,
                                   method = "ppca", center = T)@completeObs
      }
      else if (method == "svdImpute") {
        new.mat <- pcaMethods::pca(int.mat, nPcs = 5,
                                   method = "svdImpute", center = T)@completeObs
      }
    }
    msg <- c(msg, paste("Missing variables were imputated using",
                        toupper(method)))
  }
  mSetObj$dataSet$proc.feat.num <- ncol(new.mat)
  mSetObj$dataSet$data_proc <- as.data.frame(new.mat)
  message("Writing dataset with imputed values to 'data_proc.qs'")
  qs::qsave(as.data.frame(new.mat), file = "data_proc.qs")
  mSetObj$msgSet$replace.msg <- msg
  print(mSetObj$msgSet$replace.msg)
  return(mSetObj)
}

#' Constructs a dataSet object for storing metabolomics data
#'
#' This functions handles the construction of an mSetObj object for storing data for further processing and analysis. It is necessary to utilize this function to provide downstream functions with information about type of data and the type of analysis you will perform, as well as to provide the required data structure. This initialization is performed as part of the data preparation workflow \code{\link[VisomX]{met.read_data}}.
#'
#' @param data.type (Character) The type of data, either "list" (Compound lists), "conc" (Compound concentration data), "specbin" (Binned spectra data), "pktable" (Peak intensity table), "nmrpeak" (NMR peak lists), "mspeak" (MS peak lists), or "msspec" (MS spectra data).
#' @param anal.type (Character) Indicate the analysis module to be performed: "stat", "pathora", "pathqea", "msetora", "msetssp", "msetqea", "ts", "cmpdmap", "smpmap", or "pathinteg".
#' @param paired (Logical) Indicate if the data is paired (\code{TRUE}) or not (\code{FALSE}).
#' @return The input mSet object with imputed data at mSetObj$dataSet$data_proc.
#' @references adapted from \code{\link[MetaboAnalystR]{Initialize}} (\url{https://github.com/xia-lab/MetaboAnalystR}).
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)
#' @export
met.initialize <- function (data.type = "conc", anal.type = "stat", paired = FALSE)
{
  dataSet <- list()
  dataSet$type <- data.type
  dataSet$design.type <- "regular"
  dataSet$cls.type <- "disc"
  dataSet$format <- "rowu"
  dataSet$paired <- paired
  analSet <- list()
  analSet$type <- anal.type
  mSetObj <- list()
  mSetObj$dataSet <- dataSet
  mSetObj$analSet <- analSet
  mSetObj$imgSet <- list()
  mSetObj$msgSet <- list()
  mSetObj$msgSet$msg.vec <- vector(mode = "character")
  mSetObj$cmdSet <- vector(mode = "character")
  if (anal.type == "mummichog") {
    mSetObj$paramSet$mumRT <- NA
    mSetObj$paramSet$mumRT.type <- NA
    mSetObj$paramSet$version <- NA
    mSetObj$paramSet$mumDataContainsPval <- 1
    mSetObj$paramSet$mode <- NA
    mSetObj$paramSet$adducts <- NA
    mSetObj$paramSet$peakFormat <- "mpt"
  }
  else if (anal.type == "metapaths") {
    paramSet <- list()
    paramSet$mumRT <- NA
    paramSet$mumRT.type <- NA
    paramSet$version <- NA
    paramSet$mumDataContainsPval <- 1
    paramSet$mode <- NA
    paramSet$adducts <- NA
    paramSet$peakFormat <- "mpt"
    paramSet$metaNum <- 0
    mSetObj$paramSet <- paramSet
    dataNMs <- names(mSetObj)[grepl("MetaData", names(mSetObj))]
    if (length(dataNMs) > 0) {
      for (n in dataNMs) {
        mSetObj[[n]] <- NULL
      }
    }
  }
  module.count <<- 0
  smpdbpw.count <<- 0
  mdata.all <<- list()
  mdata.siggenes <<- vector("list")
  meta.selected <<- TRUE
  anal.type <<- anal.type
  print("MetaboAnalyst R objects initialized ...")
  return(mSetObj)
}

#' Data normalization
#'
#' \code{met.normalize} performs row-wise normalization, transformation, and scaling of metabolomics data. This step is performed as part of the \code{\link[VisomX]{met.workflow}} function. Additionally, the workflow \code{\link[VisomX]{met.test_normalization}} allows the simultaneous testing of different data processing conditions and helps with finding the most suitable options.
#'
#' @param mSetObj Enter the name of the created mSet object (see \code{\link[VisomX]{met.read_data}}).
#' @param rowNorm (Character) Select the option for row-wise normalization:
#' \itemize{
#'  \item \code{"GroupPQN"} for probabilistic quotient normalization by a reference group
#'  \item \code{"SamplePQN"} for probabilistic quotient normalization by a reference sample
#'  \item \code{"QuantileNorm"} for Quantile Normalization
#'  \item \code{"CompNorm"} for normalization by a reference feature
#'  \item \code{"SumNorm"} for normalization to constant sum of intensities
#'  \item \code{"MedianNorm"} for normalization to sample median
#'  \item \code{"SpecNorm"} for normalization by a sample-specific factor
#'  }
#' @param transNorm (Character) Select option to transform the data:
#' \itemize{
#'  \item \code{"LogNorm"} for Log10 normalization
#'  \item \code{"CrNorm"} Cubic Root Transformation
#'  }
#' @param scaleNorm (Character) Select option for scaling the data:
#' \itemize{
#'  \item \code{"MeanCenter"} for Mean Centering
#'  \item \code{"AutoNorm"} for Autoscaling
#'  \item \code{"ParetoNorm"} for Pareto Scaling
#'  \item \code{"RangeNorm"} for Range Scaling
#'  }
#' @param ref (Character) Enter the name of the reference sample or the reference feature (if \code{rowNorm = "GroupPQN"}, \code{"SamplePQN"}, or \code{"CompNorm"}.
#' @param norm.vec (Numeric vector) Vector with sample-specific scaling factors. Only applicable for \code{rowNorm = "SpecNorm"}.
#' @param ratio This option is only for biomarker analysis.
#' @param ratioNum Relevant only for biomarker analysis.
#' @return The input mSet object with normalized data at mSetObj$dataSet$norm.
#' @references adapted from \code{\link[MetaboAnalystR]{Normalization}} (\url{https://github.com/xia-lab/MetaboAnalystR}).
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)
#' @export
met.normalize <- function (mSetObj = NA, rowNorm = NULL, transNorm = NULL, scaleNorm = NULL, ref = NULL, norm.vec = NULL,
          ratio = FALSE, ratioNum = 20)
{
  if(!is.null(mSetObj$dataSet$prenorm)){
    data <- mSetObj$dataSet$prenorm
  } else {
    data <- qs::qread("prenorm.qs")
  }
  cls <- mSetObj$dataSet$prenorm.cls
  if (substring(mSetObj$dataSet$format, 4, 5) == "ts") {
    if (is.null(mSetObj$dataSet$prenorm.facA)) {
      nfacA <- mSetObj$dataSet$facA
      nfacB <- mSetObj$dataSet$facB
    }
    else {
      nfacA <- mSetObj$dataSet$prenorm.facA
      nfacB <- mSetObj$dataSet$prenorm.facB
    }
    mSetObj$dataSet$facA <- nfacA
    mSetObj$dataSet$facB <- nfacB
    if (mSetObj$dataSet$design.type == "time" | mSetObj$dataSet$design.type ==
        "time0") {
      if (tolower(mSetObj$dataSet$facA.lbl) == "time") {
        time.fac <- nfacA
        exp.fac <- nfacB
      }
      else {
        time.fac <- nfacB
        exp.fac <- nfacA
      }
      lvls <- levels(time.fac)
      time.points <- as.numeric(as.character(lvls))
      ord.lvls <- lvls[order(time.points)]
      time.fac <- ordered(time.fac, levels = ord.lvls)
      mSetObj$dataSet$time.fac <- time.fac
      mSetObj$dataSet$exp.fac <- exp.fac
    }
  }
  colNames <- colnames(data)
  rowNames <- rownames(data)
  if (rowNorm == "QuantileNorm") {
    data <- return(t(preprocessCore::normalize.quantiles(t(data), copy = FALSE)))
    varCol <- apply(data, 2, var, na.rm = T)
    constCol <- (varCol == 0 | is.na(varCol))
    constNum <- sum(constCol, na.rm = T)
    if (constNum > 0) {
      print(paste("After quantile normalization",
                  constNum, "features with a constant value were found and deleted."))
      data <- data[, !constCol, drop = FALSE]
      colNames <- colnames(data)
      rowNames <- rownames(data)
    }
    rownm <- "Quantile Normalization"
  }
  else if (rowNorm == "GroupPQN") {
    if(!(ref %in% levels(cls))){
      stop(paste0("'", ref, "' is not a valid reference sample group (condition). Valid conditions are:\n",
                  paste(levels(cls), collapse = ", ")), call. = FALSE)
    }
    grp.inx <- cls == ref
    ref.smpl <- apply(data[grp.inx, , drop = FALSE], 2, mean)
    data <- t(apply(data, 1, function (x, ref.smpl) x/median(as.numeric(x/ref.smpl), na.rm = T), ref.smpl))
    rownm <- "Probabilistic Quotient Normalization by a reference group"
  }
  else if (rowNorm == "SamplePQN") {
    if(!(ref %in% rownames(data))){
      stop(paste0("'", ref, "' is not a valid reference sample. Valid samples are, for example:\n",
                  paste0("'", sample(rownames(data), size = 5), collapse = "', "), "'"), call. = FALSE)
    }
    ref.smpl <- data[ref, , drop = FALSE]
    data <- t(apply(data, 1, function (x, ref.smpl) x/median(as.numeric(x/ref.smpl), na.rm = T), ref.smpl))
    rownm <- "Probabilistic Quotient Normalization by a reference sample"
  }
  else if (rowNorm == "CompNorm") {
    if(!(ref %in% colnames(data))){
      stop(paste0("'", ref, "' is not a valid reference feature. Valid features are, for example:\n",
                  paste0("'", sample(colnames(data), size = 5), collapse = "', "), "'"), call. = FALSE)
    }
    data <- t(apply(data, 1, function (x, ref) 1000 * x/x[ref], ref))
    rownm <- "Normalization by a reference feature"
  }
  else if (rowNorm == "SumNorm") {
    data <- t(apply(data, 1, function (x) 1000 * x/sum(x, na.rm = T)))
    rownm <- "Normalization to constant sum"
  }
  else if (rowNorm == "MedianNorm") {
    data <- t(apply(data, 1, function (x) x/median(x, na.rm = T)))
    rownm <- "Normalization to sample median"
  }
  else if (rowNorm == "SpecNorm") {
    if (is.null(norm.vec)) {
      norm.vec <- rep(1, nrow(data))
      print("No sample specific information were given, all set to 1.0")
    }
    rownm <- "Normalization by sample-specific factor"
    data <- data/norm.vec
  }
  else {
    rownm <- "N/A"
  }
  rownames(data) <- rowNames
  colnames(data) <- colNames
  if (rowNorm == "CompNorm" && !is.null(ref)) {
    inx <- match(ref, colnames(data))
    data <- data[, -inx, drop = FALSE]
    colNames <- colNames[-inx]
  }
  row.norm <- as.data.frame(CleanData(data, T, T))
  qs::qsave(row.norm, file = "row_norm.qs")
  mSetObj$dataSet$row_norm <- row.norm
  if (ratio) {
    min.val <- min(abs(data[data != 0]))/2
    norm.data <- log2((data + sqrt(data^2 + min.val))/2)
    transnm <- "Log2 Normalization"
    ratio.mat <- CalculatePairwiseDiff(norm.data)
    fstats <- Get.Fstat(ratio.mat, cls)
    hit.inx <- rank(-fstats) < ratioNum
    ratio.mat <- ratio.mat[, hit.inx, drop = FALSE]
    data <- cbind(norm.data, ratio.mat)
    colNames <- colnames(data)
    rowNames <- rownames(data)
    mSetObj$dataSet$use.ratio <- TRUE
    mSetObj$dataSet$proc.ratio <- data
  }
  else {
    mSetObj$dataSet$use.ratio <- FALSE
    if (transNorm == "LogNorm") {
      min.val <- min(abs(data[data != 0]))/10
      data <- apply(data, 2, function (x, min.val) log10((x + sqrt(x^2 + min.val^2))/2), min.val)
      transnm <- "Log10 Transformation"
    }
    else if (transNorm == "SrNorm") {
      min.val <- min(abs(data[data != 0]))/10
      data <- apply(data, 2, function (x, min.val) ((x + sqrt(x^2 + min.val^2))/2)^(1/2), min.val)
      transnm <- "Square Root Transformation"
    }
    else if (transNorm == "CrNorm") {
      norm.data <- abs(data)^(1/3)
      norm.data[data < 0] <- -norm.data[data < 0]
      data <- norm.data
      transnm <- "Cubic Root Transformation"
    }
    else {
      transnm <- "N/A"
    }
  }
  if (scaleNorm == "MeanCenter") {
    data <- apply(data, 2, function (x) x - mean(x))
    scalenm <- "Mean Centering"
  }
  else if (scaleNorm == "AutoNorm") {
    data <- apply(data, 2, function(x) (x - mean(x))/sd(x, na.rm = T))
    scalenm <- "Autoscaling"
  }
  else if (scaleNorm == "ParetoNorm") {
    data <- apply(data, 2, function(x) (x - mean(x))/sqrt(sd(x, na.rm = T)))
    scalenm <- "Pareto Scaling"
  }
  else if (scaleNorm == "RangeNorm") {
    data <- apply(data, 2, function (x) {
      if (max(x) == min(x)) {
        x
      }
      else {
        (x - mean(x))/(max(x) - min(x))
      }
    })
    scalenm <- "Range Scaling"
  }
  else {
    scalenm <- "N/A"
  }
  rownames(data) <- rowNames
  colnames(data) <- colNames
  data <- CleanData(data, T, F)
  if (ratio) {
    mSetObj$dataSet$ratio <- CleanData(ratio.mat, T, F)
  }
  mSetObj$dataSet$norm <- as.data.frame(data)
  if (substring(mSetObj$dataSet$format, 4, 5) == "ts") {
    if (rownames(mSetObj$dataSet$norm) != rownames(mSetObj$dataSet$meta.info)) {
      print("Metadata and data norm are not synchronized.")
    }
    mSetObj$dataSet$meta.info <- mSetObj$dataSet$meta.info[rownames(data),
    ]
  }
  qs::qsave(mSetObj$dataSet$norm, file = "complete_norm.qs")
  mSetObj$dataSet$cls <- cls
  mSetObj$dataSet$rownorm.method <- rownm
  mSetObj$dataSet$trans.method <- transnm
  mSetObj$dataSet$scale.method <- scalenm
  mSetObj$dataSet$combined.method <- FALSE
  mSetObj$dataSet$norm.all <- NULL
  return(mSetObj)
}


#' PLS-DA classification and feature selection
#'
#' \code{met.PLSDA.CV} performs group classification and feature selection on a PLS model.
#'
#' @param mSetObj Enter name of the created mSet object after PLS model creation (see \code{\link[VisomX]{met.PLSR.Anal}}).
#' @param methodName (Character) Enter one of two methods for PLS-DA model (cross) validation:
#'\itemize{
#'  \item \code{"LOOCV"} performs leave-one-out cross validation
#'  \item \code{"CV"} performs k-fold cross validation.
#'  }
#' @param k (Numeric) The number of (randomized) groups that the dataset is to be split into during cross validation if \code{methodName = "CV"}.
#' @param compNum Number of components used for model validation (detected automatically).
#' @param choice (Character) Choose the criterion used to estimate the predictive ability of the model, \code{"Q2"} or \code{"R2"}.
#' @param data (Character) Enter \code{"all"} to train the PLS(-DA) model on your whole (filtered and normalized) dataset or \code{"anova"} to use a subset of features defined as significant based on ANOVA analysis.
#' @return The input mSet object with the results of PLS-DA at mSetObj$analSet$plsda.
#' @references adapted from \code{\link[MetaboAnalystR]{PLSDA.CV}} (\url{https://github.com/xia-lab/MetaboAnalystR}).
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)
#' @export
#' @import pls
met.PLSDA.CV <- function (mSetObj = NA, methodName = "LOOCV", k = 5,compNum = GetDefaultPLSCVComp(mSetObj),
          choice = "Q2", data = "all")
{
  # get classification accuracy using caret
  if(methodName == "LOOCV"){
    traincontrol = caret::trainControl(method = "LOOCV")
  } else {
    traincontrol = caret::trainControl(method = "cv", number = 5)
  }
  if(data=="anova") {
    plsda.cls <-
      caret::train(
        mSetObj$dataSet$norm[, mSetObj$analSet$aov[["inx.imp"]]],
        mSetObj$dataSet$cls,
        "pls",
        trControl = traincontrol,
        tuneLength = compNum
      )
  } else {
    plsda.cls <-
      caret::train(
        mSetObj$dataSet$norm,
        mSetObj$dataSet$cls,
        "pls",
        trControl = traincontrol,
        tuneLength = compNum
      )
  }
  # note, for regression, use model matrix
  if (mSetObj$analSet$plsr$reg) {
    cls <- cls <- scale(as.numeric(mSetObj$dataSet$cls))[,
                                                         1]
  } else {
    cls <- stats::model.matrix(~mSetObj$dataSet$cls - 1)
  }
  datmat <- as.matrix(mSetObj$dataSet$norm)
  # use the classifical regression to get R2 and Q2 measure
  if(methodName == "LOOCV"){
    plsda.reg <- pls::plsr(cls ~ datmat, method = "oscorespls",
                           ncomp = compNum, validation = "LOO")
  } else {
    plsda.reg <- pls::plsr(cls ~ datmat, method = "oscorespls",
                            ncomp = compNum, validation = "CV", segments = k)
  }

  fit.info <- pls::R2(plsda.reg, estimate = "all")$val[,1,]
  # combine accuracy, R2 and Q2
  accu <- plsda.cls$results[, 2]
  all.info <- rbind(accu, fit.info[, -1])
  rownames(all.info) <- c("Accuracy", "R2", "Q2")
  if (choice == "Q2") {
    best.num <- which(all.info[3, ] == max(all.info[3, ]))
  }
  else if (choice == "R2") {
    best.num <- which(all.info[2, ] == max(all.info[2, ]))
  }
  else {
    best.num <- which(all.info[1, ] == max(all.info[1, ]))
  }
  coef.mat <- try(caret::varImp(plsda.cls, scale = T)$importance)
  if (class(coef.mat) == "try-error") {
    coef.mat <- NULL
  }
  else {
    if (mSetObj$dataSet$cls.num > 2) {
      coef.mean <- apply(coef.mat, 1, mean)
      coef.mat <- cbind(coef.mean = coef.mean, coef.mat)
    }
    inx.ord <- order(coef.mat[, 1], decreasing = T)
    coef.mat <- data.matrix(coef.mat[inx.ord, , drop = FALSE])
    fast.write.csv(signif(coef.mat, 5), file = "plsda_coef.csv")
  }
  pls <- mSetObj$analSet$plsr
  b <- c(pls$Yloadings)[1:compNum]
  T <- pls$scores[, 1:compNum, drop = FALSE]
  SS <- b^2 * colSums(T^2)
  W <- pls$loading.weights[, 1:compNum, drop = FALSE]
  Wnorm2 <- colSums(W^2)
  SSW <- sweep(W^2, 2, SS/Wnorm2, "*")
  vips <- sqrt(nrow(SSW) * apply(SSW, 1, cumsum)/cumsum(SS))
  if (compNum > 1) {
    vip.mat <- as.matrix(t(vips))
    ord.inx <- order(-abs(vip.mat[, 1]), -abs(vip.mat[, 2]))
  }
  else {
    vip.mat <- as.matrix(vips)
    ord.inx <- order(-abs(vip.mat[, 1]))
  }
  vip.mat <- vip.mat[ord.inx, ]
  colnames(vip.mat) <- paste("Comp.", 1:ncol(vip.mat))
  fast.write.csv(signif(vip.mat, 5), file = "plsda_vip.csv")

  if(data=="anova") {
    plsda.cls <-
      caret::train(
        mSetObj$dataSet$norm[, mSetObj$analSet$aov[["inx.imp"]]],
        mSetObj$dataSet$cls,
        "pls",
        trControl = caret::trainControl(method = ifelse(methodName ==
                                                          "LOOCV", "LOOCV", "CV")),
        tuneLength = compNum
      )
  } else {
    plsda.cls <-
      caret::train(
        mSetObj$dataSet$norm,
        mSetObj$dataSet$cls,
        "pls",
        trControl = caret::trainControl(method = ifelse(methodName ==
                                                          "LOOCV", "LOOCV", "CV")),
        tuneLength = compNum
      )
  }
  mSetObj$analSet$plsda <- list(best.num = best.num, choice = choice,
                                coef.mat = coef.mat, vip.mat = vip.mat, fit.info = all.info, data.type = data)
  return(mSetObj)
}

#' Partial least squares (PLS) analysis using \emph{oscorespls} (Orthogonal scores algorithm)
#'
#' \code{met.PLSR.Anal} performs PLS analysis using \emph{oscorespls}.
#'
#' @param mSetObj Enter name of the created mSet object (see \code{\link[VisomX]{met.read_data}}).
#' @param reg (Logical)
#' @param data (Character) Enter \code{"all"} to train the PLS(-DA) model on your whole (filtered and normalized) dataset or \code{"anova"} to use a subset of features defined as significant based on ANOVA analysis.
#' @return The input mSet object with the results of PLS-DA at mSetObj$analSet$plsr.
#' @references adapted from \code{\link[MetaboAnalystR]{PLSR.Anal}} (\url{https://github.com/xia-lab/MetaboAnalystR}).
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)
#' @export
met.PLSR.Anal <- function (mSetObj = NA, reg = FALSE, data = "all")
{
  comp.num <- dim(mSetObj$dataSet$norm)[1] - 1
  if (comp.num > 8) {
    comp.num <- min(dim(mSetObj$dataSet$norm)[2], 8)
  }
  else if (comp.num < 8) {
    AddMsg(paste("Too few features in your data to do PLS-DA analysis! "))
  }
  if (reg) {
    cls <- scale(as.numeric(mSetObj$dataSet$cls))[, 1]
  }
  else {
    cls <- stats::model.matrix(~mSetObj$dataSet$cls - 1)
  }
  if(data=="anova"&&is.null(mSetObj$analSet$aov)) {
    stop("Please perform 'met.ANOVA.Anal' on your mSet before running PLS analysis with 'data = \"anova\"'")
  }
  if(data=="anova") {
    datmat <- as.matrix(mSetObj$dataSet$norm[,mSetObj$analSet$aov[["inx.imp"]]])
  } else if(!is.null(mSetObj$dataSet$norm)){
    datmat <- as.matrix(mSetObj$dataSet$norm)
  } else {
    datmat <- as.matrix(mSetObj$dataSet$prenorm)
  }
  mSetObj$analSet$plsr <- pls::plsr(cls ~ datmat, method = "oscorespls",
                                    ncomp = comp.num)
  mSetObj$analSet$plsr$reg <- reg
  mSetObj$analSet$plsr$loading.type <- "all"
  mSetObj$analSet$plsr$data.type <- data
  mSetObj$custom.cmpds <- c()
  fast.write.csv(signif(mSetObj$analSet$plsr$scores, 5), row.names = rownames(mSetObj$dataSet$norm),
                 file = "plsda_score.csv")
  fast.write.csv(signif(mSetObj$analSet$plsr$loadings, 5),
                 file = "plsda_loadings.csv")
  return(mSetObj)
}

#' Prepare data for normalization
#'
#' \code{met.PreparePrenormData} checks for previous data editing and filtering, and defines adds a 'prenormalization' data table to the mSet object for further analysis.
#'
#' @param mSetObj Enter name of the created mSet object (see \code{\link[VisomX]{met.read_data}}).
#' @return The input mSet object with prenorm data table at mSetObj$dataSet$prenorm.
#' @references adapted from \code{\link[MetaboAnalystR]{PreparePrenormData}} (\url{https://github.com/xia-lab/MetaboAnalystR}).
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)
#' @export
met.PreparePrenormData <- function (mSetObj = NA)
{
  if (!is.null(mSetObj$dataSet$edit)) {
    mydata <- mSetObj$dataSet$edit
    if (!is.null(mSetObj$dataSet$filt)) {
      hit.inx <- colnames(mydata) %in% colnames(mSetObj$dataSet$filt)
      mydata <- mydata[, hit.inx, drop = FALSE]
    }
    prenorm <- mydata
    mSetObj$dataSet$prenorm.cls <- mSetObj$dataSet$edit.cls
    if (substring(mSetObj$dataSet$format, 4, 5) == "ts") {
      mSetObj$dataSet$prenorm.facA <- mSetObj$dataSet$edit.facA
      mSetObj$dataSet$prenorm.facB <- mSetObj$dataSet$edit.facB
    }
  } else if (!is.null(mSetObj$dataSet$filt)) {
    prenorm <- mSetObj$dataSet$filt
    mSetObj$dataSet$prenorm.cls <- mSetObj$dataSet$filt.cls
    if (substring(mSetObj$dataSet$format, 4, 5) == "ts") {
      mSetObj$dataSet$prenorm.facA <- mSetObj$dataSet$filt.facA
      mSetObj$dataSet$prenorm.facB <- mSetObj$dataSet$filt.facB
    }
  } else {
    prenorm <- mSetObj$dataSet$data_proc
    mSetObj$dataSet$prenorm.cls <- mSetObj$dataSet$proc.cls
    if (substring(mSetObj$dataSet$format, 4, 5) == "ts") {
      mSetObj$dataSet$prenorm.facA <- mSetObj$dataSet$proc.facA
      mSetObj$dataSet$prenorm.facB <- mSetObj$dataSet$proc.facB
    }
  }
  mSetObj$dataSet$prenorm <- prenorm
  qs::qsave(prenorm, "prenorm.qs")
  mSetObj$dataSet$prenorm.smpl.nms <- rownames(prenorm)
  mSetObj$dataSet$prenorm.feat.nms <- colnames(prenorm)
  return(mSetObj)
}

#' Construct mSet data container, read metabolomics data, filter data, and impute missing values
#'
#' \code{met.read_data} is a wrapper function that constructs an mSet object, adds data from a table file or R dataframe object, applies unspecific and user-defined data filters, and imputes missing values.
#'
#' @param data Enter name of an R dataframe object or the "path name" (in quotes) of the CSV/TSV/XLS/XLSX/TXT file to read.
#' @param data.type (Character) The type of data, either "list" (Compound lists), \code{"conc"} (Compound concentration data), \code{"specbin"} (Binned spectra data), \code{"pktable"} (Peak intensity table), \code{"nmrpeak"} (NMR peak lists), \code{"mspeak"} (MS peak lists), or \code{"msspec"} (MS spectra data).
#' @param anal.type (Character) Indicate the analysis module to be performed: \code{"stat", "pathora", "pathqea", "msetora", "msetssp", "msetqea", "ts", "cmpdmap", "smpmap",} or \code{"pathinteg"}.
#' @param paired (Logical) Indicate if the data is paired (\code{TRUE}) or not (\code{FALSE}).
#' @param csvsep (Character) Enter the separator used in the CSV file (only applicable if reading a ".csv" file).
#' @param sheet (Integer or Character string) Number or name of the sheet with proteomics data in XLS or XLSX files (_optional_).
#' @param data.format (Character) Specify if samples are paired and in rows (\code{"rowp"}), unpaired and in rows (\code{"rowu"}), in columns and paired (\code{"colp"}), or in columns and unpaired (\code{"colu"}).
#' @param lbl.type (Character) Specify the group label type, either categorical (\code{"disc"}) or continuous (\code{"cont"}).
#' @param filt.feat (Character Vector) Enter the names of features to remove from the dataset.
#' @param filt.smpl (Character Vector) Enter the names of samples to remove from the dataset.
#' @param filt.grp (Character Vector) Enter the names of groups to remove from the dataset.
#' @param filt.method (Character) Select an option for unspecific filtering based on the following ranking criteria:
#' \itemize{
#'  \item \code{"none"} apply no unspecific filtering.
#'  \item \code{"rsd"} filters features with low relative standard deviation across the dataset.
#'  \item \code{"nrsd"} is the non-parametric relative standard deviation.
#'  \item \code{"mean"} filters features with low mean intensity value across the dataset.
#'  \item \code{"median"} filters features with low median intensity value across the dataset.
#'  \item \code{"sd"} filters features with low absolute standard deviation across the dataset.
#'  \item \code{"mad"} filters features with low median absolute deviation across the dataset.
#'  \item \code{"iqr"} filters features with a low inter-quartile range across the dataset.
#'  }
#' @param remain.num (Numerical) Enter the number of variables to keep in your dataset. If \code{NULL}, the following empirical rules are applied during data filtering with the methods specified in \code{filter = ""}:
#' \itemize{
#'   \item \strong{Less than 250 variables:} 5% will be filtered
#'   \item \strong{250 - 500 variables:} 10% will be filtered
#'   \item \strong{500 - 1000 variables:} 25% will be filtered
#'   \item \strong{More than 1000 variables:} 40% will be filtered
#' }
#' @param qcFilter (Logical) Filter the variables based on the relative standard deviation of features in quality control (QC) samples (\code{TRUE}), or not (\code{FALSE}). This filter can be applied in addition to other, unspecific filtering methods.
#' @param qc.rsd (Numeric) Define the relative standard deviation cut-off in %. Variables with a RSD greater than this number in the QC samples will be removed from the dataset. It is only necessary to specify this argument if \code{qcFilter} is \code{TRUE}. Otherwise, it will be ignored.
#' @param all.rsd (Numeric or \code{NULL}) Apply a filter based on the in-group relative standard deviation (RSD, in %) or not \code{NULL}. Therefore, the RSD of every feature is calculated for every group in the dataset. If the RSD of a variable in any group exceeds the indicated threshold, it is removed from the dataset. This filter can be applied in addition to other filtering methods and is especially useful to perform on data with technical replicates.
#' @param imp.method (Character) Select the option to replace missing variables:
#' \itemize{
#'  \item \code{"lod"} replaces missing values with 1/5 of the minimum value for the respective variable.
#'  \item \code{"rowmin"} replaces missing values with the half sample minimum.
#'  \item \code{"colmin"} replaces missing values with the half feature minimum.
#'  \item \code{"mean"} replaces missing values with the mean value of the respective feature column.
#'  \item \code{"median"} replaces missing values with the median value of the respective feature column.
#'  \item \code{"knn_var"} imputes missing values by finding the features in the training set “closest” to it and averages these nearby points to fill in the value.
#'  \item \code{"knn_smp"} imputes missing values by finding the samples in the training set “closest” to it and averages these nearby points to fill in the value.
#'  \item \code{"bpca"} applies Bayesian PCA to impute missing values.
#'  \item \code{"ppca"} applies probabilistic PCA to impute missing values.
#'  \item \code{"svdImpute"} applies singular value decomposition to impute missing values.
#'  }
#' @param export (Logical, \code{TRUE} or \code{FALSE}) Shall the missing value detection plots be exported as PDF or PNG file?
#' @param dpi (Numeric) The resolution of exported PNG and PDF images.
#' @param img.format (Character, \code{"png"} or \code{"pdf"}) image file format (if \code{export = TRUE}).
#' @param dec (Character) decimal separator used in CSV, TSV and TXT files.
#'
#' @return An mSet object with (built in ascending order):
#' \itemize{
#'  \item original data at \code{mSetObj$dataSet$data_orig}.
#'  \item data with manually filtered out features/samples/groups at \code{mSetObj$dataSet$edit}.
#'  \item data with unspecifically filtered data at  \code{mSetObj$dataSet$filt}.
#'  \item data with imputed missing values at \code{mSetObj$dataSet$data_proc}.
#'  \item missing value heatmap at \code{mSetObj$imgSet$missval_heatmap.plot} (see \code{\link[VisomX]{met.plot_missval}}).
#'  \item Density and CumSum plots of intensities of proteins with and without missing values at \code{mSetObj$imgSet$missval_density.plot} (see \code{\link[VisomX]{met.plot_detect}}).
#' }
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)
#' @export
met.read_data <- function (data,
                           data.type = "conc",
                           anal.type = "stat",
                           paired = FALSE, # Parameters used to initialize dataSet object
                           csvsep = ";", # optional: delimiter if reading CSV file
                           dec = ".",
                           sheet = 1,
                           data.format = "rowu", lbl.type = "disc",
                           filt.feat = c(""),
                           filt.smpl = c(""),
                           filt.grp = c(""),
                           filt.method = "none",
                           remain.num = NULL,
                           qcFilter = "F",
                           qc.rsd = 25,
                           all.rsd = NULL,
                           imp.method = "lod",
                           export = FALSE,
                           img.format = "pdf",
                           dpi = dpi
)
{
  mSetObj <- suppressWarnings(met.initialize(data.type = data.type, anal.type = anal.type, paired = paired))
  mSetObj$dataSet$cls.type <- lbl.type
  mSetObj$dataSet$format <- data.format
  # Read data
  if(!is.null(data)){
    if (is.character(data)) {
      # Read table file
      dat <- read_file(data, csvsep = csvsep, dec = dec, sheet = sheet)
    } #if (is.character(data))
    else if(exists(paste(quote(data)))){
      dat <- data
    }
  }

  if (class(dat) == "try-error" || ncol(dat) == 1) {
    AddErrMsg("Data format error. Failed to read in the data!")
    AddErrMsg("Make sure the data table is saved as comma separated values (.csv), XLS/XLSX, or tab separated values (.txt) format!")
    AddErrMsg("Please also check the followings: ")
    AddErrMsg("Either sample or feature names must in UTF-8 encoding; Latin, Greek letters are not allowed.")
    AddErrMsg("We recommend to use a combination of English letters, underscore, and numbers for naming purpose.")
    AddErrMsg("Make sure sample names and feature (peak, compound) names are unique.")
    AddErrMsg("Missing values should be blank or NA without quote.")
    AddErrMsg("Make sure the file delimeters are indicated correctly (e.g., csvsep = \",\")")
    return(0)
  }
  msg <- NULL

  if (substring(data.format, 1, 3) == "row") {
    msg <- c(msg, "Samples are in rows and features in columns")
    smpl.nms <- dat[, 1]
    dat[, 1] <- NULL
    if (lbl.type == "qc") {
      rownames(dat) <- smpl.nms
      qs::qsave(dat, file = "data_orig.qs")
      mSetObj$dataSet$data_orig <- dat
      mSetObj$dataSet$cmpd <- colnames(dat)
      return(1)
    }
    cls.lbl <- dat[, 1]
    conc <- dat[, -1, drop = FALSE]
    var.nms <- colnames(conc)
    if (lbl.type == "no") {
      cls.lbl <- rep(1, nrow(dat))
      conc <- dat[, , drop = FALSE]
      var.nms <- colnames(conc)
    }
  } else {
    msg <- c(msg, "Samples are in columns and features in rows.")
    if (lbl.type == "no") {
      cls.lbl <- rep(1, ncol(dat))
      conc <- t(dat[, -1])
      var.nms <- dat[, 1]
      smpl.nms <- colnames(dat[, -1])
    } else {
      var.nms <- dat[-1, 1]
      dat[, 1] <- NULL
      smpl.nms <- colnames(dat)
      cls.lbl <- dat[1, ]
      conc <- t(dat[-1, ])
    }
  }
  conc <- conc[,order(colnames(conc))]
  colnames(conc) <- make.names(colnames(conc))
  var.nms <- make.names(var.nms[order(var.nms)])
  mSetObj$dataSet$type.cls.lbl <- class(cls.lbl)
  empty.inx <- is.na(smpl.nms) | smpl.nms == ""
  if (sum(empty.inx) > 0) {
    msg <- c(msg, paste("<font color=\"red\">", sum(empty.inx),
                        "empty rows</font> were detected and excluded from your data."))
    smpl.nms <- smpl.nms[!empty.inx]
    cls.lbl <- cls.lbl[!empty.inx]
    conc <- conc[!empty.inx, ]
  }
  empty.inx <- is.na(cls.lbl) | cls.lbl == ""
  if (sum(empty.inx) > 0) {
    if (mSetObj$analSet$type != "roc") {
      msg <- c(msg, paste("<font color=\"red\">",
                          sum(empty.inx), "empty labels</font> were detected and excluded from your data."))
      smpl.nms <- smpl.nms[!empty.inx]
      cls.lbl <- cls.lbl[!empty.inx]
      conc <- conc[!empty.inx, ]
    }
    else {
      cls.lbl[is.na(cls.lbl)] <- ""
      msg <- c(msg, paste("<font color=\"orange\">",
                          sum(empty.inx), "new samples</font> were detected from your data."))
    }
  }
  if (anal.type == "roc") {
    if (length(unique(cls.lbl[!empty.inx])) > 2) {
      AddErrMsg("ROC analysis is only defined for two-group comparisions!")
      return(0)
    }
  }
  if (length(unique(smpl.nms)) != length(smpl.nms)) {
    dup.nm <- paste(smpl.nms[duplicated(smpl.nms)], collapse = " ")
    AddErrMsg("Duplicate sample names are not allowed!")
    AddErrMsg(dup.nm)
    return(0)
  }
  empty.inx <- is.na(var.nms) | var.nms == ""
  if (sum(empty.inx) > 0) {
    msg <- c(msg, paste("<font color=\"red\">", sum(empty.inx),
                        "empty features</font> were detected and excluded from your data."))
    var.nms <- var.nms[!empty.inx]
    conc <- conc[, !empty.inx]
  }
  if (length(unique(var.nms)) != length(var.nms)) {
    dup.nm <- paste(var.nms[duplicated(var.nms)], collapse = " ")
    AddErrMsg("Duplicate feature names are not allowed!")
    AddErrMsg(dup.nm)
    return(0)
  }
  if (anal.type == "mummichog") {
    is.rt <- mSetObj$paramSet$mumRT
    if (!is.rt) {
      mzs <- as.numeric(var.nms)
      if (sum(is.na(mzs) > 0)) {
        AddErrMsg("Make sure that feature names are numeric values (mass or m/z)!")
        return(0)
      }
    }
  }
  if (sum(is.na(iconv(smpl.nms))) > 0) {
    na.inx <- is.na(iconv(smpl.nms))
    nms <- paste(smpl.nms[na.inx], collapse = "; ")
    AddErrMsg(paste("No special letters (i.e. Latin, Greek) are allowed in sample names!",
                    nms, collapse = " "))
    return(0)
  }
  if (sum(is.na(iconv(var.nms))) > 0) {
    na.inx <- is.na(iconv(var.nms))
    nms <- paste(var.nms[na.inx], collapse = "; ")
    AddErrMsg(paste("No special letters (i.e. Latin, Greek) are allowed in feature names!",
                    nms, collapse = " "))
    return(0)
  }
  smpl.nms <- gsub("\\\\", "-", smpl.nms)
  url.smp.nms <- make.unique(gsub("[^[:alnum:].@_-]", "", smpl.nms))
  names(url.smp.nms) <- smpl.nms
  var.nms <- gsub("\\\\", "-", var.nms)
  url.var.nms <- make.unique(gsub("[^[:alnum:].@_-]", "", var.nms))
  names(url.var.nms) <- var.nms
  cls.lbl <- gsub(" +", " ", as.vector(cls.lbl))
  cls.lbl <- gsub("\\\\", "-", cls.lbl)
  cls.lbl <- sub("^[[:space:]]*(.*?)[[:space:]]*$", "\\1", cls.lbl,
               perl = TRUE)
  rownames(conc) <- smpl.nms
  colnames(conc) <- var.nms
  if (mSetObj$dataSet$paired) {
    mSetObj$dataSet$orig.cls <- mSetObj$dataSet$pairs <- cls.lbl
  } else {
    if (lbl.type == "disc") {
      mSetObj$dataSet$orig.cls <- mSetObj$dataSet$cls <- as.factor(as.character(cls.lbl))
      if (substring(data.format, 4, 5) == "ts") {
        mSetObj$dataSet$facA.type <- is.numeric(facA)
        mSetObj$dataSet$orig.facA <- mSetObj$dataSet$facA <- as.factor(as.character(facA))
        mSetObj$dataSet$facA.lbl <- facA.lbl
        mSetObj$dataSet$facB.type <- is.numeric(facB)
        mSetObj$dataSet$orig.facB <- mSetObj$dataSet$facB <- as.factor(as.character(facB))
        mSetObj$dataSet$facB.lbl <- facB.lbl
      }
    }
    else {
      mSetObj$dataSet$orig.cls <- mSetObj$dataSet$cls <- tryCatch({
        as.numeric(cls.lbl)
      }, warning = function(na) {
        print("Class labels must be numeric and continuous!")
        return(0)
      })
      if (mSetObj$dataSet$cls == 0) {
        AddErrMsg("Class labels must be numeric and continuous!")
        return(0)
      }
    }
  }
  if (mSetObj$dataSet$type == "conc") {
    mSetObj$dataSet$cmpd <- var.nms
  }
  mSetObj$dataSet$mumType <- "table"
  mSetObj$dataSet$url.var.nms <- url.var.nms
  mSetObj$dataSet$url.smp.nms <- url.smp.nms
  mSetObj$dataSet$data_orig <- conc
  qs::qsave(conc, file = "data_orig.qs")
  var_label <- if (mSetObj$dataSet$type == "conc") {
   "Compounds"
  } else if (mSetObj$dataSet$type == "specbin") {
    "Spectra Bins"
  } else if (mSetObj$dataSet$type == "nmrpeak") {
    "Peaks (ppm)"
  } else if (mSetObj$dataSet$type == "mspeak") {
   "Peaks (mass)"
  } else {
    "Peaks(mz/rt)"
  }
  mSetObj$msgSet$read.msg <- c(msg, paste("The uploaded data file contains a ",
                                          nrow(conc), " (samples) by ", ncol(conc), " (",
                                          tolower(var_label), ") data matrix.",
                                          sep = ""))

  mSetObj <- suppressWarnings(met.SanityCheck(mSetObj))
  mSetObj <- met.impute(mSetObj, method = imp.method)
  mSetObj <- met.PreparePrenormData(mSetObj)

  if(!filt.feat== "" || !filt.smpl == "" || !filt.grp == "") {
    mSetObj <- GetGroupNames(mSetObj, "")
    mSetObj <- met.UpdateData(mSetObj, filt.feat = filt.feat,
                              filt.smpl = filt.smpl,
                              filt.grp = filt.grp)
    mSetObj<- met.PreparePrenormData(mSetObj)
  }
  if(!is.null(filt.method) | !is.null(all.rsd)){
    mSetObj <-
      met.FilterVariable(
        mSetObj = mSetObj,
        filter = filt.method,
        remain.num = remain.num,
        qcFilter = qcFilter,
        qc.rsd = qc.rsd,
        all.rsd = all.rsd
      )
    mSetObj<- met.PreparePrenormData(mSetObj)
  }
  dir.create(paste0(getwd(), "/met.ProcessedData"), showWarnings = F)
  mSetObj <- met.plot_missval(mSetObj, plot=F, export=export, format = img.format)
  mSetObj <- met.plot_detect(mSetObj, plot=F,  export=export, format = img.format)
  return(mSetObj)
}

#' Generate a markdown report for metabolomics data analysis
#'
#' \code{met.report} generates a report of the analysis performed by the \code{met.workflow} wrapper function.
#'
#' @param mSetObj Enter name of the created mSet object after a full analysis workflow (see \code{\link[VisomX]{met.workflow}}).
#' @param report.dir (Character) Enter the name or location of the folder in which the report files are generated. If \code{NULL} (the default), a new folder "Report_\emph{date-time}" is created in your working directory.
#' @param ... Further arguments passed from other methods.
#' @return In the specified folder, this function creates a PDF report, an HTML report, and an .RData file including the entire mSet object.
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)
#' @export
#' @import rgl
#' @import knitr
met.report <- function(mSetObj, report.dir = NULL, ...)
  {
  args <- list(...)
  for(i in 1:length(args)){
    assign(names(args)[i], args[[i]])
  }
  message("Render reports...")
  if(!is.null(report.dir)){
    wd <- paste0(getwd(), "/", report.dir)
  } else {
    wd <- paste(getwd(), "/Report.met_", format(Sys.time(),
                                            "%Y%m%d_%H%M%S"), sep = "")
  }
  dir.create(wd, showWarnings = F)
  for(i in 1:length(.libPaths())){
    VisomX.ndx <- grep("VisomX", list.files(.libPaths()[i]))
    if(length(VisomX.ndx)>0){
      Report.wd <- paste0(.libPaths()[i], "/VisomX")
    }
  }
  file <- paste0(Report.wd, "/Report_Met.Rmd")
  rmarkdown::render(file, output_format = "all", output_dir = wd,
                    quiet = TRUE)
  message("Save RData object...")
  save(mSetObj, file = paste(wd, "results.RData", sep = "/"))
  message(paste0("Files saved in: '", wd, "'"))
  unlink(paste0(tempdir(), "/Plots"), recursive = TRUE)
}

#' Generate a markdown report for the screening of metabolomics data pre-processing methods.
#'
#' \code{met.report_test_normalization} generates a report of the analysis performed by the \code{met.test_normalization} wrapper function.
#'
#' @param mSetObj Enter name of the created mSet object after a full analysis workflow (see \code{\link[VisomX]{met.workflow}}).
#' @param report.dir (Character) Enter the name or location of the folder in which the report files are generated. If \code{NULL} (the default), a new folder "Report_\emph{date-time}" is created in your working directory.
#' @param ... Further arguments passed from other methods.
#' @return In the specified folder, this function creates a PDF report, an HTML report, and an .RData file including the entire mSet object.
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)
#' @export
#' @import kableExtra
#' @import knitr
#' @import rgl
met.report_test_normalization <- function(mSet_list, report.dir = NULL, ...)
  {
  assertthat::assert_that(is.list(mSet_list))
  args <- list(...)
  for(i in 1:length(args)){
    assign(names(args)[i], args[[i]])
  }
  message("Render reports...")
  if(!is.null(report.dir)){
    wd <- paste0(getwd(), "/", report.dir)
  } else {
    wd <- paste(getwd(), "/met.Test_Normalization/Report_", format(Sys.time(),
                                                                   "%Y%m%d_%H%M%S"), sep = "")
  }
  dir.create(wd, showWarnings = F)
  file <- paste("C:/Users/nicwir/Documents/DTU_Biosustain/Scripts_and_Modelling/fluctuator/220111/R_package/VisomX", "/Reports/Report_Met_TestNorm.Rmd",
                sep = "")
  rmarkdown::render(file, output_format = "all", output_dir = wd,
                    quiet = TRUE)
  message("Save RData object...")
  save(mSet_list, file = paste(wd, "results_test_normalizaton.RData", sep = "/"))
  message(paste0("Files saved in: '", wd, "'"))
  unlink(paste0(str_replace_all(tempdir(), "\\\\", "/"), "/Plots"), recursive = TRUE)
}

#' Perform sanity check on metabolomics data
#'
#' @description \code{met.SanityCheckData} is used for data processing, and performs a basic sanity
#' check of the uploaded content, ensuring that the data is suitable for further analysis.
#' The function will return a message if the data has successfully passed the check
#' and is deemed suitable for further analysis. If it fails, the function will return a 0.
#' The function will perform the check directly onto the mSet$dataSet object, and must
#' be performed immediately after reading in data.
#' The sanity check function evaluates the accuracy of sample and class labels, data structure,
#' deals with non-numeric values, and removes columns that are constant across all samples (variance = 0).
#' This function is performed automatically as part of \code{\link[VisomX]{met.read_data}}
#' @usage met.SanityCheck(mSetObj = NA)
#' @param mSetObj Enter the name of the created mSetObj (see \code{\link[VisomX]{met.initialize}})
#' @references adapted from \code{\link[MetaboAnalystR]{SanityCheckData}} (\url{https://github.com/xia-lab/MetaboAnalystR}).
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)
#' @export
#'
met.SanityCheck <- function (mSetObj = NA)
  {
  if(!is.null(mSetObj$dataSet$data_orig)){
    orig.data <- mSetObj$dataSet$data_orig
  } else if (file.exists("data_orig.qs")) {
    orig.data <- qs::qread("data_orig.qs")
  }
  else {
    return(0)
  }
  msg <- NULL
  cls <- mSetObj$dataSet$orig.cls
  mSetObj$dataSet$small.smpl.size <- 0
  if (mSetObj$dataSet$cls.type == "disc") {
    if (substring(mSetObj$dataSet$format, 4, 5) == "ts") {
      metadata <- mSetObj$dataSet$meta.info
      if (mSetObj$dataSet$design.type == "time") {
        msg <- c(msg, "The data is time-series data.")
      }
      else if (mSetObj$dataSet$design.type == "time0") {
        msg <- c(msg, "The data is time-series only data.")
      }
      else {
        msg <- c(msg, "The data is not time-series data.")
      }
      clsA.num <- length(levels(metadata[, 1]))
      clsB.num <- length(levels(metadata[, 2]))
      if (ncol(metadata) == 2) {
        msg <- c(msg, paste(clsA.num, "groups were detected in samples for factor",
                            colnames(metadata)[1]))
        msg <- c(msg, paste(clsB.num, "groups were detected in samples for factor",
                            colnames(metadata)[2]))
      }
      else {
        msg <- c(msg, paste0(clsA.num, " groups were detected from primary meta-data factor: ",
                             colnames(mSetObj$dataSet$meta.info)[1], "."))
      }
    }
    else {
      if (mSetObj$dataSet$paired) {
        msg <- c(msg, "Samples are paired.")
        if (!(mSetObj$dataSet$type == "conc" |
              mSetObj$dataSet$type == "specbin" | mSetObj$dataSet$type ==
              "pktable")) {
          pairs <- ReadPairFile()
          if (length(pairs) != length(mSetObj$dataSet$url.smp.nms)) {
            AddErrMsg("Error: the total paired names are not equal to sample names.")
            return(0)
          }
          else {
            inx <- match(rownames(orig.data), names(pairs))
            if (sum(is.na(inx)) > 0) {
              AddErrMsg("Error: some paired names not match the sample names.")
              return(0)
            }
            else {
              mSetObj$dataSet$pairs <- pairs[inx]
            }
          }
        }
        pairs <- mSetObj$dataSet$pairs
        qc.hits <- tolower(as.character(cls)) %in% "qc"
        if (sum(qc.hits) > 0) {
          AddErrMsg("<font color='red'>Error: QC samples not supported in paired analysis mode.</font>")
          AddErrMsg("You can perform QC filtering using regular two-group labels.")
          AddErrMsg("Then re-upload your data (without QC samples) for paired analysis.")
          return(0)
        }
        else {
          pairs <- as.numeric(pairs)
        }
        label <- as.numeric(pairs)
        cls <- as.factor(ifelse(label > 0, 1, 0))
        mSetObj$dataSet$pairs <- label
        lev <- unique(pairs)
        uni.cl <- length(lev)
        uni.cl.abs <- uni.cl/2
        sorted.pairs <- sort(pairs, index = TRUE)
        if (!all(sorted.pairs$x == c(-uni.cl.abs:-1,
                                     1:uni.cl.abs))) {
          AddErrMsg("There are some problems in paired sample labels! ")
          if (uni.cl.abs != round(uni.cl.abs)) {
            duplicates <- pairs[duplicated(pairs)]
            dup.msg <- paste0("Duplicated labels:",
                              duplicates)
            AddErrMsg(paste("The total samples must be of even number!",
                            dup.msg))
          }
          else {
            AddErrMsg(paste("And class labels between ",
                            -uni.cl.abs, " and 1, and between 1 and ",
                            uni.cl.abs, ".", sep = ""))
          }
          return(0)
        }
        else {
          msg <- c(msg, "The labels of paired samples passed sanity check.")
          msg <- c(msg, paste("A total of", uni.cl.abs,
                              "pairs were detected."))
          x <- sorted.pairs$ix[(uni.cl.abs + 1):uni.cl]
          y <- sorted.pairs$ix[uni.cl.abs:1]
          index <- as.vector(cbind(x, y))
          cls <- cls[index]
          pairs <- pairs[index]
          mSetObj$dataSet$pairs <- pairs
          mSetObj$dataSet$orig.cls <- cls
          orig.data <- orig.data[index, ]
          qs::qsave(orig.data, file = "data_orig.qs")
          mSetObj$dataSet$data_orig <- orig.data
        }
      }
      else {
        cls.lbl <- mSetObj$dataSet$orig.cls
        qb.inx <- tolower(cls.lbl) %in% c("qc",
                                          "blank")
        if (sum(qb.inx) > 0) {
          cls.Clean <- as.factor(as.character(cls.lbl[!qb.inx]))
        }
        else {
          cls.Clean <- cls.lbl
        }
        if (anal.type != "network") {
          if (min(table(cls.Clean)) < 3 | length(levels(cls.Clean)) <
              2) {
            AddErrMsg(paste("A total of", length(levels(cls.Clean)),
                            "groups found with", length(cls.Clean),
                            "samples."))
            AddErrMsg("<font color='red'>At least <b>two</b> groups and <b>three replicates</b> per group are required for analysis</font>!")
            AddErrMsg("You can click the <b>Edit Groups</b> button below to see the group labels for each sample and make corrections.")
            return(-1)
          }
        }
        msg <- c(msg, "Samples are not paired.")
      }
      cls.lbl <- mSetObj$dataSet$orig.cls
      qb.inx <- tolower(cls.lbl) %in% c("qc", "blank")
      if (sum(qb.inx) > 0) {
        cls.lbl <- as.factor(as.character(cls.lbl[!qb.inx]))
      }
      min.grp.size <- min(table(cls.lbl))
      cls.num <- length(levels(cls.lbl))
      if (cls.num/min.grp.size > 3) {
        mSetObj$dataSet$small.smpl.size <- 1
        msg <- c(msg, "<font color='red'>Too many groups with very small number of replicates!</font>")
        msg <- c(msg, "<font color='red'>Only a subset of methods will be available for analysis!</font>")
      }
      if (mSetObj$analSet$type == "ts") {
        msg <- c(msg, paste0(cls.num, " groups were detected from primary meta-data factor: ",
                             colnames(mSetObj$dataSet$meta.info)[1], "."))
        cls.vec <- vector()
        meta.info <- mSetObj$dataSet$meta.info
        meta.types <- mSetObj$dataSet$meta.types
        for (i in 1:ncol(meta.info)) {
          if (meta.types[i] == "disc") {
            cls.lbl <- meta.info[, i]
            qb.inx <- tolower(cls.lbl) %in% c("qc",
                                              "blank")
            if (sum(qb.inx) > 0) {
              cls.Clean <- as.factor(as.character(cls.lbl[!qb.inx]))
            }
            else {
              cls.Clean <- cls.lbl
            }
            meta.name <- colnames(meta.info)[i]
            if (min(table(cls.Clean)) < 3 | length(levels(cls.Clean)) <
                2) {
              cls.vec <- c(cls.vec, meta.name)
            }
          }
        }
      }
      else {
        msg <- c(msg, paste0(cls.num, " groups were detected in samples: ",
                             paste(levels(mSetObj$dataSet$cls), collapse = ", ")))
      }
      mSetObj$dataSet$cls.num <- cls.num
      mSetObj$dataSet$min.grp.size <- min.grp.size
    }
    if (substring(mSetObj$dataSet$format, 4, 5) == "ts") {
      metadata <- mSetObj$dataSet$meta.info
      nfacA <- metadata[, 1]
      nfacB <- metadata[, 2]
      if (mSetObj$dataSet$design.type == "time" |
          mSetObj$dataSet$design.type == "time0") {
        if (tolower(colnames(metadata)[1]) == "time") {
          time.fac <- nfacA
          exp.fac <- nfacB
        }
        else {
          time.fac <- nfacB
          exp.fac <- nfacA
        }
        ord.inx <- order(exp.fac)
      }
      else {
        ord.inx <- order(nfacA)
      }
      mSetObj$dataSet$orig.cls <- mSetObj$dataSet$orig.cls[ord.inx]
      mSetObj$dataSet$url.smp.nms <- mSetObj$dataSet$url.smp.nms[ord.inx]
      mSetObj$dataSet$facA <- mSetObj$dataSet$orig.facA <- metadata[,
                                                                    1][ord.inx]
      mSetObj$dataSet$facB <- mSetObj$dataSet$orig.facB <- metadata[,
                                                                    2][ord.inx]
      orig.data <- orig.data[ord.inx, ]
      mSetObj$dataSet$meta.info <- mSetObj$dataSet$meta.info[rownames(orig.data),
      ]
      qs::qsave(orig.data, file = "data_orig.qs")
      mSetObj$dataSet$data_orig <- orig.data
    }
    else {
      ord.inx <- order(mSetObj$dataSet$orig.cls)
      mSetObj$dataSet$orig.cls <- cls[ord.inx]
      mSetObj$dataSet$url.smp.nms <- mSetObj$dataSet$url.smp.nms[ord.inx]
      orig.data <- orig.data[ord.inx, , drop = FALSE]
      mSetObj$dataSet$data_orig <- orig.data
      qs::qsave(orig.data, file = "data_orig.qs")
      if (mSetObj$dataSet$paired) {
        mSetObj$dataSet$pairs <- mSetObj$dataSet$pairs[ord.inx]
      }
    }
  }
  msg <- c(msg, "Only English letters, numbers, underscore, hyphen and forward slash (/) are allowed.")
  msg <- c(msg, "Other special characters or punctuations (if any) will be stripped off.")
  int.mat <- orig.data
  if (ncol(int.mat) == 1) {
    if (anal.type == "roc") {
      mSetObj$dataSet$roc_cols <- 1
    }
    else {
      AddErrMsg("One-column data is only supported for biomarker analysis.")
      return(0)
    }
  }
  else {
    mSetObj$dataSet$roc_cols <- 2
  }
  rowNms <- rownames(int.mat)
  colNms <- colnames(int.mat)
  naNms <- sum(is.na(int.mat))
  for (c in 1:ncol(int.mat)) {
    if (class(int.mat[, c]) == "integer64") {
      int.mat[, c] <- as.double(int.mat[, c])
    }
  }
  num.mat <- apply(int.mat, 2, as.numeric)
  if (sum(is.na(num.mat)) > naNms) {
    num.mat <- apply(int.mat, 2, function(x) as.numeric(gsub(",",
                                                             "", x)))
    if (sum(is.na(num.mat)) > naNms) {
      msg <- c(msg, "Non-numeric values were found and replaced by NA.")
    }
    else {
      msg <- c(msg, "All data values are numeric.")
    }
  }
  else {
    msg <- c(msg, "All data values are numeric.")
  }
  int.mat <- num.mat
  rownames(int.mat) <- rowNms
  colnames(int.mat) <- colNms
  varCol <- apply(int.mat, 2, var, na.rm = T)
  constCol <- (varCol == 0 | is.na(varCol))
  constNum <- sum(constCol, na.rm = T)
  if (constNum > 0) {
    msg <- c(msg, paste("<font color=\"red\">", constNum,
                        "features with a constant or single value across samples were found and deleted.</font>"))
    int.mat <- int.mat[, !constCol, drop = FALSE]
  }
  totalCount <- nrow(int.mat) * ncol(int.mat)
  naCount <- sum(is.na(int.mat)|int.mat==0)
  naPercent <- round(100 * naCount/totalCount, 1)
  mSetObj$dataSet$missingCount <- naCount
  msg <- c(msg, paste("A total of ", naCount, " (",
                      naPercent, "%) missing or zero values were detected.",
                      sep = ""))
  mSetObj$msgSet$missing.msg <- msg[length(msg)]

  qs::qsave(as.data.frame(int.mat), "preproc.qs")
  mSetObj$dataSet$preproc <- as.data.frame(int.mat)
  mSetObj$dataSet$proc.cls <- mSetObj$dataSet$cls <- mSetObj$dataSet$orig.cls
  if (is.null(mSetObj$dataSet$meta.info)) {
    mSetObj$dataSet$meta.info <- data.frame(mSetObj$dataSet$cls)
    colnames(mSetObj$dataSet$meta.info) = "Class"
  }
  if (substring(mSetObj$dataSet$format, 4, 5) == "ts") {
    mSetObj$dataSet$proc.facA <- mSetObj$dataSet$orig.facA
    mSetObj$dataSet$proc.facB <- mSetObj$dataSet$orig.facB
  }
  mSetObj$msgSet$check.msg <- c(mSetObj$msgSet$read.msg, msg)

  print(mSetObj$msgSet$check.msg)

  return(mSetObj)
}

#' Remove defined samples and features from the metabolomics dataset.
#'
#' @description \code{met.UpdateData} updates the mSet object after removing features or samples. This step is included in the data preparation workflow \code{\link[VisomX]{met.read_data}}.
#'
#' @param mSetObj Enter the name of the created mSet object (see \code{\link[VisomX]{met.read_data}}).
#' @param filt.feat (Character vector) Enter the names of features (compounds) to remove from the dataset.
#' @param filt.smpl (Character vector) Enter the names of samples to remove from the dataset.
#' @param filt.grp (Character vector) Enter the names of groups (conditions) to remove from the dataset.
#' @references adapted from \code{\link[MetaboAnalystR]{UpdateData}} (\url{https://github.com/xia-lab/MetaboAnalystR}).
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)
#' @export
#'
met.UpdateData <- function (mSetObj = NA,
          filt.feat = c(""),
          filt.smpl = c(""),
          filt.grp = c(""))
{

  mSetObj$dataSet$edit <- NULL
  if (is.null(mSetObj$dataSet$filt) && !is.null(mSetObj$dataSet$data_proc)){
    data <- mSetObj$dataSet$data_proc
    cls <- mSetObj$dataSet$proc.cls
    if (substring(mSetObj$dataSet$format, 4, 5) == "ts") {
      facA <- mSetObj$dataSet$proc.facA
      facB <- mSetObj$dataSet$proc.facB
    }
  } else if (is.null(mSetObj$dataSet$filt) && !is.null(mSetObj$dataSet$preproc)) {
    data <- mSetObj$dataSet$preproc
    cls <- mSetObj$dataSet$proc.cls
    if (substring(mSetObj$dataSet$format, 4, 5) == "ts") {
      facA <- mSetObj$dataSet$proc.facA
      facB <- mSetObj$dataSet$proc.facB
    }
  } else {
    data <- mSetObj$dataSet$filt
    cls <- mSetObj$dataSet$filt.cls
    if (substring(mSetObj$dataSet$format, 4, 5) == "ts") {
      facA <- mSetObj$dataSet$filt.facA
      facB <- mSetObj$dataSet$filt.facB
    }
  }
  if(!all(filt.feat %in% colnames(data))){
    no_hit.idx <- (filt.feat %in% colnames(data)) == FALSE
    no_hit.feat <- filt.feat[no_hit.idx]
    stop(paste0("'", no_hit.feat, "' is/are no valid feature names in the dataset."), call. = FALSE)
  }
  if(!all(filt.smpl %in% rownames(data))){
    no_hit.idx <- (filt.smpl %in% rownames(data)) == FALSE
    no_hit.smpl <- filt.smpl[no_hit.idx]
    stop(paste0(paste0("'", no_hit.smpl, collapse = "', "), "' is/are no valid sample names in the dataset. Valid samples are, for example:\n",
                paste0("'", sample(rownames(data), size = 5), collapse = "', "), "'"), call. = FALSE)
  }
  if(!all(filt.grp %in% levels(mSetObj$dataSet$cls))){
    no_hit.idx <- (filt.grp %in% levels(mSetObj$dataSet$cls)) == FALSE
    no_hit.grp <- filt.grp[no_hit.idx]
    stop(paste0(paste0("'", no_hit.grp, collapse = "', "), "' is/are no valid group names in the dataset. Valid groups (conditions) are, for example:\n",
                paste0("'", sample(levels(mSetObj$dataSet$cls), size = 3), collapse = "', "), "'"), call. = FALSE)
  }
  feat.hit.inx <- colnames(data) %in% filt.feat
  data <- CleanDataMatrix(data[, !feat.hit.inx, drop = FALSE])
  smpl.hit.inx <- rownames(data) %in% filt.smpl
  data <- CleanDataMatrix(data[!smpl.hit.inx, , drop = FALSE])
  cls <- as.factor(as.character(cls[!smpl.hit.inx]))
  if (substring(mSetObj$dataSet$format, 4, 5) == "ts") {
    facA <- as.factor(as.character(facA[!smpl.hit.inx]))
    facB <- as.factor(as.character(facB[!smpl.hit.inx]))
  }
  grp.hit.inx <- cls %in% filt.grp
  data <- CleanDataMatrix(data[!grp.hit.inx, , drop = FALSE])
  cls <- droplevels(factor(cls[!grp.hit.inx]))
  if (substring(mSetObj$dataSet$format, 4, 5) == "ts") {
    facA <- droplevels(factor(facA[!grp.hit.inx]))
    facB <- droplevels(factor(facB[!grp.hit.inx]))
  }
  print("Successfully updated the data!")
  mSetObj$dataSet$edit <- data
  mSetObj$dataSet$edit.cls <- cls
  if (substring(mSetObj$dataSet$format, 4, 5) == "ts") {
    mSetObj$dataSet$edit.facA <- facA
    mSetObj$dataSet$edit.facB <- facB
  }
  mSetObj <- met.PreparePrenormData(mSetObj)
  return(mSetObj)
}

#' Test for optimal data processing conditions
#'
#' @description \code{met.test_normalization} runs a metabolomics analysis workflow with
#'   several normalization/transformation/scaling combinations in parallel. The workflow
#'   includes univariate analysis (ANOVA) and multivariate analyses (PCA and PLS-DA) and
#'   creates a report with various performance indicators that help with deciding for
#'   optimal data preprocessing conditions.
#' @param mSetObj Enter the name of the created mSetObj (see \code{\link[VisomX]{met.read_data}})
#' @param test_conditions (Character vector) Enter the combinations of rowNorm, transNorm,
#'   and scaleNorm separated by "/" to be tested in the workflow. See
#'   \code{\link[VisomX]{met.normalize}} for suitable options. If \code{NA}, the
#'   normalization test workflow will automatically choose \code{c("NULL", "MeanCenter",
#'   "LogNorm", "LogNorm/AutoNorm", "CrNorm", "CrNorm/AutoNorm", "AutoNorm", "RangeNorm",
#'   "ParetoNorm")} as default conditions. \emph{Please note}: rowNorm option "SpecNorm" is not supported by this workflow.
#' @param ref (Character vector) Enter the name(s) of the reference sample(s) or the
#'   reference feature(s) for \code{rowNorm = "GroupPQN"}, \code{"SamplePQN"}, or
#'   \code{"CompNorm"} in their respective order in \code{test_conditions}. Add "NULL" if the \code{ref} argument is not applicable to the respective test_condition.
#' @param class_order (Logical, \code{TRUE} or \code{FALSE}) Class order matters (i.e. implying time points, disease severity, etc.)
#' @param alpha (Numeric) Enter significance threshold for adjusted p values (false discovery rate - FDR; for ANOVA with post-hoc analyses).
#' @param lfc (Numeric) Enter relevance threshold for log2 fold changes in pair-wise comparisons (for ANOVA with post-hoc analyses).
#' @param posthoc_method (Character) Enter the name of the ANOVA post-hoc test, \code{"fisher"} or \code{"tukey"}.
#' @param nonpar (Logical) Use a non-parametric ANOVA test (\code{TRUE}) or not (\code{FALSE}).
#' @param pls.cv.method (Character) Enter one of two methods for PLS-DA model (cross) validation:
#'\itemize{
#'  \item \code{"LOOCV"} performs leave-one-out cross validation
#'  \item \code{"CV"} performs k-fold cross validation.
#'  }
#' @param pls.cv.k (Numeric) The number of (randomized) groups that the dataset is to be split into during cross validation if \code{methodName = "CV"}. This value must be equal to or smaller than the number of samples.
#' @param dpi (Numeric) Resolution of PNG images (default is 300 dpi).
#' @param pls.data (Character) Enter \code{"all"} to train the PLS(-DA) model on your whole (filtered and normalized) dataset or \code{"anova"} to use a subset of features defined as significant based on ANOVA analysis.
#' @param permut.num (Numeric) Number of permutations in PLS-DA permutation tests.
#' @param vip.thresh (Numeric) Enter a chosen relevance threshold for PLS-DA VIP scores.
#' @param report (Logical) Generate a report with results of this workflow (\code{TRUE}) or not (\code{FALSE}).
#' @param report.dir (Character) Enter the name or location of the folder in which the
#'   report files are generated if \code{report = TRUE}. If \code{NULL} (the default), a
#'   new folder "Report_\emph{date-time}" is created in your working directory.
#' @param export (Logical) Export generated plots as PDF or PNG files (\code{TRUE}) or not (\code{FALSE}).
#' @param export.format (Character, \code{"png"} or \code{"pdf"}) image file format (if \code{export = TRUE}).
#' @param export.dir (Character) Enter the name or location of a folder in which all generated files are saved.
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)
#' @export
met.test_normalization <- function(mSetObj,
                                   test_conditions = NULL,
                                   ref = NULL,
                                   class_order = FALSE,
                                   alpha = 0.05,
                                   lfc = 2,
                                   posthoc_method = "tukey",
                                   nonpar = FALSE,
                                   pls.cv.method = "LOOCV",
                                   pls.cv.k = 10,
                                   dpi = 300,
                                   pls.data = "all",
                                   permut.num = 500,
                                   vip.thresh = 1,
                                   report = FALSE,
                                   report.dir = NULL,
                                   export = FALSE,
                                   export.format = "pdf",
                                   export.dir = "met.Test_Normalization")
{

  dir.create(paste0(getwd(), "/", export.dir), showWarnings = F)
  wd <- getwd()
  setwd(paste0(wd, "/", export.dir))
  if(is.null(ref)){
    ref <- rep("NULL", length(test_conditions))
  } else if(length(ref) != length(test_conditions)){
    stop("Please provide a character vector for 'ref = c()' with the length as 'test_conditions' (", length(test_conditions), "). Indicate 'NULL' if no reference sample/group/feature is required for the respective set of normalization conditions.",call. = FALSE)
  }
  if (export == TRUE){
    dir.create(paste0(getwd(), "/Plots"), showWarnings = F)
  }
  conditions <- levels(mSetObj$dataSet$cls)
  if(!is.null(mSetObj$dataSet$prenorm)){
    proc.data <- data.frame(mSetObj$dataSet$prenorm)
  } else if(!is.null(mSetObj$dataSet$data_proc)){
    proc.data <- mSetObj$dataSet$data_proc
  } else {
    proc.data <- qs::qread("data_proc.qs")
  }
  cntrst <- apply(utils::combn(conditions, 2), 2, paste,
                  collapse = " - ")
  test.cond_tmp <- list()
  test.cond_list <- list()
  if(is.null(test_conditions)){
    test_conditions <- c("NULL", "MeanCenter", "LogNorm", "LogNorm/AutoNorm", "CrNorm", "CrNorm/AutoNorm", "AutoNorm", "RangeNorm", "ParetoNorm")
  } else if (!(any(test_conditions == "NULL") | any(test_conditions == "NULL/NULL") | any(test_conditions == "NULL/NULL/NULL"))){
    test_conditions <- append(test_conditions, "NULL", after = 0)
    if(!is.null(ref)){
      ref <- append(ref, "NULL", after = 0)
    }
  }

  for(i in 1:length(test_conditions)){
    test.cond_tmp[i] <- str_split(test_conditions[i], "/")
    test.cond_list[i] <- test.cond_tmp[i]
    for(j in 1:length(test.cond_tmp[[i]])){
      if(!any(is.element(c("NULL", "MeanCenter", "AutoNorm", "ParetoNorm", "RangeNorm",
                           "LogNorm", "CrNorm",
                           "GroupPQN", "SamplePQN", "QuantileNorm", "CompNorm", "SumNorm", "MedianNorm"), test.cond_tmp[[i]][j]))){
        stop(paste0("'", test.cond_tmp[[i]][j], "' is not a valid option for 'rowNorm', 'transNorm', or 'scaleNorm'. Valid options are:\n",
                    "rowNorm: 'NULL', 'GroupPQN', 'SamplePQN', 'QuantileNorm', 'CompNorm', 'SumNorm', 'MedianNorm'\n",
                    "transNorm: 'NULL', 'LogNorm', 'CrNorm'\n",
                    "scaleNorm: 'NULL', 'MeanCenter', 'AutoNorm', 'ParetoNorm', 'RangeNorm'"), call. = F)
      }
    }
    test.cond_list[[i]][1] <- "NULL"
    test.cond_list[[i]][2] <- "NULL"
    test.cond_list[[i]][3] <- "NULL"

    if(any(is.element(c("GroupPQN", "SamplePQN", "CompNorm", "SumNorm", "MedianNorm"), test.cond_tmp[[i]][1]))){
      test.cond_list[[i]][1] <- test.cond_tmp[[i]][1]
    }
    if(any(is.element(c("GroupPQN", "SamplePQN", "CompNorm", "SumNorm", "MedianNorm"), test.cond_tmp[[i]][2]))){
      test.cond_list[[i]][1] <- test.cond_tmp[[i]][2]
    }
    if(any(is.element(c("GroupPQN", "SamplePQN", "CompNorm", "SumNorm", "MedianNorm"), test.cond_tmp[[i]][3]))){
      test.cond_list[[i]][1] <- test.cond_tmp[[i]][3]
    }
    if(any(is.element(c("LogNorm", "CrNorm"), test.cond_tmp[[i]][1]))){
      test.cond_list[[i]][2] <- test.cond_tmp[[i]][1]
    }
    if(any(is.element(c("LogNorm", "CrNorm"), test.cond_tmp[[i]][2]))){
      test.cond_list[[i]][2] <- test.cond_tmp[[i]][2]
    }
    if(any(is.element(c("LogNorm", "CrNorm"), test.cond_tmp[[i]][3]))){
      test.cond_list[[i]][2] <- test.cond_tmp[[i]][3]
    }
    if(any(is.element(c("NULL", "MeanCenter", "AutoNorm", "ParetoNorm", "RangeNorm"), test.cond_tmp[[i]][1]))){
      test.cond_list[[i]][3] <- test.cond_tmp[[i]][1]
    }
    if(any(is.element(c("NULL", "MeanCenter", "AutoNorm", "ParetoNorm", "RangeNorm"), test.cond_tmp[[i]][2]))){
      test.cond_list[[i]][3] <- test.cond_tmp[[i]][2]
    }
    if(any(is.element(c("NULL", "MeanCenter", "AutoNorm", "ParetoNorm", "RangeNorm"), test.cond_tmp[[i]][3]))){
      test.cond_list[[i]][3] <- test.cond_tmp[[i]][3]
    }

    names(test.cond_list)[i] <- paste0(if(test.cond_list[[i]][1]=="NULL" &
                                          test.cond_list[[i]][2]=="NULL" &
                                          test.cond_list[[i]][3]=="NULL") {"untransformed"},
                                       if_else(test.cond_list[[i]][1] != "NULL", test.cond_list[[i]][1],""),
                                       if_else(test.cond_list[[i]][1] != "NULL" & (test.cond_list[[i]][2]!="NULL" | test.cond_list[[i]][3]!="NULL"), "_", ""),
                                       if_else(test.cond_list[[i]][2] != "NULL", test.cond_list[[i]][2],""),
                                       if_else(test.cond_list[[i]][2] != "NULL" & test.cond_list[[i]][3]!="NULL", "_", ""),
                                       if_else(test.cond_list[[i]][3] != "NULL", test.cond_list[[i]][3],"")
    )
  }
  ref_list <- list()
  for(i in 1:length(ref)){
    ref_list[[i]] <- ref[i]
  }
  list_names <- names(test.cond_list)
  mSet_list <- list()
  for(i in 1:length(test.cond_list)){
    mSet_list[[i]] <- met.normalize(mSetObj, rowNorm = test.cond_list[[i]][1],
                                    transNorm = test.cond_list[[i]][2],
                                    scaleNorm = test.cond_list[[i]][3],
                                    ref = ref_list[i][1])
  }
  names(mSet_list) <- list_names
  mSet_list <- sapply(1:length(mSet_list), function(x) met.ANOVA.Anal(mSet_list[[x]], nonpar = nonpar, thresh = alpha, post.hoc = posthoc_method, all_results = TRUE, silent = TRUE), simplify = FALSE,
                      USE.NAMES = TRUE )
  mSet_list <- sapply(1:length(mSet_list), function(x) met.PCA.Anal(mSet_list[[x]]), simplify = FALSE, USE.NAMES = TRUE )
  mSet_list <- sapply(1:length(mSet_list), function(x) met.PLSR.Anal(mSet_list[[x]], reg = class_order, data = pls.data), simplify = FALSE, USE.NAMES = TRUE )
  options(warn=1)
  for (i in 1:length(mSet_list)){
    cat(paste0("Performing cross-validation test for PLS-DA model with condition: ", names(test.cond_list)[i], "\n"))
    mSet_list[[i]] <-
      met.PLSDA.CV(
        mSet_list[[i]],
        methodName = pls.cv.method,
        compNum = GetDefaultPLSCVComp(mSet_list[[i]]),
        data = pls.data, k = pls.cv.k
      )

  }
  options(warn=0)
  names(mSet_list) <- list_names

  anova_significant <- list()
  for (i in 1:length(mSet_list)){
    mSet_list[[i]][["analSet"]][["plsda"]][["vip_sig_comp1"]] <- mSet_list[[i]][["analSet"]][["plsda"]][["vip.mat"]][mSet_list[[i]][["analSet"]][["plsda"]][["vip.mat"]][,1]>1, 1]
    mSet_list[[i]][["analSet"]][["plsda"]][["class_order"]] <- class_order
    mSet_list[[i]][["analSet"]][["plsda"]][["vip_sig_comp2"]] <- mSet_list[[i]][["analSet"]][["plsda"]][["vip.mat"]][mSet_list[[i]][["analSet"]][["plsda"]][["vip.mat"]][,2]>1, 2]
    mSet_list[[i]][["analSet"]][["aov"]][["significant"]] <- mSet_list[[i]][["analSet"]][["aov"]][["inx.imp"]][mSet_list[[i]][["analSet"]][["aov"]][["inx.imp"]]==TRUE]
    mSet_list[[i]][["analSet"]][["contrasts"]] <- cntrst

  }
  names(mSet_list) <- list_names

  mSet_list$vip_sig_comp1 <- lapply(1:length(list_names), function(x) names(mSet_list[[x]][["analSet"]][["plsda"]][["vip_sig_comp1"]]) )
  mSet_list$vip_sig_comp2 <- lapply(1:length(list_names), function(x) names(mSet_list[[x]][["analSet"]][["plsda"]][["vip_sig_comp2"]]) )
  mSet_list$anova_significant <- lapply(1:length(list_names), function(x) names(mSet_list[[x]][["analSet"]][["aov"]][["significant"]]) )
  names(mSet_list$vip_sig_comp1) <- names(mSet_list$vip_sig_comp2) <- names(mSet_list$anova_significant) <- list_names
  mSet_list$vip_sig_comp1 <- mSet_list$vip_sig_comp1[!sapply(mSet_list$vip_sig_comp1,is.null)]
  mSet_list$vip_sig_comp2 <- mSet_list$vip_sig_comp2[!sapply(mSet_list$vip_sig_comp2,is.null)]
  mSet_list$anova_significant <- mSet_list$anova_significant[!sapply(mSet_list$anova_significant,is.null)]

  f_venn <- function(list, title, export = T, format = "pdf", imgName = "Venn") {
    venn <- ggVennDiagram::ggVennDiagram(
      list,
      color = "black",
      lwd = 0.8,
      lty = 1,
      label_alpha = 0,
      label_size = 4.5,
      set_size = 5.5) +
      scale_fill_gradient(low = "#fcfeff", high = "#4981BF") +
      scale_x_continuous(expand = expansion(mult = 0.3)) + ggtitle(title) +
      theme(plot.title = element_text(
        size = 14,
        face = "bold",
        hjust = 0.5),
        legend.position = c(0.9, 0.5),
        legend.key.size = unit(1, "cm"),
        legend.title = element_text(size=16),
        legend.text = element_text(size=14))
    if (export == TRUE) {
      w = 10
      h = 8
      if (format == "pdf") {
        grDevices::pdf(
          file = imgName,
          width = w,
          height = h,
          bg = "white",
          onefile = FALSE)
      }
      else {
        grDevices::png(
          filename = imgName,
          units = "in",
          res = dpi,
          width = w,
          height = h,
          bg = "white")
      }
      print(venn)
      grDevices::dev.off()
    }
    return(venn)
  }
  dir.create("Plots/Venn Diagrams", showWarnings = F)
  # Venn Diagram of features with VIP1 > 1
  if(length(mSet_list$vip_sig_comp1)<5){
    mSet_list$plot.venn_VIP1 <- f_venn(mSet_list$vip_sig_comp1, "PLS-DA Component 1 \u2013 VIP score > 1", export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_PLSDA_vip_comp1.", export.format))
  } else if(length(mSet_list$vip_sig_comp1)<7){
    mSet_list$plot.venn_VIP1 <- list()
    mSet_list$plot.venn_VIP1[[1]] <-   f_venn(mSet_list$vip_sig_comp1[1:4], paste0("PLS-DA Component 1 \u2013 VIP score > " ,vip.thresh), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_PLSDA_vip_comp1_A.", export.format))
    mSet_list$plot.venn_VIP1[[2]] <-   f_venn(mSet_list$vip_sig_comp1[c(1,5:length(mSet_list$vip_sig_comp1))], paste0("PLS-DA Component 1 \u2013 VIP score > " ,vip.thresh), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_PLSDA_vip_comp1_B.", export.format))
  } else if(length(mSet_list$vip_sig_comp1)<10){
    mSet_list$plot.venn_VIP1 <- list()
    mSet_list$plot.venn_VIP1[[1]] <- f_venn(mSet_list$vip_sig_comp1[1:5], paste0("PLS-DA Component 1 \u2013 VIP score > " ,vip.thresh), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_PLSDA_vip_comp1_A.", export.format))
    mSet_list$plot.venn_VIP1[[2]] <- f_venn(mSet_list$vip_sig_comp1[c(1,6:length(mSet_list$vip_sig_comp1))], paste0("PLS-DA Component 1 \u2013 VIP score > " ,vip.thresh), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_PLSDA_vip_comp1_B.", export.format))
  } else {
    mSet_list$plot.venn_VIP1 <- list()
    mSet_list$plot.venn_VIP1[[1]] <- f_venn(mSet_list$vip_sig_comp1[1:5], paste0("PLS-DA Component 1 \u2013 VIP score > " ,vip.thresh), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_PLSDA_vip_comp1_A.", export.format))
    mSet_list$plot.venn_VIP1[[2]] <- f_venn(mSet_list$vip_sig_comp1[c(1,6:10)], paste0("PLS-DA Component 1 \u2013 VIP score > " ,vip.thresh), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_PLSDA_vip_comp1_B.", export.format))
    mSet_list$plot.venn_VIP1[[3]] <- f_venn(mSet_list$vip_sig_comp1[c(1,11:length(mSet_list$vip_sig_comp1))], paste0("PLS-DA Component 1 \u2013 VIP score > " ,vip.thresh), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_PLSDA_vip_comp1_C.", export.format))
  }

  # Venn Diagram of features with VIP2 > 1
  if(length(mSet_list$vip_sig_comp2)<5){
    mSet_list$plot.venn_VIP2 <- f_venn(mSet_list$vip_sig_comp2, paste0("PLS-DA Component 2 \u2013 VIP score > " ,vip.thresh), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_PLSDA_vip_comp2.", export.format))

  } else if(length(mSet_list$vip_sig_comp2)<7){
    mSet_list$plot.venn_VIP2 <- list()
    mSet_list$plot.venn_VIP2[[1]] <- f_venn(mSet_list$vip_sig_comp2[1:4], paste0("PLS-DA Component 2 \u2013 VIP score > " ,vip.thresh), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_PLSDA_vip_comp2_A.", export.format))
    mSet_list$plot.venn_VIP2[[2]] <- f_venn(mSet_list$vip_sig_comp2[c(1,5:length(mSet_list$vip_sig_comp2))], paste0("PLS-DA Component 2 \u2013 VIP score > " ,vip.thresh), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_PLSDA_vip_comp2_B.", export.format))

  } else if(length(mSet_list$vip_sig_comp2)<10){
    mSet_list$plot.venn_VIP2 <- list()
    mSet_list$plot.venn_VIP2[[1]] <- f_venn(mSet_list$vip_sig_comp2[1:5], paste0("PLS-DA Component 2 \u2013 VIP score > " ,vip.thresh), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_PLSDA_vip_comp2_A.", export.format))
    mSet_list$plot.venn_VIP2[[2]] <- f_venn(mSet_list$vip_sig_comp2[c(1,6:length(mSet_list$vip_sig_comp2))], paste0("PLS-DA Component 2 \u2013 VIP score > " ,vip.thresh), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_PLSDA_vip_comp2_B.", export.format))
  } else {
    mSet_list$plot.venn_VIP2 <- list()
    mSet_list$plot.venn_VIP2[[1]] <- f_venn(mSet_list$vip_sig_comp2[1:5], paste0("PLS-DA Component 2 \u2013 VIP score > " ,vip.thresh), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_PLSDA_vip_comp2_A.", export.format))
    mSet_list$plot.venn_VIP2[[2]] <- f_venn(mSet_list$vip_sig_comp2[c(1,6:10)], paste0("PLS-DA Component 2 \u2013 VIP score > " ,vip.thresh), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_PLSDA_vip_comp2_B.", export.format))
    mSet_list$plot.venn_VIP2[[3]] <- f_venn(mSet_list$vip_sig_comp2[c(1,11:length(mSet_list$vip_sig_comp2))], paste0("PLS-DA Component 2 \u2013 VIP score > " ,vip.thresh), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_PLSDA_vip_comp2_C.", export.format))
  }

  # Venn Diagram of significant features based on ANOVA test
  if(length(mSet_list$anova_significant)<5){
    mSet_list$plot.venn_anova_signif <- f_venn(mSet_list$anova_significant, paste0("Features with FDR ", "\u2264 ", alpha), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_ANOVA_signif.", export.format))
  } else if(length(mSet_list$anova_significant)<7){
    mSet_list$plot.venn_anova_signif <- list()
    mSet_list$plot.venn_anova_signif[[1]] <- f_venn(mSet_list$anova_significant[1:4], paste0("Features with FDR ", "\u2264 ", alpha), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_ANOVA_signif_A.", export.format))
    mSet_list$plot.venn_anova_signif[[2]] <- f_venn(mSet_list$anova_significant[c(1,5:length(mSet_list$anova_significant))], paste0("Features with FDR ", "\u2264 ", alpha), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_ANOVA_signif_B.", export.format))

  } else if(length(mSet_list$anova_significant)<10){
    mSet_list$plot.venn_anova_signif <- list()
    mSet_list$plot.venn_anova_signif[[1]] <- f_venn(mSet_list$anova_significant[1:5], paste0("Features with FDR ", "\u2264 ", alpha), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_ANOVA_signif_A.", export.format))
    mSet_list$plot.venn_anova_signif[[2]] <- f_venn(mSet_list$anova_significant[c(1,6:length(mSet_list$anova_significant))], paste0("Features with FDR ", "\u2264 ", alpha), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_ANOVA_signif_B.", export.format))
  } else {
    mSet_list$plot.venn_anova_signif <- list()
    mSet_list$plot.venn_anova_signif[[1]] <- f_venn(mSet_list$anova_significant[1:5], paste0("Features with FDR ", "\u2264 ", alpha), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_ANOVA_signif_A.", export.format))
    mSet_list$plot.venn_anova_signif[[2]] <- f_venn(mSet_list$anova_significant[c(1,6:10)], paste0("Features with FDR ", "\u2264 ", alpha), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_ANOVA_signif_B.", export.format))
    mSet_list$plot.venn_anova_signif[[3]] <- f_venn(mSet_list$anova_significant[c(1,11:length(mSet_list$anova_significant))], paste0("Features with FDR ", "\u2264 ", alpha), export = export, format = export.format, imgName= paste0("Plots/Venn Diagrams/Venn_ANOVA_signif_C.", export.format))
  }

  # Create Venn Diagram of significant features and VIP>vip.thresh components
  anova_vip1_list <- list()
  names_common <- intersect(names(mSet_list$anova_significant), names(mSet_list$vip_sig_comp1))
  for (i in 1:length(names_common)){
    anova_vip1_list[[i]] <- list(mSet_list$anova_significant[[match(names_common[i], names(mSet_list$anova_significant))]], mSet_list$vip_sig_comp1[[match(names_common[i], names(mSet_list$vip_sig_comp1))]])
    names(anova_vip1_list[[i]]) <- c("ANOVA_signif", paste0("PLS-DA_VIP1 > ", vip.thresh))
  }
  names(anova_vip1_list) <- names_common

  mSet_list$plot.venn_anova_vs_vip1 <- list()
  only_vip1_list <- list()

  for(i in 1:length(names(anova_vip1_list))){
    if(!is.null(anova_vip1_list[[i]][[1]]) && !is.null(anova_vip1_list[[i]][[2]])){
      mSet_list$plot.venn_anova_vs_vip1[[i]] <- ggVennDiagram::ggVennDiagram(anova_vip1_list[[i]], color = "black", lwd = 0.8, lty = 1,label_alpha = 0, label_size = 4.5, set_size = 5.5) +
        scale_fill_gradient(low = "#fcfeff", high = "#4981BF") + ggtitle(names(anova_vip1_list[i])) +
        theme(plot.title = element_text(hjust=0.5, vjust = 5, size = 20, face = "bold"),
              legend.key.size = unit(1, "cm"),
              legend.title = element_text(size=16),
              legend.text = element_text(size=14))
    } else {
      mSet_list$plot.venn_anova_vs_vip1[[i]] <- ggplot()+geom_blank()+theme_bw()
    }
    df_i = if (nrow(data.frame(
      "VIP1 - ANOVA" = Setdiff(anova_vip1_list[[i]][2], anova_vip1_list[[i]][1]),
      check.names = F
    )) != 0) {
      data.frame(
        "VIP1 - ANOVA" = Setdiff(anova_vip1_list[[i]][2], anova_vip1_list[[i]][1]),
        check.names = F
      )
    } else {
      df_i = ""
    }
    only_vip1_list[[i]] <- ggpubr::ggtexttable(df_i, rows = NULL, theme = ggpubr::ttheme(base_style = "light", base_size = 12)) %>%
      {if(!is.null(nrow(df_i))) ggpubr::tab_add_hline(tab = ., at.row = 1:2, row.side = "top", linewidth = 2) else .}

    mSet_list$plot.venn_anova_vs_vip1[[i]] <- ggpubr::ggarrange(mSet_list$plot.venn_anova_vs_vip1[[i]], only_vip1_list[[i]],
                                                                ncol = 2, nrow = 1,
                                                                widths = c(10,3))
  }
  names(mSet_list$plot.venn_anova_vs_vip1) <- names_common

  # Extract elements of Venn Diagram VIP1
  mSet_list$vip_sig_comp1[["combs"]] <-
    unlist(lapply(1:length(mSet_list$vip_sig_comp1), function(j)
      utils::combn(names(mSet_list$vip_sig_comp1), j, simplify = FALSE)),
      recursive = FALSE)
  names(mSet_list$vip_sig_comp1[["combs"]]) <-
    sapply(mSet_list$vip_sig_comp1[["combs"]], function(i)
      paste0(i, collapse = " & "))
  mSet_list$vip_sig_comp1[["combs>0"]] <-
    lapply(mSet_list$vip_sig_comp1[["combs"]], function(i)
      Setdiff(mSet_list$vip_sig_comp1[i], mSet_list$vip_sig_comp1[setdiff(names(mSet_list$vip_sig_comp1), i)])) %>%
    Filter(function(x)
      any(length(x) != 0), .)

  # Extract elements of Venn Diagram VIP2
  mSet_list$vip_sig_comp2[["combs"]] <-
    unlist(lapply(1:length(mSet_list$vip_sig_comp2), function(j)
      utils::combn(names(mSet_list$vip_sig_comp2), j, simplify = FALSE)),
      recursive = FALSE)
  names(mSet_list$vip_sig_comp2[["combs"]]) <-
    sapply(mSet_list$vip_sig_comp2[["combs"]], function(i)
      paste0(i, collapse = " & "))
  mSet_list$vip_sig_comp2[["combs>0"]] <-
    lapply(mSet_list$vip_sig_comp2[["combs"]], function(i)
      Setdiff(mSet_list$vip_sig_comp2[i], mSet_list$vip_sig_comp2[setdiff(names(mSet_list$vip_sig_comp2), i)])) %>%
    Filter(function(x)
      any(length(x) != 0), .)

  # Extract elements of Venn ANOVA significant
  mSet_list$anova_significant[["combs"]] <-
    unlist(lapply(1:length(mSet_list$anova_significant), function(j)
      utils::combn(names(mSet_list$anova_significant), j, simplify = FALSE)),
      recursive = FALSE)
  names(mSet_list$anova_significant[["combs"]]) <-
    sapply(mSet_list$anova_significant[["combs"]], function(i)
      paste0(i, collapse = " & "))
  mSet_list$anova_significant[["combs>0"]] <-
    lapply(mSet_list$anova_significant[["combs"]], function(i)
      Setdiff(mSet_list$anova_significant[i], mSet_list$anova_significant[setdiff(names(mSet_list$anova_significant), i)])) %>%
    Filter(function(x)
      any(length(x) != 0), .)
  if (export == TRUE) {
    w = 10
    h = 8
    for(i in 1:length(mSet_list$plot.venn_anova_vs_vip1)){
      imgName = paste0("Plots/Venn Diagrams/Venn_ANOVA-signif_vs_PLSDA-VIP1_", names(mSet_list$plot.venn_anova_vs_vip1)[i], ".", export.format)
      if (export.format == "pdf") {
        grDevices::pdf(
          file = imgName,
          width = w,
          height = h,
          bg = "white",
          onefile = FALSE
        )
      }
      else {
        grDevices::png(
          filename = imgName,
          units = "in",
          res = dpi,
          width = w,
          height = h,
          bg = "white"
        )
        print(mSet_list$plot.venn_anova_vs_vip1[[i]])
        grDevices::dev.off()
      }
    }
  }
  names(mSet_list) <- c(list_names, names(mSet_list[(length(list_names)+1):length(mSet_list)]))

  mSet_list <- c(lapply(1:length(list_names), function(x) met.plot_FeatureNormSummary(mSetObj = mSet_list[[x]], imgName = paste0("Plots/FeatureNormSummary_", names(mSet_list)[x]),
                                                                                      plot = F, show_prenorm = F, export = export, format = export.format)),
                 mSet_list[(length(list_names)+1):length(mSet_list)])

  names(mSet_list) <- c(list_names, names(mSet_list[(length(list_names)+1):length(mSet_list)]))

  mSet_list <- c(lapply(1:length(list_names), function(x) met.plot_SampleNormSummary(mSetObj = mSet_list[[x]], imgName = paste0("Plots/SampleNormSummary_", names(mSet_list)[x]),
                                                                                     plot = F, show_prenorm = F, export = export, format = export.format)),
                 mSet_list[(length(list_names)+1):length(mSet_list)])


  names(mSet_list) <- c(list_names, names(mSet_list[(length(list_names)+1):length(mSet_list)]))

  dir.create("Plots/ANOVA", showWarnings = F)

  mSet_list <- c(lapply(1:length(list_names), function(x) met.plot_ANOVA(mSetObj = mSet_list[[x]], imgName = paste0("Plots/ANOVA/ANOVA-Plot_", names(mSet_list)[x]),
                                                                         format = export.format, subtitle = TRUE,
                                                                         plot = F, export = export, dpi = 300)),
                 mSet_list[(length(list_names)+1):length(mSet_list)])
  names(mSet_list) <- c(list_names, names(mSet_list[(length(list_names)+1):length(mSet_list)]))

  dir.create("Plots/PCA", showWarnings = F)

  mSet_list <- c(lapply(1:length(list_names), function(x) met.plot_PCA2DScore(mSetObj = mSet_list[[x]], imgName = paste0("Plots/PCA/PCA-2DScore_", names(mSet_list)[x]),
                                                                              pcx = 1, pcy = 2, reg = 0.95, show = 0, format = export.format,
                                                                              plot = F, export = export, dpi = 300, subtitle = TRUE)),
                 mSet_list[(length(list_names)+1):length(mSet_list)])
  names(mSet_list) <- c(list_names, names(mSet_list[(length(list_names)+1):length(mSet_list)]))

  mSet_list <- c(lapply(1:length(list_names), function(x) met.plot_PCA2DLoading(mSetObj = mSet_list[[x]], imgName = paste0("Plots/PLSDA/PCA-Loadings_", names(mSet_list)[x]),
                                                                                inx1 = 1, inx2 = 2, plot = F, export = export, format = export.format, subtitle = TRUE)),
                 mSet_list[(length(list_names)+1):length(mSet_list)])

  names(mSet_list) <- c(list_names, names(mSet_list[(length(list_names)+1):length(mSet_list)]))

  dir.create("Plots/PLSDA", showWarnings = F)

  mSet_list <- c(lapply(1:length(list_names), function(x) met.plot_PLS2DScore(mSetObj = mSet_list[[x]], imgName = paste0("Plots/PLSDA/PLS-2DScore_", names(mSet_list)[x]),
                                                                              inx1 = 1, inx2 = 2, reg = 0.95, show = 0, format = export.format, subtitle = TRUE,
                                                                              plot = F, export = export, dpi = 300)),
                 mSet_list[(length(list_names)+1):length(mSet_list)])
  names(mSet_list) <- c(list_names, names(mSet_list[(length(list_names)+1):length(mSet_list)]))

  mSet_list <- c(lapply(1:length(list_names), function(x) met.plot_PLS2DLoading(mSetObj = mSet_list[[x]], imgName = paste0("Plots/PLSDA/PLS-Loadings_", names(mSet_list)[x]),
                                                                                inx1 = 1, inx2 = 2, plot = F, export = export, format = export.format, subtitle = TRUE)),
                 mSet_list[(length(list_names)+1):length(mSet_list)])

  names(mSet_list) <- c(list_names, names(mSet_list[(length(list_names)+1):length(mSet_list)]))

  mSet_list <- c(lapply(1:length(list_names), function(x) suppressWarnings(met.plot_PLS_Imp(mSetObj = mSet_list[[x]], imgName = paste0("Plots/PLSDA/PLSDA-coef_coef.mean_", names(mSet_list)[x]),
                                                                                            type = "coef", feat.nm = "coef.mean", feat.num = 15,
                                                                                            color.BW = FALSE, plot = F, export = export, format = export.format))),
                 mSet_list[(length(list_names)+1):length(mSet_list)])

  names(mSet_list) <- c(list_names, names(mSet_list[(length(list_names)+1):length(mSet_list)]))

  mSet_list <- c(lapply(1:length(list_names), function(x) suppressWarnings(met.plot_PLS_Imp(mSetObj = mSet_list[[x]], imgName = paste0("Plots/PLSDA/PLSDA-VIP1_", names(mSet_list)[x]),
                                                                                            type = "vip", feat.nm = "Comp. 1", feat.num = 15,
                                                                                            color.BW = FALSE, plot = F, export = export, format = export.format))),
                 mSet_list[(length(list_names)+1):length(mSet_list)])

  names(mSet_list) <- c(list_names, names(mSet_list[(length(list_names)+1):length(mSet_list)]))

  for(i in levels(mSet_list[[1]][["dataSet"]][["cls"]])){
    names(mSet_list) <- c(list_names, names(mSet_list[(length(list_names)+1):length(mSet_list)]))
    mSet_list <- c(lapply(1:length(list_names), function(x) suppressWarnings(met.plot_PLS_Imp(mSetObj = mSet_list[[x]], imgName = paste0("Plots/PLSDA/PLSDA-coef_", i, "_", names(mSet_list)[x]),
                                                                                              type = "coef", feat.nm = i, feat.num = 15,
                                                                                              color.BW = FALSE, plot = F, export = export, format = export.format))),
                   mSet_list[(length(list_names)+1):length(mSet_list)])
  }

  mSet_list <- c(lapply(1:length(list_names), function(x) met.PLSDA.Permut(mSetObj = mSet_list[[x]], permut.num, "bw")),
                 mSet_list[(length(list_names)+1):length(mSet_list)])

  names(mSet_list) <- c(list_names, names(mSet_list[(length(list_names)+1):length(mSet_list)]))

  mSet_list <- c(lapply(1:length(list_names), function(x) suppressWarnings(met.plot_PLSImpScatter(mSetObj = mSet_list[[x]], imgName = paste0("Plots/PLSDA/PLS_Scatter_VIP1+2__vs_coef.mean_", names(mSet_list)[x]),
                                                                                                  feat.nm = "coef.mean",  vip.nm=c("Comp. 1", "Comp. 2"),  dpi=72,
                                                                                                  plot = F, export = export, format = export.format, show.title = TRUE,
                                                                                                  title = paste0(
                                                                                                    "(",
                                                                                                    stringr::str_replace(mSet_list[[x]][["dataSet"]][["rownorm.method"]], "N/A", ""),
                                                                                                    "/",
                                                                                                    stringr::str_replace(mSet_list[[x]][["dataSet"]][["trans.method"]], "N/A", ""),
                                                                                                    "/",
                                                                                                    stringr::str_replace(mSet_list[[x]][["dataSet"]][["scale.method"]], "N/A", ""),
                                                                                                    ")"
                                                                                                  ) %>% str_replace_all(., "(?<![:alpha:])/", "") %>%
                                                                                                    str_replace_all(., "/(?![:alpha:])", "") %>%
                                                                                                    str_replace_all(., "\\(\\)", "(no normalization/transformation)") %>%
                                                                                                    stringr::str_wrap(., 53) ))),
                 mSet_list[(length(list_names)+1):length(mSet_list)])

  for(i in levels(mSet_list[[1]][["dataSet"]][["cls"]])){
    names(mSet_list) <- c(list_names, names(mSet_list[(length(list_names)+1):length(mSet_list)]))
    mSet_list <- c(lapply(1:length(list_names), function(x) suppressWarnings(met.plot_PLSImpScatter(mSetObj = mSet_list[[x]], imgName = paste0("Plots/PLSDA/PLS_Scatter_VIP1+2_vs_coef-", i, names(mSet_list)[x]),
                                                                                                    feat.nm = i,  vip.nm=c("Comp. 1", "Comp. 2"), dpi=72, vip.thresh = vip.thresh,
                                                                                                    plot = F, export = export, format = export.format, show.title = TRUE,
                                                                                                    title = paste0(
                                                                                                      "(",
                                                                                                      stringr::str_replace(mSet_list[[x]][["dataSet"]][["rownorm.method"]], "N/A", ""),
                                                                                                      "/",
                                                                                                      stringr::str_replace(mSet_list[[x]][["dataSet"]][["trans.method"]], "N/A", ""),
                                                                                                      "/",
                                                                                                      stringr::str_replace(mSet_list[[x]][["dataSet"]][["scale.method"]], "N/A", ""),
                                                                                                      ")"
                                                                                                    ) %>% str_replace_all(., "(?<![:alpha:])/", "") %>%
                                                                                                      str_replace_all(., "/(?![:alpha:])", "") %>%
                                                                                                      str_replace_all(., "\\(\\)", "(no normalization/transformation)") %>%
                                                                                                      stringr::str_wrap(., 53)  ))),
                   mSet_list[(length(list_names)+1):length(mSet_list)])
  }

  names(mSet_list) <- c(list_names, names(mSet_list[(length(list_names)+1):length(mSet_list)]))

  try(mSet_list <- c(lapply(1:length(list_names), function(x) met.plot_PLS.Crossvalidation(mSetObj = mSet_list[[x]],
                                                                                           imgName = paste0("Plots/PLSDA/PLSDA-CrossValidation_", names(mSet_list)[x]),
                                                                                           export = export, format = export.format, title = TRUE)),
                     mSet_list[(length(list_names)+1):length(mSet_list)]))

  mSet_list <- c(lapply(1:length(list_names), function(x) met.plot_PLS.Permutation(mSetObj = mSet_list[[x]],
                                                                                   imgName = paste0("Plots/PLSDA/PLSDA-Permutation_", names(mSet_list)[x]),
                                                                                   plot=F, export = export, format = export.format, title = TRUE)),
                 mSet_list[(length(list_names)+1):length(mSet_list)])

  names(mSet_list) <- c(list_names, names(mSet_list[(length(list_names)+1):length(mSet_list)]))

  if(!is.null(grDevices::dev.list())){
    grDevices::dev.off()
  }
  if(report==TRUE) {
    stack_size <- getOption("pandoc.stack.size", default = "512m")
    met.report_test_normalization(
      mSet_list,
      report.dir = report.dir,
      list_names = list_names,
      contrasts = cntrst,
      alpha = alpha,
      lfc = lfc,
      vip.thresh = vip.thresh,
      names_common = names_common,
      ref_list = ref_list
    )
  }
  setwd(wd)
  return(mSet_list)
}

#' Perform metabolomics data analysis workflow
#'
#' @description \code{met.workflow} runs a metabolomics analysis workflow, including
#'   univariate analysis (ANOVA) and multivariate analyses (PCA and PLS-DA) and creates a
#'   report with various graphical results.
#' @param mSetObj Enter the name of the created mSet object (see \code{\link[VisomX]{met.read_data}}).
#' @param rowNorm (Character) Select the option for row-wise normalization (see \code{\link[VisomX]{met.normalize}} for options).
#' @param transNorm (Character) Select option to transform the data (see \code{\link[VisomX]{met.normalize}} for options).
#' @param scaleNorm (Character) Select option for scaling the data (see \code{\link[VisomX]{met.normalize}} for options).
#' @param ref (Character) Enter the name of the reference sample or the reference feature (if \code{rowNorm = "GroupPQN"}, \code{"SamplePQN"}, or \code{"CompNorm"}.
#' @param norm.vec (Numeric vector) Vector with sample-specific scaling factors. Only applicable for \code{rowNorm = "SpecNorm"}.
#' @param contrast.type
#' @param control
#' @param contrast
#' @param class_order (Logical, \code{TRUE} or \code{FALSE}) Class order matters (i.e. implying time points, disease severity, etc.).
#' @param alpha (Numeric) Enter significance threshold for adjusted p values (false discovery rate - FDR; for ANOVA with post-hoc analyses).
#' @param lfc (Numeric) Enter relevance threshold for log2 fold changes in pair-wise comparisons (for ANOVA with post-hoc analyses).
#' @param posthoc_method (Character) Enter the name of the ANOVA post-hoc test, \code{"fisher"} or \code{"tukey"}.
#' @param nonpar (Logical) Use a non-parametric ANOVA test (\code{TRUE}) or not (\code{FALSE}).
#' @param permut.num (Numeric) Number of permutations in PLS-DA permutation tests.
#' @param vip.thresh (Numeric) Enter a chosen relevance threshold for PLS-DA VIP scores.
#' @param volcano.test (Character) Enter the name of the statistical, univariate analysis to perform, either \code{"anova"} or \code{"ttest"}.
#' @param volcano.add_names (Logical) Display compound labels next to dots within the volcano plot (\code{TRUE}) or not (\code{FALSE}).
#' @param volcano.label_size (Numerical) Enter the font size of compound labels within the volcano plot (if \code{volcano.add_names = TRUE})
#' @param volcano.adjusted (Logical) Display adjusted p-values on y axis of the volcano plot (\code{TRUE}) or not (\code{FALSE})? (only applicable to \code{volcano.test = "ttest"})
#' @param pls.data (Character) Enter \code{"all"} to train the PLS(-DA) model on your whole (filtered and normalized) dataset or \code{"anova"} to use a subset of features defined as significant based on ANOVA analysis.
#' @param pls.cv.method (Character) Enter one of two methods for PLS-DA model (cross) validation:
#'\itemize{
#'  \item \code{"LOOCV"} performs leave-one-out cross validation
#'  \item \code{"CV"} performs k-fold cross validation.
#'  }
#' @param pls.cv.k (Numeric) The number of (randomized) groups that the dataset is to be split into during cross validation if \code{methodName = "CV"}. This value must be equal to or smaller than the number of samples.
#' @param plot (Logical) Display the generated plots in the Plots pane in RStudio (\code{TRUE}) or not (\code{FALSE}).
#' @param export (Logical) Export generated plots as PDF or PNG files (\code{TRUE}) or not (\code{FALSE}).
#' @param export.format (Character, \code{"png"} or \code{"pdf"}) image file format (if \code{export = TRUE}).
#' @param dpi (Numeric) Resolution of PNG images (default is 300 dpi).
#' @param report (Logical) Generate a report with results of this workflow (\code{TRUE}) or not (\code{FALSE}).
#' @param report.dir (Character) Enter the name or location of the folder in which the report files are generated if \code{report = TRUE}. If \code{NULL} (the default), a new folder "Report_\emph{date-time}" is created in your working directory.
#' @param export.dir (Character) Enter the name of location of a folder in which all generated files are saved.
#' @export
#' @author Nicolas T. Wirth \email{mail.nicowirth@gmail.com}
#' Technical University of Denmark
#' License: GNU GPL (>= 2)


met.workflow <- function(mSetObj,
         rowNorm = "NULL", # Option for row-wise normalization
         transNorm = "NULL", # Option to transform the data, "LogNorm" for Log Normalization, and "CrNorm" for Cubic Root Transformation.
         scaleNorm = "NULL", # Option for scaling the data, "MeanCenter" for Mean Centering, "AutoNorm" for Autoscaling, "ParetoNorm" for Pareto Scaling, amd "RangeNorm" for Range Scaling.
         ref = NULL, # Enter the name of the reference sample or the reference feature, use " " around the name (for rowNorm="CompNorm" or "SpecNorm").
         norm.vec = NULL,
         contrast.type = "all",
         control = NULL,
         contrast = NULL,
         class_order = FALSE, # Class order matters (i.e. implying time points, disease severity, etc.)
         alpha = 0.05, # Significance threshold for adj. p values.
         lfc = 2, # Relevance threshold for log2(fold change) values.
         posthoc_method = "tukey",
         nonpar = FALSE,
         permut.num = 500,
         vip.thresh = 1,
         volcano.test = "anova",
         volcano.add_names = TRUE, # Display names next to symbols in volcano plot.
         volcano.label_size = 3, # Size of labels in volcano plot if
         volcano.adjusted = FALSE, # Display adjusted p-values on y axis of volcano plot? (only for volcano.test = "ttest"),
         pls.data = "all",
         pls.cv.method = "LOOCV",
         pls.cv.k = 5,
         plot = FALSE, # Shall plots be returned?
         export = FALSE, # Shall plots be exported as PDF and PNG files?
         export.format = "pdf",
         dpi = 300,
         report = FALSE, # Shall a report (HTML and PDF) be created?
         report.dir = NULL, # Folder name for created report (if report = TRUE)
         export.dir = "met.ProcessedData"
)
{
  # Show error if inputs are not the required classes
  if(is.integer(alpha)) alpha <- as.numeric(alpha)
  if(is.integer(lfc)) lfc <- as.numeric(lfc)
  assertthat::assert_that(is.numeric(alpha),
                          length(alpha) == 1,
                          is.numeric(lfc),
                          length(lfc) == 1)
  if(!(rowNorm %in% c('NULL', 'QuantileNorm', 'CompNorm', 'SumNorm', 'MedianNorm', 'SpecNorm'))){
    stop(paste0(rowNorm, " is not a valid option for 'rowNorm'. Valid options are:\n",
                "'NULL', 'QuantileNorm', 'CompNorm', 'SumNorm', 'MedianNorm', 'SpecNorm'"))
  }
  if(!(transNorm %in% c('NULL', 'LogNorm', 'CrNorm'))){
    stop(paste0(transNorm, " is not a valid option for 'transNorm'. Valid options are:\n",
                "'NULL', 'LogNorm', 'CrNorm'"))
  }
  if(!(scaleNorm %in% c('NULL', 'MeanCenter', 'AutoNorm', 'ParetoNorm', 'RangeNorm'))){
    stop(paste0(scaleNorm, " is not a valid option for 'scaleNorm'. Valid options are:\n",
                "'NULL', 'MeanCenter', 'AutoNorm', 'ParetoNorm', 'RangeNorm'"))
  }
  dir.create(paste0(getwd(), "/", export.dir), showWarnings = F)
  wd <- getwd()
  file.copy("data_orig.qs", paste0(wd, "/", export.dir), overwrite = T)
  file.copy("preproc.qs", paste0(wd, "/", export.dir), overwrite = T)
  file.copy("prenorm.qs", paste0(wd, "/", export.dir), overwrite = T)
  file.copy("data_proc.qs", paste0(wd, "/", export.dir), overwrite = T)

  setwd(paste0(wd, "/", export.dir))

  if (export == TRUE){
    dir.create(paste0(getwd(), "/Plots"), showWarnings = F)
    dir.create(paste0(getwd(), "/Plots/PLSDA"), showWarnings = F)
  }
  conditions <- levels(mSetObj$dataSet$cls)

  if (contrast.type == "all") {
    # Automatically gather all possible condition combinations.
    cntrst <- rev(apply(utils::combn(rev(conditions), 2), 2, paste,
                        collapse = "-"))
    if (!is.null(control)) {
      if ((!control %in% conditions)) {
        stop(
          paste0(
            "You chose '", control, "' as control, which is not a valid condition in your dataset. \nValid controls are: '",
            paste0(conditions, collapse = "', '"), "'."), call. = FALSE)
      }
      # If a control sample is indicated, get the indices of contrasts in which the
      # control condition is placed in the front ("control-sample") and revert the order.
      flip <- grep(paste("^", control, sep = ""),
                   cntrst)
      if (length(flip) >= 1) {
        cntrst[flip] <- cntrst[flip] %>% gsub(paste(control,
                                                    "-", sep = ""), "", .) %>%
          paste("-", control, sep = "")
      }
    }
  }
  if (contrast.type == "control") {
    if (is.null(control)){
      stop(
        paste0(
          "run test_diff(type = 'control') with a 'control' argument.\n",
          "Possible control conditions are: '",
          paste(conditions,
                collapse = "', '"),
          "'"
        )
      )
    }
    cntrst <- paste(conditions[!conditions %in% control],
                    control, sep = " - ")
  }
  if (contrast.type == "manual") {
    if (is.null(contrast)) {
      stop(
        paste0(
          "Run met.workflow(contrast.type = 'manual') with a 'contrast' argument.\n",
          "Valid contrasts should contain combinations of: '",
          paste(conditions,
                collapse = "', '"
          ),
          "', for example '",
          paste0(conditions[1],
                 "_vs_", conditions[2]),
          "'."
        ) ,
        call. = FALSE)
    }
    assertthat::assert_that(is.character(contrast))
    if (any(!unlist(strsplit(contrast, "_vs_")) %in% conditions)) {
      stop("Run met.workflow() with valid contrasts in 'contrast'",
           ".\nValid contrasts should contain combinations of: '",
           paste0(conditions, collapse = "', '"),
           "', for example '", paste0(conditions[1],
                                      "_vs_", conditions[2]), "'.", call. = FALSE)
    }
    cntrst <- gsub("_vs_", " - ", contrast)
  }
  message("Tested contrasts: ", paste(gsub(" - ",
                                           "_vs_", cntrst), collapse = ", "))

  mSetObj[["analSet"]][["contrasts"]] <- cntrst

  mSetObj <- met.normalize(mSetObj, rowNorm = rowNorm, transNorm = transNorm, scaleNorm = scaleNorm, ref = ref)

  mSetObj <- met.ANOVA.Anal(mSetObj, nonpar = nonpar, thresh = alpha, post.hoc = posthoc_method, all_results = TRUE)
  mSetObj <- met.PCA.Anal(mSetObj)
  mSetObj <- met.PLSR.Anal(mSetObj, reg = class_order, data = pls.data)
  options(warn=1)
  mSetObj <- met.PLSDA.CV(mSetObj, methodName = pls.cv.method, data = pls.data, k = pls.cv.k)
  options(warn=0)

  mSetObj[["analSet"]][["plsda"]][["class_order"]] <- class_order

  mSetObj <- met.plot_FeatureNormSummary(mSetObj = mSetObj, imgName = "Plots/norm_summary_features_", format = export.format,
                                         dpi = 300, width=NA, show_prenorm = TRUE,
                                         plot = plot, export = export)

  mSetObj <- met.plot_SampleNormSummary(mSetObj = mSetObj, imgName = "Plots/norm_summary_samples_", format = export.format,
                                        dpi = 300, width=NA, show_prenorm = TRUE,
                                        plot = plot, export = export)

  try(mSetObj <- met.plot_CorrHeatMap_Samples(mSetObj, imgName = "Plots/correlation_samples_", format = export.format,
                                              dpi = 300, plot = plot, export = export))

  mSetObj <- met.plot_CorrHeatMap_Features(mSetObj, imgName = "Plots/correlation_features_", format = export.format,
                                           dpi = 300, cor.method = "pearson",
                                           plot = plot, export = export)

  mSetObj <- met.plot_ANOVA(mSetObj, imgName = paste0("Plots/ANOVA-Plot"), format = export.format, subtitle = FALSE,
                            plot = F, export = export, dpi = 300)

  mSetObj <- met.plot_PCAScree(mSetObj, imgName = "Plots/PCA_ScreePlot_", format = export.format,
                               dpi = 300, width=NA, scree.num = 6,
                               plot = plot, export = export)

  mSetObj <- met.plot_PCA2DScore(mSetObj, imgName = "Plots/PCA_2DScores_PC1-PC2", format = export.format,
                                 dpi = 300, width=NA, pcx = 1, pcy = 2, reg = 0.95,
                                 show = 0, grey.scale = 0, plot = plot, export = export)

  mSetObj <- met.plot_PCA2DScore(mSetObj, imgName = "Plots/PCA_2DScores_PC2-PC3", format = export.format,
                                 dpi = 300, width=NA, pcx = 1, pcy = 3, reg = 0.95,
                                 show = 0, grey.scale = 0, plot = plot, export = export)

  mSetObj <- met.plot_PCA2DLoading(mSetObj, imgName = "Plots/PCA_LoadingPlot_", format = export.format,
                                 dpi = 300, width=NA, inx1 = 1, inx2 = 2,
                                 plot = plot, export = export)

  mSetObj <- met.plot_PCA2DLoading(mSetObj, imgName = "Plots/PCA_LoadingPlot_", format = export.format,
                                 dpi = 300, width=NA, inx1 = 1, inx2 = 3,
                                 plot = plot, export = export)

  mSetObj <- met.plot_PLS2DScore(mSetObj, "Plots/PLSDA/PLSDA_2DScores_C1-C2", format = export.format, dpi = 300,
                                 width = NA, inx1 = 1, inx2 = 2, reg = 0.95, show = 0, grey.scale = 0,
                                 plot = plot, export = export)

  mSetObj <- met.plot_PLS2DScore(mSetObj, "Plots/PLSDA/PLSDA_2DScores_C2-C3", format = export.format, dpi = 300,
                                 width = NA, inx1 = 1, inx2 = 3, reg = 0.95, show = 0, grey.scale = 0,
                                 plot = plot, export = export)

  mSetObj <- met.plot_PLS2DLoading(mSetObj, "Plots/PLSDA/PLSDA_LoadingPlot_", format = export.format, dpi = 300,
                                 width=NA, inx1 = 1, inx2 = 2, plot = plot, export = export)

  mSetObj <- met.plot_PLS2DLoading(mSetObj, "Plots/PLSDA/PLSDA_LoadingPlot_", format = export.format, dpi = 300,
                                 width=NA, inx1 = 1, inx2 = 3, plot = plot, export = export)

  mSetObj <- met.PLSDA.Permut(mSetObj, permut.num, "bw")

  mSetObj <- met.plot_PLS.Permutation(mSetObj, "Plots/PLSDA/PLS-DA_permutation", format = export.format, dpi = 300, width=NA,
                                      plot = plot, export = export)

  mSetObj <- met.plot_PLS.Crossvalidation(mSetObj, imgName = paste0("Plots/PLSDA/PLSDA-CrossValidation"),
                                         export = export, format = export.format, title = FALSE)

  mSetObj <- met.plot_PLS_Imp(mSetObj, paste0("Plots/PLSDA/PLSDA_imp_Coeff_", "coef.mean"), format = export.format, dpi = 300, width=NA,
                              type = "coef", feat.nm = "coef.mean", feat.num = 20, color.BW = FALSE, export = export)

  for(i in unique(mSetObj$dataSet$cls)){
    mSetObj <- met.plot_PLS_Imp(mSetObj, paste0("Plots/PLSDA/PLSDA_imp_Coeff_", i), format = export.format, dpi = 300, width=NA,
                                type = "coef", feat.nm = i, feat.num = 20, color.BW = FALSE, export = export)
  }

  mSetObj <- met.plot_PLS_Imp(mSetObj, "Plots/PLSDA/PLSDA_imp_VIP_Comp1", format = export.format, dpi = 300, width=NA,
                              type = "vip", feat.nm = "Comp. 1", feat.num = 20, color.BW = FALSE, export = export)

  mSetObj <- met.plot_PLS_Imp(mSetObj, "Plots/PLSDA/PLSDA_imp_VIP_Comp2", format = export.format, dpi = 300, width=NA,
                              type = "vip", feat.nm = "Comp. 2", feat.num = 20, color.BW = FALSE, export = export)

  mSetObj <- met.plot_PLSImpScatter(mSetObj, "Plots/PLSDA/PLSDA_Scatter_coef.mean_vs_VIP1",
                                feat.nm = "coef.mean",  vip.nm = c("Comp. 1", "Comp. 2"),  dpi=72,
                                plot = F, export = export, format = export.format)

  for (i in levels(mSetObj[["dataSet"]][["cls"]])) {
    mSetObj <-
      met.plot_PLSImpScatter(mSetObj,
                         paste0("Plots/PLSDA/PLSDA_Scatter_coef-", i, "_vs_VIP1_"), feat.nm = i,  vip.nm = c("Comp. 1", "Comp. 2"),
                         dpi = 72, vip.thresh = vip.thresh, plot = F, export = export, format = export.format)
  }

  mSetObj <- met.plot_PCA3DScore(mSetObj, "Plots/PLSDA/PLSDA_3DScores", format = "json", export=export)
  mSetObj <- met.plot_PCA3DLoading(mSetObj, "Plots/PLSDA/PLSDA_3DScores", format = "json", export=export)
  mSetObj <- met.plot_PLS3DScore(mSetObj, "Plots/PLSDA/PLSDA_3DScores", format = "json", export=export)
  mSetObj <- met.plot_PLS3DLoading(mSetObj, "Plots/PLSDA/PLSDA_3DScores", format = "json", export=export)



  for(i in 1:length(cntrst)){
    grp1 <- gsub("-.+", "", cntrst[i])
    grp2 <- gsub(".+-", "", cntrst[i])
    mSetObj <- met.FC.Anal(mSetObj, log2fc.thresh = lfc, grp1 = grp1, grp2 = grp2, paired= FALSE)
    mSetObj <- met.plot_volcano(mSetObj, grp1 = grp2, grp2 = grp1,  label_size = volcano.label_size, test = volcano.test,
                                plot = plot, export = export, threshp = alpha, dpi = 300,
                                pval.type = if_else(volcano.adjusted==TRUE, "fdr", "raw"), format = export.format )
  }

  setwd(wd)
  if(report==TRUE){
    stack_size <- getOption("pandoc.stack.size", default = "512m")
    met.report(mSetObj, report.dir=report.dir, contrasts=cntrst, alpha=alpha, lfc=lfc, vip.thresh=vip.thresh)
  }
  return(mSetObj)
}



#' Perform a Principal Component Analysis
#'
#' This function performs a Principal Component Analysis (PCA) on the normalized data set stored in the mSetObj object.
#'
#' @param mSetObj An mSet object containing normalized data
#'
#' @return An mSet object with the PCA results stored in the analSet element.
#'
#' @export
#'
#'
met.PCA.Anal <- function (mSetObj = NA)
{
  ...
}
met.PCA.Anal <- function (mSetObj = NA)
{
  pca <- stats::prcomp(mSetObj$dataSet$norm, center = TRUE, scale = F)
  sum.pca <- summary(pca)
  imp.pca <- sum.pca$importance
  std.pca <- imp.pca[1, ]
  var.pca <- imp.pca[2, ]
  cum.pca <- imp.pca[3, ]
  mSetObj$analSet$pca <- append(pca, list(std = std.pca, variance = var.pca,
                                          cum.var = cum.pca))
  fast.write.csv(signif(mSetObj$analSet$pca$x, 5), file = "pca_score.csv")
  fast.write.csv(signif(mSetObj$analSet$pca$rotation, 5), file = "pca_loadings.csv")
  mSetObj$analSet$pca$loading.type <- "all"
  mSetObj$custom.cmpds <- c()
  return(mSetObj)
}

#'Perform PLS-DA permutation
#'@description \code{met.read_data} performs PLS-DA permutation using training classification accuracy as
#'indicator, for two or more groups
#'@param mSetObj Enter name of the created mSet object.
#'@param num (Numeric) Enter the number of permutations to perform.
#'@param type (Character) Type of accuracy indicator. \code{"accu"} for prediction accuracy, \code{"sep"} for separation distance.
#'@references adapted from \code{\link[MetaboAnalystR]{PLSDA.Permut}} (\url{https://github.com/xia-lab/MetaboAnalystR}).
#'@export
met.PLSDA.Permut <- function (mSetObj = NA, num = 500, type = "accu")
{
  orig.cls <- cls <- as.numeric(mSetObj$dataSet$cls)
  datmat <- as.matrix(mSetObj$dataSet$norm)
  best.num <- mSetObj$analSet$plsda$best.num
  Get.pls.bw <- function(dummy) {
    cls <- cls[order(runif(length(cls)))]
    pls <- caret::plsda(datmat, as.factor(cls), ncomp = best.num)
    pred <- stats::predict(pls, datmat)
    Get.bwss(pred, cls)
  }
  Get.pls.accu <- function(dummy) {
    cls <- cls[order(runif(length(cls)))]
    pls <- caret::plsda(datmat, as.factor(cls), ncomp = best.num)
    pred <- stats::predict(pls, datmat)
    sum(pred == cls)/length(cls)
  }
  pls <- caret::plsda(datmat, as.factor(orig.cls), ncomp = best.num)
  pred.orig <- predict(pls, datmat)
  if (type == "accu") {
    perm.type = "prediction accuracy"
    res.orig <- sum(pred.orig == orig.cls)/length(orig.cls)
    res.perm <- Perform.permutation(num, Get.pls.accu)
  }
  else {
    perm.type = "separation distance"
    res.orig <- Get.bwss(pred.orig, orig.cls)
    res.perm <- Perform.permutation(num, Get.pls.bw)
  }
  perm.num <- res.perm$perm.num
  perm.res <- res.perm$perm.res
  perm.vec <- c(res.orig, unlist(perm.res, use.names = FALSE))
  inf.found = TRUE
  if (sum(is.finite(perm.vec)) == length(perm.vec)) {
    inf.found = FALSE
  }
  else {
    if (sum(is.finite(perm.vec)) == 0) {
      perm.vec <- rep(10, length(perm.vec))
    }
    else {
      perm.vec[!is.finite(perm.vec)] <- 10 * max(perm.vec[is.finite(perm.vec)])
    }
  }
  better.hits <- sum(perm.vec[-1] >= perm.vec[1])
  if (better.hits == 0) {
    p <- paste("p < ", 1/perm.num, " (", better.hits, "/",
               perm.num, ")", sep = "")
  }
  else {
    p <- better.hits/perm.num
    p <- paste("p = ", signif(p, digits = 5), " (", better.hits,
               "/", perm.num, ")", sep = "")
  }
  mSetObj$analSet$plsda$permut.p <- p
  mSetObj$analSet$plsda$permut.inf <- F
  mSetObj$analSet$plsda$permut.type <- perm.type
  mSetObj$analSet$plsda$permut <- perm.vec
  msg <- paste("Empirical p value:", p)
  print(msg)
  return(mSetObj)
}

#'Permutation
#'@description Perform permutation
#'@param perm.num (Numeric) Enter the number of permutations to perform.
#'@param fun Place holder for function.
#'@references adapted from \code{\link[MetaboAnalystR]{Perform.permutation}} (\url{https://github.com/xia-lab/MetaboAnalystR}).
#'@usage Perform.permutation(perm.num, fun)
Perform.permutation <- function (perm.num, fun)
{
  perm.res <- NULL
  print(paste("performing", perm.num, "permutations ..."))
  perm.res <- c(perm.res, lapply(1:perm.num, fun))
  return(list(perm.res = perm.res, perm.num = perm.num))
}
NicWir/VisomX documentation built on Dec. 8, 2024, 1:27 a.m.