midiv: MiDiv-lab bioinformatics

Documented in nonparDA

#' @name nonparDA
#' @title Non-parametric DA testing
#'
#' @description Testing for differential abundance using non-parametric tests.
#'
#' @usage nonparDA(ps.obj, group = NULL, contrast = NULL, p.adjust.method = "BH", verbose = TRUE)
#'
#' @param ps.obj A phyloseq object.
#' @param group Name of one column in sample_table(ps.obj) used to group the samples.
#' @param contrast Optional specification of which category levels to use, see below.
#' @param p.adjust.method The method used for multiple testing correction, see \code{\link{p.adjust}}.
#' @param verbose Logical to turn on/off output during computing.
#'
#' @details Performs a Kruskal-Wallis non-parametric test for differential abundance
#' for each OTU in a \code{\link{phyloseq}} object.
#'
#' The \code{group} must be the name of a column in \code{sample_table(ps.obj)}
#' that splits the samples into groups.
#'
#' If no \code{contrast} is specified, a Kruskal-Wallis test is performed, using all category levels,
#' i.e. it tests if the abundance for at least one level deviates from at least one other level.
#' If \code{contrast} contains one text it must one of the levels in \code{group}, and then
#' the test is contrasting this level against all the others (A versus not A). If \code{contrast} contains
#' two texts, both must be levels in \code{group}, and the test is contrasting the samples from
#' these two levels only (A versus B).
#'
#' @return A table with the columns
#' \itemize{
#'   \item{OTU}
#'   \item{statistic} The Kruskal-Wallis test statistic
#'   \item{p.value} Raw p-value.
#'   \item{p.adj} Adjusted p-value due to multiple testing
#'  }
#'
#' @author Lars Snipen.
#'
#' @examples
#'
#' @export nonparDA
#'
nonparDA <- function(ps.obj, group = NULL, contrast = NULL, p.adjust.method = "BH", verbose = TRUE){
  if(is.null(group)) stop("You must name a column in sample_data(ps.obj) for grouping the samples!")
  readcount.tbl <- as.data.frame(t(otu_table(ps.obj))) %>%
    mutate(SampleID = rownames(.))
  full.tbl <- as.data.frame(as.matrix(sample_data(ps.obj))) %>%
    mutate(SampleID = rownames(.)) %>%
    select(SampleID, all_of(group)) %>%
    left_join(readcount.tbl, by = "SampleID")
  if(!is.null(contrast)){
    if(length(contrast) == 2){
      full.tbl <- full.tbl %>%
        filter(.data[[group]] %in% contrast)
      tt <- table(full.tbl[,2])
      if(length(tt) != 2) stop("Cannot find both the levels: ", contrast[1], " and ", contrast[2], " in the column ", group)
    } else {
      full.tbl <- full.tbl %>%
        mutate(new_cat = if_else(.data[[group]] == contrast, as.character(contrast), str_c("not_", contrast))) %>%
        select(-all_of(group)) %>%
        select(SampleID, new_cat, everything())
    }
  }
  if(verbose) cat("nonparDA:\nHave", nrow(full.tbl), "samples and", ncol(full.tbl) - 2, "OTUs\n")
  result.tbl <- tibble(OTU = colnames(full.tbl)[-c(1,2)],
                       statistic = 0,
                       p.value = -1,
                       p.adj = -1)
  for(i in 1:nrow(result.tbl)){
    tst <- kruskal.test(x = full.tbl[,2+i], g = full.tbl[,2])
    result.tbl$statistic[i] <- tst$statistic
    result.tbl$p.value[i] <- tst$p.value
  }
  result.tbl$p.adj <- p.adjust(result.tbl$p.value, method = p.adjust.method)
  return(result.tbl)
}