R/micro_rank_sum.R

Defines functions micro_rank_sum

Documented in micro_rank_sum

#' @title Run rank sum tests for each taxa within an OTU table
#' @name micro_rank_sum
#' @description Runs a rank sum test for each taxa within an OTU table or each taxa that didn't converge in \code{\link{mods}}
#' @param micro_set A tidy_micro data set
#' @param table OTU table of interest
#' @param grp_var A factor variable for grouping
#' @param y A continuous response variable. Taxa relative abundance (ra) is recommended
#' @param mod The output from \code{\link{mods}} if desired
#' @param ... Options to be passed to \code{\link[stats]{wilcox.test}} or \code{\link[stats]{kruskal.test}}
#' @references \code{\link[stats]{kruskal.test}} and \code{\link[stats]{wilcox.test}}
#' @details The grp_var must have a least 2 levels. For a 2 level factor a Mann-Whitney test will be calculated through \code{\link[stats]{wilcox.test}}, and for 3 or more levels a Kruskal-Wallis test will be run throuh \code{\link[stats]{kruskal.test}}
#' @return A data frame containing the p-value for each taxa's rank sum test.
#' @examples
#' data(phy); data(cla); data(ord); data(fam); data(met)
#'
#' otu_tabs = list(Phylum = phy, Class = cla, Order = ord, Family = fam)
#' set <- tidy_micro(otu_tabs = otu_tabs, meta = met) %>%
#' filter(day == 7) \%>\% ## Only including the first week
#' mutate(bpd1 = factor(bpd1))
#'
#' ## Rank sum test on every taxa's relative abundance
#' set \%>\% micro_rank_sum(table = "Family", grp_var = bpd1)
#'
#' ## Rank sum test on every taxa whose model didn't converge
#' set \%>\%
#' nb_mods(table = "Family", bpd1) \%>\%
#' micro_rank_sum(table = "Family", grp_var = bpd1, mod = nb_fam)
#' @export
micro_rank_sum <- function(micro_set, table, grp_var, y = ra, mod = NULL, ...){

  if(table %nin% unique(micro_set$Table)) stop("Specified table is not in supplied micro_set")

  ## Kruskal-Wallis rank sum
  if(nlevels(micro_set %>% pull(!!rlang::enquo(grp_var))) > 2){

    cat("Running Kruskal-Wallis rank sum test.\n")
    ## Run on every taxa
    if(is.null(mod)){
      dat <- micro_set %>%
        dplyr::filter(Table == table) %>%
        plyr::ddply(plyr::.(Taxa), function(set){

          ## Pulling grouping variable and y
          grp <- set %>% pull(!!rlang::enquo(grp_var))
          rsp <- set %>% pull(!!rlang::enquo(y))

          if(length(unique(rsp)) == 1){
            X <- ifelse(all(rsp == 0), "All Absent", "All Present")

            dat <- data.frame(Taxa = unique(set$Taxa),
                              P_Val = X)
          } else{
            ##running kruskal-wallis rank sum test
            kwt <- stats::kruskal.test(rsp ~ grp)

            dat <- data.frame(Taxa = unique(set$Taxa),
                              P_Val = kwt$p.value)
          }

         dat
        } )
    } else { ## Running on taxa that didn't converge

      ## Pulling taxa that didn't converge
      if("FE_Converged" %in% names(mod$RA_Summary)){
        if(all(mod$RA_Summary$FE_Converged)) stop("No tests run. All taxa models converged")

        tax <- mod$RA_Summary %>% dplyr::filter(!FE_Converged) %>% dplyr::pull(Taxa)
      }
      if("RE_Converged" %in% names(mod$RA_Summary)){
        if(all(mod$RA_Summary$RE_Converged)) stop("No tests run. All taxa models converged")

        tax <- mod$RA_Summary %>% dplyr::filter(!RE_Converged) %>% dplyr::pull(Taxa)
      }

      dat <- micro_set %>%
        ## Filtering out rank and taxa that didn't converge
        dplyr::filter(Table == table, Taxa %in% tax) %>%
        plyr::ddply(plyr::.(Taxa), function(set){

          ## Pulling grouping variable and y
          grp <- set %>% dplyr::pull(!!rlang::enquo(grp_var))
          rsp <- set %>% dplyr::pull(!!rlang::enquo(y))

          if(length(unique(rsp)) == 1){
            X <- ifelse(all(rsp == 0), "All Absent", "All Present")

            dat <- data.frame(Taxa = unique(set$Taxa),
                              P_Val = X)
          } else{
            ##running kruskal-wallis rank sum test
            kwt <- stats::kruskal.test(rsp ~ grp)

            dat <- data.frame(Taxa = unique(set$Taxa),
                              P_Val = kwt$p.value)
          }

        } )
    }
    ## Wilcoxon for 2 groups
  } else if(nlevels(micro_set %>% pull(!!rlang::enquo(grp_var))) <= 2){

    cat("Running Wilcoxon rank sum test.\n")
    ## Run on every taxa
    if(is.null(mod)){
      dat <- micro_set %>%
        dplyr::filter(Table == table) %>%
        plyr::ddply(plyr::.(Taxa), function(set){

          ## Pulling grouping variable and splitting based on that variable
          grp <- set %>% dplyr::pull(!!rlang::enquo(grp_var))
          spl <- split(set, grp)

          rsp <- c( dplyr::pull(spl[[1]], !!rlang::enquo(y)),
                    dplyr::pull(spl[[2]], !!rlang::enquo(y)) )
          if(length(unique(rsp)) == 1){
            X <- ifelse(all(rsp == 0), "All Absent", "All Present")

            dat <- data.frame(Taxa = unique(set$Taxa),
                              P_Val = X)
          } else{
            ## Running Wilcoxon / Mann-Whitney test
            wct <- stats::wilcox.test(spl[[1]] %>% dplyr::pull(!!rlang::enquo(y)),

                               spl[[2]] %>% dplyr::pull(!!rlang::enquo(y)),

                               ...) %>% broom::tidy

            dat <- data.frame(Taxa = unique(set$Taxa),
                              P_Val = wct$p.value)
          }

        } )
    } else { ## Running on taxa that don't converge

      ## Pulling taxa that didn't converge
      if("FE_Converged" %in% names(mod$RA_Summary)){
        if(all(mod$Convergent_Summary$FE_Converged)) stop("No tests run. All taxa models converged")

        tax <- mod$RA_Summary %>% dplyr::filter(!FE_Converged) %>% dplyr::pull(Taxa)
      }
      if("RE_Converged" %in% names(mod$RA_Summary)){
        if(all(mod$Convergent_Summary$RE_Converged)) stop("No tests run. All taxa models converged")

        tax <- mod$RA_Summary %>% dplyr::filter(!RE_Converged) %>% dplyr::pull(Taxa)
      }

      dat <- micro_set %>%
        ## Filtering out table and taxa that didn't converge
        dplyr::filter(Table == table, Taxa %in% tax) %>%
        plyr::ddply(plyr::.(Taxa), function(set){

          ## Pulling grouping variable and splitting based on that variable
          grp <- set %>% dplyr::pull(!!rlang::enquo(grp_var))
          spl <- split(set, grp)

          if(length(unique(rsp)) == 1){
            X <- ifelse(all(rsp == 0), "All Absent", "All Present")

            dat <- data.frame(Taxa = unique(set$Taxa),
                              P_Val = X)
          } else{
            ## Running Wilcoxon / Mann-Whitney test
            wct <- wilcox.test(spl[[1]] %>% pull(!!rlang::enquo(y)),

                               spl[[2]] %>% pull(!!rlang::enquo(y)),

                               ...) %>% broom::tidy

            dat <- data.frame(Taxa = unique(set$Taxa),
                              P_Val = wct$p.value)
          }

        } )
    }
  } else stop("grp_var must have two or more levels")

  suppressWarnings( dat %>% dplyr::mutate(FDR_Pval = stats::p.adjust(P_Val, method = "BH")) )
}
CharlieCarpenter/tidy.micro documentation built on Jan. 19, 2020, 6:28 p.m.