#' @title Run rank sum tests for each taxa within an OTU table
#' @name micro_rank_sum
#' @description Runs a rank sum test for each taxa within an OTU table or each taxa that didn't converge in \code{\link{mods}}
#' @param micro_set A tidy_micro data set
#' @param table OTU table of interest
#' @param grp_var A factor variable for grouping
#' @param y A continuous response variable. Taxa relative abundance (ra) is recommended
#' @param mod The output from \code{\link{mods}} if desired
#' @param ... Options to be passed to \code{\link[stats]{wilcox.test}} or \code{\link[stats]{kruskal.test}}
#' @references \code{\link[stats]{kruskal.test}} and \code{\link[stats]{wilcox.test}}
#' @details The grp_var must have a least 2 levels. For a 2 level factor a Mann-Whitney test will be calculated through \code{\link[stats]{wilcox.test}}, and for 3 or more levels a Kruskal-Wallis test will be run throuh \code{\link[stats]{kruskal.test}}
#' @return A data frame containing the p-value for each taxa's rank sum test.
#' @examples
#' data(phy); data(cla); data(ord); data(fam); data(met)
#'
#' otu_tabs = list(Phylum = phy, Class = cla, Order = ord, Family = fam)
#' set <- tidy_micro(otu_tabs = otu_tabs, meta = met) %>%
#' filter(day == 7) \%>\% ## Only including the first week
#' mutate(bpd1 = factor(bpd1))
#'
#' ## Rank sum test on every taxa's relative abundance
#' set \%>\% micro_rank_sum(table = "Family", grp_var = bpd1)
#'
#' ## Rank sum test on every taxa whose model didn't converge
#' set \%>\%
#' nb_mods(table = "Family", bpd1) \%>\%
#' micro_rank_sum(table = "Family", grp_var = bpd1, mod = nb_fam)
#' @export
micro_rank_sum <- function(micro_set, table, grp_var, y = ra, mod = NULL, ...){
if(table %nin% unique(micro_set$Table)) stop("Specified table is not in supplied micro_set")
## Kruskal-Wallis rank sum
if(nlevels(micro_set %>% pull(!!rlang::enquo(grp_var))) > 2){
cat("Running Kruskal-Wallis rank sum test.\n")
## Run on every taxa
if(is.null(mod)){
dat <- micro_set %>%
dplyr::filter(Table == table) %>%
plyr::ddply(plyr::.(Taxa), function(set){
## Pulling grouping variable and y
grp <- set %>% pull(!!rlang::enquo(grp_var))
rsp <- set %>% pull(!!rlang::enquo(y))
if(length(unique(rsp)) == 1){
X <- ifelse(all(rsp == 0), "All Absent", "All Present")
dat <- data.frame(Taxa = unique(set$Taxa),
P_Val = X)
} else{
##running kruskal-wallis rank sum test
kwt <- stats::kruskal.test(rsp ~ grp)
dat <- data.frame(Taxa = unique(set$Taxa),
P_Val = kwt$p.value)
}
dat
} )
} else { ## Running on taxa that didn't converge
## Pulling taxa that didn't converge
if("FE_Converged" %in% names(mod$RA_Summary)){
if(all(mod$RA_Summary$FE_Converged)) stop("No tests run. All taxa models converged")
tax <- mod$RA_Summary %>% dplyr::filter(!FE_Converged) %>% dplyr::pull(Taxa)
}
if("RE_Converged" %in% names(mod$RA_Summary)){
if(all(mod$RA_Summary$RE_Converged)) stop("No tests run. All taxa models converged")
tax <- mod$RA_Summary %>% dplyr::filter(!RE_Converged) %>% dplyr::pull(Taxa)
}
dat <- micro_set %>%
## Filtering out rank and taxa that didn't converge
dplyr::filter(Table == table, Taxa %in% tax) %>%
plyr::ddply(plyr::.(Taxa), function(set){
## Pulling grouping variable and y
grp <- set %>% dplyr::pull(!!rlang::enquo(grp_var))
rsp <- set %>% dplyr::pull(!!rlang::enquo(y))
if(length(unique(rsp)) == 1){
X <- ifelse(all(rsp == 0), "All Absent", "All Present")
dat <- data.frame(Taxa = unique(set$Taxa),
P_Val = X)
} else{
##running kruskal-wallis rank sum test
kwt <- stats::kruskal.test(rsp ~ grp)
dat <- data.frame(Taxa = unique(set$Taxa),
P_Val = kwt$p.value)
}
} )
}
## Wilcoxon for 2 groups
} else if(nlevels(micro_set %>% pull(!!rlang::enquo(grp_var))) <= 2){
cat("Running Wilcoxon rank sum test.\n")
## Run on every taxa
if(is.null(mod)){
dat <- micro_set %>%
dplyr::filter(Table == table) %>%
plyr::ddply(plyr::.(Taxa), function(set){
## Pulling grouping variable and splitting based on that variable
grp <- set %>% dplyr::pull(!!rlang::enquo(grp_var))
spl <- split(set, grp)
rsp <- c( dplyr::pull(spl[[1]], !!rlang::enquo(y)),
dplyr::pull(spl[[2]], !!rlang::enquo(y)) )
if(length(unique(rsp)) == 1){
X <- ifelse(all(rsp == 0), "All Absent", "All Present")
dat <- data.frame(Taxa = unique(set$Taxa),
P_Val = X)
} else{
## Running Wilcoxon / Mann-Whitney test
wct <- stats::wilcox.test(spl[[1]] %>% dplyr::pull(!!rlang::enquo(y)),
spl[[2]] %>% dplyr::pull(!!rlang::enquo(y)),
...) %>% broom::tidy
dat <- data.frame(Taxa = unique(set$Taxa),
P_Val = wct$p.value)
}
} )
} else { ## Running on taxa that don't converge
## Pulling taxa that didn't converge
if("FE_Converged" %in% names(mod$RA_Summary)){
if(all(mod$Convergent_Summary$FE_Converged)) stop("No tests run. All taxa models converged")
tax <- mod$RA_Summary %>% dplyr::filter(!FE_Converged) %>% dplyr::pull(Taxa)
}
if("RE_Converged" %in% names(mod$RA_Summary)){
if(all(mod$Convergent_Summary$RE_Converged)) stop("No tests run. All taxa models converged")
tax <- mod$RA_Summary %>% dplyr::filter(!RE_Converged) %>% dplyr::pull(Taxa)
}
dat <- micro_set %>%
## Filtering out table and taxa that didn't converge
dplyr::filter(Table == table, Taxa %in% tax) %>%
plyr::ddply(plyr::.(Taxa), function(set){
## Pulling grouping variable and splitting based on that variable
grp <- set %>% dplyr::pull(!!rlang::enquo(grp_var))
spl <- split(set, grp)
if(length(unique(rsp)) == 1){
X <- ifelse(all(rsp == 0), "All Absent", "All Present")
dat <- data.frame(Taxa = unique(set$Taxa),
P_Val = X)
} else{
## Running Wilcoxon / Mann-Whitney test
wct <- wilcox.test(spl[[1]] %>% pull(!!rlang::enquo(y)),
spl[[2]] %>% pull(!!rlang::enquo(y)),
...) %>% broom::tidy
dat <- data.frame(Taxa = unique(set$Taxa),
P_Val = wct$p.value)
}
} )
}
} else stop("grp_var must have two or more levels")
suppressWarnings( dat %>% dplyr::mutate(FDR_Pval = stats::p.adjust(P_Val, method = "BH")) )
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.