Nothing
#' Run non-parametric statistics on a data.frame.
#'
#' A simple interface to lower-level statistics functions, including
#' [stats::wilcox.test()], [stats::kruskal.test()], [emmeans::emmeans()],
#' and [emmeans::emtrends()].
#'
#' @inherit documentation_default
#'
#' @family stats_tables
#'
#' @return A tibble data.frame with fields from the table below. This tibble
#' object provides the `$code` operator to print the R code used to generate
#' the statistics.
#'
#' | **Field** | **Description** |
#' | ------------ | ---------------------------------------------------- |
#' | .mean | Estimated marginal mean. See [emmeans::emmeans()]. |
#' | .mean.diff | Difference in means. |
#' | .slope | Trendline slope. See [emmeans::emtrends()]. |
#' | .slope.diff | Difference in slopes. |
#' | .h1 | Alternate hypothesis. |
#' | .p.val | Probability that null hypothesis is correct. |
#' | .adj.p | `.p.val` after adjusting for multiple comparisons. |
#' | .effect.size | Effect size. See [emmeans::eff_size()]. |
#' | .lower | Confidence interval lower bound. |
#' | .upper | Confidence interval upper bound. |
#' | .se | Standard error. |
#' | .n | Number of samples. |
#' | .df | Degrees of freedom. |
#' | .stat | Wilcoxon or Kruskal-Wallis rank sum statistic. |
#' | .t.ratio | `.mean` / `.se` |
#' | .r.sqr | Percent of variation explained by the model. |
#' | .adj.r | `.r.sqr`, taking degrees of freedom into account. |
#' | .aic | Akaike Information Criterion (predictive models). |
#' | .bic | Bayesian Information Criterion (descriptive models). |
#' | .loglik | Log-likelihood goodness-of-fit score. |
#' | .fit.p | P-value for observing this fit by chance. |
#'
#'
#' @export
#' @examples
#' library(rbiom)
#'
#' biom <- rarefy(hmp50)
#'
#' df <- taxa_table(biom, rank = "Family")
#' stats_table(df, stat.by = "Body Site")[,1:6]
#'
#' df <- adiv_table(biom)
#' stats_table(df, stat.by = "Sex", split.by = "Body Site")[,1:7]
stats_table <- function (
df, regr = NULL, resp = attr(df, 'response'),
stat.by = NULL, split.by = NULL,
test = "emmeans", fit = "gam", at = NULL,
level = 0.95, alt = "!=", mu = 0, p.adj = "fdr" ) {
#________________________________________________________
# See if this result is already in the cache.
#________________________________________________________
params <- list2env(slurp_env())
cache_file <- get_cache_file('stats_table', params)
if (isTRUE(attr(cache_file, 'exists', exact = TRUE)))
return (readRDS(cache_file))
with(params, {
#________________________________________________________
# Validate and restructure user's arguments.
#________________________________________________________
if (!inherits(df, 'data.frame'))
cli_abort("`df` must be a data.frame, not {.type {df}}.")
validate_df_field('regr', col_type = "num", null_ok = TRUE)
validate_df_field('resp', col_type = "num")
validate_df_field('stat.by', col_type = "cat", null_ok = TRUE)
validate_df_field('split.by', col_type = "cat", null_ok = TRUE, max = Inf)
validate_var_choices('test', c("wilcox", "kruskal", "emmeans", "emtrends"))
validate_var_choices('alt', c("!=", ">", "<"))
validate_var_choices('fit', c("lm", "log", "gam"))
validate_var_choices('p.adj', p.adjust.methods)
validate_var_range('at', null_ok = TRUE)
validate_var_range('level', n = 1, range = c(0.5, 1))
validate_var_range('mu', n = 1)
#________________________________________________________
# Warn about invalid combinations of parameters.
#________________________________________________________
if (as.logical(anyDuplicated(c(regr, resp, stat.by, split.by))))
cli_abort("`regr`, `resp`, `stat.by`, `split.by` must all be unique.")
if (is.null(regr) && !is.null(at))
cli_warn('`at` is ignored when `regr` = NULL.')
if (test %in% c('wilcox', 'kruskal')) {
if (!eq(fit, "gam")) cli_warn('`fit` is ignored when `test` = "{test}".')
if (!is.null(regr)) cli_warn('`regr` is ignored when `test` = "{test}".')
}
if (!is.null(stat.by)) {
if (!eq(level, 0.95)) cli_warn('`level` is ignored when using `stat.by`.')
if (!eq(alt, "!=")) cli_warn('`alt` must be "!=" when using `stat.by`.')
if (!eq(mu, 0)) cli_warn('`mu` must be 0 when using `stat.by`.')
}
})
#________________________________________________________
# Dispatch to relevant stats_* function.
#________________________________________________________
stats <- switch(
EXPR = params$test,
'wilcox' = do.call(stats_wilcox, fun_params(stats_wilcox, params)),
'kruskal' = do.call(stats_kruskal, fun_params(stats_kruskal, params)),
'emmeans' = do.call(stats_emmeans, fun_params(stats_emmeans, params)),
'emtrends' = do.call(stats_emtrends, fun_params(stats_emtrends, params)) )
attr(stats, 'cmd') <- current_cmd('stats_table')
set_cache_value(cache_file, stats)
return (stats)
}
#' Test alpha diversity for associations with metadata.
#'
#' A convenience wrapper for [adiv_table()] + [stats_table()].
#'
#' @inherit documentation_default
#' @inherit stats_table return
#'
#' @family alpha_diversity
#' @family stats_tables
#'
#' @export
#' @examples
#' library(rbiom)
#'
#' biom <- rarefy(hmp50)
#'
#' adiv_stats(biom, stat.by = "Sex")[,1:6]
#'
#' adiv_stats(biom, stat.by = "Sex", split.by = "Body Site")[,1:6]
#'
#' adiv_stats(biom, stat.by = "Body Site", test = "kruskal")
adiv_stats <- function (
biom, regr = NULL, stat.by = NULL, adiv = "Shannon",
split.by = NULL, transform = "none",
test = "emmeans", fit = "gam", at = NULL,
level = 0.95, alt = "!=", mu = 0, p.adj = "fdr" ) {
#________________________________________________________
# Compute alpha diversity values
#________________________________________________________
df <- adiv_table(
biom = biom,
adiv = adiv,
md = c(regr, stat.by, split.by),
transform = transform )
if (nlevels(df$.adiv) > 1)
split.by %<>% c('.adiv')
#________________________________________________________
# Run statistics
#________________________________________________________
stats <- stats_table(
df = df,
regr = regr,
stat.by = stat.by,
split.by = split.by,
test = test,
fit = fit,
at = at,
level = level,
alt = alt,
mu = mu,
p.adj = p.adj )
attr(stats, 'cmd') <- current_cmd('adiv_stats')
attr(stats, 'data') <- df
return (stats)
}
#' Test beta diversity for associations with metadata.
#'
#' A convenience wrapper for [bdiv_table()] + [stats_table()].
#'
#' @inherit documentation_default
#' @inherit stats_table return
#'
#' @family beta_diversity
#' @family stats_tables
#'
#' @export
#' @examples
#' library(rbiom)
#'
#' biom <- rarefy(hmp50)
#'
#' bdiv_stats(biom, stat.by = "Sex", bdiv = c("bray", "unifrac"))[,1:7]
#'
#' biom <- subset(biom, `Body Site` %in% c('Saliva', 'Stool', 'Buccal mucosa'))
#' bdiv_stats(biom, stat.by = "Body Site", split.by = "==Sex")[,1:6]
bdiv_stats <- function (
biom, regr = NULL, stat.by = NULL, bdiv = "Bray-Curtis",
weighted = TRUE, tree = NULL, within = NULL, between = NULL,
split.by = NULL, transform = "none",
test = "emmeans", fit = "gam", at = NULL,
level = 0.95, alt = "!=", mu = 0, p.adj = "fdr" ) {
#________________________________________________________
# Compute beta diversity values
#________________________________________________________
df <- bdiv_table(
biom = biom,
bdiv = bdiv,
weighted = weighted,
tree = tree,
md = c(regr, stat.by, split.by, within, between),
within = within,
between = between,
delta = regr,
transform = transform )
if (nlevels(df$.bdiv) > 1)
split.by %<>% c('.bdiv')
#________________________________________________________
# Run statistics
#________________________________________________________
stats <- stats_table(
df = df,
regr = regr,
stat.by = if (!is.null(stat.by)) sub("^(==|!=)", "", stat.by),
split.by = if (!is.null(split.by)) sub("^(==|!=)", "", split.by),
test = test,
fit = fit,
at = at,
level = level,
alt = alt,
mu = mu,
p.adj = p.adj )
attr(stats, 'cmd') <- current_cmd('bdiv_stats')
attr(stats, 'data') <- df
return (stats)
}
#' Test taxa abundances for associations with metadata.
#'
#' A convenience wrapper for [taxa_table()] + [stats_table()].
#'
#' @inherit documentation_default
#' @inherit stats_table return
#'
#' @family taxa_abundance
#' @family stats_tables
#'
#' @export
#' @examples
#' library(rbiom)
#'
#' biom <- rarefy(hmp50)
#'
#' taxa_stats(biom, stat.by = "Body Site", rank = "Family")[,1:6]
taxa_stats <- function (
biom, regr = NULL, stat.by = NULL, rank = -1, taxa = 6,
lineage = FALSE, unc = "singly", other = FALSE,
split.by = NULL, transform = "none",
test = "emmeans", fit = "gam", at = NULL,
level = 0.95, alt = "!=", mu = 0, p.adj = "fdr" ) {
#________________________________________________________
# Compute taxa abundance values
#________________________________________________________
df <- taxa_table(
biom = biom,
rank = rank,
taxa = taxa,
lineage = lineage,
md = setdiff(c(regr, stat.by, split.by), ".taxa"),
unc = unc,
other = other,
transform = transform )
if (nlevels(df$.rank) > 1) split.by %<>% c('.rank')
if (!eq(stat.by, ".taxa")) split.by %<>% c('.taxa')
#________________________________________________________
# Run statistics
#________________________________________________________
stats <- stats_table(
df = df,
regr = regr,
stat.by = stat.by,
split.by = split.by,
test = test,
fit = fit,
at = at,
level = level,
alt = alt,
mu = mu,
p.adj = p.adj )
attr(stats, 'cmd') <- current_cmd('taxa_stats')
attr(stats, 'data') <- df
return (stats)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.