#' @title Function to run two-sample Wilcoxon tests on multiple variables across
#' multiple grouping variables.
#' @title Running Wilcox test across multiple grouping variables.
#' @name grouped_wilcox
#' @return A tibble dataframe with tidy results from two-sample Wilcoxon tests
#' analyses.
#'
#' @param data Dataframe from which variables are to be taken.
#' @param dep.vars List dependent variables for a two-sample Wilcoxon tests (`y`
#' in `y ~ x`).
#' @param indep.vars List independent variables for a two-sample Wilcoxon tests
#' (`x` in `y ~ x`).
#' @param grouping.vars List of grouping variables (if `NULL`, the entire
#' dataframe will be used).
#' @param paired A logical indicating whether you want a paired two-sample
#' Wilcoxon tests (Default: `paired = FALSE`).
#' @param correct A logical indicating whether to apply continuity correction in
#' the normal approximation for the p-value (Default: `correct = TRUE`).
#'
#' @importFrom glue glue
#' @importFrom purrr map pmap
#' @importFrom stats wilcox.test as.formula
#' @importFrom tidyr nest
#'
#' @seealso \code{\link{grouped_tidy}}
#'
#' @examples
#' # for reproducibility
#' set.seed(123)
#'
#' # only with one grouping variable
#' groupedstats::grouped_wilcox(
#' data = dplyr::filter(.data = ggplot2::diamonds, color == "E" | color == "J"),
#' dep.vars = depth:table,
#' indep.vars = color,
#' grouping.vars = clarity,
#' paired = FALSE
#' )
#' @export
# defining the function
grouped_wilcox <- function(data,
dep.vars,
indep.vars,
grouping.vars,
paired = FALSE,
correct = TRUE) {
# ================== preparing dataframe ==================
#
# check how many variables were entered for criterion variables vector
dep.vars <- as.list(rlang::quo_squash(rlang::enquo(dep.vars)))
dep.vars <-
if (length(dep.vars) == 1) {
dep.vars
} else {
dep.vars[-1]
}
# check how many variables were entered for predictor variables vector
indep.vars <- as.list(rlang::quo_squash(rlang::enquo(indep.vars)))
indep.vars <-
if (length(indep.vars) == 1) {
indep.vars
} else {
indep.vars[-1]
}
# check how many variables were entered for grouping variable vector
grouping.vars <- as.list(rlang::quo_squash(rlang::enquo(grouping.vars)))
grouping.vars <-
if (length(grouping.vars) == 1) {
grouping.vars
} else {
grouping.vars[-1]
}
# getting the dataframe ready
df <-
dplyr::select(.data = data, !!!grouping.vars, !!!dep.vars, !!!indep.vars) %>%
dplyr::group_by(.data = ., !!!grouping.vars) %>%
tidyr::nest(.) %>%
dplyr::filter(.data = ., !purrr::map_lgl(.x = data, .f = is.null)) %>%
dplyr::ungroup(x = .)
# ============== custom function ================
# custom function to run linear regression for every element of a list for two variables
lm_listed <- function(list.col,
x_name,
y_name,
paired,
correct) {
# plain version of the formula to return
fx <- glue::glue("{y_name} ~ {x_name}")
# dataframe with results from wilcox tests
results_df <-
list.col %>% # running two-sample Wilcoxon tests on each individual group with purrr
purrr::map(
.x = .,
.f = ~ stats::wilcox.test(
formula = stats::as.formula(fx),
mu = 0,
paired = paired,
alternative = "two.sided",
conf.level = 0.95,
na.action = na.omit,
exact = NULL,
correct = correct,
conf.int = TRUE,
data = (.)
)
) %>% # tidying up the output with broom
purrr::map_dfr(
.x = .,
.f = ~ broomExtra::tidy(x = .),
.id = "..group"
) %>% # add formula as a character
dplyr::mutate(.data = ., formula = as.character(fx)) %>% # rearrange the dataframe
dplyr::select(
.data = .,
`..group`,
formula,
method,
statistic,
estimate,
conf.low,
conf.high,
p.value,
alternative
) %>% # convert to a tibble dataframe
tibble::as_tibble(x = .)
# return the dataframe
return(results_df)
}
# ========= using custom function on entered dataframe =================
df <- df %>%
tibble::rownames_to_column(., var = "..group")
# running custom function for each element of the created list column
df_lm <- purrr::pmap(
.l = list(
list.col = list(df$data),
x_name = purrr::map(
.x = indep.vars,
.f = ~ rlang::quo_name(quo = .)
),
y_name = purrr::map(
.x = dep.vars,
.f = ~ rlang::quo_name(quo = .)
),
paired = paired,
correct = correct
),
.f = lm_listed
) %>%
dplyr::bind_rows(.) %>%
dplyr::left_join(x = ., y = df, by = "..group") %>%
dplyr::select(.data = ., !!!grouping.vars, dplyr::everything()) %>%
dplyr::select(.data = ., -`..group`, -data, -alternative) %>%
signif_column(data = ., p = `p.value`)
# ============================== output ==================================
# return the final dataframe with results
return(df_lm)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.