#' Perform normality test for multiple variables by groups
#'
#' @description This function performs a Shapiro-Wilk normality test on multiple
#' variables in a dataframe by groups.
#'
#' @param df A dataframe containing the data to be tested.
#' @param cols A vector of column names in the dataframe to be tested for
#' normality.
#' @param group A column name in the dataframe that defines the groups.
#'
#' @details The function first creates subsets of the data for each group. It
#' then performs a Shapiro-Wilk normality test (`shapiro.test()`) on each
#' group and returns the results in a tidy format. the function also checks
#' whether the number of observations for each variable and group lies within
#' the permissible range of the `shapiro.test()`. If not, the function will
#' provide diagnostics on why a test could not be performed base on which
#' groups are affected.
#'
#' @return A data frame with the results of the Shapiro-Wilk test for each group
#' and variable.
#' @seealso [ntest()] to run normality tests for ungrouped data
#' @examples
#' \dontrun{
#' # Perform a normality test for groups
#' ntest_by(df = mtcars,
#' cols = mpg,
#' group = cyl)
#'
#' # Perform a normality test for multiple variables by groups
#' ntest_by(df = starwars,
#' cols = c(mass, birth_year),
#' group = species)
#' }
#'
#' @importFrom tidyr pivot_longer nest unnest
#' @importFrom tibble tibble
#' @importFrom dplyr group_by summarise filter select mutate everything rowwise
#' ungroup
#' @importFrom purrr map
#' @importFrom broom tidy
#' @importFrom cli cli_abort
#'
#' @export
ntest_by <- function(df, cols, group){
# Create subsets of data as nested dfs
nested_group_data <-
df |>
select({{ group }}, {{ cols }}) |>
nest(.by = {{ group }})
# a check to make sure groups are: 3>= size >= 5000
check_group_size <-
nested_group_data |>
rowwise() |>
mutate(n = nrow(.data$data)) |>
ungroup()
# Group(s) too small
small_groups <-
check_group_size |>
filter(.data$n < 3)
small_group_names <- paste(small_groups[[1]], collapse = ", ")
# Group(s) too big
big_groups <-
check_group_size |>
filter(.data$n > 5000)
big_group_names <- paste(big_groups[[1]], collapse = ", ")
if (any(check_group_size$n < 3)) {
cli::cli_abort(c(
"Sample size of all groups needs to be >= 3.",
i = "Groups with n < 3: {.strong {small_group_names}}.")
)
}
if (any(check_group_size$n > 5000)) {
cli::cli_abort(c(
"Sample size of all groups needs to be <= 5000.",
i = "Groups with n > 5000: {.strong {big_group_names}}.")
)
}
# Perform normality test on each dataset
results <-
nested_group_data |>
mutate(test_results = map(.data$data, ~ map(., ~ shapiro.test(.)))
) |>
# Turn results into a tibble
mutate(test_results = map(.data$test_results,
~ tibble(variable = names(.x),
result = .x))
) |>
unnest(cols = .data$test_results) |>
mutate(result = map(.data$result, tidy)) |>
unnest(cols = .data$result) |>
select(-.data$data)
return(results)
}
utils::globalVariables(".data")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.