#' Vary hypothetical group sizes
#'
#' @description Some published studies only report a total sample size but no
#' group sizes. However, group sizes are crucial for consistency tests such as
#' GRIM. Call `disperse()` to generate possible group sizes that all add up to
#' the total sample size, if that total is even.
#'
#' `disperse2()` is a variant for odd totals. It takes two consecutive numbers
#' and generates decreasing values from the lower as well as increasing values
#' from the upper. In this way, all combinations still add up to the total.
#'
#' `disperse_total()` directly takes the total sample size, checks if it's
#' even or odd, splits it up accordingly, and applies `disperse()` or
#' `disperse2()`, respectively.
#'
#' These functions are primarily intended as helpers. They form the backbone
#' of [`grim_map_total_n()`] and all other functions created with
#' [`function_map_total_n()`].
#' @param n Numeric:
#' - In `disperse()`, single number from which to go up and down. This should be
#' half of an even total sample size.
#' - In `disperse2()`, the two consecutive numbers closest to half of an odd
#' total sample size (e.g., `c(25, 26)` for a total of 51).
#' - In `disperse_total()`, the total sample size.
#' @param dispersion Numeric. Vector that determines the steps up and down from
#' `n` (or, in `disperse_total()`, from half `n`). Default is `0:5`.
#' @param n_min Numeric. Minimal group size. Default is `1L`.
#' @param n_max Numeric. Maximal group size. Default is `NULL`, i.e., no
#' maximum.
#' @param constant Optionally, add a length-2 vector or a list of length-2
#' vectors (such as a data frame with exactly two rows) to accompany the pairs
#' of dispersed values. Default is `NULL`, i.e., no constant values.
#' @param constant_index Integer (length 1). Index of `constant` or the first
#' `constant` column in the output tibble. If `NULL` (the default), `constant`
#' will go to the right of `n_change`.
#'
#' @details If any group size is less than `n_min` or greater than `n_max`, it
#' is removed. The complementary size of the other group is also removed.
#'
#' `constant` values are pairwise repeated. That is why `constant` must be
#' a length-2 atomic vector or a list of such vectors. If `constant` is a data
#' frame or some other named list, the resulting columns will have the same
#' names as the list-element names. If the list is not named, the new column
#' names will be `"constant1"`, `"constant2"`, etc; or just `"constant"`, for
#' a single pair.
#' @return A tibble (data frame) with these columns:
#' - `n` includes the dispersed `n` values. Every pair of consecutive rows has
#' `n` values that each add up to the total.
#' - `n_change` records how the input `n` was transformed to the output `n`. In
#' `disperse2()`, the `n_change` strings label the lower of the input `n`
#' values `n1` and the higher one `n2`.
#' @seealso [`function_map_total_n()`], [`grim_map_total_n()`], and
#' [`seq_distance_df()`].
#'
#' @include utils.R
#'
#' @export
#' @references Bauer, P. J., & Francis, G. (2021). Expression of Concern: Is It
#' Light or Dark? Recalling Moral Behavior Changes Perception of Brightness.
#' *Psychological Science*, 32(12), 2042–2043.
#' https://journals.sagepub.com/doi/10.1177/09567976211058727
#' @examples
#' # For a total sample size of 40,
#' # set `n` to `20`:
#' disperse(n = 20)
#'
#' # Specify `dispersion` to control
#' # the steps up and down from `n`:
#' disperse(n = 20, dispersion = c(3, 6, 10))
#'
#' # In `disperse2()`, specify `n` as two
#' # consecutive numbers -- i.e., group sizes:
#' disperse2(n = c(25, 26))
#'
#' # Use the total sample size directly
#' # with `disperse_total()`. An even total
#' # internally triggers `disperse()`...
#' disperse_total(n = 40)
#'
#' # ...whereas an odd total triggers `disperse2()`:
#' disperse_total(n = 51)
#'
#' # You may add values that repeat along with the
#' # dispersed ones but remain constant themselves.
#' # Such values can be stored in a length-2 vector
#' # for a single column...
#' disperse_total(37, constant = c("5.24", "3.80"))
#'
#' # ... or a list of length-2 vectors for multiple
#' # columns. This includes data frames with 2 rows:
#' df_constant <- tibble::tibble(
#' name = c("Paul", "Mathilda"), age = 27:28,
#' registered = c(TRUE, FALSE)
#' )
#' disperse_total(37, constant = df_constant)
# Basic function for halves of even totals --------------------------------
disperse <- function(n, dispersion = 0:5, n_min = 1L, n_max = NULL,
constant = NULL, constant_index = NULL) {
# Checks ---
check_length_disperse_n(n, "It must have length 1.")
check_non_negative(dispersion)
# Main part ---
# (Note: The checks below count towards to the main part because they may lead
# to input transformations.)
if (!is.null(n_min)) {
if (length(n_min) > 1L) {
cli::cli_abort(c(
"!" = "`n_min` must have length 1 or to be `NULL`.",
"x" = "It has length {length(n_min)}."
))
}
dispersion <- dispersion[(n - dispersion) >= n_min]
}
if (!is.null(n_max)) {
if (length(n_max) > 1L) {
cli::cli_abort(c(
"!" = "`n_max` must have length 1 or to be `NULL`.",
"x" = "It has length {length(n_max)}."
))
}
dispersion <- dispersion[(n + dispersion) <= n_max]
}
n_minus <- n - dispersion
n_plus <- n + dispersion
minus_plus_df <- tibble::tibble(n_minus, n_plus)
out <- minus_plus_df %>%
tidyr::pivot_longer(
cols = everything(),
names_to = "n_change",
values_to = "n"
)
n_change_num <- out$n - n
out$n_change <- n_change_num
if (!is_whole_number(n)) {
digits <- decimal_places_scalar(n)
out$n_change <- round(out$n_change, digits)
}
out <- out %>%
reverse_column_order() %>%
add_class("scr_disperse")
if (!is.null(constant)) {
repeat_constant <- function(constant, out, list_input = FALSE) {
if (list_input) {
constant_list_element <- constant
check_length_or_null(constant_list_element, 2L)
} else {
check_length_or_null(constant, 2L)
}
rep(constant, times = nrow(out) / 2L)
}
if (is.list(constant)) {
constant <- purrr::map(constant, repeat_constant, out, list_input = TRUE)
constant_is_named_list <- !is.null(names(constant)) &&
length(names(constant)) == length(constant)
if (constant_is_named_list) {
constant <- tibble::as_tibble(
constant, .name_repair = function(x) names(constant)
)
} else {
constant <- tibble::as_tibble(constant, .name_repair = function(x) {
paste0("constant", seq_along(constant))
})
}
} else {
constant <- repeat_constant(constant, out)
}
if (is.null(constant_index)) {
constant_index <- match("n_change", colnames(out)) + 1L
}
out <- dplyr::mutate(out, constant, .before = constant_index)
}
out$n_change <- as.integer(out$n_change)
out
}
# Variant for halves of odd totals ----------------------------------------
#' @rdname disperse
#' @export
disperse2 <- function(n, dispersion = 0:5, n_min = 1L, n_max = NULL,
constant = NULL, constant_index = NULL) {
# Checks ---
check_length(n, 2L)
if (!dplyr::near(n[2L] - n[1L], 1)) {
cli::cli_warn(c(
"`n` was given as `{n[1L]}` and `{n[2L]}`.",
"!" = "It should be two consecutive numbers.",
">" = "(The second value in `n` should be the first plus 1.)"
))
}
# Main part ---
# Take the mean of the two `n` values and disperse from there:
out <- disperse(
n = mean(n), constant = constant, constant_index = constant_index,
dispersion = dispersion,
n_min = n_min, n_max = n_max
)
# Determine which row numbers in the output tibble have an `n` that must
# be increased or decreased (using an internal helper function from utils.R):
seq_rows <- seq_len(nrow(out))
locations1 <- seq_rows %>% parcel_nth_elements(n = 2, from = 1L)
locations2 <- seq_rows %>% parcel_nth_elements(n = 2, from = 2L)
# Increase or decrease the dispersed values so that the lower values decrease
# from the first of the two `n` values, the higher values increase from the
# second one, and both interleaved sequences proceed by increments of 1:
out$n <- out$n %>% purrr::modify_at(locations1, `-`, 0.5)
out$n <- out$n %>% purrr::modify_at(locations2, `+`, 0.5)
# Return the resulting tibble:
out
}
# Variant for totals; with even / odd splitting ---------------------------
#' @rdname disperse
#' @export
disperse_total <- function(n, dispersion = 0:5, n_min = 1L, n_max = NULL,
constant = NULL, constant_index = NULL) {
# Checks ---
check_length_disperse_n(
n, "It must have length 1; `n` is supposed \\
to be a *single*, total sample size."
)
# Main part ---
n_half <- n / 2
# Test if `n` is even, then call the appropriate function. If `n` is even,
# call `disperse()`; if `n` is odd, call `disperse2()`:
if (is_even(n)) {
disperse(
n = n_half, dispersion = dispersion, n_min = n_min, n_max = n_max,
constant = constant, constant_index = constant_index
)
} else {
# Determine the two whole numbers closest to half of the odd `n`:
disperse2(
n = c(n_half - 0.5, n_half + 0.5), dispersion = dispersion,
n_min = n_min, n_max = n_max, constant = constant,
constant_index = constant_index
)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.