Nothing
# __________________ #< 4dfc02f9f16d691fdde16cc9a0024e9f ># __________________
# Combine predictors ####
#' @title Generate model formulas by combining predictors
#' @description
#' \Sexpr[results=rd, stage=render]{lifecycle::badge("maturing")}
#'
#' Create model formulas with every combination
#' of your fixed effects, along with the dependent variable and random effects.
#' \code{259,358} formulas have been precomputed with two- and three-way interactions
#' for up to \code{8} fixed effects, with up to \code{5} included effects per formula.
#' Uses the \code{+} and \code{*} operators, so lower order interactions are
#' automatically included.
#' @return
#' \code{list} of model formulas.
#'
#' E.g.:
#'
#' \code{c("y ~ x1 + (1|z)", "y ~ x2 + (1|z)",
#' "y ~ x1 + x2 + (1|z)", "y ~ x1 * x2 + (1|z)")}.
#' @author Ludvig Renbo Olsen, \email{r-pkgs@@ludvigolsen.dk}
#' @export
#' @aliases generate_formulas
#' @param dependent Name of dependent variable. (Character)
#' @param fixed_effects \code{list} of fixed effects. (Character)
#'
#' Max. limit of \code{8} effects \strong{when interactions are included}!
#'
#' A fixed effect name cannot contain: white spaces, \code{"*"} or \code{"+"}.
#'
#' Effects in sublists will be interchanged. This can be useful, when
#' we have multiple versions of a predictor (e.g. \code{x1} and \code{log(x1)}) that we
#' do not wish to have in the same formula.
#'
#' Example of interchangeable effects:
#'
#' \code{list( list( "x1", "log_x1" ), "x2", "x3" )}
#' @param random_effects The random effects structure. (Character)
#'
#' Is appended to the model formulas.
#' @param max_fixed_effects Maximum number of fixed effects in a model formula. (Integer)
#'
#' Max. limit of \code{5} \strong{when interactions are included}!
#' @param max_interaction_size Maximum number of effects in an interaction. (Integer)
#'
#' Max. limit of \code{3}.
#'
#' Use this to limit the \code{n}-way interactions allowed.
#' \code{0} or \code{1} excludes interactions all together.
#'
#' A model formula can contain multiple interactions.
#' @param max_effect_frequency Maximum number of times an effect is included in a formula string.
#' @examples
#' # Attach packages
#' library(cvms)
#'
#' # Create effect names
#' dependent <- "y"
#' fixed_effects <- c("a", "b", "c")
#' random_effects <- "(1|e)"
#' \donttest{
#' # Create model formulas
#' combine_predictors(
#' dependent, fixed_effects,
#' random_effects
#' )
#' }
#' # Create effect names with interchangeable effects in sublists
#' fixed_effects <- list("a", list("b", "log_b"), "c")
#' \donttest{
#' # Create model formulas
#' combine_predictors(
#' dependent, fixed_effects,
#' random_effects
#' )
#' }
#' @importFrom purrr pmap_dbl pmap_df
#' @importFrom rlang .data
#' @importFrom utils combn head capture.output tail packageVersion
#' @importFrom stats setNames formula terms.formula
#' @import data.table
combine_predictors <- function(dependent,
fixed_effects,
random_effects = NULL,
max_fixed_effects = 5,
max_interaction_size = 3,
max_effect_frequency = NULL) {
# Check arguments ####
assert_collection <- checkmate::makeAssertCollection()
checkmate::assert_string(
x = dependent,
min.chars = 1,
add = assert_collection
)
checkmate::assert_string(
x = random_effects,
min.chars = 1, # lmer will fail if ""
null.ok = TRUE,
add = assert_collection
)
checkmate::assert_count(
x = max_effect_frequency,
null.ok = TRUE,
add = assert_collection
)
# Ror range
checkmate::assert_number(
x = max_interaction_size,
lower = 0, upper = 3,
null.ok = TRUE,
add = assert_collection
)
# We need max_interaction_size for the next asserts
checkmate::reportAssertions(assert_collection)
# For integer
checkmate::assert_count(
x = max_interaction_size,
null.ok = TRUE,
add = assert_collection
)
arg_max__max_interaction_size <- ifelse(max_interaction_size <= 1, 256, 8)
checkmate::assert(
checkmate::check_list(
x = fixed_effects,
types = c("character", "list"),
any.missing = FALSE,
min.len = 2,
max.len = arg_max__max_interaction_size
),
checkmate::check_character(
x = fixed_effects,
any.missing = FALSE,
min.len = 2,
max.len = arg_max__max_interaction_size
)
)
checkmate::assert_number(
x = max_fixed_effects,
lower = 2,
upper = ifelse(max_interaction_size <= 1, 256, 5),
null.ok = TRUE,
add = assert_collection
)
# We don't want the same assertions twice
checkmate::reportAssertions(assert_collection)
checkmate::assert_count(
x = max_fixed_effects,
positive = TRUE,
null.ok = TRUE,
add = assert_collection
)
checkmate::reportAssertions(assert_collection)
# End of argument checks ####
args_ <- combine_predictors_prepare_args(
dependent = dependent,
fixed_effects = fixed_effects,
random_effects = random_effects,
max_fixed_effects = max_fixed_effects,
max_interaction_size = max_interaction_size,
max_effect_frequency = max_effect_frequency
)
dependent <- args_[["dependent"]]
fixed_effects <- args_[["fixed_effects"]]
random_effects <- args_[["random_effects"]]
max_fixed_effects <- args_[["max_fixed_effects"]]
max_interaction_size <- args_[["max_interaction_size"]]
max_effect_frequency <- args_[["max_effect_frequency"]]
n_fixed_effects <- args_[["n_fixed_effects"]]
should_contain_interactions <- args_[["should_contain_interactions"]]
interchangeable_effects_combinations <- args_[["interchangeable_effects_combinations"]]
# Generate / fetch formulas
if (!should_contain_interactions) {
## Effect combinations without interactions
# Without interactions
effect_combinations <- plyr::ldply(1:max_fixed_effects, function(i) {
as.data.frame(t(combn(fixed_effects, i)), stringsAsFactors = FALSE)
}) %>%
dplyr::as_tibble()
# Create formulas without interactions
formulas <- effect_combinations %>%
dplyr::mutate(formula_ = purrr::pmap_chr(., paste_columns, collapse = " + ")) %>%
base_select(cols = "formula_")
} else {
# Create the map between each fixed effect and its letter ("A"="efx1", etc.)
pattern_replacement <- setNames(
paste0(" ", fixed_effects),
paste0("(\\s|^)", LETTERS[1:n_fixed_effects], "(?!\\S)")
)
# Fetch the precomputed formulas and replace names of the effects
formulas <- fetch_formulas(
n_fixed_effects = n_fixed_effects,
max_interaction_size_ = max_interaction_size,
max_fixed_effects_ = max_fixed_effects,
max_effect_frequency_ = max_effect_frequency
) %>%
dplyr::mutate(
formula_ = stringr::str_replace_all(
.data$formula_,
pattern_replacement
),
formula_ = trimws(.data$formula_)
)
}
# Add formula versions with the interchangeable effects
if (!is.null(interchangeable_effects_combinations)) {
formulas <- tidyr::crossing(
formulas,
interchangeable_effects_combinations
)
# Replace with the interchangeable effects
for (column in 2:ncol(formulas)) {
formulas <- formulas %>%
dplyr::mutate_at(
.vars = 1,
.funs = list(~ stringr::str_replace_all(
., colnames(formulas)[[column]],
as.character(!!as.name(colnames(formulas)[[column]]))
))
)
}
formulas <- formulas %>%
base_select(cols = "formula_") %>%
dplyr::distinct()
}
formulas[["n_efxs"]] <- stringr::str_count(formulas[["formula_"]], "\\+|\\*")
formulas <- formulas[
order(formulas$n_efxs,
formulas$formula_,
method = "radix"),][["formula_"]]
if (is.null(random_effects)) {
return(paste0(dependent, " ~ ", formulas))
} else {
return(paste0(dependent, " ~ ", formulas, " + ", random_effects))
}
}
counts_as_row <- function(...) {
dplyr::bind_rows(table(...))
}
sort_rowwise <- function(data, vars, decreasing = FALSE, na.last = TRUE) {
data[vars] <- t(apply(data[vars], 1,
FUN = function(x) sort(x, decreasing = decreasing, na.last = na.last)
))
data
}
# Create data frame row with values as columns.
# Fill with the values or a specified value.
# @param ... Values.
#
# Intended to be used with
# purrr::pmap, where ... is a row of values.
# @param fill_with Value to fill cells with.
# If \code{NULL}, the values in \code{...} are used.
vals_to_cols <- function(..., fill_with = NULL) {
vals <- unname(c(...))
if (is.null(fill_with)) {
fill_with <- vals
}
setNames(
data.frame(
matrix(data = fill_with, ncol = length(vals), nrow = 1),
stringsAsFactors = FALSE
),
c(vals)
)
}
# Paste effects as interactions.
# Collapse with ".|_|." instead of " * ".
# @param ... Values.
#
# Intended to be used with
# purrr::pmap, where ... is a row of values.
paste_columns <- function(..., collapse = " * ") {
effects_ <- unname(c(...))
effects_ <- effects_[effects_ != "__NA__"]
effects_ <- effects_[!is.na(effects_)]
effects_ <- sort(effects_, decreasing = FALSE)
paste0(effects_, collapse = collapse)
}
# Detect if a row has a "__NA__" value left
# of a non-"__NA__" value.
# @param ... Values.
#
# Intended to be used with
# purrr::pmap, where ... is a row of values.
contains_NA_left_of_value <- function(...) {
r <- unname(c(...))
rle_ <- rle(r)
total_NAs <- sum(stringr::str_count(rle_$values, "__NA__"))
if (total_NAs == 0) {
return(FALSE)
} else if (total_NAs > 1) {
return(TRUE)
}
if (rle_$values[length(rle_$values)] != "__NA__") {
return(TRUE)
}
return(FALSE)
}
# Creates map of interchangeable effects and their combinations.
# Removes all but the first interchangeable effects from fixed_effects.
# With this, we can create our formulas with one combination of effects
# and add versions with the combinations of interchangeable effects afterwards.
# @param fixed_effects \code{vector} of fixed effects. (Character)
create_interchangeable_effects_combinations <- function(fixed_effects) {
# Check if any element is a list with more than one element
contains_interchangeable_effects <- plyr::llply(fixed_effects, function(x) {
is.list(x) && length(x) > 1
}) %>%
unlist()
if (any(contains_interchangeable_effects)) {
map_of_interchangeable_effects <- plyr::llply(fixed_effects[contains_interchangeable_effects], function(x) {
key <- x[[1]]
values <- setNames(list(x), key)
values
}) %>%
unlist(recursive = FALSE)
fixed_effects <- c(
unlist(fixed_effects[!contains_interchangeable_effects]),
names(map_of_interchangeable_effects)
)
interchangeable_effects_combinations <- setNames(
expand.grid(map_of_interchangeable_effects),
names(map_of_interchangeable_effects)
)
} else {
fixed_effects <- unlist(fixed_effects)
interchangeable_effects_combinations <- NULL
}
return(list(
"fixed_effects" = fixed_effects,
"interchangeable_effects_combinations" = interchangeable_effects_combinations
))
}
# Get effects and all possible interactions
get_terms_matrix <- function(fixed_effects) {
interacting_effects <- paste0(fixed_effects, collapse = "*")
interaction_formula <- formula(paste0("1 ~ ", interacting_effects))
terms_matrix <- t(attr(terms.formula(interaction_formula), "factors"))
terms <- rownames(terms_matrix)
terms_matrix <- dplyr::as_tibble(terms_matrix)
terms_matrix[["1"]] <- NULL
terms_matrix$terms <- stringr::str_replace_all(terms, ":", " \\* ")
# Place terms as first column and calculate number of terms
terms_matrix %>%
position_first(col = "terms") %>%
dplyr::mutate(num_terms = rowSums(.[2:(length(fixed_effects) + 1)]))
}
# Note DO NOT DELETE #############################
# Also used by data-raw/combine_predictors_table()
get_min_n_fixed_effects <- function(..., fixed_effects) {
n_fixed_effects <- length(fixed_effects)
r <- c(...)[rev(fixed_effects)]
n_fixed_effects + 1 - match(1, r, nomatch = NA)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.