Nothing
#' Generates B-many bootstrap samples
#'
#' Ensures that each group in a bootstrap sample has the same number of rows as
#' that group did in the original sample data.
#'
#' @param samp The sample data
#' @param pop The bootstrap population data
#' @param dom_nm Character string of the domain identifier name as it appears in samp and pop
#' @param B Integer. The number of bootstrap samples to produce
#'
#' @return A list of length B where each item is a unique bootstrap sample
#' @noRd
#'
samp_by_grp <- function(samp, pop, dom_nm, B) {
num_plots <- dplyr::count(samp, !!rlang::sym(dom_nm))
diff <- unique(pop$domain)[!unique(pop$domain) %in% unique(samp[[dom_nm]])]
if (length(diff) != 0) {
to_append <- data.frame(
doms = diff,
n = rep(0, length(diff))
)
colnames(to_append) <- c(dom_nm, "n")
num_plots <- rbind(num_plots, to_append)
}
# our boot_pop_data has column name domain as its group variable
setup <- dplyr::count(pop, !!rlang::sym(dom_nm)) |>
dplyr::left_join(num_plots, by = dom_nm) |>
dplyr::mutate(add_to = dplyr::lag(cumsum(n.x), default = 0)) |>
dplyr::rowwise() |>
dplyr::mutate(map_args = list(list(n.x, n.y, add_to)))
all_samps <- vector("list", length = B)
pop_ordered <- pop[order(pop[[dom_nm]]), ]
for (i in 1:B) {
ids <- setup |>
dplyr::mutate(samps = purrr::pmap(.l = map_args, .f = \ (x, y, z) {
sample(1:x, size = y, replace = TRUE) + z
})) |>
dplyr::pull(samps) |>
unlist()
out <- pop_ordered[ids, ]
all_samps[[i]] <- out
}
return(all_samps)
}
#' Fits the two models in a zi-estimator
#'
#' @param samp_dat Sample data
#' @param lin_formula Formula to be used in the linear regression model
#' @param log_formula Formula to be used in the logistic regression model
#' @param domain_level Character. Domain identifier name.
#'
#' @return A list containing the two model objects
#' @noRd
#'
fit_zi <- function(samp_dat,
lin_formula,
log_formula,
domain_level) {
Y <- deparse(lin_formula[[2]])
# creating nonzero version of our sample data set
nz <- samp_dat[samp_dat[[Y]] > 0, ]
# fit linear mixed model on nonzero data
lmer_nz <- suppressMessages(
lme4::lmer(lin_formula, data = nz)
)
# Fit logistic mixed effects on ALL data
glmer_z <- suppressMessages(
lme4::glmer(log_formula, data = samp_dat, family = 'binomial')
)
return(list(lmer = lmer_nz, glmer = glmer_z))
}
#' Generates mse estimates
#'
#' @param .data The bootstrap population data
#' @param truth A data.frame containing the true domain level values from the bootstrap population data
#' @param domain_level Character. Domain identifier name
#' @param beta_lm_mat A matrix containing the fixed effects coefficients resulting from fitting the linear model to each bootstrap sample
#' @param beta_glm_mat A matrix containing the fixed effects coefficients resulting from fitting the logistic model to each bootstrap sample
#' @param u_lm A matrix containing the random effects values for each domain resulting from fitting the linear model to each bootstrap sample
#' @param u_glm A matrix containing the random effects values for each domain resulting from fitting the linear model to each bootstrap sample
#' @param lin_X A character vector with the names of the predictor variables used in the linear model
#' @param log_X A character vector with the names of the predictor variables used in the logistic model
#' @param estimand A string specifying whether the estimates should be 'totals' or 'means'
#'
#' @return A data.frame with the mse estimate for every domain
#' @noRd
generate_mse <- function(.data,
truth,
domain_level,
beta_lm_mat,
beta_glm_mat,
u_lm,
u_glm,
lin_X,
log_X,
estimand) {
boot_pop_by_dom <- split(.data, f = .data[[domain_level]])
design_mat_ls <- boot_pop_by_dom |>
map(.f = function(.x) {
dmat_lm <- model.matrix(~., .x[ ,lin_X, drop = FALSE])
dmat_glm <- model.matrix(~., .x[ ,log_X, drop = FALSE])
return(list(design_mat_lm = dmat_lm,
design_mat_glm = dmat_glm))
})
n_doms <- length(colnames(u_glm))
dom_order <- names(boot_pop_by_dom)
u_lm <- u_lm[, order(match(colnames(u_lm), dom_order))]
u_glm <- u_glm[, order(match(colnames(u_glm), dom_order))]
if (ncol(u_lm) != ncol(u_glm)) {
diff <- base::setdiff(colnames(u_glm), colnames(u_lm))
to_append <- matrix(rep(NA, times = nrow(u_lm) * length(diff)), ncol = length(diff))
colnames(to_append) <- diff
u_lm <- cbind(u_lm, to_append)
}
dom_res_wide <- generate_preds(beta_lm = beta_lm_mat,
beta_glm = beta_glm_mat,
u_lm = u_lm,
u_glm = u_glm,
design_mats = design_mat_ls,
J = n_doms,
estimand = estimand)
truth_ordered <- truth[order(match(truth[[domain_level]], dom_order)), ]
truth_vec <- truth_ordered$domain_est
mse <- (dom_res_wide - truth_vec)^2 |>
rowMeans(na.rm = TRUE)
res_doms <- data.frame(
domain = truth_ordered[[domain_level]],
mse = mse
)
return(res_doms)
}
#' Generate the Bootstrap population data
#'
#' @param original_out List containing original model objects
#' @param pop_dat The population data frame
#' @param domain_level Character. The domain column names in pop_dat
#' @param log_X Vector of characters containing logistic model predictor names
#' @param all_preds Vector of characters containing all predictor names
#'
#' @return The population bootstrap data
#' @noRd
generate_boot_pop <- function(original_out,
pop_dat,
domain_level,
log_X,
all_preds) {
zi_mod_coefs <- mse_coefs(original_out$lmer, original_out$glmer)
params_and_domain <- setNames(
zi_mod_coefs$b_i,
zi_mod_coefs$domain_levels
)
pop_b_i <- data.frame(
dom = pop_dat[ , domain_level, drop = TRUE],
b_i = params_and_domain[pop_dat[ , domain_level, drop = TRUE]]
)
pop_b_i[is.na(pop_b_i$b_i), "b_i"] <- 0
x_matrix <- model.matrix(
as.formula(paste0(" ~ ", paste(all_preds, collapse = " + "))),
data = pop_dat[ , all_preds, drop = FALSE]
)
indv_re <- data.frame(
dom = pop_dat[ , domain_level, drop = TRUE],
eps_ij = rnorm(nrow(pop_dat), 0, sqrt(zi_mod_coefs$sig2_eps_hat))
)
# tweak for allowing new levels
pop_doms <- unique(pop_dat[[domain_level]])
all_doms <- unique(pop_doms, zi_mod_coefs$domain_levels)
area_re_lkp <- setNames(
rnorm(length(all_doms), 0, sqrt(zi_mod_coefs$sig2_mu_hat)),
all_doms
)
rand_effs <- data.frame(
indv_re,
u_j = area_re_lkp[pop_dat[ , domain_level, drop = TRUE]]
)
x <- x_matrix[ , c("(Intercept)", log_X)] %*% zi_mod_coefs$alpha_1 + pop_b_i$b_i
p_hat_i <- binomial()$linkinv(x)
delta_i_star <- rbinom(length(p_hat_i), 1, p_hat_i)
boot_dat_params <- list(
random_effects = rand_effs,
p_hat_i = p_hat_i,
delta_i_star = delta_i_star
)
linear_preds <- (x_matrix[, colnames(model.matrix(original_out$lmer))] %*% zi_mod_coefs$beta_hat) +
boot_dat_params$random_effects$u_j + boot_dat_params$random_effects$eps_ij
boot_pop_data <- data.frame(
pop_dat[ , c(domain_level, all_preds)],
response = linear_preds * boot_dat_params$delta_i_star
)
return(boot_pop_data)
}
#' Bootstrap procedure for the parallel option
#'
#' @param x The vector 1:B where B is the number of total bootstraps
#' @param boot_lst A list where each element contains a bootstrap sample
#' @param domain_level Character. Domain identifier name
#' @param boot_lin_formula The formula to be used for the linear model
#' @param boot_log_formula The formula to be used for the logistic model
#' @param boot_pop_data The bootstrap population data
#' @param boot_truth A data.frame containing the true domain level values from the bootstrap population data
#' @param estimand A string specifying whether the estimates should be 'totals' or 'means'
#' @param lin_X A character vector with the names of the predictor variables used in the linear model
#' @param log_X A character vector with the names of the predictor variables used in the logistic model
#'
#' @return A list containing the mse estimates data.frame.
#' @noRd
#'
boot_rep_par <- function(x,
boot_lst,
domain_level,
boot_lin_formula,
boot_log_formula,
boot_pop_data,
boot_truth,
estimand,
lin_X,
log_X) {
p <- progressor(steps = length(x))
res <-
furrr::future_map(.x = boot_lst,
.f = \(.x) {
p()
boot_rep(boot_samp = .x,
domain_level,
boot_lin_formula,
boot_log_formula)
},
.options = furrr_options(seed = TRUE))
beta_lm_mat <- res |>
map_dfr(.f = ~ .x$beta_lm) |>
as.matrix()
beta_glm_mat <- res |>
map_dfr(.f = ~ .x$beta_glm) |>
as.matrix()
u_lm <- res |>
map_dfr(.f = ~ .x$u_lm) |>
as.matrix()
u_glm <- res |>
map_dfr(.f = ~ .x$u_glm) |>
as.matrix()
# sometimes u_lm will have fewer domains once it is filtered
# down to positive response values
u_lm[is.na(u_lm)] <- 0
preds_full <- generate_mse(.data = boot_pop_data,
truth = boot_truth,
domain_level = domain_level,
beta_lm_mat = beta_lm_mat,
beta_glm_mat = beta_glm_mat,
u_lm = u_lm,
u_glm = u_glm,
lin_X = lin_X,
log_X = log_X,
estimand = estimand)
return(list(preds = preds_full))
}
#' Sample n rows from a data.frame
#'
#' @param .data The data.frame to sample from
#' @param n The number of rows to sample
#' @param replace Logical. Should selected rows be added back to the data.frame for the next row selection?
#'
#' @return A data.frame with n randomly chosen rows
#' @noRd
#'
slice_samp <- function(.data, n, replace = TRUE) {
.data[sample(nrow(.data), n, replace = replace),]
}
#' Extract all matches from a string
#'
#' @param string String to extract matches from
#' @param pattern Regex pattern to use for extractions
#'
#' @return A vector of the matches
#' @noRd
str_extract_all_base <- function(string, pattern) {
regmatches(string, gregexpr(pattern, string))
}
#' Format model parameters for mse estimation
#'
#' @param .fit A custom list containing model fits
#' @param ref A list containing names to be used to fill in when a model fit failed
#'
#' @return A list containing all of the properly formated parameters
#' @noRd
#'
mod_param_fmt <- function(.fit, ref = NULL) {
if (!is.null(.fit)) {
.lmer <- .fit$lmer
.glmer <- .fit$glmer
beta_lm <- lme4::fixef(.lmer)
beta_glm <- lme4::fixef(.glmer)
ref_lm <- lme4::ranef(.lmer)[[1]]
ref_glm <- lme4::ranef(.glmer)[[1]]
u_lm <- setNames(
ref_lm[ ,1],
rownames(ref_lm)
)
u_glm <- setNames(
ref_glm[ ,1],
rownames(ref_glm)
)
} else {
lm_terms <- ref$.lm[!grepl("\\|", ref$.lm)]
glm_terms <- ref$.glm[!grepl("\\|", ref$.glm)]
beta_lm <- setNames(
rep(NA, times = length(lm_terms) + 1),
c('(Intercept)', lm_terms)
)
beta_glm <- setNames(
rep(NA, times = length(glm_terms) + 1),
c('(Intercept)', glm_terms)
)
u_lm <- setNames(
rep(NA, times = length(ref$d)),
ref$d
)
u_glm <- u_lm
}
list(beta_lm = beta_lm,
beta_glm = beta_glm,
u_lm = u_lm,
u_glm = u_glm)
}
#' Perform a single bootstrap repetition
#'
#' @param boot_samp data.frame, An individual bootstrap sample.
#' @param domain_level Character. Domain identifier name
#' @param boot_lin_formula The formula to be used for the linear model
#' @param boot_log_formula The formula to be used for the logistic model
#'
#' @return A list containing the properly formated model parameters from fitting the two models to the sample data.
#' @noRd
#'
boot_rep <- function(boot_samp,
domain_level,
boot_lin_formula,
boot_log_formula) {
boot_samp_fit <- tryCatch(
{
out <- fit_zi(boot_samp,
boot_lin_formula,
boot_log_formula,
domain_level)
ps <- mod_param_fmt(out)
return(ps)
},
error = function(cond) {
doms <- unique(boot_samp[[domain_level]])
lm_cfs <- labels(terms(boot_lin_formula))
glm_cfs <- labels(terms(boot_lin_formula))
ps <- mod_param_fmt(.fit = NULL,
ref = list(d = doms,
.lm = lm_cfs,
.glm = glm_cfs))
return(ps)
}
)
return(boot_samp_fit)
}
#' Format model coefficients
#'
#' @param lmer_model lme4::lmer model object
#' @param glmer_model lme4::glmer model object
#'
#' @return A list with all of the necessary model parameters
#' @noRd
#'
mse_coefs <- function(lmer_model, glmer_model) {
# from lmer model
beta_hat <- lmer_model@beta # linear model coefficients
model_summary_df <- data.frame(summary(lmer_model)$varcor)
sig2_mu_hat <- model_summary_df[1, ]$vcov
sig2_eps_hat <- subset(model_summary_df, grp == "Residual")$vcov
# from glmer model
alpha_1 <- glmer_model@beta
b_i <- lme4::ranef(glmer_model)[[1]][,1]
b_domain_levels <- rownames(lme4::ranef(glmer_model)[[1]])
return(list(
beta_hat = beta_hat, sig2_mu_hat = sig2_mu_hat,
sig2_eps_hat = sig2_eps_hat, alpha_1 = alpha_1,
b_i = b_i, domain_levels = b_domain_levels))
}
#' A function factory that causes a function to capture messages, warnings, or errors
#'
#' @param .f The function
#'
#' @return A function that works just like .f but also returns any messages, warnings, or errors that result from using the function
#' @noRd
capture_all <- function(.f){
.f <- purrr::as_mapper(.f)
function(...){
try_out <- suppressMessages(suppressWarnings(
try(.f(...), silent = TRUE)
))
res <- rlang::try_fetch(
.f(...),
error = function(err) rlang::abort("Failed.", parent = err),
warning = function(warn) warn,
message = function(message) message,
)
out <- list(
result = NULL,
log = NULL
)
if("error" %in% class(res)) {
stop(res$message)
} else if (!any(c("warning", "message") %in% class(res))){
out$result <- try_out
out$log <- NA
} else {
out$result <- try_out
out$log <- res$message
}
return(out)
}
}
#' Checking if a param inherits a class
#'
#' @param what What class to check if the parameter input inherits
#' @param ... The parameter input(s) to check
#'
#' @return Nothing if the check is passed, but an error if the check fails
#' @noRd
check_inherits <- function(what, ...) {
opts <- list(...)
for (i in seq_along(opts)) {
if (!is.null(opts[[i]])) {
if (!inherits(opts[[i]], what)) {
stop(paste0(opts[[i]], " needs to be of class ", what))
}
}
} # i
invisible(opts)
}
#' Checking if parallel functionality is properly set up
#'
#' @param x The parameter input to check
#' @param call The caller environment to check in
#'
#' @return Nothing if the check is passed, but an error if the check fails
#' @noRd
check_parallel <- function(x, call = rlang::caller_env()) {
if (x) {
if (eval(inherits(future::plan(), "sequential"), envir = call)) {
message("In order for the internal processes to be run in parallel a `future::plan()` must be specified by the user")
message("See <https://future.futureverse.org/reference/plan.html> for reference on how to use `future::plan()`")
}
}
if (x && future::nbrOfWorkers() == 1) {
warning("Argument `parallel` is set to true, but only one core is being used")
}
invisible(x)
}
#' Checking random effect column
#'
#' @param pop_dat The population dataset to check
#' @param domain_level Character. The domain level identifier.
#'
#' @return Nothing if the check is passed, but and error if it fails
#' @noRd
check_re <- function(pop_dat, samp_dat, domain_level) {
if (!(domain_level %in% names(pop_dat))) {
stop(paste0("Column ", domain_level, " does not exist in pop_dat"))
}
if (!(domain_level %in% names(samp_dat))) {
stop(paste0("Column ", domain_level, " does not exist in samp_dat"))
}
if (!inherits(pop_dat[[domain_level]], "character") || !inherits(samp_dat[[domain_level]], "character")) {
stop(paste0("Column ", domain_level, " must be of type `character` in both pop_dat and samp_dat"))
}
}
#' Fast aggregation
#'
#' @noRd
agg_stat <- function(vals, nms, .f) {
agg <- tapply(vals, nms, .f)
out <- data.frame(nms = names(agg), vals = agg)
out
}
#' Predict with both models and return result
#'
#' @param mod1 Linear model object
#' @param mod2 Logistic model object
#' @param estimand Character, either "means" or "totals"
#' @param .data The data to predict on
#' @param domain_level Character name of domain variable in .data
#'
#' @returns A data frame of results
#' @noRd
collect_preds <- function(mod1, mod2, estimand, .data, domain_level) {
lin_pred <- predict(mod1, newdata = .data, allow.new.levels = TRUE)
log_pred <- predict(mod2, newdata = .data, type = "response", allow.new.levels = TRUE)
unit_preds <- lin_pred * log_pred
out <- switch(estimand,
"means" = agg_stat(unit_preds, .data[[domain_level]], mean),
"totals" = agg_stat(unit_preds, .data[[domain_level]], sum),
stop())
names(out) <- c(domain_level, "est")
out
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.