################ Uncertainty ##############
boot_mat <- function(nrow, iters, weight = NULL){
# For zero iterations, return the original row indices
if(iters == 0){
out <- matrix(1:nrow)
colnames(out) <- "original"
} else {
out <-
replicate(iters,
sample.int(nrow, replace = TRUE,
prob = weight))
# unique, sortable tags for each resample
iter_tags <- formatC(1:iters, width = 8, format = "d", flag = "0")
colnames(out) <- sprintf("resample-%s", iter_tags)
}
return(out)
}
#' Constructor for vbdf summaries
#'
#' @param x data.frame of uncertainty summary
#' @param bloc_var string, the name of the variable that defines the voting blocs
#' @param var_type string, the type of variable, discrete or continuous
#' @param summary_type string, the type of variable, discrete or continuous
#' @param resamples numeric, the number of bootstrap resamples
#'
#' @return A \code{vbsum} object
new_vbsum <- function(x, bloc_var, var_type, summary_type, resamples){
stopifnot(is.data.frame(x))
# tibble protects attributes from dplyr verbs
out <-
tibble::new_tibble(x, nrow = nrow(x), class = "vbsum",
bloc_var = bloc_var, var_type = var_type,
summary_type = summary_type, resamples = resamples)
tibble::validate_tibble(out)
return(out)
}
#' Summarize uncertainty for a vbdf objects
#'
#' Summarize uncertainty for a vbdf objects. Analysis must have run with bootstrap iterations.
#' \code{vb_uncertainty} is just an alias for \code{vb_summary}.
#'
#' @param object a \code{vbdf} object, usually the output of [vb_discrete], [vb_continuous], or [vb_difference].
#' @param type a string naming the type of independent variable summary. Use
#' \code{"binned"} when using the output of [vb_continuous] plus a binned version of the continuous bloc variable.
#' @param estimates character vector naming columns for which to calculate
#' uncertainty estimates.
#' @param na.rm logical indicating whether to remove \code{NA} values in
#' \code{estimates}.
#' @param funcs character vector of summary functions to apply to
#' \code{estimates}. Alternatively, supply your own list of functions, which
#' should accept a numeric vector input and return a scalar.
#' @param low_ci numeric. If you include the string \code{"low"} in \code{funcs}, then use this argument to control the lower bound of the confidence interval.
#' @param high_ci numeric. If you include the string \code{"high"} in \code{funcs}, then use this argument to control the upper bound of the confidence interval.
#' @param bin_col character vector naming the column(s) that define the bins. Used only when \code{type} is \code{"binned"}.
#' @return A summary object with additional columns for each combination
#' of \code{estimates} and \code{funcs}.
#' @param tolerance tolerance used when checking range of probability estimates
#'
#' @export vb_summary
vb_summary <-
function(object, type = c("discrete", "continuous", "binned"),
estimates = grep("prob|pr_turnout|pr_votedem|pr_voterep|cond_rep|net_rep",
names(object), value = TRUE),
na.rm = FALSE,
funcs = c("mean", "median", "low", "high"),
low_ci = 0.025, high_ci = 0.975,
bin_col, tolerance = sqrt(.Machine$double.eps)){
check_vbdf(object, tolerance = tolerance)
if(dplyr::is.grouped_df(object)) stop("Summarizing uncertainty by group is not supported yet. Please use split-apply-combine.")
if(identical(type, c("discrete", "continuous", "binned")))
type <- get_var_type(object)
else type <- match.arg(type, c("discrete", "binned", "continuous"))
if(!all(rlang::has_name(object, estimates))){
miss_estim <- paste(setdiff(estimates, names(object)), collapse = ", ")
stop(sprintf("%s column not found in data\n", miss_estim))
}
bloc_var <- get_bloc_var(object)
if(is.character(funcs))
funcs <-
list(
mean = ~ mean(.x, na.rm = na.rm),
median = ~ median(.x, na.rm = na.rm),
low = ~ quantile(.x, prob = low_ci, na.rm = na.rm),
high = ~ quantile(.x, prob = high_ci, na.rm = na.rm)
)[funcs]
switch(type,
discrete =
{
uncertainty_summary <-
# For each subgroup, calculate summary stats across iterations
object %>%
dplyr::group_by(
dplyr::across(dplyr::all_of(
c(dplyr::group_vars(object), bloc_var)))
) %>%
dplyr::summarize(
dplyr::across(dplyr::all_of(estimates),
.fns = funcs
)
)
},
binned =
{
if(missing(bin_col)) stop("Missing required argument bin_col")
uncertainty_summary <-
object %>%
# Begin by integrating estimates within bin and iteration
dplyr::group_by(dplyr::across(dplyr::all_of(c("resample", dplyr::group_vars(object), bin_col)))) %>%
dplyr::summarize(
dplyr::across(dplyr::all_of(estimates),
sum),
) %>%
# For each subgroup, calculate summary stats across iterations
dplyr::group_by(dplyr::across(dplyr::all_of(c(dplyr::group_vars(object), bin_col)))) %>%
dplyr::summarize(
dplyr::across(dplyr::all_of(estimates),
.fns = funcs
)
)
},
continuous =
{
uncertainty_summary <-
object %>%
# Across iterations, calculate summary stats
dplyr::group_by(dplyr::across(dplyr::all_of(c(dplyr::group_vars(object), bloc_var)))) %>%
dplyr::summarize(
dplyr::across(dplyr::all_of(estimates),
.fns = funcs
)
)
}
)
n_resamples <- length(unique(object$resample))
# Use custom class to protect attributes from dplyr verbs
out <- new_vbsum(uncertainty_summary,
bloc_var = bloc_var, var_type = get_var_type(object),
summary_type = type, resamples = n_resamples)
return(out)
}
#' @rdname vb_summary
#' @export
vb_uncertainty <- vb_summary
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.