Nothing
################ Uncertainty ##############
boot_mat <- function(nrow, iters, weight = NULL){
# For zero iterations, return the original row indices
if(iters == 0){
out <- matrix(1:nrow)
colnames(out) <- "original"
} else {
out <-
replicate(iters,
sample.int(nrow, replace = TRUE,
prob = weight))
# unique, sortable tags for each resample
iter_tags <- formatC(1:iters, width = 8, format = "d", flag = "0")
colnames(out) <- sprintf("resample-%s", iter_tags)
}
return(out)
}
#' Constructor for vbdf summaries
#'
#' @param x data.frame of uncertainty summary
#' @param bloc_var string, the name of the variable that defines the voting blocs
#' @param var_type string, the type of variable, discrete or continuous
#' @param summary_type string, the type of variable, discrete or continuous
#' @param resamples numeric, the number of bootstrap resamples
#'
#' @return A \code{vbsum} object
new_vbsum <- function(x, bloc_var, var_type, summary_type, resamples){
stopifnot(is.data.frame(x))
# tibble protects attributes from dplyr verbs
out <-
tibble::new_tibble(x, nrow = nrow(x), class = "vbsum",
bloc_var = bloc_var, var_type = var_type,
summary_type = summary_type, resamples = resamples)
tibble::validate_tibble(out)
return(out)
}
#' Summarize uncertainty for a vbdf objects
#'
#' Summarize uncertainty for a vbdf objects. Analysis must have run with bootstrap iterations.
#' \code{vb_uncertainty} is just an alias for \code{vb_summary}.
#'
#' @param object a \code{vbdf} object, usually the output of [vb_discrete], [vb_continuous], or [vb_difference].
#' @param type a string naming the type of independent variable summary. Use
#' \code{"binned"} when using the output of [vb_continuous] plus a binned version of the continuous bloc variable.
#' @param estimates character vector naming columns for which to calculate
#' uncertainty estimates.
#' @param na.rm logical indicating whether to remove \code{NA} values in
#' \code{estimates}.
#' @param funcs character vector of summary functions to apply to
#' \code{estimates}. Alternatively, supply your own list of functions, which
#' should accept a numeric vector input and return a scalar.
#' @param low_ci numeric. If you include the string \code{"low"} in \code{funcs}, then use this argument to control the lower bound of the confidence interval.
#' @param high_ci numeric. If you include the string \code{"high"} in \code{funcs}, then use this argument to control the upper bound of the confidence interval.
#' @param bin_col character vector naming the column(s) that define the bins. Used only when \code{type} is \code{"binned"}.
#' @return A summary object with additional columns for each combination
#' of \code{estimates} and \code{funcs}.
#' @param tolerance tolerance used when checking range of probability estimates
#'
#' @export vb_summary
vb_summary <-
function(object, type = c("discrete", "continuous", "binned"),
estimates = grep("prob|pr_turnout|pr_votedem|pr_voterep|cond_rep|net_rep",
names(object), value = TRUE),
na.rm = FALSE,
funcs = c("mean", "median", "low", "high"),
low_ci = 0.025, high_ci = 0.975,
bin_col, tolerance = sqrt(.Machine$double.eps)){
check_vbdf(object, tolerance = tolerance)
if(dplyr::is.grouped_df(object)) stop("Summarizing uncertainty by group is not supported yet. Please use split-apply-combine.")
if(identical(type, c("discrete", "continuous", "binned")))
type <- get_var_type(object)
else type <- match.arg(type, c("discrete", "binned", "continuous"))
if(!all(rlang::has_name(object, estimates))){
miss_estim <- paste(setdiff(estimates, names(object)), collapse = ", ")
stop(sprintf("%s column not found in data\n", miss_estim))
}
bloc_var <- get_bloc_var(object)
if(is.character(funcs))
funcs <-
list(
mean = ~ mean(.x, na.rm = na.rm),
median = ~ median(.x, na.rm = na.rm),
low = ~ quantile(.x, prob = low_ci, na.rm = na.rm),
high = ~ quantile(.x, prob = high_ci, na.rm = na.rm)
)[funcs]
switch(type,
discrete =
{
uncertainty_summary <-
# For each subgroup, calculate summary stats across iterations
object %>%
dplyr::group_by(
dplyr::across(dplyr::all_of(
c(dplyr::group_vars(object), bloc_var)))
) %>%
dplyr::summarize(
dplyr::across(dplyr::all_of(estimates),
.fns = funcs
)
)
},
binned =
{
if(missing(bin_col)) stop("Missing required argument bin_col")
uncertainty_summary <-
object %>%
# Begin by integrating estimates within bin and iteration
dplyr::group_by(dplyr::across(dplyr::all_of(c("resample", dplyr::group_vars(object), bin_col)))) %>%
dplyr::summarize(
dplyr::across(dplyr::all_of(estimates),
sum),
) %>%
# For each subgroup, calculate summary stats across iterations
dplyr::group_by(dplyr::across(dplyr::all_of(c(dplyr::group_vars(object), bin_col)))) %>%
dplyr::summarize(
dplyr::across(dplyr::all_of(estimates),
.fns = funcs
)
)
},
continuous =
{
uncertainty_summary <-
object %>%
# Across iterations, calculate summary stats
dplyr::group_by(dplyr::across(dplyr::all_of(c(dplyr::group_vars(object), bloc_var)))) %>%
dplyr::summarize(
dplyr::across(dplyr::all_of(estimates),
.fns = funcs
)
)
}
)
n_resamples <- length(unique(object$resample))
# Use custom class to protect attributes from dplyr verbs
out <- new_vbsum(uncertainty_summary,
bloc_var = bloc_var, var_type = get_var_type(object),
summary_type = type, resamples = n_resamples)
return(out)
}
#' @rdname vb_summary
#' @export
vb_uncertainty <- vb_summary
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.