#' Hierarchical Bayes Shrinkage
#'
#' This is a function that takes a "success" and a "total" variable (often something like "number of devices staying home" and "total number of devices") and shrinks them to the full data set using shrinkage methods for proportions.
#'
#' This is usually called by group, either with \code{dplyr::group_by} or with the \code{by} argument in a \code{data.table}, so that individual observations can be shrunk to the group level.
#'
#' @param success A numeric integer variable containing the number of successes.
#' @param total A numeric integer variable containing the total sample size.
#' @examples
#' \dontrun{
#' # The directory distdat is the folder we have downloaded the distancing data to from AWS.
#' # Read and compile all distancing data from May 1 to May 7
#' distancing <- read_distancing(
#' start = lubridate::ymd('2020-05-01'),
#' end = lubridate::ymd('2020-05-07'),
#' dir = distdat
#' )
#'
#' # Shrink county to state
#' distancing <- distancing[,.(county_fips = county_fips,
#' unshrunk_stay_home = completely_home_device_count/device_count,
#' shrunk_stay_home = hb_shrink(completely_home_device_count, device_count)),
#' by = .(state_fips, date)]
#'
#' }
#' @export
hb_shrink <- function(success,total) {
expected_theta <- mean(success/total)
var_theta <- var(success/total)
# Get the beta dist alpha+beta
alpha_plus_beta <- (expected_theta*(1-expected_theta)/var_theta) - 1
# and separate them out
alpha <- alpha_plus_beta*expected_theta
beta <- alpha_plus_beta*(1-expected_theta)
# Posteriors!
posterior_alpha <- alpha + success
posterior_beta <- beta + (total - success)
# Finally, estimate the mean of the beta distribution
return(posterior_alpha/(posterior_alpha+posterior_beta))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.