#' @title Calculate replicate weights and summary statistics
#' @description Takes the output of `cluster_gen` to calculate the replicate weights as well as some summary statistics
#' @param data list of background questionnaire data (typically generated by `cluster_gen`)
#' @param method replication method. Can be "Jackknife", "BRR" or "BRR Fay"
#' @param k deflating weight factor (used only when `method = "BRR Fay")
#' @details Replicate weights can be calculated using the Jackknife for unstratified two-stage sample designs or Balanced Repeated Replication (BRR) with or without Fay's modification.
#' According to OECD (2015), PISA uses the Fay method with a factor of 0.5. This is why `k = .5` by default.
#' @seealso cluster_gen jackknife, jackknife_var
#' @references
#' OECD (2015). Pisa Data Analysis Manual.
#' Rust, K. F., & Rao, J. N. K. (1996). Variance estimation for complex surveys using replication techniques. Statistical methods in medical research, 5(3), 283-310.
#' @return list with data and, if requested, some statistics
#' @note This function is essentially a big wrapper for `replicate_var`, applying that function on each element of an output of `cluster_gen`.
#' @examples
#' data <- cluster_gen(c(3, 50))
#' calc_replicate_weights(data, "Jackknife")
#' calc_replicate_weights(data, "BRR")
#' calc_replicate_weights(data, "BRR Fay")
#' @export
calc_replicate_weights <- function(data, method, k = 0) {
# Verification =============================================================
if (k < 0 | k > 1) stop ("k must be between 0 and 1")
if (method != "BRR Fay" & k != .5 & k != 0) {
warning(method, " ignores k. Use 'BRR Fay' instead.")
}
# Basic variable creation ==================================================
num_lvl <- length(data)
new_bg <- list()
replicate_stats <- list()
# Analysing each dataset separately ========================================
for (lvl in seq(num_lvl)) {
lvl_name <- names(data)[lvl]
num_sublvl <- length(data[[lvl]])
for (sublvl in seq(num_sublvl)) {
bg <- data[[lvl_name]][[sublvl]]
w_cols <- names(bg)[grep("weight", names(bg))]
# Remove uninsteresting columns ------------------------------------
data_cols <- grep("subject|ID|weight", names(bg), invert = TRUE)
data_cols <- names(bg)[data_cols]
# Calculating replicate weights ------------------------------------
if (method == "Jackknife") {
replicates <- jackknife(bg, drop = FALSE)
} else if (method == "BRR") {
replicates <- brr(bg, drop = FALSE)
} else if (method == "BRR Fay") {
k <- ifelse(k != 0, k, .5)
replicates <- brr(bg, k, drop = FALSE)
} else {
stop("Invalid replication method.")
}
rep_stats <- replicate_var(data_whole = bg,
data_rep = replicates,
method = method,
k = k,
weight_var = "replicate_weight",
vars = data_cols,
full_output = TRUE)
# Organizing statistics --------------------------------------------
means <- rep_stats$theta_whole
vars <- rep_stats$sigma2
stats <- rbind(means, sqrt(vars))
rownames(stats) <- c("mean", "SE")
# Putting data and stats into common list for output ---------------
bg -> new_bg[[lvl_name]][[sublvl]]
stats -> replicate_stats[[lvl_name]][[sublvl]]
}
}
# Assembling output ========================================================
out <- replicate_stats
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.