################################################################################
#'
#' Blocked Weighted Bootstrap
#'
#' The **blocked weighted bootstrap (BBW)** is an estimation technique for
#' use with data from two-stage cluster sampled surveys in which either prior
#' weighting (e.g. **population proportional sampling** or **PPS** as
#' used in **SMART** surveys) or posterior weighting (e.g. as used in
#' **RAM** and **S3M** surveys).
#'
#' @param x A data frame with primary sampling unit (PSU) in column named `psu`
#' @param w A data frame with primary sampling unit (PSU) in column named `psu`
#' and survey weight (i.e. PSU population) in column named `pop`
#' @param statistic A function operating on data in `x` (see example)
#' @param params Parameters (named columns in `x`) passed to the function
#' specified in `statistic`
#' @param outputColumns Names of columns in output data frame
#' @param replicates Number of bootstrap replicates
#'
#' @return A data frame with:
#' * ncol = length(outputColumns)
#' * nrow = replicates
#' * names = outputColumns
#'
#' @examples
#' # Example function - estimate a proportion for a binary (0/1) variable):
#'
#' oneP <- function(x, params) {
#' v1 <- params[1]
#' v1Data <- x[[v1]]
#' oneP <- mean(v1Data, na.rm = TRUE)
#' return(oneP)
#' }
#'
#' # Example call to bootBW function using RAM-OP test data:
#'
#' bootP <- bootBW(x = indicatorsHH,
#' w = villageData,
#' statistic = oneP,
#' params = "anc1",
#' outputColumns = "anc1",
#' replicates = 9)
#'
#' # Example estimate with 95% CI:
#'
#' quantile(bootP, probs = c(0.500, 0.025, 0.975), na.rm = TRUE)
#'
#' @export
#'
#
################################################################################
bootBW <- function(x, w, statistic, params, outputColumns, replicates = 400) {
## Scale weights and accumulate weights
w$weight <- w$pop / sum(w$pop)
w$cumWeight <- cumsum(w$weight)
## Create data.frame with named columns for output
boot <- data.frame(matrix(ncol = length(outputColumns), nrow = replicates))
names(boot) <- outputColumns
## Create an empty data.frame with same structure of 'x' with sufficient rows
## to hold the largest possible survey replicates (i.e. number of clusters
## multiplied by the size of the largest cluster)
nClusters <- nrow(w)
maxRows <- nClusters * max(table(x$psu))
emptyDF <- rbind(as.data.frame(lapply(x, function(x) rep.int(NA, maxRows))))
## Vector to hold clusters to be included in a survey replicate
sampledClusters <- vector(mode = mode(x$psu), length = nClusters)
## And now ... resample!
for(i in seq_len(replicates)) {
## Create a dataframe to hold a survey replicate
xBW <- emptyDF
## Blocking Bootstrap from 'x' (blocking on x$psu = cluster identifier)
for(j in seq_len(nClusters)) {
## "Roulette Wheel" algorithm (to select a weighted sample of clusters)
sampledClusters[j] <- w$psu[which.max(w$cumWeight >= runif(n = 1, min = 0, max = 1))]
}
## Pointer for inserting selected clusters into the survey replicate
rowIndex <- 1
## Build a (blocking weighted) bootstrap replicate from the selected clusters
for(k in seq_len(nClusters)) {
## Extract data for cluster and resample within the cluster
y <- subset(x, psu == sampledClusters[k])
clusterN <- nrow(y)
y <- y[sample(1:clusterN, replace = TRUE), ]
## Insert cluster replicate into survey replicate
endRow <- rowIndex + clusterN
xBW[rowIndex:(endRow - 1), ] <- y
## Update pointer
rowIndex <- endRow
}
## Select data for analysis
xBW <- xBW[1:(rowIndex - 1), ]
## Apply statistic
boot[i, ] <- statistic(xBW, params)
}
return(boot)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.