Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup, message=FALSE, warning=FALSE, echo=FALSE--------------------------
library(dplyr) # For data manipulation
library(survey) # For complex survey analysis
library(srvyr) # For complex survey analysis with dplyr syntax
library(svrep)
## -----------------------------------------------------------------------------
library(survey) # For complex survey analysis
library(svrep)
set.seed(2022)
# Load an example dataset from a multistage sample, with two stages of SRSWOR
data("mu284", package = 'survey')
multistage_srswor_design <- svydesign(data = mu284,
ids = ~ id1 + id2,
fpc = ~ n1 + n2)
bootstrap_rep_design <- as_bootstrap_design(multistage_srswor_design,
type = "Rao-Wu-Yue-Beaumont",
replicates = 500)
svytotal(x = ~ y1, design = multistage_srswor_design)
svytotal(x = ~ y1, design = bootstrap_rep_design)
## ----eval=FALSE---------------------------------------------------------------
# # Load example dataset of U.S. counties and states with 2004 Presidential vote counts
# data("election", package = 'survey')
# pps_wor_design <- svydesign(data = election_pps,
# pps = HR(),
# fpc = ~ p, # Inclusion probabilities
# ids = ~ 1)
# bootstrap_rep_design <- as_bootstrap_design(pps_wor_design,
# type = "Rao-Wu-Yue-Beaumont",
# replicates = 100)
#
# svytotal(x = ~ Bush + Kerry, design = pps_wor_design)
# svytotal(x = ~ Bush + Kerry, design = bootstrap_rep_design)
## -----------------------------------------------------------------------------
# Declare a multistage design
# where first-stage probabilities are PPSWOR sampling
# and second-stage probabilities are based on SRSWOR
multistage_design <- svydesign(
data = library_multistage_sample,
ids = ~ PSU_ID + SSU_ID,
probs = ~ PSU_SAMPLING_PROB + SSU_SAMPLING_PROB,
pps = "brewer"
)
# Convert to a bootstrap replicate design
boot_design <- as_bootstrap_design(
design = multistage_design,
type = "Rao-Wu-Yue-Beaumont",
samp_method_by_stage = c("PPSWOR", "SRSWOR"),
replicates = 1000
)
# Compare variance estimates
svytotal(x = ~ TOTCIR, na.rm = TRUE, design = multistage_design)
svytotal(x = ~ TOTCIR, na.rm = TRUE, design = boot_design)
## -----------------------------------------------------------------------------
make_quad_form_matrix(
variance_estimator = "SD2",
cluster_ids = c(1,2,3,4,5) |> data.frame(),
strata_ids = c(1,1,1,1,1) |> data.frame(),
sort_order = c(1,2,3,4,5)
)
## ----eval=TRUE----------------------------------------------------------------
# Load an example dataset of a stratified systematic sample
data('library_stsys_sample', package = 'svrep')
# First, sort the rows in the order used in sampling
library_stsys_sample <- library_stsys_sample |>
dplyr::arrange(SAMPLING_SORT_ORDER)
# Create a survey design object
survey_design <- svydesign(
data = library_stsys_sample,
ids = ~ 1,
strata = ~ SAMPLING_STRATUM,
fpc = ~ STRATUM_POP_SIZE
)
# Obtain the quadratic form for the target estimator
sd2_quad_form <- get_design_quad_form(
design = survey_design,
variance_estimator = "SD2"
)
class(sd2_quad_form)
dim(sd2_quad_form)
## -----------------------------------------------------------------------------
# Obtain weighted values
wtd_y <- as.matrix(library_stsys_sample[['LIBRARIA']] /
library_stsys_sample[['SAMPLING_PROB']])
wtd_y[is.na(wtd_y)] <- 0
# Obtain point estimate for a population total
point_estimate <- sum(wtd_y)
# Obtain the variance estimate using the quadratic form
variance_estimate <- t(wtd_y) %*% sd2_quad_form %*% wtd_y
std_error <- sqrt(variance_estimate[1,1])
# Summarize results
sprintf("Estimate: %s", round(point_estimate))
sprintf("Standard Error: %s", round(std_error))
## -----------------------------------------------------------------------------
# Load example data from stratified systematic sample
data('library_stsys_sample', package = 'svrep')
# First, ensure data are sorted in same order as was used in sampling
library_stsys_sample <- library_stsys_sample[
order(library_stsys_sample$SAMPLING_SORT_ORDER),
]
# Create a survey design object
design_obj <- svydesign(
data = library_stsys_sample,
strata = ~ SAMPLING_STRATUM,
ids = ~ 1,
fpc = ~ STRATUM_POP_SIZE
)
## -----------------------------------------------------------------------------
# Convert to generalized bootstrap replicate design
gen_boot_design_sd2 <- as_gen_boot_design(
design = design_obj,
variance_estimator = "SD2",
replicates = 2000
)
# Estimate sampling variances
svymean(x = ~ TOTSTAFF, na.rm = TRUE, design = gen_boot_design_sd2)
## ----eval=FALSE---------------------------------------------------------------
# # Load example data of a PPS survey of counties and states
# data('election', package = 'survey')
#
# # Create survey design object
# pps_design_ht <- svydesign(
# data = election_pps,
# id = ~1, fpc = ~p,
# pps = ppsmat(election_jointprob),
# variance = "HT"
# )
#
#
# # Convert to generalized bootstrap replicate design
# gen_boot_design_ht <- pps_design_ht |>
# as_gen_boot_design(variance_estimator = "Horvitz-Thompson",
# replicates = 5000, tau = "auto")
#
# # Compare sampling variances from bootstrap vs. Horvitz-Thompson estimator
# svytotal(x = ~ Bush + Kerry, design = pps_design_ht)
# svytotal(x = ~ Bush + Kerry, design = gen_boot_design_ht)
## -----------------------------------------------------------------------------
library(dplyr) # For data manipulation
# Create a multistage survey design
multistage_design <- svydesign(
data = library_multistage_sample |>
mutate(Weight = 1/SAMPLING_PROB),
ids = ~ PSU_ID + SSU_ID,
fpc = ~ PSU_POP_SIZE + SSU_POP_SIZE,
weights = ~ Weight
)
# Convert to a generalized bootstrap design
multistage_boot_design <- as_gen_boot_design(
design = multistage_design,
variance_estimator = "Stratified Multistage SRS"
)
# Compare variance estimates
svytotal(x = ~ TOTCIR, na.rm = TRUE, design = multistage_design)
svytotal(x = ~ TOTCIR, na.rm = TRUE, design = multistage_boot_design)
## -----------------------------------------------------------------------------
# View overall scale factor
overall_scale_factor <- multistage_boot_design$scale
print(overall_scale_factor)
# Check that the scale factor was calculated correctly
tau <- multistage_boot_design$tau
print(tau)
B <- ncol(multistage_boot_design$repweights)
print(B)
print( (tau^2) / B )
## -----------------------------------------------------------------------------
# Load an example dataset of a stratified systematic sample
data('library_stsys_sample', package = 'svrep')
# Represent the SD2 successive-difference estimator as a quadratic form,
# and obtain the matrix of that quadratic form
sd2_quad_form <- make_quad_form_matrix(
variance_estimator = 'SD2',
cluster_ids = library_stsys_sample |> select(FSCSKEY),
strata_ids = library_stsys_sample |> select(SAMPLING_STRATUM),
strata_pop_sizes = library_stsys_sample |> select(STRATUM_POP_SIZE),
sort_order = library_stsys_sample |> pull("SAMPLING_SORT_ORDER")
)
## -----------------------------------------------------------------------------
rep_adj_factors <- make_gen_boot_factors(
Sigma = sd2_quad_form,
num_replicates = 500,
tau = "auto"
)
## -----------------------------------------------------------------------------
tau <- attr(rep_adj_factors, 'tau')
B <- ncol(rep_adj_factors)
## -----------------------------------------------------------------------------
# Retrieve value of 'scale'
rep_adj_factors |>
attr('scale')
# Compare to manually-calculated value
(tau^2) / B
# Retrieve value of 'rscales'
rep_adj_factors |>
attr('rscales') |>
head() # Only show first 5 values
## -----------------------------------------------------------------------------
gen_boot_design <- svrepdesign(
data = library_stsys_sample |>
mutate(SAMPLING_WEIGHT = 1/SAMPLING_PROB),
repweights = rep_adj_factors,
weights = ~ SAMPLING_WEIGHT,
combined.weights = FALSE,
type = "other",
scale = attr(rep_adj_factors, 'scale'),
rscales = attr(rep_adj_factors, 'rscales')
)
## -----------------------------------------------------------------------------
gen_boot_design |>
svymean(x = ~ TOTSTAFF,
na.rm = TRUE, deff = TRUE)
## -----------------------------------------------------------------------------
library(survey)
data('api', package = 'survey')
# Declare a bootstrap survey design object ----
boot_design <- svydesign(
data = apistrat,
weights = ~pw,
id = ~1,
strata = ~stype,
fpc = ~fpc
) |>
svrep::as_bootstrap_design(replicates = 5000)
# Produce estimates of interest, and save the estimate from each replicate ----
estimated_means_and_proportions <- svymean(x = ~ api00 + api99 + stype,
design = boot_design,
return.replicates = TRUE)
# Estimate the number of replicates needed to obtain a target simulation CV ----
estimate_boot_reps_for_target_cv(
svrepstat = estimated_means_and_proportions,
target_cv = c(0.01, 0.05, 0.10)
)
## -----------------------------------------------------------------------------
estimate_boot_sim_cv(estimated_means_and_proportions)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.