Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## ----setup--------------------------------------------------------------------
# Load necessary libraries
library(auxvecLASSO)
library(survey)
library(sampling)
library(dplyr)
# Set seed for reproducibility
set.seed(123)
## ----create_pop_file----------------------------------------------------------
# Load the population data
data(api)
# Load the population data file and add binary variables
api_pop <- apipop
api_pop$api00_bin <- as.factor(ifelse(api_pop$api00 > 650, 1, 0))
api_pop$growth_bin <- as.factor(ifelse(api_pop$growth > 25, 1, 0))
api_pop$meals_bin <- as.factor(ifelse(api_pop$meals > 40, 1, 0))
api_pop$ell_bin <- as.factor(ifelse(api_pop$ell > 15, 1, 0))
api_pop$hsg_bin <- as.factor(ifelse(api_pop$hsg > 20, 1, 0))
api_pop$full_bin <- as.factor(ifelse(api_pop$full > 90, 1, 0))
api_pop$sch.wide_bin <- as.factor(ifelse(api_pop$sch.wide == "Yes", 1, 0))
api_pop$awards_bin <- as.factor(ifelse(api_pop$awards == "Yes", 1, 0))
api_pop$comp.imp_bin <- as.factor(ifelse(api_pop$comp.imp == "Yes", 1, 0))
api_pop$stype <- as.factor(api_pop$stype)
# Keep only relevant variables
api_pop <- api_pop |>
dplyr::select("cds", "stype", "enroll", ends_with("_bin"))
## ----create_sample------------------------------------------------------------
strata_sample <- sampling::strata(
api_pop, # The population dataset
stratanames = c("stype"), # Stratification variable
size = c(150, 200, 175), # Desired sample sizes per stratum
method = "srswor" # Sampling without replacement
)
api_sample <- getdata(api_pop, strata_sample)
api_sample$SamplingWeight <- 1 / api_sample$Prob
## ----dataimport---------------------------------------------------------------
# Artificial logistic regression coefficients (for both main effects and interactions)
coefficients <- c(
"api00_bin" = 0.5,
"growth_bin" = -0.3,
"ell_bin" = -0.6,
"interaction1" = 0.9, # Interaction term api00_bin:growth_bin
"interaction2" = 1.2 # Interaction term api00_bin:ell_bin
)
# Create interaction terms for logistic model
api_sample$interaction1 <- as.numeric(api_sample$api00_bin) * as.numeric(api_sample$growth_bin)
api_sample$interaction2 <- as.numeric(api_sample$api00_bin) * as.numeric(api_sample$ell_bin)
# Calculate the logit (log-odds) based on the artificial coefficients and interaction terms
logit <- -1.2 +
coefficients["api00_bin"] * as.numeric(api_sample$api00_bin) +
coefficients["growth_bin"] * as.numeric(api_sample$growth_bin) +
coefficients["ell_bin"] * as.numeric(api_sample$ell_bin) +
coefficients["interaction1"] * api_sample$interaction1 +
coefficients["interaction2"] * api_sample$interaction2
# Compute the logistic probabilities
api_sample$response_prob <- 1 / (1 + exp(-logit)) # Logistic function
# Simulate the response variable (1 for response, 0 for non-response)
api_sample$response <- as.factor(rbinom(n = nrow(api_sample), size = 1, prob = api_sample$response_prob))
# --- Check the summary of the sample ---
summary(api_sample)
## ----lassomodeling------------------------------------------------------------
# Apply LASSO for selecting auxiliary variables
lasso_result <- select_auxiliary_variables_lasso_cv(
df = api_sample,
outcome_vars = c("response", "enroll"),
auxiliary_vars = c(
"api00_bin", "growth_bin", "comp.imp_bin", "meals_bin",
"meals_bin", "ell_bin", "hsg_bin", "full_bin", "stype"
),
must_have_vars = "stype", # Include the domain variable (stype) in the model
check_twoway_int = TRUE, # Include two-way interactions
nfolds = 5, # Use 5-fold cross-validation
verbose = FALSE, # Don't print progress messages
standardize = TRUE, # Standardize the predictors
return_models = FALSE, # Don't return the models, only the selection results
parallel = FALSE # Run without parallel processing
)
# Display the results
lasso_result
## ----create_resp-set----------------------------------------------------------
api_resp <- api_sample[api_sample$response == 1, ]
## ----svydesign----------------------------------------------------------------
design <- survey::svydesign(
ids = ~1,
data = api_resp,
strata = ~stype,
weights = ~SamplingWeight
)
## ----cal_reg_means------------------------------------------------------------
register_pop_means <- list(
total = list(sch.wide_bin = mean(api_pop$sch.wide_bin == "1")),
by_domain = list(
stype = tapply(
api_pop$sch.wide_bin, api_pop$stype,
function(x) mean(x == "1")
),
awards_bin = tapply(
api_pop$sch.wide_bin, api_pop$awards_bin,
function(x) mean(x == "1")
)
)
)
## ----calib_totals-------------------------------------------------------------
## --- Define the calibration formula ---
calibration_formula <- ~ stype + growth_bin + api00_bin + ell_bin + comp.imp_bin + hsg_bin + api00_bin:stype + hsg_bin:stype + comp.imp_bin:stype + api00_bin:growth_bin
## --- Calculate population totals with a single terms object built on the POPULATION ---
pop_totals <- generate_population_totals(
population_df = api_pop,
calibration_formula
)
## ----assess_vec---------------------------------------------------------------
result_diagnostics <- assess_aux_vector(
design = design,
df = api_resp,
already_calibrated = FALSE,
calibration_formula = calibration_formula,
calibration_pop_totals = pop_totals$population_totals,
register_vars = c("sch.wide_bin"),
register_pop_means = register_pop_means,
survey_vars = c("enroll", "full_bin"),
domain_vars = c("stype", "awards_bin"),
diagnostics = c(
"weight_variation",
"register_diagnostics",
"survey_diagnostics"
),
verbose = FALSE
)
## --- Display output ---
result_diagnostics
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.