intro-to-auxvecLASSO.R
In auxvecLASSO: LASSO Auxiliary Variable Selection and Auxiliary Vector Diagnostics

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup--------------------------------------------------------------------
# Load necessary libraries
library(auxvecLASSO)
library(survey)
library(sampling)
library(dplyr)

# Set seed for reproducibility
set.seed(123)

## ----create_pop_file----------------------------------------------------------
# Load the population data
data(api)

# Load the population data file and add binary variables
api_pop <- apipop
api_pop$api00_bin <- as.factor(ifelse(api_pop$api00 > 650, 1, 0))
api_pop$growth_bin <- as.factor(ifelse(api_pop$growth > 25, 1, 0))
api_pop$meals_bin <- as.factor(ifelse(api_pop$meals > 40, 1, 0))
api_pop$ell_bin <- as.factor(ifelse(api_pop$ell > 15, 1, 0))
api_pop$hsg_bin <- as.factor(ifelse(api_pop$hsg > 20, 1, 0))
api_pop$full_bin <- as.factor(ifelse(api_pop$full > 90, 1, 0))
api_pop$sch.wide_bin <- as.factor(ifelse(api_pop$sch.wide == "Yes", 1, 0))
api_pop$awards_bin <- as.factor(ifelse(api_pop$awards == "Yes", 1, 0))
api_pop$comp.imp_bin <- as.factor(ifelse(api_pop$comp.imp == "Yes", 1, 0))
api_pop$stype <- as.factor(api_pop$stype)

# Keep only relevant variables
api_pop <- api_pop |>
  dplyr::select("cds", "stype", "enroll", ends_with("_bin"))

## ----create_sample------------------------------------------------------------
strata_sample <- sampling::strata(
  api_pop, # The population dataset
  stratanames = c("stype"), # Stratification variable
  size = c(150, 200, 175), # Desired sample sizes per stratum
  method = "srswor" # Sampling without replacement
)

api_sample <- getdata(api_pop, strata_sample)
api_sample$SamplingWeight <- 1 / api_sample$Prob

## ----dataimport---------------------------------------------------------------
# Artificial logistic regression coefficients (for both main effects and interactions)
coefficients <- c(
  "api00_bin" = 0.5,
  "growth_bin" = -0.3,
  "ell_bin" = -0.6,
  "interaction1" = 0.9, # Interaction term api00_bin:growth_bin
  "interaction2" = 1.2 # Interaction term api00_bin:ell_bin
)

# Create interaction terms for logistic model
api_sample$interaction1 <- as.numeric(api_sample$api00_bin) * as.numeric(api_sample$growth_bin)
api_sample$interaction2 <- as.numeric(api_sample$api00_bin) * as.numeric(api_sample$ell_bin)

# Calculate the logit (log-odds) based on the artificial coefficients and interaction terms
logit <- -1.2 +
  coefficients["api00_bin"] * as.numeric(api_sample$api00_bin) +
  coefficients["growth_bin"] * as.numeric(api_sample$growth_bin) +
  coefficients["ell_bin"] * as.numeric(api_sample$ell_bin) +
  coefficients["interaction1"] * api_sample$interaction1 +
  coefficients["interaction2"] * api_sample$interaction2

# Compute the logistic probabilities
api_sample$response_prob <- 1 / (1 + exp(-logit)) # Logistic function

# Simulate the response variable (1 for response, 0 for non-response)
api_sample$response <- as.factor(rbinom(n = nrow(api_sample), size = 1, prob = api_sample$response_prob))

# --- Check the summary of the sample ---
summary(api_sample)

## ----lassomodeling------------------------------------------------------------
# Apply LASSO for selecting auxiliary variables
lasso_result <- select_auxiliary_variables_lasso_cv(
  df = api_sample,
  outcome_vars = c("response", "enroll"),
  auxiliary_vars = c(
    "api00_bin", "growth_bin", "comp.imp_bin", "meals_bin",
    "meals_bin", "ell_bin", "hsg_bin", "full_bin", "stype"
  ),
  must_have_vars = "stype", # Include the domain variable (stype) in the model
  check_twoway_int = TRUE, # Include two-way interactions
  nfolds = 5, # Use 5-fold cross-validation
  verbose = FALSE, # Don't print progress messages
  standardize = TRUE, # Standardize the predictors
  return_models = FALSE, # Don't return the models, only the selection results
  parallel = FALSE # Run without parallel processing
)

# Display the results
lasso_result

## ----create_resp-set----------------------------------------------------------
api_resp <- api_sample[api_sample$response == 1, ]

## ----svydesign----------------------------------------------------------------
design <- survey::svydesign(
  ids     = ~1,
  data    = api_resp,
  strata  = ~stype,
  weights = ~SamplingWeight
)

## ----cal_reg_means------------------------------------------------------------
register_pop_means <- list(
  total = list(sch.wide_bin = mean(api_pop$sch.wide_bin == "1")),
  by_domain = list(
    stype = tapply(
      api_pop$sch.wide_bin, api_pop$stype,
      function(x) mean(x == "1")
    ),
    awards_bin = tapply(
      api_pop$sch.wide_bin, api_pop$awards_bin,
      function(x) mean(x == "1")
    )
  )
)

## ----calib_totals-------------------------------------------------------------
## --- Define the calibration formula ---
calibration_formula <- ~ stype + growth_bin + api00_bin + ell_bin + comp.imp_bin + hsg_bin + api00_bin:stype + hsg_bin:stype + comp.imp_bin:stype + api00_bin:growth_bin

## --- Calculate population totals with a single terms object built on the POPULATION ---
pop_totals <- generate_population_totals(
  population_df = api_pop,
  calibration_formula
)

## ----assess_vec---------------------------------------------------------------
result_diagnostics <- assess_aux_vector(
  design = design,
  df = api_resp,
  already_calibrated = FALSE,
  calibration_formula = calibration_formula,
  calibration_pop_totals = pop_totals$population_totals,
  register_vars = c("sch.wide_bin"),
  register_pop_means = register_pop_means,
  survey_vars = c("enroll", "full_bin"),
  domain_vars = c("stype", "awards_bin"),
  diagnostics = c(
    "weight_variation",
    "register_diagnostics",
    "survey_diagnostics"
  ),
  verbose = FALSE
)

## --- Display output ---
result_diagnostics

Any scripts or data that you put into this service are public.

auxvecLASSO documentation built on Aug. 28, 2025, 9:09 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

auxvecLASSO
LASSO Auxiliary Variable Selection and Auxiliary Vector Diagnostics

inst/doc/intro-to-auxvecLASSO.R
In auxvecLASSO: LASSO Auxiliary Variable Selection and Auxiliary Vector Diagnostics

Try the auxvecLASSO package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

auxvecLASSO LASSO Auxiliary Variable Selection and Auxiliary Vector Diagnostics

inst/doc/intro-to-auxvecLASSO.R In auxvecLASSO: LASSO Auxiliary Variable Selection and Auxiliary Vector Diagnostics

Try the auxvecLASSO package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

auxvecLASSO
LASSO Auxiliary Variable Selection and Auxiliary Vector Diagnostics

inst/doc/intro-to-auxvecLASSO.R
In auxvecLASSO: LASSO Auxiliary Variable Selection and Auxiliary Vector Diagnostics