We present csSampling, an R package for estimation of Bayesian models for data collected from complex survey samples. csSampling combines functionality from the probabilistic programming language Stan (via the rstan and brms R packages) and the survey R package. Under the proposed approach, the user creates a survey weighted model in brms or provides custom weighted model via rstan. Survey design information is provided via svydesign function of the survey package. The cs_sampling function of csSampling estimates the weighted stan model and provides an asymptotic covariance correction for model mis-specification due to using survey sampling weights as plug in values in the likelihood. This is often known as a design effect which is the ratio between the variance from complex survey sample and a simple random sample.
knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(csSampling) library(rstan) library(brms) library(survey) rstan_options(auto_write = TRUE)
Survey Design Information
data(api) apistrat$wt <- apistrat$pw /mean(apistrat$pw) dstrat <- svydesign(id=~1,strata=~stype, weights=~wt, data=apistrat, fpc=~fpc)
Define and Run the Stan Model Via BRMS Wrapper
set.seed(12345) model_formula <- formula("api00|weights(wt) ~ ell + meals + mobility") mod.brms <- cs_sampling_brms(svydes = dstrat, brmsmod = brmsformula(model_formula, center = F), data = apistrat, family = gaussian())
Plot the Results
plot(mod.brms)
plot(mod.brms) plot(mod.brms, varnames = paste("b", 1:4, sep =""))
data(api) apiclus1$wt <- apiclus1$pw/mean(apiclus1$pw) dclus1<-svydesign(id=~dnum, weights=~wt, data=apiclus1, fpc=~fpc) rclus1<-as.svrepdesign(dclus1) svymean(~stype, rclus1)
Construct a Stan mode
mod_dm <- stan_model(model_code = load_wt_multi_model())
Prepare data for Stan modelling
#Set the Data for Stan y <- as.factor(rclus1$variables$stype) yM <- model.matrix(~y -1) n <- dim(yM)[1] K <- dim(yM)[2] alpha<-rep(1,K) weights <- rclus1$pweights data_stan<-list("y"=yM,"alpha"=alpha,"K"=K, "n" = n, "weights" = weights) ctrl_stan<-list("chains"=1,"iter"=2000,"warmup"=1000,"thin"=1)
Run the Stan mode
mod1 <- cs_sampling(svydes = rclus1, mod_stan = mod_dm, data_stan = data_stan, ctrl_stan = ctrl_stan, rep_design = T)
Plot the results
plot(mod1) plot(mod1, varnames = paste("theta",1:3, sep = ""))
Compare Summary Statistics
#hybrid variance adjustedM2 cbind(colMeans(mod1$adjusted_parms[,4:6]), sqrt(diag(cov(mod1$adjusted_parms[,4:6])))) #Replication svymean(~stype, rclus1)
dat14 <- csSampling::dat14 #subset to adults dat14 <- dat14[as.numeric(dat14$CATAG6) > 1,] #normalize weights to sum to sample size dat14$WTS <- dat14$ANALWT_C/mean(dat14$ANALWT_C) svy14 <- svydesign(ids = ~VEREP, strata = ~VESTR, weights = ~WTS, data = dat14, nest = T)
Construct a Stan model
model_formula <- formula("CIGMON|weights(WTS) ~ AMDEY2_U") stancode <- make_stancode(brmsformula(model_formula,center = F), data = dat14, family = bernoulli(), save_model = "brms_wt_log.stan") mod_brms <- stan_model('brms_wt_log.stan')
Prepare data for Stan modelling
data_brms <- make_standata(brmsformula(CIGMON|weights(WTS) ~ AMDEY2_U,center = FALSE), data = dat14, family = bernoulli())
Run the Stan model
set.seed(12345) #set seed to fix output for comparison mod.brms <- cs_sampling(svydes = svy14, mod_stan = mod_brms, data_stan = data_brms)
Plot the results
plot(mod.brms, varnames = paste("b", 1:2, sep =""))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.