tests/multistage-rcpp.R

## temporarily disable while it's being fixed
## q("no")

## 
## Check that multistage() and multistage_rcpp() give same results
## for different options.
##
library(survey)

# Check for a simple random sample ----

  data('api', package = 'survey')
  api_srs_design <- svydesign(
    data = apisrs,
    ids = ~ 1,
    weights = ~ 1
  )
  
  x <- as.matrix(api_srs_design$variables[,c('api00','api99')] / api_srs_design$prob)
  
  base_r_result <- survey:::multistage(x = x,
                                       clusters = api_srs_design$cluster,
                                       stratas = api_srs_design$strata,
                                       nPSUs = api_srs_design$fpc$sampsize, fpcs = api_srs_design$fpc$popsize,
                                       lonely.psu=getOption("survey.lonely.psu"),
                                       one.stage=TRUE, stage = 1, cal = NULL)
  
  rcpp_result <- survey:::multistage_rcpp(x = x,
                                          clusters = api_srs_design$cluster,
                                          stratas = api_srs_design$strata,
                                          nPSUs = api_srs_design$fpc$sampsize, fpcs = api_srs_design$fpc$popsize,
                                          lonely.psu=getOption("survey.lonely.psu"),
                                          one.stage=TRUE, stage = 1, cal = NULL)
  
  if (!isTRUE(all.equal(base_r_result, rcpp_result))) {
    stop("Differences between `multistage()` and `multistage_rcpp()` for SRS")
  }
  
# Check for a stratified simple random sample ----
  
  apistrat_design <- svydesign(
    data = apistrat,
    id =~ 1,
    strata =~ stype,
    weights =~ pw,
    fpc =~ fpc
  )
  
  x <- as.matrix(apistrat_design$variables[,c('api00','api99')] / apistrat_design$prob)
  
  base_r_result <- survey:::multistage(x = x,
                                       clusters = apistrat_design$cluster,
                                       stratas = apistrat_design$strata,
                                       nPSUs = apistrat_design$fpc$sampsize, fpcs = apistrat_design$fpc$popsize,
                                       lonely.psu=getOption("survey.lonely.psu"),
                                       one.stage=TRUE, stage = 1, cal = NULL)
  
  rcpp_result <- survey:::multistage_rcpp(x = x,
                                          clusters = apistrat_design$cluster,
                                          stratas = apistrat_design$strata,
                                          nPSUs = apistrat_design$fpc$sampsize, fpcs = apistrat_design$fpc$popsize,
                                          lonely.psu=getOption("survey.lonely.psu"),
                                          one.stage=TRUE, stage = 1, cal = NULL)
  
  if (!isTRUE(all.equal(base_r_result, rcpp_result))) {
    stop("Differences between `multistage()` and `multistage_rcpp()` for stratified sample")
  }
  
  ##_ Check whether expected true zeroes are actually computed as zeroes ----
  
  x <- as.matrix(model.matrix(~ -1 + stype, data = apistrat_design$variables) / (sum(apistrat_design$prob)/apistrat_design$prob))
  
  base_r_result <- survey:::multistage(x = x,
                                       clusters = apistrat_design$cluster,
                                       stratas = apistrat_design$strata,
                                       nPSUs = apistrat_design$fpc$sampsize, fpcs = apistrat_design$fpc$popsize,
                                       lonely.psu=getOption("survey.lonely.psu"),
                                       one.stage=TRUE, stage = 1, cal = NULL)
  
  rcpp_result <- survey:::multistage_rcpp(x = x,
                                          clusters = apistrat_design$cluster,
                                          stratas = apistrat_design$strata,
                                          nPSUs = apistrat_design$fpc$sampsize, fpcs = apistrat_design$fpc$popsize,
                                          lonely.psu=getOption("survey.lonely.psu"),
                                          one.stage=TRUE, stage = 1, cal = NULL)
  
  if (!all(rcpp_result == 0) | !all(diag(rcpp_result) >= 0)) {
    stop("Computations which should equal zero do not.")
  }

# Create a stratified, multistage design ----
  
  data(mu284)
  
  ## Create three strata, the third of which has only one PSU
  mu284_stratified <- rbind(
    transform(mu284, stratum = 1,
              y2 = y1 + c(0, 2, 1, 0, -1, 2, 0, 3, 0, 0, -1, -1, 1, -1, 0)),
    transform(mu284, stratum = 2,
              y2 = y1 + c(-1, 0, -1, 0, -1, 0, 0, 1, 2, -1, 1, 1, 0, 1, 0)),
    transform(mu284[1,], stratum = 3,
              y2 = y1 + 2)
  )
  
  ## Create domain variables which yield a lonely PSU or a lonely SSU
  mu284_stratified[['DOMAIN_W_LONELY_PSU']] <- mu284_stratified$stratum == 1 | (mu284_stratified$stratum == 2 & mu284_stratified$id1 == 19)
  mu284_stratified[['DOMAIN_W_LONELY_SSU']] <- mu284_stratified$stratum == 1 | (mu284_stratified$stratum == 2 & mu284_stratified$id1 == 19 & mu284_stratified$id2 == 1)
  
  ## Create a survey design object with no lonely PSUs
  dmu284_strat <- svydesign(data = subset(mu284_stratified, stratum != 3),
                            strata = ~ stratum, nest = TRUE,
                            id = ~ id1 + id2, fpc = ~ n1 + n2)
  ## Create a survey design with one lonely PSU
  dmu284_strat_w_lonely <- svydesign(data = mu284_stratified,
                                     strata = ~ stratum, nest = TRUE,
                                     id = ~ id1 + id2, fpc = ~ n1 + n2)
  ## Create subsetted design objects, subsetted to domain with a lonely PSU or SSU
  dmu284_strat_domain_lonely_psu <- subset(dmu284_strat, DOMAIN_W_LONELY_PSU)
  dmu284_strat_domain_lonely_ssu <- subset(dmu284_strat, DOMAIN_W_LONELY_SSU)
  
  
# Check same results for different values of 'lonely PSU' options ----
  
  lonely_psu_options <- c(certainty = 'certainty',
                          remove    = 'remove',
                          average   = 'average',
                          adjust    = 'adjust')
  one_stage_options <- c(TRUE, FALSE)
  
  design_lonely_psu_comparisons <- expand.grid('survey.lonely.psu' = lonely_psu_options,
                                               'one.stage' = one_stage_options,
                                               stringsAsFactors = FALSE)
  design_lonely_psu_comparisons[['results_match']] <- NA
  
  ##_ Check for lonely PSUs caused by design ----
  for (i in seq_len(nrow(design_lonely_psu_comparisons))) {
    survey.lonely.psu <- design_lonely_psu_comparisons[['survey.lonely.psu']][i]
    one.stage <- design_lonely_psu_comparisons[['one.stage']][i]
    options('survey.lonely.psu' = survey.lonely.psu)
    
    x <- as.matrix(dmu284_strat_w_lonely$variables[,c('y1','y2')] / dmu284_strat_w_lonely$prob)
    
    base_r_result <- survey:::multistage(x = x,
                                         clusters = dmu284_strat_w_lonely$cluster,
                                         stratas = dmu284_strat_w_lonely$strata,
                                         nPSUs = dmu284_strat_w_lonely$fpc$sampsize, fpcs = dmu284_strat_w_lonely$fpc$popsize,
                                         lonely.psu=getOption("survey.lonely.psu"),
                                         one.stage=one.stage, stage = 1, cal = NULL)
    
    rcpp_result <- survey:::multistage_rcpp(x = x,
                                            clusters = dmu284_strat_w_lonely$cluster,
                                            stratas = dmu284_strat_w_lonely$strata,
                                            nPSUs = dmu284_strat_w_lonely$fpc$sampsize, fpcs = dmu284_strat_w_lonely$fpc$popsize,
                                            lonely.psu=getOption("survey.lonely.psu"),
                                            one.stage=one.stage, stage = 1, cal = NULL)
    
      design_lonely_psu_comparisons[['results_match']][i] <- isTRUE(all.equal(base_r_result, rcpp_result))
  }
  
  print(design_lonely_psu_comparisons)
  if (!all(design_lonely_psu_comparisons$results_match)) {
    stop("Results for design lonely PSUs differ between base R and Rcpp implementations.")
  }

  ##_ Check for domain lonely PSUs caused by subsetting ----
  
  domain_lonely_psu_comparisons <- expand.grid('survey.lonely.psu' = lonely_psu_options,
                                               'survey.adjust.domain.lonely' = c(FALSE, TRUE),
                                               'one.stage' = one_stage_options,
                                               stringsAsFactors = FALSE)
  domain_lonely_psu_comparisons[['results_match']] <- NA
  
  for (i in seq_len(nrow(domain_lonely_psu_comparisons))) {
      
    options('survey.lonely.psu' = domain_lonely_psu_comparisons[['survey.lonely.psu']][i])
    options('survey.adjust.domain.lonely' = domain_lonely_psu_comparisons[['survey.adjust.domain.lonely']][i])
    one.stage <- domain_lonely_psu_comparisons[['one.stage']][i]
    
    
    x <- as.matrix(dmu284_strat_domain_lonely_psu$variables[,c('y1','y2')] / dmu284_strat_domain_lonely_psu$prob)
    suppressWarnings({
      base_r_result <- survey:::multistage(x = x,
                                           clusters = dmu284_strat_domain_lonely_psu$cluster,
                                           stratas = dmu284_strat_domain_lonely_psu$strata,
                                           nPSUs = dmu284_strat_domain_lonely_psu$fpc$sampsize,
                                           fpcs = dmu284_strat_domain_lonely_psu$fpc$popsize,
                                           lonely.psu=getOption("survey.lonely.psu"),
                                           one.stage=one.stage, stage = 1, cal = NULL)
    })
    
    rcpp_result <- survey:::multistage_rcpp(x = x,
                                            clusters = dmu284_strat_domain_lonely_psu$cluster,
                                            stratas = dmu284_strat_domain_lonely_psu$strata,
                                            nPSUs = dmu284_strat_domain_lonely_psu$fpc$sampsize,
                                            fpcs = dmu284_strat_domain_lonely_psu$fpc$popsize,
                                            lonely.psu=getOption("survey.lonely.psu"),
                                            one.stage=one.stage, stage = 1, cal = NULL)
    
    domain_lonely_psu_comparisons[['results_match']][i] <- isTRUE(all.equal(base_r_result, rcpp_result))
  }
  
  print(domain_lonely_psu_comparisons)
  if (!all(domain_lonely_psu_comparisons$results_match)) {
    stop("Results for domain lonely PSUs differ between base R and Rcpp implementations.")
  }
  
# Checks for edge cases ----
  
  ## Zero records ----
  
  zero_row_design <- dmu284_strat |> subset(id1 == "Nonexistent")
  
  zero_row_result <- survey:::multistage_rcpp(
    x = as.matrix(zero_row_design$variables[,c('y1', 'y2')]),
    clusters = zero_row_design$cluster,
    stratas = zero_row_design$strata,
    nPSUs = zero_row_design$fpc$sampsize, 
    fpcs = zero_row_design$fpc$popsize,
    lonely.psu=getOption("survey.lonely.psu"),
    one.stage=one.stage, stage = 1, cal = NULL
  )
  
  if (any(is.na(zero_row_result)) || any(zero_row_result != 0)) {
    stop("`multistage_rcpp()` should return zeros for zero-row inputs.")
  }
  
bschneidr/fastsurvey documentation built on March 13, 2024, 11:12 a.m.