tests/testthat/test-smoothSurvey.R

##
## These tests cover all @examples of smoothSurvey()
##

test_that("smoothSurvey: Area-level model works", {

   skip_on_cran()
   library(INLA)
   # make devtools::check() happy with single process
   inla.setOption( num.threads = 1 )

   data(DemoData2)
   data(DemoMap2)
   fit0 <- smoothSurvey(data=DemoData2,  
    Amat=DemoMap2$Amat, response.type="binary", 
    responseVar="tobacco.use", strataVar="strata", 
    weightVar="weights", regionVar="region", 
    clusterVar = "~clustid+id", CI = 0.95)
   expect_equal(class(fit0), "SUMMERmodel.svy")
   expect_equal(dim(fit0$direct), c(8, 6))
   expect_equal(dim(fit0$smooth), c(8, 11))

   # if only direct estimates without smoothing is of interest
   fit0.dir <- smoothSurvey(data=DemoData2,  
      Amat=DemoMap2$Amat, response.type="binary", 
      responseVar="tobacco.use", strataVar="strata", 
      weightVar="weights", regionVar="region", 
      clusterVar = "~clustid+id", CI = 0.95, smooth = FALSE)

   expect_equal(class(fit0.dir), "SUMMERmodel.svy")
   expect_equal(dim(fit0.dir$direct), c(8, 6))
   expect_equal(sum(is.na(fit0.dir$smooth$mean)), 8)


   # posterior draws can be returned with save.draws = TRUE
   fit0.draws <- smoothSurvey(data=DemoData2,  
      Amat=DemoMap2$Amat, response.type="binary", 
      responseVar="tobacco.use", strataVar="strata", 
      weightVar="weights", regionVar="region", 
      clusterVar = "~clustid+id", CI = 0.95, 
      save.draws = TRUE, nsim = 100)
   # notice the posterior draws are on the latent scale
   expect_equal(dim(fit0.draws$draws.est), c(8, 102))
   # check no NAs except in the time column
   expect_equal(sum(is.na(fit0.draws$draws.est[, -2])), 0)

   # Example with region-level covariates
   Xmat <- aggregate(age~region, data = DemoData2, 
                FUN = function(x) mean(x))
   fit1 <- smoothSurvey(data=DemoData2,  
      Amat=DemoMap2$Amat, response.type="binary", 
      X = Xmat,
      responseVar="tobacco.use", strataVar="strata", 
      weightVar="weights", regionVar="region", 
      clusterVar = "~clustid+id", CI = 0.95)
   expect_equal(class(fit1), "SUMMERmodel.svy")
   expect_equal(dim(fit1$direct), c(8, 6+1))
   expect_equal(dim(fit1$smooth), c(8, 11))
   expect_equal(sum(is.na(fit1$smooth$mean)), 0)




   # Example with using only direct estimates as input instead of the full data
   direct <- fit0$direct[, c("region", "direct.est", "direct.var")]
   fit2 <- smoothSurvey(data=NULL, direct.est = direct, 
                  Amat=DemoMap2$Amat, regionVar="region",
                  responseVar="direct.est", direct.est.var = "direct.var", 
                  response.type = "binary")

   expect_equal(class(fit2), "SUMMERmodel.svy")
   # check they are the same as using raw input
   expect_equal(fit2$smooth$mean, fit0$smooth$mean, tolerance = 0.01)
   expect_equal(fit2$smooth$var, fit0$smooth$var, tolerance = 0.01)

 

   # Example with using only direct estimates as input, 
   #   and after transformation into a Gaussian smoothing model
   # Notice: the output are on the same scale as the input 
   #   and in this case, the logit estimates.    
   direct.logit <- fit0$direct[, c("region", "direct.logit.est", "direct.logit.var")]
   fit3 <- smoothSurvey(data=NULL, direct.est = direct.logit, 
             Amat=DemoMap2$Amat, regionVar="region",
             responseVar="direct.logit.est", direct.est.var = "direct.logit.var",
             response.type = "gaussian")
   expect_equal(fit3$smooth$mean, fit0$smooth$logit.mean, tolerance = 0.01)
   expect_equal(fit3$smooth$var, fit0$smooth$logit.var, tolerance = 0.01)

   # Example with non-spatial smoothing using IID random effects
   fit4 <- smoothSurvey(data=DemoData2, response.type="binary", 
     responseVar="tobacco.use", strataVar="strata", 
     weightVar="weights", regionVar="region", 
     clusterVar = "~clustid+id", CI = 0.95)
   expect_equal(class(fit4), "SUMMERmodel.svy")
   expect_equal(dim(fit4$direct), c(8, 6))
   expect_equal(dim(fit4$smooth), c(8, 11))

})



test_that("smoothSurvey: Area-level model works with missing data", {

   skip_on_cran()
   library(INLA)
   # make devtools::check() happy with single process
   inla.setOption( num.threads = 1 )

   # Example with missing regions in the raw input
   # When no Amat is given, the missing area is completely gone
   DemoData2.sub <- subset(DemoData2, region != "central")
   fit.without.central <- smoothSurvey(data=DemoData2.sub,  
                       Amat=NULL, response.type="binary", 
                       responseVar="tobacco.use", strataVar="strata", 
                       weightVar="weights", regionVar="region", 
                       clusterVar = "~clustid+id", CI = 0.95)
   expect_equal(class(fit.without.central), "SUMMERmodel.svy")
   expect_equal(dim(fit.without.central$direct), c(7, 6))
   expect_equal(dim(fit.without.central$smooth), c(7, 11))


   # Example with missing regions in the raw input
   # When no Amat is given, but region.list is provided
   # the missing area is filled in
   fit.with.central <- smoothSurvey(data=DemoData2.sub,  
                       Amat=NULL, region.list = unique(DemoData2$region),
                       response.type="binary", 
                       responseVar="tobacco.use", strataVar="strata", 
                       weightVar="weights", regionVar="region", 
                       clusterVar = "~clustid+id", CI = 0.95)
   expect_equal(class(fit.with.central), "SUMMERmodel.svy")
   expect_equal(dim(fit.with.central$direct), c(8, 6))
   expect_equal(is.na(fit.with.central$direct$direct.est[fit.with.central$direct$region == "central"]), TRUE)
   expect_equal(dim(fit.with.central$smooth), c(8, 11))

   # Example with missing regions in the raw input
   # When Amat is given,
   fit.with.central.spatial <- smoothSurvey(data=DemoData2.sub,  
                       Amat=DemoMap2$Amat,
                       response.type="binary", 
                       responseVar="tobacco.use", strataVar="strata", 
                       weightVar="weights", regionVar="region", 
                       clusterVar = "~clustid+id", CI = 0.95)
   expect_equal(class(fit.with.central.spatial), "SUMMERmodel.svy")
   expect_equal(dim(fit.with.central.spatial$direct), c(8, 6))
   expect_equal(is.na(fit.with.central.spatial$direct$direct.est[fit.with.central.spatial$direct$region == "central"]), TRUE)
   expect_equal(dim(fit.with.central.spatial$smooth), c(8, 11))

})



test_that("smoothSurvey: Area-level model works with customized formula", {

   skip_on_cran()
   library(INLA)
   # make devtools::check() happy with single process
   inla.setOption( num.threads = 1 )


   # Using the formula argument, further customizations can be added to the 
   #  model fitted. For example, we can fit the Fay-Harriot model with 
   #  IID effect instead of the BYM2 random effect as follows.
   #  The "region.struct" and "hyperpc1" are picked to match internal object 
   #  names. Other object names can be inspected from the source of smoothSurvey.
   fit5 <- smoothSurvey(data=DemoData2,  
        Amat=DemoMap2$Amat, response.type="binary", 
        formula = "f(region.struct, model = 'iid', hyper = hyperpc1)",
        pc.u = 1, pc.alpha = 0.01,
        responseVar="tobacco.use", strataVar="strata", 
        weightVar="weights", regionVar="region", 
        clusterVar = "~clustid+id", CI = 0.95)
   # Check BYM2 is replaced by IID
   expect_equal(class(fit5), "SUMMERmodel.svy")
   expect_equal(fit5$fit$model.random, "IID model")



})

test_that("smoothSurvey: Cluster-level model works", {

   skip_on_cran()
   library(INLA)
   # make devtools::check() happy with single process
   inla.setOption( num.threads = 1 )


   # For unit-level models, we need to create stratification variable within regions
   data <- DemoData2
   data$urbanicity <- "rural"
   data$urbanicity[grep("urban", data$strata)] <- "urban"
   
   Xmat <- aggregate(age~region, data = DemoData2, 
                FUN = function(x) mean(x))
   # Beta-binomial likelihood is used in this model
   fit6 <- smoothSurvey(data=data, 
         Amat=DemoMap2$Amat, response.type="binary", 
         X = Xmat, is.unit.level = TRUE,
         responseVar="tobacco.use", strataVar.within = "urbanicity", 
         regionVar="region", clusterVar = "clustid", CI = 0.95)

   expect_equal(class(fit6), "SUMMERmodel.svy")
   expect_equal(dim(fit6$smooth), c(8*2, 13))


   # We may use aggregated PSU-level counts as input as well
   #    in the case of modeling a binary outcome 
   data.agg <- aggregate(tobacco.use~region + urbanicity + clustid, 
                    data = data, FUN = sum)
   data.agg.total <- aggregate(tobacco.use~region + urbanicity + clustid, 
                    data = data, FUN = length)
   colnames(data.agg.total)[4] <- "total"
   data.agg <- merge(data.agg, data.agg.total)

   fit7 <- smoothSurvey(data=data.agg, 
         Amat=DemoMap2$Amat, response.type="binary", 
         X = Xmat, is.unit.level = TRUE, is.agg = TRUE,
         responseVar = "tobacco.use", strataVar.within = "urbanicity", 
         totalVar = "total", regionVar="region", clusterVar = "clustid", CI = 0.95)

   expect_equal(fit6$smooth, fit7$smooth, tolerance = 0.01)


})



   
test_that("smoothSurvey: Cluster-level model works with Gaussian likelihood", {

   skip_on_cran()
   skip_on_ci()
   library(INLA)
   # make devtools::check() happy with single process
   inla.setOption( num.threads = 1 )


  # For unit-level models, we need to create stratification variable within regions
   data <- DemoData2
   data$urbanicity <- "rural"
   data$urbanicity[grep("urban", data$strata)] <- "urban"
  
   ##
   ## 4. Unit-level model with continuous response  
   ##    (or nested error models)

   # The unit-level model assumes for each of the i-th unit,
   #    Y_{i} ~ intercept + region_effect + IID_i
   #    where IID_i is the error term specific to i-th unit

   # When more than one level of cluster sampling is carried out, 
   #   they are ignored here. Only the input unit is considered.
   #   So here we do not need to specify clusterVar any more. 
   fit9 <- smoothSurvey(data= data, 
            Amat=DemoMap2$Amat, response.type="gaussian", 
            is.unit.level = TRUE, responseVar="age", strataVar.within = NULL,
            regionVar="region", clusterVar = NULL, CI = 0.95)
   expect_equal(class(fit9), "SUMMERmodel.svy")
   expect_equal(dim(fit9$smooth), c(8, 13))

   # To compare, we may also model PSU-level responses. As an illustration, 
   data.median <- aggregate(age~region + urbanicity + clustid, 
                    data = data, FUN = median)

   fit10 <- smoothSurvey(data= data.median, 
      Amat=DemoMap2$Amat, response.type="gaussian", 
      is.unit.level = TRUE, responseVar="age", strataVar.within = NULL,
      regionVar="region", clusterVar = "clustid", CI = 0.95)
   # Notice these two should not be the same, but should not be very far either
   expect_equal(fit9$smooth, fit10$smooth, tolerance = 1)




   # For unit-level models, we need to create stratification variable within regions
   data <- DemoData2
   data$urbanicity <- "rural"
   data$urbanicity[grep("urban", data$strata)] <- "urban"


   fit11 <- smoothSurvey(data = data, 
         Amat = DemoMap2$Amat, response.type = "gaussian", 
         is.unit.level = TRUE, responseVar="age", strataVar.within = "urbanicity",
         regionVar = "region", clusterVar = NULL, CI = 0.95)  
   expect_equal(class(fit11), "SUMMERmodel.svy")
   expect_equal(dim(fit11$smooth), c(8*2, 13))


   # Notice the usual output is now stratified within each region
   # The aggregated estimates require strata proportions for each region
   # For illustration, we set strata population proportions below
   prop <- data.frame(region = unique(data$region), 
                          urban = 0.3, 
                          rural = 0.7)
   fit12 <- smoothSurvey(data=data, 
               Amat=DemoMap2$Amat, response.type="gaussian", 
               is.unit.level = TRUE, responseVar="age", strataVar.within = "urbanicity",
               regionVar="region", clusterVar = NULL, CI = 0.95,
               weight.strata = prop)  
   expect_equal(class(fit12), "SUMMERmodel.svy")
   expect_equal(dim(fit12$smooth), c(8*2, 13))
   expect_equal(dim(fit12$smooth.overall), c(8, 13 - 1))


   # aggregated outcome
   head(fit12$smooth.overall)

   # Compare aggregated outcome with direct aggregating posterior means. 
   # There could be some differences due to the approximating nature, so only
   # check that the correlation is greater than 0.9.
   est.urb <- subset(fit11$smooth, strata == "urban")
   est.rural <- subset(fit11$smooth, strata == "rural")
   est.mean.post <- est.urb$mean * 0.3 + est.rural$mean * 0.7
   cor <- cor(fit12$smooth.overall$mean, est.mean.post)
   expect_equal(cor, 1, tolerance = 0.1)


   ##
   ## 6. Unit-level model with continuous response and unit-level covariate 
   ## 

   # For area-level prediction, area-level covariate mean needs to be  
   #   specified in X argument. And unit-level covariate names are specified
   #   in X.unit argument.

   set.seed(1)
   sim <- data.frame(region = rep(c(1, 2, 3, 4), 1000),
                 X1 = rnorm(4000), X2 = rnorm(4000))
   Xmean <- aggregate(.~region, data = sim, FUN = sum)
   sim$Y <- rnorm(4000, mean = sim$X1 + 0.3 * sim$X2 + sim$region)
   samp <- sim[sample(1:4000, 20), ]
   fit.sim <- smoothSurvey(data=samp , 
                X.unit = c("X1", "X2"),
                X = Xmean, Amat=NULL, response.type="gaussian", 
                is.unit.level = TRUE, responseVar="Y", regionVar = "region",  
                pc.u = 1, pc.alpha = 0.01, CI = 0.95) 
   expect_equal(class(fit.sim), "SUMMERmodel.svy")
   expect_equal(dim(fit.sim$smooth), c(4, 13))

})

Try the SUMMER package in your browser

Any scripts or data that you put into this service are public.

SUMMER documentation built on April 4, 2025, 3:11 a.m.