tests/testthat/test-extract_svygpg.R

context("Gpg output survey.design and svyrep.design")

skip_on_cran()

library(laeken)
library(survey)


data(api)
dstrat1<-convey_prep(svydesign(id=~1,data=apistrat))
dstrat1 <- update( dstrat1 , sex = ifelse( both == 'Yes' , 'male' , 'female' ) )
test_that("svygpg works on unweighted designs",{
	svygpg(~api00, design=dstrat1, ~sex)
})


data(ses) ; names( ses ) <- gsub( "size" , "size_" , tolower( names( ses ) ) )
des_ses <- svydesign(id=~1, weights=~weights, data=ses)
des_ses <- convey_prep(des_ses)
des_ses_rep <- as.svrepdesign(des_ses, type = "bootstrap")
des_ses_rep <- convey_prep(des_ses_rep)


a1 <- svygpg(~earningshour, des_ses, ~sex)

a2 <- svyby(~earningshour, by = ~location, design = des_ses, FUN = svygpg, sex=~sex, deff = FALSE)

b1 <- svygpg(~earningshour, design = des_ses_rep, ~sex)

b2 <- svyby(~earningshour, by = ~location, design = des_ses_rep,
  FUN = svygpg, sex=~sex, deff = FALSE)

cv_dif1 <- abs(cv(a1)-cv(b1))
pos_est <- coef(a2)> 0
cv_diff2 <- max(abs(SE(a2)[pos_est]/coef(a2)[pos_est]-SE(b2)[pos_est]/coef(b2)[pos_est]))

test_that("output svygpg",{
  expect_is(coef(a1),"numeric")
  expect_is(coef(a2), "numeric")
  expect_is(coef(b1),"numeric")
  expect_is(coef(b2),"numeric")
  expect_equal(coef(a1), coef(b1))
  expect_equal(coef(a2), coef(b2))
  expect_lte(cv_dif1, coef(a1) * 0.05 ) # the difference between CVs should be less than 5% of the coefficient, otherwise manually set it
  expect_lte(cv_diff2, max( coef(a2) ) * 0.1 ) # the difference between CVs should be less than 10% of the maximum coefficient, otherwise manually set it
  expect_is(SE(a1),"matrix")
  expect_is(SE(a2), "numeric")
  expect_is(SE(b1),"numeric")
  expect_is(SE(b2),"numeric")
  expect_lte(confint(a1)[1], coef(a1))
  expect_gte(confint(a1)[2],coef(a1))
  expect_lte(confint(b1)[,1], coef(b1))
  expect_gte(confint(b1)[2], coef(b1))
  expect_equal(sum(confint(a2)[,1]<= coef(a2)),length(coef(a2)))
  expect_equal(sum(confint(a2)[,2]>= coef(a2)),length(coef(a2)))
  expect_equal(sum(confint(b2)[,1]<= coef(b2)),length(coef(b2)))
  expect_equal(sum(confint(b2)[,2]>= coef(b2)),length(coef(b2)))
})



	# database-backed design
	library(RSQLite)
	library(DBI)
	dbfile <- tempfile()
	conn <- dbConnect( RSQLite::SQLite() , dbfile )
	dbWriteTable( conn , 'ses' , ses )
	dbd_ses <- svydesign(id=~1, weights=~weights, data="ses", dbname=dbfile, dbtype="SQLite")
	dbd_ses <- convey_prep( dbd_ses )

	c1 <-  svygpg(formula=~earningshour, design=dbd_ses, sex= ~sex)
	c2 <- svyby(~earningshour, by = ~location, design = dbd_ses, FUN = svygpg, sex=~sex, deff = FALSE)

	dbRemoveTable( conn , 'ses' )

	test_that("database svygpg",{
	  expect_equal(coef(a1), coef(c1))
	  expect_equal(coef(a2), coef(c2))
	  expect_equal(SE(a1), SE(c1))
	  expect_equal(SE(a2), SE(c2))
	})






# compare subsetted objects to svyby objects
sub_des <- svygpg( ~earningshour , sex=~sex, design = subset( des_ses , location == "AT1" ) )
sby_des <- svyby( ~earningshour, sex=~sex, by = ~location, design = des_ses, FUN = svygpg)
sub_rep <- svygpg( ~earningshour, sex=~sex , design = subset( des_ses_rep , location == "AT1" ) )
sby_rep <- svyby( ~earningshour, sex=~sex, by = ~location, design = des_ses_rep, FUN = svygpg)

test_that("subsets equal svyby",{
	expect_equal(as.numeric(coef(sub_des)), as.numeric(coef(sby_des))[1])
	expect_equal(as.numeric(coef(sub_rep)), as.numeric(coef(sby_rep))[1])
	expect_equal(as.numeric(SE(sub_des)), as.numeric(SE(sby_des))[1])
	expect_equal(as.numeric(SE(sub_rep)), as.numeric(SE(sby_rep))[1])

	# coefficients should match across svydesign & svrepdesign
	expect_equal(as.numeric(coef(sub_des)), as.numeric(coef(sby_rep))[1])

	# coefficients of variation should be within five percent
	cv_dif <- abs(cv(sub_des)-cv(sby_rep)[1])
	expect_lte(cv_dif,5)
})




# second run of database-backed designs #

	# database-backed design
	library(RSQLite)
	library(DBI)
	dbfile <- tempfile()
	conn <- dbConnect( RSQLite::SQLite() , dbfile )
	dbWriteTable( conn , 'ses' , ses )

	dbd_ses <- svydesign(id=~1, weights=~weights, data="ses", dbname=dbfile, dbtype="SQLite")

	dbd_ses <- convey_prep( dbd_ses )

	# create a hacky database-backed svrepdesign object
	# mirroring des_ses_rep
	dbd_ses_rep <-
		svrepdesign(
			weights = ~ weights,
			repweights = des_ses_rep$repweights ,
			scale = des_ses_rep$scale ,
			rscales = des_ses_rep$rscales ,
			type = "bootstrap" ,
			data = "ses" ,
			dbtype="SQLite" ,
			dbname = dbfile ,
			combined.weights = FALSE
		)

	dbd_ses_rep <- convey_prep( dbd_ses_rep )

	sub_dbd <- svygpg( ~earningshour, sex=~sex , design = subset( dbd_ses , location == "AT1" ) )
	sby_dbd <- svyby( ~earningshour, sex=~sex, by = ~location, design = dbd_ses, FUN = svygpg)
	sub_dbr <- svygpg( ~earningshour, sex=~sex , design = subset( dbd_ses_rep , location == "AT1" ) )
	sby_dbr <- svyby( ~earningshour, sex=~sex, by = ~location, design = dbd_ses_rep, FUN = svygpg)

	dbRemoveTable( conn , 'ses' )


	# compare database-backed designs to non-database-backed designs
	test_that("dbi subsets equal non-dbi subsets",{
		expect_equal(coef(sub_des), coef(sub_dbd))
		expect_equal(coef(sub_rep), coef(sub_dbr))
		expect_equal(SE(sub_des), SE(sub_dbd))
		expect_equal(SE(sub_rep), SE(sub_dbr))
	})


	# compare database-backed subsetted objects to database-backed svyby objects
	test_that("dbi subsets equal dbi svyby",{
		expect_equal(as.numeric(coef(sub_dbd)), as.numeric(coef(sby_dbd))[1])
		expect_equal(as.numeric(coef(sub_dbr)), as.numeric(coef(sby_dbr))[1])
		expect_equal(as.numeric(SE(sub_dbd)), as.numeric(SE(sby_dbd))[1])
		expect_equal(as.numeric(SE(sub_dbr)), as.numeric(SE(sby_dbr))[1])
	})

Try the convey package in your browser

Any scripts or data that you put into this service are public.

convey documentation built on April 28, 2022, 1:06 a.m.