testthat::skip_on_cran()
context("Testing Frequency Tables")
set.seed(120, sample.kind = "Reject")
dat <- ck_create_testdata()
seed <- ck_create_seed_from_hash(dat)
test_that("check seed from hash generation", {
expect_equal(seed, 1474937517)
})
dim_sex <- hier_create(root = "Total", nodes = c("male", "female"))
dim_age <- hier_create(root = "Total", paste0("age_group", 1:6))
dims <- list(sex = dim_sex, age = dim_age)
test_that("dims-hash is ok", {
expect_identical(class(dims), "list")
expect_identical(nrow(dim_sex), 3L)
expect_identical(max(dim_sex$level), 2)
expect_identical(nrow(dim_age), 7L)
expect_identical(max(dim_age$level), 2)
})
## test generation of destatis rkeys
rk1 <- ck_generate_rkeys(dat = dat, nr_digits = 5)
rk2 <- ck_generate_rkeys(dat = dat, nr_digits = 5)
test_that("check rkey generation and seed is ok", {
expect_identical(rk1, rk2)
expect_identical(round(mean(rk1), digits = 3), 0.501)
})
dat$rec_key <- rk1
test_that("checking dimension and structure of generated testdata is ok", {
expect_true(is.data.table(dat))
expect_identical(round(mean(dat$sampling_weight), digits = 3), 59.719)
expect_identical(round(mean(dat$household_weights), digits = 3), 21.834)
expect_identical(nrow(dat), 4580L)
expect_identical(ncol(dat), 16L)
expect_identical(sum(dat$sex == "male"), 2296L)
})
## perturbation parameters for count variables
set.seed(120, sample.kind = "Reject")
suppressMessages(para <- ptable::create_cnt_ptable(
D = 5,
V = 3,
js = 2,
pstay = 0.5,
optim = 1,
mono = TRUE,
create = FALSE))
params_cnts <- ck_params_cnts(ptab = ptable::create_ptable(params = para))
ptab <- params_cnts$params$ptable
test_that("ck_params_cnts() is ok", {
expect_is(params_cnts, "ck_params")
expect_identical(params_cnts$type, "cnts")
expect_identical(dim(ptab), c(66L, 7L))
expect_identical(max(ptab$i), 8)
})
# we need to load a custom data set because otherwise, the
# digest based checks don't work
test_that("checking perturbation parameters for counts", {
expect_is(params_cnts, "ck_params")
expect_equal(params_cnts$type, "cnts")
dt <- params_cnts$params$ptable
expect_is(dt, "data.table")
expect_identical(dim(dt), c(66L, 7L))
expect_identical(round(mean(dt$p), digits = 3), 0.136)
expect_identical(round(mean(dt$lb), digits = 3), 0.562)
expect_identical(round(mean(dt$ub), digits = 3), 0.698)
expect_identical(round(mean(dt$v), digits = 3), 1.045)
})
countvars <- NULL
w <- "sampling_weight"
rkey <- "rec_key"
tab <- ck_setup(
x = dat,
rkey = "rec_key",
dims = dims,
w = w,
countvars = countvars
)
# set perturbation parameters for all variables
tab$params_cnts_set(val = params_cnts, v = NULL)
expect_message(tab$perturb("total"))
expect_message(tab$perturb("total"), "Variable 'total' was already perturbed!")
res_freqtab <- tab$freqtab("total")
test_that("check ck_define_table() with already existing rec-keys", {
expect_is(tab, "cellkey_obj")
expect_identical(dim(res_freqtab), c(21L, 7L))
expect_identical(res_freqtab$uwc[3], 1143)
expect_identical(res_freqtab$puwc[3], 1147)
expect_identical(round(mean(res_freqtab$pwc), digits = 3), 52096.24)
expect_identical(round(mean(res_freqtab$puwc), digits = 3), 872.333)
})
dat$rec_key <- NULL
tab <- ck_setup(
x = dat,
rkey = 7,
dims = dims,
w = w,
countvars = countvars)
expect_error(tab$perturb("total"))
tab$params_cnts_set(val = params_cnts, v = "total")
expect_message(tab$perturb("total"), "Count variable 'total' was perturbed.")
expect_message(tab$perturb("total"), "Variable 'total' was already perturbed!")
test_that("ck_define_table() with new record keys is ok", {
expect_is(tab, "cellkey_obj")
dt <- tab$freqtab("total")
expect_identical(dt$uwc[3], 1143)
expect_identical(dt$puwc[3], 1147)
expect_identical(round(mean(dt$pwc), digits = 3), 52096.24)
expect_identical(round(mean(dt$puwc), digits = 3), 872.333)
})
freqtab <- tab$freqtab("total")
test_that("weighted version of ck_perturb() is ok", {
expect_identical(freqtab$uwc[3], 1143)
expect_identical(freqtab$puwc[3], 1147)
expect_identical(round(mean(freqtab$pwc), digits = 3), 52096.24)
expect_identical(round(mean(freqtab$puwc), digits = 3), 872.333)
dt <- tab$mod_cnts()
expect_identical(dim(dt), c(21L, 6L))
expect_identical(round(mean(dt$ckey), digits = 3), 0.456)
expect_identical(round(mean(dt$pert), digits = 3), -0.048)
expect_identical(range(dt$row_nr), c(40, 65))
})
mm <- tab$measures_cnts("Total")
test_that("ck_cnt_measures() [exclude_zeros = TRUE] is ok", {
expect_identical(range(as.numeric(mm$overview$noise)), c(1, 7))
expect_identical(range(as.numeric(mm$overview$cnt)), c(1, 9))
expect_identical(round(mean(as.numeric(mm$overview$pct)), digits = 3), 0.143)
expect_identical(range(as.numeric(mm$measures$d1)), c(0, 4))
expect_identical(range(as.numeric(mm$measures$d2)), c(0, 0.429))
expect_identical(range(as.numeric(mm$measures$d3)), c(0, 0.517))
expect_identical(range(mm$cumdistr_d1$cnt), c(9L, 21L))
expect_identical(round(range(mm$cumdistr_d1$pct), digits = 3), c(0.429, 1))
expect_identical(range(mm$cumdistr_d2$cnt), c(19L, 21L))
expect_identical(round(range(mm$cumdistr_d2$pct), digits = 3), c(0.905, 1))
expect_identical(range(mm$cumdistr_d3$cnt), c(12L, 21L))
expect_identical(round(range(mm$cumdistr_d3$pct), digits = 3), c(0.571, 1))
expect_identical(mm$false_nonzero, 0L)
expect_identical(mm$false_zero, 0L)
expect_identical(mm$exclude_zeros, TRUE)
})
mm <- tab$measures_cnts("Total", exclude_zeros = FALSE)
test_that("ck_cnt_measures() [exclude_zeros = FALSE] is ok", {
expect_identical(range(as.numeric(mm$overview$noise)), c(1, 7))
expect_identical(range(as.numeric(mm$overview$cnt)), c(1, 9))
expect_identical(round(mean(as.numeric(mm$overview$pct)), digits = 3), 0.143)
expect_identical(range(as.numeric(mm$measures$d1)), c(0, 4))
expect_identical(range(as.numeric(mm$measures$d2)), c(0, 0.429))
expect_identical(range(as.numeric(mm$measures$d3)), c(0, 0.517))
expect_identical(range(mm$cumdistr_d1$cnt), c(9L, 21L))
expect_identical(round(range(mm$cumdistr_d1$pct), digits = 3), c(0.429, 1))
expect_identical(range(mm$cumdistr_d2$cnt), c(19L, 21L))
expect_identical(round(range(mm$cumdistr_d2$pct), digits = 3), c(0.905, 1))
expect_identical(range(mm$cumdistr_d3$cnt), c(12L, 21L))
expect_identical(round(range(mm$cumdistr_d3$pct), digits = 3), c(0.571, 1))
expect_identical(mm$false_nonzero, 0L)
expect_identical(mm$false_zero, 0L)
expect_identical(mm$exclude_zeros, FALSE)
})
# no weights
tab <- ck_setup(
x = dat,
rkey = 7,
dims = dims,
w = NULL,
countvars = countvars)
# set params
tab$params_cnts_set(params_cnts, v = NULL)
tab$perturb("total")
freqtab <- tab$freqtab("total")
test_that("checking unweighted version of perturb()", {
expect_identical(dim(freqtab), c(21L, 7L))
expect_identical(freqtab$uwc[3], 1143)
expect_identical(freqtab$puwc[3], 1147)
expect_identical(freqtab$pwc, freqtab$puwc)
expect_identical(round(mean(freqtab$pwc), digits = 3), 872.333)
dt <- tab$mod_cnts()
expect_identical(dim(dt), c(21L, 6L))
expect_identical(round(mean(dt$ckey), digits = 3), 0.456)
expect_identical(round(mean(dt$pert), digits = 3), -0.048)
expect_identical(range(dt$row_nr), c(40, 65))
})
context("Testing multiple countvars")
dat[, cnt_males := ifelse(sex == "male", 1, 0)]
dat[, cnt_highincome := ifelse(income >= 9000, 1, 0)]
countvars <- c("cnt_males", "cnt_highincome")
tab <- ck_setup(
x = dat,
rkey = 7,
dims = dims,
w = w,
countvars = countvars)
# set params
tab$params_cnts_set(params_cnts, v = NULL)
tab$perturb(c("total", "cnt_males", "cnt_highincome"))
test_that("check tabulation of cnt_males is ok", {
dt <- tab$freqtab("cnt_males")
expect_identical(dt$uwc[3], 571)
expect_identical(dt$puwc[3], 571)
expect_identical(round(mean(dt$pwc), digits = 3), 25757.65)
expect_identical(round(mean(dt$puwc), digits = 3), 437.048)
expect_identical(round(range(dt$puwc), digits = 3), c(0, 2297))
expect_identical(round(range(dt$pwc), digits = 3), c(0, 135387.941))
mm <- tab$measures_cnts("cnt_males")
expect_identical(round(range(mm$overview$pct), digits = 3), c(0.095, 0.810))
expect_identical(round(range(mm$measures$d1), digits = 3), c(0, 4))
expect_identical(round(range(mm$measures$d2), digits = 3), c(0, 0.048))
expect_identical(round(range(mm$measures$d3), digits = 3), c(0, 0.221))
})
test_that("check tabulation of cnt_highincome is ok", {
dt <- tab$freqtab("cnt_highincome")
expect_identical(dt$uwc[3], 123)
expect_identical(dt$puwc[3], 123)
expect_identical(round(mean(dt$pwc), digits = 3), 5063.68)
expect_identical(round(mean(dt$puwc), digits = 3), 84.286)
expect_identical(round(range(dt$puwc), digits = 3), c(0, 444))
expect_identical(round(range(dt$pwc), digits = 3), c(0, 26671.928))
mm <- tab$measures_cnts("cnt_highincome")
expect_identical(round(range(mm$overview$pct), digits = 3), c(0.048, 0.667))
expect_identical(round(range(mm$measures$d1), digits = 3), c(0, 3))
expect_identical(round(range(mm$measures$d2), digits = 3), c(0, 0.158))
expect_identical(round(range(mm$measures$d3), digits = 3), c(0, 0.359))
})
test_that("check tabulation of multiple count variables is ok", {
dt <- tab$freqtab(c("total", "cnt_males", "cnt_highincome"))
expect_identical(dim(dt), c(63L, 7L))
expect_identical(dt$uwc[3], 1143)
expect_identical(dt$puwc[3], 1148)
expect_identical(round(mean(dt$pwc), digits = 3), 27640.21)
expect_identical(round(mean(dt$puwc), digits = 3), 464.587)
expect_identical(round(range(dt$puwc), digits = 3), c(0, 4582))
expect_identical(round(range(dt$pwc), digits = 3), c(0, 273633.438))
mm <- tab$measures_cnts("cnt_highincome")
expect_identical(round(range(mm$overview$pct), digits = 3), c(0.048, 0.667))
expect_identical(round(range(mm$measures$d1), digits = 3), c(0, 3))
expect_identical(round(range(mm$measures$d2), digits = 3), c(0, 0.158))
expect_identical(round(range(mm$measures$d3), digits = 3), c(0, 0.359))
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.