Nothing
# test setup ####
library(data.table)
# simple cases ----
distinct_ids_ref_1 <- structure(
data.table(distinct_ids = 10L),
class = c("sdc_distinct_ids", "data.table", "data.frame")
)
dominance_ref_1 <- structure(
data.table(value_share = 0.7413301),
class = c("sdc_dominance", "data.table", "data.frame")
)
descriptives_ref_1 <- structure(
list(
options = sdcLog:::list_options(),
settings = sdcLog:::list_arguments("id", "val_1", zero_as_NA = FALSE),
distinct_ids = distinct_ids_ref_1,
dominance = dominance_ref_1
),
class = c("sdc_descriptives", "list")
)
test_that("sdc_descriptives works in simple cases", {
data("sdc_descriptives_DT")
expect_equal(
sdc_descriptives(sdc_descriptives_DT, "id", "val_1"),
descriptives_ref_1,
ignore_attr = TRUE
)
expect_equal(
sdc_descriptives(as.data.frame(sdc_descriptives_DT), "id", "val_1"),
descriptives_ref_1,
ignore_attr = TRUE
)
if (requireNamespace("tibble", quietly = TRUE)) {
expect_equal(
sdc_descriptives(tibble::as_tibble(sdc_descriptives_DT), "id", "val_1"),
descriptives_ref_1,
ignore_attr = TRUE
)
}
})
# medium cases ----
test_that("sdc_descriptives works in medium cases", {
data("sdc_descriptives_DT")
distinct_ids_ref_2 <- structure(
data.table(
sector = factor(c("S1", "S2")),
distinct_ids = 4L:5L,
key = "sector"
),
class = c("sdc_distinct_ids", "data.table", "data.frame")
)
dominance_ref_2 <- structure(
data.table(
sector = factor(c("S2", "S1")),
value_share = c(0.84650585, 0.60400376)
),
class = c("sdc_dominance", "data.table", "data.frame")
)
descriptives_ref_2 <- structure(
list(
options = list_options(),
settings = list_arguments(id_var = "id", val_var = "val_1", by = "sector", zero_as_NA = FALSE),
distinct_ids = distinct_ids_ref_2,
dominance = dominance_ref_2
),
class = c("sdc_descriptives", "list")
)
expect_warning(
expect_equal(
sdc_descriptives(sdc_descriptives_DT[!(id == "A")], "id", "val_1", by = "sector"),
descriptives_ref_2,
ignore_attr = TRUE
),
paste(
cli::style_bold("DISCLOSURE PROBLEM:"), "Not enough distinct entities."
),
fixed = TRUE
)
})
# complex cases ----
test_that("sdc_descriptives works in complex cases", {
data("sdc_descriptives_DT")
distinct_ids_ref_3 <- structure(
data.table(
sector = factor(c("S1", "S1", "S2", "S2")),
year = rep(2019L:2020L, 2L),
distinct_ids = c(4L, rep(5L, 3L)),
key = c("sector", "year")
),
class = c("sdc_distinct_ids", "data.table", "data.frame")
)
dominance_ref_3 <- structure(
data.table(
sector = factor(c("S2", "S1", "S1", "S2")),
year = c(rep(2020L, 2L), rep(2019L, 2L)),
value_share = c(0.90563139, 0.87768517, 0.6815010551185098, 0.5506964573607419)
),
class = c("sdc_dominance", "data.table", "data.frame")
)
descriptives_ref_3 <- structure(
list(
options = sdcLog:::list_options(),
settings = list_arguments(id_var = "id", val_var = "val_1", by = c("sector", "year"), zero_as_NA = FALSE),
distinct_ids = distinct_ids_ref_3,
dominance = dominance_ref_3
),
class = c("sdc_descriptives", "list")
)
warnings <- capture_warnings(
expect_equal(
sdc_descriptives(sdc_descriptives_DT, "id", "val_1", by = c("sector", "year")),
descriptives_ref_3,
ignore_attr = TRUE
)
)
expect_match(
warnings,
"DISCLOSURE PROBLEM:.*(Not enough distinct entities|Dominant entities)\\."
)
})
# no val_var ----
test_that("sdc_descriptives() works without 'val_var'", {
data("sdc_descriptives_DT")
options(sdc.info_level = 2)
descriptives_ref_4 <- structure(
list(
options = sdcLog:::list_options(),
settings = sdcLog:::list_arguments("id"),
distinct_ids = distinct_ids_ref_1,
dominance = structure(
data.table(value_share = NA_real_),
class = c("sdc_dominance", "data.table", "data.frame")
)
),
class = c("sdc_descriptives", "list")
)
expect_equal(
sdc_descriptives(sdc_descriptives_DT, "id"),
descriptives_ref_4,
ignore_attr = TRUE
)
})
# handling zeros ----
test_that("zeros are handles correctly", {
data("sdc_descriptives_DT")
sdc_descriptives_DT_copy <- sdc_descriptives_DT
descriptives_ref_5 <- structure(
list(
options = sdcLog:::list_options(),
settings = sdcLog:::list_arguments(
"id", "val_2", zero_as_NA = TRUE
),
distinct_ids = distinct_ids_ref_1,
dominance = structure(
data.table(value_share = 0.35958732),
class = c("sdc_dominance", "data.table", "data.frame")
)
),
class = c("sdc_descriptives", "list")
)
expect_message(
expect_equal(
sdc_descriptives(sdc_descriptives_DT, "id", "val_2"),
descriptives_ref_5,
ignore_attr = TRUE
),
"A share of 0.2 of 'val_var' are zero. These will be treated as 'NA'.",
fixed = TRUE
)
expect_silent(
expect_equal(
sdc_descriptives(sdc_descriptives_DT, "id", "val_2", zero_as_NA = TRUE),
descriptives_ref_5,
ignore_attr = TRUE
))
# assert that input data remains unchanged
expect_identical(sdc_descriptives_DT, sdc_descriptives_DT_copy)
})
# errors ----
test_that("sdc_descriptives() returns appropriate error", {
data("sdc_descriptives_DT")
# throw error if data is not a data.frame
expect_error(
sdc_descriptives(wrong_test_dt, "id", "val_1"),
"object 'wrong_test_dt' not found",
fixed = TRUE
)
expect_error(
sdc_descriptives("wrong_test_dt", "id", "val_1"),
"Assertion on 'data' failed: Must be of type 'data.frame', not 'character'.",
fixed = TRUE
)
# throw error if specified variables are not in data
expect_error(
sdc_descriptives(sdc_descriptives_DT, "wrong_id", "val_1"),
"'id_var'.*subset"
)
expect_error(
sdc_descriptives(sdc_descriptives_DT, "id", "wrong_val"),
"'val_var'.*subset"
)
expect_error(
sdc_descriptives(sdc_descriptives_DT, "id", "val_1", "wrong_by"),
"'by'.*subset"
)
# error for elements unquoted
expect_error(
sdc_descriptives(sdc_descriptives_DT, id, "val_1"),
"object 'id' not found"
)
expect_error(
sdc_descriptives(sdc_descriptives_DT, "id", val_1),
"object 'val_1' not found"
)
# error for missing arguments
expect_error(
sdc_descriptives(id_var = "id", val_var = "val_1"),
'argument "data" is missing, with no default',
fixed = TRUE
)
expect_silent(
sdc_descriptives(sdc_descriptives_DT, "id")
)
options(sdc.id_var = NULL)
expect_error(
sdc_descriptives(sdc_descriptives_DT, val_var = "val_1"),
"Assertion on 'id_var' failed: Must be of type 'string', not 'NULL'.",
fixed = TRUE
)
})
# missing ID's ----
test_that("missing ID's are handled correctly (simple case)", {
data("sdc_descriptives_DT")
options(sdc.info_level = NULL)
distinct_ids_ref_6 <- structure(
data.table(distinct_ids = 8L),
class = c("sdc_distinct_ids", "data.table", "data.frame")
)
dominance_ref_6 <- structure(
data.table(value_share = 0.054822016086913394),
class = c("sdc_dominance", "data.table", "data.frame")
)
descriptives_ref_6 <- structure(
list(
options = sdcLog:::list_options(),
settings = sdcLog:::list_arguments("id_na", "val_1", zero_as_NA = FALSE),
distinct_ids = distinct_ids_ref_6,
dominance = dominance_ref_6
),
class = c("sdc_descriptives", "list")
)
expect_equal(
sdc_descriptives(sdc_descriptives_DT[seq(2, 20, 2)], "id_na", "val_1"),
descriptives_ref_6,
ignore_attr = TRUE
)
distinct_ids_ref_7 <- structure(
data.table(distinct_ids = 9L),
class = c("sdc_distinct_ids", "data.table", "data.frame")
)
dominance_ref_7 <- structure(
data.table(value_share = 0.8714379273090671),
class = c("sdc_dominance", "data.table", "data.frame")
)
descriptives_ref_7 <- structure(
list(
options = sdcLog:::list_options(),
settings = sdcLog:::list_arguments("id_na", "val_1", zero_as_NA = FALSE),
distinct_ids = distinct_ids_ref_7,
dominance = dominance_ref_7
),
class = c("sdc_descriptives", "list")
)
DT_filled <- copy(sdc_descriptives_DT)
DT_filled[is.na(id_na), id_na := "X"]
expect_warning(
expect_equal(
sdc_descriptives(DT_filled[seq(2, 20, 2)], "id_na", "val_1"),
descriptives_ref_7,
ignore_attr = TRUE
),
paste(cli::style_bold("DISCLOSURE PROBLEM:"), "Dominant entities."),
fixed = TRUE
)
})
test_that("missing ID's are handled correctly (by case)", {
data("sdc_descriptives_DT")
options(sdc.info_level = 2)
distinct_ids_ref_8 <- structure(
data.table(
sector = factor(c("S1", "S2")),
distinct_ids = rep(4L, 2L)
),
class = c("sdc_distinct_ids", "data.table", "data.frame")
)
dominance_ref_8 <- structure(
data.table(
sector = factor(c("S1", "S2")),
value_share = c(0.12990197140157855, 0.08510201158264474)
),
class = c("sdc_dominance", "data.table", "data.frame")
)
descriptives_ref_8 <- structure(
list(
options = sdcLog:::list_options(),
settings = sdcLog:::list_arguments(
"id_na", "val_1", by = "sector", zero_as_NA = FALSE
),
distinct_ids = distinct_ids_ref_8,
dominance = dominance_ref_8
),
class = c("sdc_descriptives", "list")
)
expect_warning(
expect_equal(
sdc_descriptives(
sdc_descriptives_DT[seq(2, 20, 2)],
"id_na",
"val_1",
by = "sector"
),
descriptives_ref_8,
ignore_attr = TRUE
),
paste(
cli::style_bold("DISCLOSURE PROBLEM:"), "Not enough distinct entities."
),
fixed = TRUE
)
distinct_ids_ref_9 <- structure(
data.table(
sector = factor(c("S1", "S2")),
distinct_ids = rep(5L, 2L)
),
class = c("sdc_distinct_ids", "data.table", "data.frame")
)
dominance_ref_9 <- structure(
data.table(
sector = factor(c("S2", "S1")),
value_share = c(0.905631390, 0.877685169)
),
class = c("sdc_dominance", "data.table", "data.frame")
)
descriptives_ref_9 <- structure(
list(
options = sdcLog:::list_options(),
settings = sdcLog:::list_arguments(
"id_na", "val_1", by = "sector", zero_as_NA = FALSE
),
distinct_ids = distinct_ids_ref_9,
dominance = dominance_ref_9
),
class = c("sdc_descriptives", "list")
)
DT_filled <- copy(sdc_descriptives_DT)
DT_filled[is.na(id_na), id_na := "X"]
expect_warning(
expect_equal(
sdc_descriptives(DT_filled[seq(2, 20, 2)], "id_na", "val_1", by = "sector"),
descriptives_ref_9,
ignore_attr = TRUE
),
paste(cli::style_bold("DISCLOSURE PROBLEM:"), "Dominant entities."),
fixed = TRUE
)
})
# no id's ----
test_that("all ID's NA are handled correctly", {
data("sdc_descriptives_DT")
sdc_descriptives_DT[, id_all_na := NA_character_]
distinct_ids_ref_10 <- structure(
data.table(
sector = factor(character(), levels = c("S1", "S2")),
distinct_ids = integer()
),
class = c("sdc_distinct_ids", "data.table", "data.frame")
)
dominance_ref_10 <- structure(
data.table(
sector = factor(character(), levels = c("S1", "S2")),
value_share = double()
),
class = c("sdc_dominance", "data.table", "data.frame")
)
descriptives_ref_10 <- structure(
list(
options = sdcLog:::list_options(),
settings = sdcLog:::list_arguments(
"id_all_na", "val_1", by = "sector", zero_as_NA = FALSE
),
distinct_ids = distinct_ids_ref_10,
dominance = dominance_ref_10
),
class = c("sdc_descriptives", "list")
)
expect_equal(
sdc_descriptives(sdc_descriptives_DT, "id_all_na", "val_1", by = "sector"),
descriptives_ref_10,
ignore_attr = TRUE
)
})
# some id's ----
test_that("argument fill_id_var works", {
data("sdc_descriptives_DT")
id_na <- sdc_descriptives_DT[["id_na"]]
expect_warning(
sdc_descriptives(sdc_descriptives_DT, "id_na", val_var = "val_1", by = "sector"),
paste(
cli::style_bold("DISCLOSURE PROBLEM:"), "Not enough distinct entities."
),
fixed = TRUE
)
expect_silent(
sdc_descriptives(sdc_descriptives_DT, "id_na", val_var = "val_1", by = "sector", fill_id_var = TRUE)
)
expect_identical(sdc_descriptives_DT[["id_na"]], id_na)
})
test_that("#77 is fixed", {
options(sdc.info_level = 2)
df <- data.table(id = "A", val = 1:4)
descriptives_ref_issue_77_a <- structure(
list(
options = sdcLog:::list_options(),
settings = sdcLog:::list_arguments(
"id", "val", zero_as_NA = FALSE
),
distinct_ids = structure(
class = c("sdc_distinct_ids", "data.table", "data.frame"),
data.table(distinct_ids = 1L)
),
dominance = structure(
class = c("sdc_dominance", "data.table", "data.frame"),
data.table(value_share = 1L)
)
),
class = c("sdc_descriptives", "list")
)
warnings <- capture_warnings(
expect_equal(
sdc_descriptives(df, "id", val_var = "val"),
descriptives_ref_issue_77_a,
ignore_attr = TRUE
)
)
expect_match(
warnings,
"DISCLOSURE PROBLEM:.*(Not enough distinct entities|Dominant entities)\\."
)
df[, id := NA_character_]
descriptives_ref_issue_77_b <- structure(
list(
options = sdcLog:::list_options(),
settings = sdcLog:::list_arguments(
"id", "val", zero_as_NA = FALSE
),
distinct_ids = structure(
class = c("sdc_distinct_ids", "data.table", "data.frame"),
data.table(distinct_ids = 0L)
),
dominance = structure(
class = c("sdc_dominance", "data.table", "data.frame"),
data.table(value_share = double())
)
),
class = c("sdc_descriptives", "list")
)
expect_equal(
sdc_descriptives(df, "id", val_var = "val"),
descriptives_ref_issue_77_b,
ignore_attr = TRUE
)
})
test_that("#83 is fixed", {
df <- data.table(
id = c("N", NA, NA, NA, "N", "N"),
by_var = factor(c("U", "U", "N", "M", "M", "N"), levels = c("U", "M", "N")),
val = c(7, 2, 500, 3000, 4, 1)
)
expect_warning(
{res <- sdc_descriptives(df, "id", "val", "by_var")},
paste(
cli::style_bold("DISCLOSURE PROBLEM:"), "Not enough distinct entities."
),
fixed = TRUE
)
expect_equal(
as.data.table(res[[4]]),
data.table(
by_var = factor(c("U", "N", "M"), levels = c("U", "M", "N")),
value_share = c(0.555555556, 0.001996008, 0.001331558)
)
)
})
test_that("preventing val_var = 'val_var' works", {
df <- data.table(id_var = "A", val_var = 1L)
expect_error(
sdc_descriptives(df, id_var = "id_var", val_var = "val_var"),
"Assertion on 'val_var' failed: Must not equal \"val_var\".",
fixed = TRUE
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.