Nothing
library(data.table)
set.seed(1L)
# small test dt
extreme_test_dt <- data.table(
id = LETTERS[1L:10L],
val_1 = 10L:1L,
val_2 = c(100L, 90L, 8L:1L),
val_3 = c(NA_integer_, 9L:1L),
key = "id"
)
# test sdc_min_max ----
# simple case ----
# calculate extreme values for val_var = val_1
extreme_ref_1 <- structure(
list(
options = sdcLog:::list_options(),
settings = sdcLog:::list_arguments("id", "val_1"),
min_max = data.table(
val_var = "val_1",
min = extreme_test_dt[6L:10L, mean(val_1)],
distinct_ids_min = 5L,
max = extreme_test_dt[1L:5L, mean(val_1)],
distinct_ids_max = 5L
)
),
class = c("sdc_min_max", "list")
)
test_that("sdc_min_max() works in simple case", {
expect_identical(
sdc_min_max(as.data.frame(extreme_test_dt, stringsAsFactors = FALSE), "id", "val_1"),
extreme_ref_1
)
})
# problematic case 1 ----
# problem with dominance for 1:5, so 1:9 necessary but would lead to overlap, so
# all extreme values NA
extreme_ref_2 <- structure(
list(
options = sdcLog:::list_options(),
settings = sdcLog:::list_arguments("id", "val_2"),
min_max = data.table(
val_var = "val_2",
min = NA_real_,
distinct_ids_min = NA_integer_,
max = NA_real_,
distinct_ids_max = NA_integer_
)
),
class = c("sdc_min_max", "list")
)
test_that("sdc_min_max() produces no result in case of sd_overlap due to dominance", {
expect_equal(
sdc_min_max(extreme_test_dt, "id", "val_2"),
extreme_ref_2
)
})
# problematic case 2 ----
# problem with NA in (1:5), so 2:6 necessary but would lead to overlap, so all
# extreme values NA
extreme_ref_3 <- structure(
list(
options = sdcLog:::list_options(),
settings = sdcLog:::list_arguments("id", "val_3"),
min_max = data.table(
val_var = "val_3",
min = NA_real_,
distinct_ids_min = NA_integer_,
max = NA_real_,
distinct_ids_max = NA_integer_
)
),
class = c("sdc_min_max", "list")
)
# actual test
test_that("sdc_min_max() produces no result in case of sd_overlap", {
expect_identical(
sdc_min_max(extreme_test_dt, "id", "val_3"),
extreme_ref_3
)
})
# create test dt für by argument ----
n <- 20L
extreme_test_dt_by <- data.table(
id = rep_len(LETTERS[1L:10L], n),
sector = sort(rep_len(paste0("S", 1L:2L), n)),
val_1 = 20L:1L,
val_2 = c(200L, 190L, 18L:1L),
val_3 = c(NA_integer_, 19L:1L),
val_4 = c(20L:7L, rep(NA_integer_, 6L)),
key = "id"
)
setorder(extreme_test_dt_by, -val_1)
# simple by case ----
# setup test extreme values for val_var = val, by = sector
extreme_ref_4 <- structure(
list(
options = sdcLog:::list_options(),
settings = sdcLog:::list_arguments("id", "val_1", "sector"),
min_max = data.table(
val_var = "val_1",
sector = c("S1", "S2"),
min = c(
extreme_test_dt_by[6L:10L, mean(val_1)],
extreme_test_dt_by[16L:20L, mean(val_1)]
),
distinct_ids_min = 5L,
max = c(
extreme_test_dt_by[1L:5L, mean(val_1)],
extreme_test_dt_by[11L:15L, mean(val_1)]
),
distinct_ids_max = 5L,
key = "sector"
)
),
class = c("sdc_min_max", "list")
)
# actual tests
test_that("sdc_min_max() works in simple cases with by", {
# expect_identical(
# sdc_min_max(extreme_test_dt_by, "id", "val_1", by = sector),
# extreme_ref_4
# )
expect_identical(
sdc_min_max(extreme_test_dt_by, "id", "val_1", by = "sector"),
extreme_ref_4
)
# expect_identical(
# sdc_min_max(extreme_test_dt_by, "id", "val_1", by = .(sector)),
# extreme_ref_4
# )
})
# problematic by case 1 ----
# setup test extreme values for val_var = val_2, by = sector
extreme_ref_5 <- structure(
list(
options = sdcLog:::list_options(),
settings = sdcLog:::list_arguments("id", "val_2", "sector"),
min_max = data.table(
val_var = "val_2",
sector = c("S1", "S2"),
min = NA_real_,
distinct_ids_min = NA_integer_,
max = NA_real_,
distinct_ids_max = NA_integer_,
key = "sector"
)
),
class = c("sdc_min_max", "list")
)
# actual tests
test_that("sdc_min_max() gives no result in by cases", {
expect_identical(
sdc_min_max(extreme_test_dt_by, "id", "val_2", by = "sector"),
extreme_ref_5
)
})
# tests for internal functions (removed, since not necessary) ----
# test arguments in sdc_min_max ----
# test that sdc_min_max returns appropriate error
test_that("sdc_min_max() returns appropriate error", {
# throw error if data is not a data.frame
expect_error(
sdc_min_max(wrong_test_dt, "id", "val_1"),
"object 'wrong_test_dt' not found",
fixed = TRUE
)
expect_error(
sdc_min_max("wrong_test_dt", "id", "val_1"),
"Assertion on 'data' failed: Must be of type 'data.frame', not 'character'.",
fixed = TRUE
)
# throw error if specified variables are not in data
expect_error(
sdc_min_max(extreme_test_dt, "wrong_id", "val_1"),
"'id_var'.*subset"
)
expect_error(
sdc_min_max(extreme_test_dt, "id", "wrong_val"),
"'val_var'.*subset"
)
expect_error(
sdc_min_max(extreme_test_dt_by, "id", "val_1", "wrong_by"),
"'by'.*subset"
)
# error for elements unquoted
expect_error(
sdc_min_max(extreme_test_dt, id, "val_1"),
"object 'id' not found"
)
expect_error(
sdc_min_max(extreme_test_dt, "id", val_1),
"object 'val_1' not found"
)
# error for missing arguments
expect_error(
sdc_min_max(id_var = "id", val_var = "val_1"),
'argument "data" is missing, with no default',
fixed = TRUE
)
expect_error(
sdc_min_max(extreme_test_dt, "id"),
'argument "val_var" is missing, with no default',
fixed = TRUE
)
expect_error(
sdc_min_max(extreme_test_dt, val_var = "val_1"),
"Assertion on 'id_var' failed: Must be of type 'string', not 'NULL'.",
fixed = TRUE
)
expect_error(
sdc_min_max(extreme_test_dt, "id_var", val_var = "val_1"),
"'id_var'.*subset",
)
expect_error(
sdc_min_max(extreme_test_dt, "id", val_var = "val_"),
"'val_var'.*subset",
)
expect_error(
sdc_min_max(extreme_test_dt, "id", val_var = "val_1", by = "by_var"),
"'by'.*subset",
)
})
# no infinite loop ----
test_that("sdc_min_max() does not enter an infinite loop", {
expect_identical(
sdc_min_max(extreme_test_dt[1:5], "id", "val_2"),
extreme_ref_2
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.