set.seed(1234)
data("occup", package = "cat2cat")
data("trans", package = "cat2cat")
occup_old <- occup[occup$year == 2008, ]
occup_new <- occup[occup$year == 2010, ]
testthat::test_that("incorect input", {
testthat::expect_error(cat2cat())
testthat::expect_error(
cat2cat(data = list(
old = occup_old, new = occup_new, cat_var = "code", time_var = "year"
))
)
})
testthat::test_that("no mappings so empty dataset in one direction", {
data <- list(
old = occup_old, new = occup_new, cat_var = "code", time_var = "year"
)
mappings <- list(
trans = data.frame(old = 1, new = 1)[NULL, ],
direction = "backward"
)
testthat::expect_warning(
cat2cat(data = data, mappings = mappings),
"trans table does not cover some levels "
)
res <- suppressWarnings(cat2cat(data = data, mappings = mappings))
testthat::expect_identical(nrow(res$old), 0L)
})
data_simple <- list(
old = occup_old, new = occup_new, cat_var = "code", time_var = "year"
)
mappings_simple_back <- list(trans = trans, direction = "backward")
mappings_simple_for <- list(trans = trans, direction = "forward")
ml <- list(
data = occup_new,
cat_var = "code",
method = c("knn", "rf", "lda"),
features = c("age", "sex", "edu", "exp", "parttime", "salary"),
args = list(k = 10, ntree = 30)
)
testthat::test_that("Simple backward cat2cat with 2 periods and no ml", {
occup_1a <- cat2cat(data = data_simple, mappings = mappings_simple_back)
expect_true(all(occup_1a$old$wei_freq_c2c <= 1 &
occup_1a$old$wei_freq_c2c >= 0))
expect_true(all(occup_1a$old$wei_naive_c2c <= 1 &
occup_1a$old$wei_naive_c2c >= 0))
expect_equal(sum(occup_1a$old$wei_naive_c2c), nrow(occup_old))
expect_equal(sum(occup_1a$old$wei_freq_c2c), nrow(occup_old))
})
testthat::test_that("Simple backward cat2cat with 2 periods and ml", {
occup_2 <- cat2cat(
data = data_simple,
mappings = mappings_simple_back,
ml = ml
)
expect_true(!identical(occup_2$old$wei_freq_c2c, occup_2$old$wei_rf_c2c))
expect_true(!identical(occup_2$old$wei_freq_c2c, occup_2$old$wei_knn_c2c))
expect_true(!identical(occup_2$old$wei_freq_c2c, occup_2$old$wei_lda_c2c))
expect_true(!identical(occup_2$old$wei_freq_c2c, occup_2$old$wei_naive_c2c))
expect_equal(sum(occup_2$old$wei_freq_c2c), nrow(occup_old))
expect_equal(sum(occup_2$old$wei_knn_c2c), nrow(occup_old))
expect_equal(sum(occup_2$old$wei_rf_c2c), nrow(occup_old))
expect_equal(sum(occup_2$old$wei_lda_c2c), nrow(occup_old))
expect_equal(sum(occup_2$old$wei_naive_c2c), nrow(occup_old))
expect_true(
all(occup_2$old$wei_freq_c2c <= 1 & occup_2$old$wei_freq_c2c >= 0)
)
expect_true(
all(occup_2$old$wei_freq_c2c <= 1 & occup_2$old$wei_freq_c2c >= 0)
)
expect_true(
all(occup_2$old$wei_knn_c2c <= 1 & occup_2$old$wei_knn_c2c >= 0)
)
expect_true(
all(occup_2$old$wei_rf_c2c <= 1 & occup_2$old$wei_rf_c2c >= 0)
)
expect_true(
all(occup_2$old$wei_lda_c2c <= 1 & occup_2$old$wei_lda_c2c >= 0)
)
expect_equal(
occup_2$old %>%
cross_c2c(., c("wei_freq_c2c", "wei_knn_c2c"), c(1 / 2, 1 / 2)) %>%
pull("wei_cross_c2c"),
(occup_2$old$wei_knn_c2c + occup_2$old$wei_freq_c2c) / 2
)
})
testthat::test_that(
"Simple backward cat2cat with 2 periods and ml, not all mappings",
{
expect_warning(
cat2cat(
data = data_simple,
mappings = list(trans = head(trans, -50), direction = "backward"),
ml = ml
),
"9321, 9312, 9311, 9331, 9313"
)
}
)
# handling NAs
occup_3a <- cat2cat(
data = list(
old = occup_old,
new = occup_new,
cat_var = "code",
time_var = "year",
multiplier_var = "multiplier"
),
mappings = list(
trans = trans,
direction = "backward"
),
ml = list(
data = occup_new,
cat_var = "code",
method = c("knn"),
features = c("age", "sex", "edu", "exp", "parttime", "salary"),
args = list(k = 10)
)
)
na_row <- occup_old[1, ]
na_row$code <- NA
na_row2 <- occup_new[1, ]
na_row2$code <- NA
occup_3b <- cat2cat(
data = list(
old = rbind(occup_old, na_row),
new = rbind(occup_new, na_row2),
cat_var = "code",
time_var = "year",
multiplier_var = "multiplier"
),
mappings = list(
trans = do.call(rbind, list(trans, c(NA, NA), c(NA, "432190"))),
direction = "backward"
),
ml = list(
data = rbind(occup_new, na_row2),
cat_var = "code",
method = c("knn"),
features = c("age", "sex", "edu", "exp", "parttime", "salary"),
args = list(k = 10)
)
)
expect_identical(nrow(occup_3b$old) - 2L, nrow(occup_3a$old))
expect_true(all(occup_3b$old$wei_freq_c2c <= 1 &
occup_3b$old$wei_freq_c2c >= 0))
expect_true(all(occup_3b$old$wei_knn_c2c <= 1 & occup_3b$old$wei_knn_c2c >= 0))
na_row <- occup_old[1, ]
na_row$code <- "NA"
occup_3c <- cat2cat(
data = list(
old = rbind(occup_old, na_row),
new = occup_new,
cat_var = "code",
time_var = "year",
multiplier_var = "multiplier"
),
mappings = list(trans = rbind(trans, c("NA", "NA")), direction = "backward"),
ml = list(
method = c("knn"),
features = c("age", "sex", "edu", "exp", "parttime", "salary"),
args = list(k = 10)
)
)
expect_identical(nrow(occup_3b$old), nrow(occup_3c$old) + 1L)
expect_true(all(occup_3c$old$wei_freq_c2c <= 1 &
occup_3c$old$wei_freq_c2c >= 0))
expect_true(all(occup_3c$old$wei_knn_c2c <= 1 & occup_3c$old$wei_knn_c2c >= 0))
# forward and not all mappings
testthat::test_that(
"Simple forward cat2cat with 2 periods and no ml, not all mappings",
{
expect_warning(
cat2cat(
data = data_simple,
mappings = mappings_simple_for
),
paste(
"741103, 712604, 732201, 818116, 732301,",
"816003, 741201, 713201, 818115, 815204"
)
)
}
)
testthat::test_that(
"Simple forward cat2cat with 2 periods and no ml - details",
{
occup_4 <- suppressWarnings(cat2cat(
data = data_simple,
mappings = mappings_simple_for
))
expect_equal(
sum(occup_4$new$wei_freq_c2c) +
sum(occup_new$code %in% setdiff(occup_new$code, trans$new)),
nrow(occup_new)
)
expect_true(all(occup_4$new$wei_freq_c2c <= 1 &
occup_4$new$wei_freq_c2c >= 0))
occup_4b <- cat2cat(
data = list(
old = occup_old, new = occup_new, cat_var = "code", time_var = "year"
),
mappings = list(
trans = rbind(
trans,
data.frame(old = "no_cat", new = setdiff(occup_new$code, trans$new))
),
direction = "forward"
)
)
expect_identical(nrow(occup_4b$old), nrow(occup_4b$old))
expect_true(all(occup_4b$old$wei_freq_c2c <= 1 &
occup_4b$old$wei_freq_c2c >= 0))
}
)
# automatic mapping table
# the ean variable is an unique identifier
data("verticals2", package = "cat2cat")
vert_old <- verticals2[verticals2$v_date == "2020-04-01", ]
vert_new <- verticals2[verticals2$v_date == "2020-05-01", ]
## get mapping (transition) table
trans_v <- vert_old %>%
inner_join(vert_new, by = "ean") %>%
select(vertical.x, vertical.y) %>%
distinct()
# as then we merging categories 1 to 1 for this identifiers
testthat::test_that(
"cat2cat - set id_var",
{
verts <- cat2cat(
data = list(
old = vert_old, new = vert_new, id_var = "ean",
cat_var = "vertical", time_var = "v_date"
),
mappings = list(trans = trans_v, direction = "backward")
)
expect_true(all(verts$old$wei_freq_c2c <= 1 & verts$old$wei_freq_c2c >= 0))
expect_true(all(verts$new$wei_freq_c2c <= 1 & verts$new$wei_freq_c2c >= 0))
expect_equal(sum(verts$old$wei_naive_c2c), nrow(vert_old))
expect_equal(sum(verts$old$wei_freq_c2c), nrow(vert_old))
}
)
testthat::test_that(
"set id_var, backward and ml",
{
verts2 <- cat2cat(
data = list(
old = vert_old, new = vert_new, id_var = "ean",
cat_var = "vertical", time_var = "v_date"
),
mappings = list(trans = trans_v, direction = "backward"),
ml = list(
data = vert_new,
cat_var = "vertical",
method = c("knn", "rf", "lda"),
features = c("sales"),
args = list(k = 10, ntree = 30)
)
)
expect_true(!identical(verts2$old$wei_freq_c2c, verts2$old$wei_naive_c2c))
expect_true(!identical(verts2$old$wei_freq_c2c, verts2$old$wei_rf_c2c))
expect_true(!identical(verts2$old$wei_freq_c2c, verts2$old$wei_knn_c2c))
expect_true(!identical(verts2$old$wei_freq_c2c, verts2$old$wei_lda_c2c))
expect_equal(sum(verts2$old$wei_freq_c2c), nrow(vert_old))
expect_equal(sum(verts2$old$wei_knn_c2c), nrow(vert_old))
expect_equal(sum(verts2$old$wei_rf_c2c), nrow(vert_old))
expect_equal(sum(verts2$old$wei_lda_c2c), nrow(vert_old))
expect_true(
(all(verts2$old$wei_knn_c2c <= 1 & verts2$old$wei_knn_c2c >= 0))
)
expect_true(
(all(verts2$old$wei_rf_c2c <= 1 & verts2$old$wei_rf_c2c >= 0))
)
expect_true(
(all(verts2$old$wei_lda_c2c <= 1 & verts2$old$wei_lda_c2c >= 0))
)
expect_true(all(unique(verts2[["old"]][["g_new_c2c"]]) %in% trans_v[[2]]))
}
)
testthat::test_that(
"set id_var, forward and ml",
{
verts3 <- cat2cat(
data = list(
old = vert_old, new = vert_new, id_var = "ean",
cat_var = "vertical", time_var = "v_date"
),
mappings = list(trans = trans_v, direction = "forward"),
ml = list(
data = vert_old,
cat_var = "vertical",
method = c("knn", "rf", "lda"),
features = c("sales"),
args = list(k = 10, ntree = 30)
)
)
expect_true(!identical(verts3$new$wei_freq_c2c, verts3$new$wei_rf_c2c))
expect_true(!identical(verts3$new$wei_freq_c2c, verts3$new$wei_knn_c2c))
expect_true(!identical(verts3$new$wei_freq_c2c, verts3$new$wei_lda_c2c))
expect_equal(sum(verts3$new$wei_freq_c2c), nrow(vert_new))
expect_equal(sum(verts3$new$wei_knn_c2c), nrow(vert_new))
expect_equal(sum(verts3$new$wei_rf_c2c), nrow(vert_new))
expect_equal(sum(verts3$new$wei_lda_c2c), nrow(vert_new))
expect_true(
(all(verts3$new$wei_knn_c2c <= 1 & verts3$new$wei_knn_c2c >= 0))
)
expect_true(
(all(verts3$new$wei_rf_c2c <= 1 & verts3$new$wei_rf_c2c >= 0))
)
expect_true(
(all(verts3$new$wei_lda_c2c <= 1 & verts3$new$wei_lda_c2c >= 0))
)
expect_true(all(unique(verts3[["new"]][["g_new_c2c"]]) %in% trans_v[[1]]))
}
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.