Nothing
if (!identical(Sys.getenv("NOT_CRAN"), "true")) {
return()
}
library("segregation")
context("test_compression")
subset <- schools00[1:50, ]
data.table::setDT(subset)
n_schools <- length(unique(subset$school))
all_neighbors <- unique(subset$school)
all_neighbors <- expand.grid(a = all_neighbors, b = all_neighbors)
res_all <- compress(subset, "race", "school", weight = "n", neighbors = all_neighbors)
test_that("result is the same with no neighbors given", {
res2 <- compress(subset, "race", "school", neighbors = "all", weight = "n")
expect_equal(res_all$iterations, res2$iterations)
})
test_that("compress works", {
# 9 merges
expect_equal(nrow(res_all$iterations), 16)
# M values is declining continously
expect_equal(all(res_all$iterations$M[2:16] < res_all$M$iterations[1:15]), TRUE)
# number of units are correct
expect_equal(res_all$iteration$N_units[[1]], n_schools - 1)
})
test_that("print", {
expect_output(print(res_all), "17 units")
expect_output(print(res_all), "Threshold 99%")
})
test_that("get_crosswalk works", {
expect_error(
get_crosswalk(schools00),
"either n_units or percent has to be given"
)
expect_error(
get_crosswalk(res_all, n_units = -1),
"n_units is out of bounds"
)
expect_error(
get_crosswalk(res_all, n_units = 20),
"n_units is out of bounds"
)
expect_error(
get_crosswalk(res_all, n_units = 3, percent = 0.2),
"only n_units or percent has to be given"
)
expect_equal(nrow(get_crosswalk(res_all, n_units = 1)), n_schools)
expect_equal(nrow(get_crosswalk(res_all, n_units = 5)), n_schools)
expect_equal(nrow(get_crosswalk(res_all, n_units = 15)), n_schools)
expect_equal(nrow(get_crosswalk(res_all, percent = 0.1)), n_schools)
expect_equal(nrow(get_crosswalk(res_all, percent = 0.6)), n_schools)
expect_equal(nrow(get_crosswalk(res_all, percent = 0.9)), n_schools)
expect_equal(as.character(get_crosswalk(res_all, n_units = 1)$new), rep("M1", 17))
})
test_that("parts", {
# get_crosswalk
res_no_parts <- get_crosswalk(res_all, percent = 0.6)
res_parts <- get_crosswalk(res_all, percent = 0.6, parts = TRUE)
expect_equal(names(res_no_parts), c("school", "new"))
expect_equal(names(res_parts), c("school", "new", "parts"))
expect_equal(res_no_parts, res_parts[, -"parts"])
res_no_parts <- get_crosswalk(res_all, percent = 0.99)
res_parts <- get_crosswalk(res_all, percent = 0.99, parts = TRUE)
expect_equal(res_no_parts, res_parts[, -"parts"])
expect_true(all(!is.na(res_parts[grepl("^M", new)][["parts"]])))
# merge_units
merged_no_parts <- merge_units(res_all, percent = .8)
merged_parts <- merge_units(res_all, percent = .8, parts = TRUE)
expect_equal(res_no_parts, res_parts[, -"parts"])
expect_equal(names(merged_no_parts), c("school", "race", "n"))
expect_equal(names(merged_parts), c("school", "race", "n", "parts"))
})
test_that("compress edge case", {
res_edge <- compress(subset, "race", "school", neighbors = "all", weight = "n", max_iter = 1)
expect_equal(nrow(get_crosswalk(res_edge, n_units = 16)), n_schools)
})
test_that("merge_units", {
merged <- merge_units(res_all, percent = 0.8)
new_units_cw <- sort(unique(get_crosswalk(res_all, percent = 0.8)$new))
new_units_merged <- sort(unique(merged$school))
expect_equal(new_units_cw, new_units_merged)
})
test_that("percent works", {
M_full <- mutual_total(subset, "race", "school", weight = "n")[stat == "M"][["est"]]
for (pct in seq(0.1, 0.9, by = 0.05)) {
merged_pct <- merge_units(res_all, percent = pct)
M_pct <- mutual_total(merged_pct, "race", "school", weight = "n")[stat == "M"][["est"]]
pct_M <- M_pct / M_full
expect_true(pct_M > pct)
expect_equal(res_all$iterations[N_units == merged_pct[, uniqueN(school)]][["pct_M"]], pct_M)
}
})
test_that("merge_units edge case", {
res_edge <- compress(subset, "race", "school", neighbors = "all", weight = "n", max_iter = 1)
merged <- merge_units(res_edge, n_units = 16)
# replicate manual merge
units <- c(res_edge$iterations$old_unit, res_edge$iterations$new_unit)
merged_manually <- subset[school %in% units, .(n = sum(n)), by = .(race)]
merged_algo <- merged[school == "M1" & n != 0][, -"school"]
expect_equal(merged_manually, merged_algo)
})
test_that("scree plot", {
if (requireNamespace("ggplot2", quietly = TRUE)) {
plot <- scree_plot(res_all)
expect_equal(nrow(plot$data), n_schools)
plot <- scree_plot(res_all, tail = 3)
expect_equal(nrow(plot$data), 3)
}
})
test_that("data set names", {
subset <- schools00[1:50, ]
data.table::setDT(subset)
names(subset) <- c("state", "district", "unit", "group", "n")
res <- compress(subset, "group", "unit", neighbors = "all", weight = "n")
expect_equal(nrow(res$iterations), 16)
})
test_that("local neighbors", {
subset <- schools00[1:500, ]
data.table::setDT(subset)
res_local <- compress(subset, "race", "school", neighbors = "local", n_neighbors = 100, weight = "n")
res_local_small <- compress(subset, "race", "school", neighbors = "local", n_neighbors = 5, weight = "n")
res_all <- compress(subset, "race", "school", neighbors = "all", weight = "n")
expect_equal(res_local$iterations$old_unit, res_all$iterations$old_unit)
expect_true(
res_local_small$iterations[pct_M > 0.99][.N][["N_units"]] >
res_local$iterations[pct_M > 0.99][.N][["N_units"]]
)
})
test_that("dendrogram", {
dend <- as.dendrogram(res_all)
expect_equal(attr(dend, "height"), res_all$iterations$M[[1]])
expect_equal(attr(dend, "members"), length(unique(res_all$data$school)))
res_limited <- compress(subset, "race", "school", weight = "n", max_iter = 5)
expect_error(as.dendrogram(res_limited))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.