mutual: Entropy-Based Segregation Indices

if (!identical(Sys.getenv("NOT_CRAN"), "true")) {
    return()
}

library("segregation")
context("test_ipf")

test_that("different precisions", {
    # reduce to overlap sample
    schools00_r <- schools00[schools00$school %in% schools05$school, ]
    schools05_r <- schools05[schools05$school %in% schools00$school, ]

    for (precision in c(.1, .01, .001)) {
        adj <- ipf(schools00_r, schools05_r, "race", "school", weight = "n", precision = precision)

        # check that the new "race" marginals are similar to the target marginals
        new <- aggregate(adj$n, list(adj$race), sum)
        old <- aggregate(schools05_r$n, list(schools05_r$race), sum)
        old <- old[match(new$Group.1, old$Group.1), ]
        new <- new$x / sum(new$x)
        old <- old$x / sum(old$x)
        expect_true(all(abs(new - old) < precision))

        # check that the new "school" marginals are similar to the target marginals
        new <- aggregate(adj$n, list(adj$school), sum)
        old <- aggregate(schools05_r$n, list(schools05_r$school), sum)
        old <- old[match(new$Group.1, old$Group.1), ]
        new <- new$x / sum(new$x)
        old <- old$x / sum(old$x)
        expect_true(all(abs(new - old) < precision))
    }
})

test_that("warn if iterations are too low", {
    expect_error(
        suppressWarnings(
            ipf(schools00, schools05, "race", "school",
                weight = "n",
                precision = .00001, max_iterations = 1
            )
        )
    )
})

test_that("gives sames results as mutual_difference", {
    diff <- mutual_difference(schools00, schools05,
        group = "race", unit = "school",
        weight = "n", method = "km", precision = 0.000001
    )
    # what changed from 2000 to 2005?
    # first reduce to overlap sample
    schools00_r <- schools00[schools00$school %in% schools05$school, ]
    schools05_r <- schools05[schools05$school %in% schools00$school, ]
    M_00 <- mutual_total(schools00_r, "race", "school", weight = "n")[stat == "M", est]
    M_05 <- mutual_total(schools05_r, "race", "school", weight = "n")[stat == "M", est]
    # adjust the 2000 margins to the 2005 margins
    # if only the margins changed, then this would explain all the difference
    adj_00 <- ipf(schools00_r, schools05_r, "race", "school", weight = "n", precision = 0.00001)
    M_margins <- mutual_total(adj_00, "race", "school", weight = "n")[stat == "M", est]
    structural_change <- M_05 - M_margins
    # test
    expect_equal(diff[stat == "M1", est] + diff[stat == "removals", est], M_00)
    expect_equal(diff[stat == "M2", est] - diff[stat == "additions", est], M_05)
    expect_equal(structural_change, diff[stat == "structural", est], tolerance = .001)
    expect_equal(
        M_05 - M_00 - structural_change,
        diff[stat %in% c("unit_marginal", "group_marginal", "interaction"), sum(est)],
        tolerance = .001
    )
})

test_that("example from Karmel & Maclachlan 1988", {
    source <- data.frame(
        occ = rep(c(1, 2, 3), 2),
        gender = c(rep("male", 3), rep("female", 3)),
        n = c(100, 50, 100, 50, 50, 50)
    )
    target <- data.frame(
        occ = rep(c(1, 2, 3), 2),
        gender = c(rep("male", 3), rep("female", 3)),
        n = c(125, 100, 100, 100, 100, 75)
    )
    adj <- ipf(source, target, "occ", "gender", "n", precision = 0.0000000001)
    # K-M report on the scale of the target distribution
    adj$n <- round(adj$n / sum(adj$n) * sum(adj$n_target), 1)
    expect_equal(adj[adj$gender == "male" & adj$occ == 1, "n"][[1]], 134.7)
    expect_equal(adj[adj$gender == "male" & adj$occ == 2, "n"][[1]], 85.5)
    expect_equal(adj[adj$gender == "male" & adj$occ == 3, "n"][[1]], 104.8)
    expect_equal(adj[adj$gender == "female" & adj$occ == 1, "n"][[1]], 90.3)
    expect_equal(adj[adj$gender == "female" & adj$occ == 2, "n"][[1]], 114.5)
    expect_equal(adj[adj$gender == "female" & adj$occ == 3, "n"][[1]], 70.2)

    expect_equal(
        sum(adj[adj$gender == "male", "n"]),
        sum(adj[adj$gender == "male", "n_target"])
    )
    expect_equal(
        sum(adj[adj$gender == "female", "n"]),
        sum(adj[adj$gender == "female", "n_target"])
    )
    expect_equal(sum(adj[adj$occ == 1, "n"]), sum(adj[adj$occ == 1, "n_target"]))
    expect_equal(sum(adj[adj$occ == 2, "n"]), sum(adj[adj$occ == 2, "n_target"]))
    expect_equal(sum(adj[adj$occ == 3, "n"]), sum(adj[adj$occ == 3, "n_target"]))
})

test_that("warning about units and groups being dropped", {
    expect_warning(ipfd <- ipf(schools00, schools05, "race", "school", weight = "n"))
    expect_equal(sum(ipfd$n), sum(ipfd$n_source))
})

test_that("returns same number of observations as before", {
    # schools are dropped here
    suppressWarnings(ipfd <- ipf(schools00, schools05, "race", "school", weight = "n"))
    expect_equal(sum(ipfd$n), sum(ipfd$n_source))

    # reduce to overlap sample, because then by definition the counts are identical
    schools00_r <- schools00[schools00$school %in% schools05$school, ]
    schools05_r <- schools05[schools05$school %in% schools00$school, ]

    ipfd <- ipf(schools00_r, schools05_r, "race", "school", weight = "n")
    expect_equal(sum(ipfd$n), sum(ipfd$n_source))
    expect_equal(sum(schools00_r$n), sum(ipfd$n))
    expect_equal(sum(schools00_r$n), sum(ipfd$n_source))
    expect_equal(sum(schools05_r$n), sum(ipfd$n_target))
})