tests/testthat/test-optimize_controls.R

# General optimization tests ----
set.seed(64, kind = "Mersenne-Twister")
z <- c(rep(0, 14), rep(1, 6))
data <- data.frame(color = c(rep("Red", 4), rep("White", 7), rep("Blue", 3), rep("White", 2), rep("Red", 4)),
                   number = rnorm(20),
                   category = c(rep(c("1", "2"), 5), "1", rep("2", 3), "1", rep("2", 4), "1"))
data$number[c(1, 5, 11)] <- NA
constraints <- suppressWarnings(generate_constraints(list(color + number ~ 2 * category), z, data = data,
                                                     autogen_missing = 4))
results <- optimize_controls(z = z, X = constraints$X, st = data$category, ratio = 1.5,
                             importances = constraints$importances,
                             integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                             time_limit = Inf)

test_that("optimization gives correct results", {
  expect_equal(results$lpdetails$objective, 6.124562, tolerance = .0001)
  expect_equal(results$objective, 6.264861, tolerance = .0001)
})

test_that("optimization chooses correct number of units", {
  expect_equal(sum(results$pr[results$pr < 1 & results$pr > 0]),
               sum(results$selected[results$pr < 1 & results$pr > 0]))
  expect_equal(sum(results$selected[z == 0]), sum(round(table(z, data$category) * 1.5) [2,]))
})

test_that("importances incorporated properly", {
  expect_equal(results$objective, sum(results$importances * results$eps))
  expect_equal(results$objective_wo_importances, sum(results$eps))
  expect_equal(results$lpdetails$objective, sum(results$importances * results$lpdetails$eps))
  expect_equal(results$lpdetails$objective_wo_importances, sum(results$lpdetails$eps))

})

# SD vs epsilon tests ----

data_for_sds <- cbind(data[, 1, drop = FALSE], is.na(data$number))
names(data_for_sds)[2] <- "number_missing"
sds <- check_balance(z, data_for_sds, data$category, results$selected, message = FALSE)

test_that("epsilons across strata equal standardized diff in means across when no missingness", {
  expect_equal(sds$sd_across[, "abs_stand_diff_after"],
               as.numeric(rowSums(results$eps[paste0(row.names(sds$sd_across), "_1"),])))
})

test_that("sum of epsilons within strata equal weighted avg of within strata standardized diff in means
          when ratios lead to integer q_s and no missingness", {
            covs <- row.names(sds$sd_across)
            stripped_row_names <- sapply(strsplit(row.names(results$eps), "_"),
                                         function(k) {paste0(k[-length(k)], collapse = "_")})
            sum_eps_within <- sapply(covs, function(cov) {
              sum(results$eps[stripped_row_names == cov &
                                grepl("_category", row.names(results$eps))])})
            expect_equal(sds$sd_strata_avg[, "abs_stand_diff_after"],
                         as.numeric(sum_eps_within))

          })



# EMD Tests ----

z <- c(rep(0, 15), rep(1, 5))
data <- data.frame(color = c(rep("Red", 5), rep("White", 2), rep("Blue", 5), rep("White", 4), rep("Red", 4)),
                   number = 1:20,
                   category = c("1", "1", "1", rep(c( "2", "3"), 6), "1", rep("2", 2), "3", "1"))
data$number[c(1, 5, 11, 16)] <- NA
constraints <- suppressWarnings(generate_constraints(list(color + number ~ 2 * category), z, data = data,
                                                     autogen_missing = 4))
strata_dist <- matrix(c(0, 2, 1, 2, 0, 1, 1, 1, 0), byrow = TRUE, ncol = 3, dimnames = list(c("1", "2", "3"), c("1", "2", "3")))
q_s <- generate_qs(z, st = data$category, ratio = 2.5, max_ratio = NULL, max_extra_s = NULL, strata_dist = strata_dist)
results_emd <- suppressWarnings(optimize_controls(z = z, X = constraints$X, st = data$category, q_s = q_s,
                                                  importances = constraints$importances,
                                                  integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                                                  time_limit = Inf))

test_that("EMD chooses correct number of units", {
  expect_equal(sum(results_emd$selected),
               sum(round(2.5 * table(z, data$category)[2, ])) + sum(table(z, data$category)[2, ]))
})

test_that("EMD gives right objective", {
  expect_equal(results_emd$objective, 27.89788, tolerance = 0.0001)
})

test_that("Choosing from closest strata", {
  expect_equal(as.numeric(table(data$category[results_emd$selected & !z]))[3], 4)
})

test_that("Specified max ratio and max extra working", {
  q_s2 <- generate_qs(z, st = data$category, ratio = 2.5, max_ratio = 0, max_extra_s = 1, strata_dist = strata_dist)
  results_emd2 <- suppressWarnings(optimize_controls(z = z, X = constraints$X, st = data$category, q_s = q_s2,
                                                     importances = constraints$importances,
                                                     integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                                                     time_limit = Inf))
  expect_equal(as.numeric(table(data$category[results_emd2$selected & !z]))[3], 3)
  q_s3 <- generate_qs(z, st = data$category, ratio = 2.5, max_ratio = Inf, max_extra_s = 1, strata_dist = strata_dist)
  results_emd3 <- suppressWarnings(optimize_controls(z = z, X = constraints$X, st = data$category,  q_s = q_s3,
                                                     importances = constraints$importances,
                                                     integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                                                     time_limit = Inf))
  expect_equal(as.numeric(table(data$category[results_emd3$selected & !z]))[3], 4)
  q_s4 <- generate_qs(z, st = data$category, ratio = 2.5,  max_ratio = 3, max_extra_s = 0, strata_dist = strata_dist)
  results_emd4 <- suppressWarnings(optimize_controls(z = z, X = constraints$X, st = data$category, q_s = q_s4,
                                                     importances = constraints$importances,
                                                     integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                                                     time_limit = Inf))
  expect_equal(as.numeric(table(data$category[results_emd4$selected & !z]))[3], 3)
})



# Multiple control group tests ----

z <- c(rep(0, 10), rep(1, 10), rep(2, 8))
data <- data.frame(color = c(rep("Red", 5), rep("White", 2), rep("Blue", 3),
                             rep("White", 2), rep("Red", 8), rep("White", 3), rep("Blue", 5)),
                   number = 1:28,
                   category = c(rep(c("1", "2"), 5), "1", rep("2", 1), rep(c("1", "2"), 8)))
data$number[c(1, 5, 11)] <- NA
constraints <- suppressWarnings(generate_constraints(list(color + number ~ 2 * category),
                                                     z, data = data, treated = 2,
                                                     autogen_missing = 4, denom_variance = "pooled"))
results <- optimize_controls(z = z, X = constraints$X, st = data$category, ratio = .5,
                             treated = 2, importances = constraints$importances,
                             integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                             time_limit = Inf)

test_that("optimization gives correct results", {
  expect_equal(results$lpdetails$objective, 60.63904, tolerance = .0001)
  expect_equal(results$lpdetails$objective, sum(results$lpdetails$eps * constraints$importances),
               tolerance = .0001)
  expect_equal(results$lpdetails$objective_wo_importances, 40.06041, tolerance = .0001)
  expect_equal(results$lpdetails$objective_wo_importances, sum(results$lpdetails$eps), tolerance = .0001)
  expect_equal(results$objective, 61.7425, tolerance = .0001)
  expect_equal(results$objective, sum(results$eps * constraints$importances),
               tolerance = .0001)
  expect_equal(results$objective_wo_importances, 40.97886, tolerance = .0001)
  expect_equal(results$objective_wo_importances, sum(results$eps), tolerance = .0001)
})

test_that("optimization chooses correct number of units", {
  expect_equal(sum(results$pr[results$pr < 1 & results$pr > 0]),
               sum(results$selected[results$pr < 1 & results$pr > 0]))
  expect_equal(sum(results$selected[z == 0]), sum(round(table(z, data$category)[3,] * 0.5) ))
  expect_equal(sum(results$selected[z == 1]), sum(round(table(z, data$category)[3,] * 0.5) ))
  expect_equal(sum(results$selected[z == 2]), sum(round(table(z, data$category)[3,] * 1) ))
})

data_for_sds <- cbind(data[, 1, drop = FALSE], is.na(data$number))
names(data_for_sds)[2] <- "number_missing"
# Between group 0 and 1
sds1 <- check_balance(z = z, control = 0, treated = 1, X = data_for_sds,
                      st = data$category, selected = results$selected,
                      denom_variance = "pooled", message = FALSE)
# Between group 0 and 2
sds2 <- check_balance(z = z, control = 0, treated = 2, X = data_for_sds,
                      st = data$category, selected = results$selected,
                      denom_variance = "pooled", message = FALSE)
# Between group 1 and 2
sds3 <- check_balance(z = z, control = 1, treated = 2, X = data_for_sds,
                      st = data$category, selected = results$selected,
                      denom_variance = "pooled", message = FALSE)

test_that("epsilons across strata equal standardized diff in means across when no missingness", {
  expect_equal(sds1$sd_across[, "abs_stand_diff_after"],
               as.numeric(rowSums(results$eps[paste0(row.names(sds1$sd_across), "_1_0:1"),])))
  expect_equal(sds2$sd_across[, "abs_stand_diff_after"],
               as.numeric(rowSums(results$eps[paste0(row.names(sds2$sd_across), "_1_0:2"),])))
  expect_equal(sds3$sd_across[, "abs_stand_diff_after"],
               as.numeric(rowSums(results$eps[paste0(row.names(sds3$sd_across), "_1_1:2"),])))
})

test_that("sum of epsilons within strata equal weighted avg of within strata standardized diff in means
          when no missingness and ratios lead to integer q_s", {
            covs <- row.names(sds1$sd_across)
            stripped_row_names <- sapply(strsplit(row.names(results$eps), "_"),
                                         function(k) {paste0(k[1:(length(k)-2)], collapse = "_")})
            sum_eps_within <- sapply(covs, function(cov) {
              sum(results$eps[stripped_row_names == cov &
                                grepl("_category", row.names(results$eps)) &
                                grepl("_0:1", row.names(results$eps))])})
            expect_equal(sds1$sd_strata_avg[, "abs_stand_diff_after"],
                         as.numeric(sum_eps_within))

            sum_eps_within <- sapply(covs, function(cov) {
              sum(results$eps[stripped_row_names == cov &
                                grepl("_category", row.names(results$eps)) &
                                grepl("_0:2", row.names(results$eps))])})
            expect_equal(sds2$sd_strata_avg[, "abs_stand_diff_after"],
                         as.numeric(sum_eps_within))

            sum_eps_within <- sapply(covs, function(cov) {
              sum(results$eps[stripped_row_names == cov &
                                grepl("_category", row.names(results$eps)) &
                                grepl("_1:2", row.names(results$eps))])})
            expect_equal(sds3$sd_strata_avg[, "abs_stand_diff_after"],
                         as.numeric(sum_eps_within))
          })

results <- optimize_controls(z = z, X = constraints$X, st = data$category, ratio = c(.5, .75, 1),
                             treated = 2, importances = constraints$importances,
                             integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                             time_limit = Inf)
test_that("multiple ratios still choose correct number of units", {
  expect_equal(sum(results$pr[results$pr < 1 & results$pr > 0]),
               sum(results$selected[results$pr < 1 & results$pr > 0]))
  expect_equal(sum(results$selected[z == 0]), sum(round(table(z, data$category)[3,] * 0.5) ))
  expect_equal(sum(results$selected[z == 1]), sum(round(table(z, data$category)[3,] * 0.75) ))
  expect_equal(sum(results$selected[z == 2]), sum(round(table(z, data$category)[3,] * 1) ))
})



# Two comparisons tests ----

set.seed(64, kind = "Mersenne-Twister")
z <- c(rep(0, 20), rep(1, 12), rep(2, 8))
data <- data.frame(color = c(rep(c(rep("Red", 4), rep("White", 1), rep("Blue", 2)), 3), "Blue",
                             rep("White", 2), rep("Red", 8), rep("White", 3), rep("Blue", 5)),
                   number = c(sample(1:20, 20, replace = TRUE), sample(10:30, 12, replace = TRUE),
                              sample(10:20, 8, replace = TRUE)),
                   category = c(rep(c("1", "2"), 20)))
data$number[c(1, 5, 11, 22, 30, 39)] <- NA
constraints <- suppressWarnings(generate_constraints(list(color + number ~ 2 * category),
                                                     z, data = data, treated = 2,
                                                     autogen_missing = 4, denom_variance = "pooled"))
results_two <- optimize_controls(z = z, X = constraints$X, st = data$category, ratio = 1,
                             q_star_s = matrix(c(rep(2, 4), rep(0, 2)), nrow = 3,
                                               byrow = TRUE, dimnames = list(NULL, c("1", "2"))),
                             treated = 2, treated_star = 1, weight_star = 2,
                             importances = constraints$importances,
                             integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                             time_limit = Inf, correct_sizes = FALSE, low_memory = FALSE)


test_that("optimization gives correct results", {
  expect_equal(results_two$lpdetails$objective, 44.17337, tolerance = .0001)
  expect_equal(results_two$lpdetails$objective,
               sum(results_two$lpdetails$eps * constraints$importances) +
                 sum(results_two$lpdetails$eps_star * results_two$weight_star * constraints$importances),
               tolerance = .0001)
  expect_equal(results_two$lpdetails$objective_wo_importances,  17.92818, tolerance = .0001)
  expect_equal(results_two$lpdetails$objective_wo_importances,
               sum(results_two$lpdetails$eps) + sum(results_two$lpdetails$eps_star), tolerance = .0001)
  expect_equal(results_two$objective, 44.17337, tolerance = .0001)
  expect_equal(results_two$objective, sum(results_two$eps * constraints$importances) +
                 sum(results_two$eps_star * results_two$weight_star * constraints$importances),
               tolerance = .0001)
  expect_equal(results_two$objective_wo_importances, 17.92818, tolerance = .0001)
  expect_equal(results_two$objective_wo_importances, sum(results_two$eps) + sum(results_two$eps_star), tolerance = .0001)
})

# Since sample sizes only correct in expectation, this varies from seed to seed
test_that("number of units chosen is approximately what we wanted for each group", {
  expect_equal(as.numeric(table(z[results_two$selected], data$category[results_two$selected])), rep(4, 6))
  expect_equal(as.numeric(table(z[results_two$selected_star], data$category[results_two$selected_star])), rep(2, 4))
})

test_that("units chosen for either main or supplemental group", {
  expect_equal(sum(results_two$selected & results_two$selected_star), 0)
})



# Three comparisons tests ----

set.seed(64, kind = "Mersenne-Twister")

results_three <- optimize_controls(z = z, X = constraints$X, st = data$category, ratio = 1,
                             q_star_s = list(matrix(c(0, rep(1, 3), rep(0, 2)), nrow = 3,
                                               byrow = TRUE, dimnames = list(NULL, c("1", "2"))),
                                             matrix(c(1, 0, 1, 0, rep(0, 2)), nrow = 3,
                                                    byrow = TRUE, dimnames = list(NULL, c("1", "2")))),
                             treated = 2, treated_star = c(1, 1), weight_star = c(2, 1),
                             importances = constraints$importances,
                             integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                             time_limit = Inf, correct_sizes = FALSE, low_memory = FALSE)

test_that("optimization gives correct results", {
  expect_equal(results_three$lpdetails$objective, 49.5996, tolerance = .0001)
  lp_sum_w_imp <- sum(results_three$lpdetails$eps * constraints$importances)
  lp_sum_wo_imp <- sum(results_three$lpdetails$eps)
  sum_w_imp <- sum(results_three$eps * constraints$importances)
  sum_wo_imp <- sum(results_three$eps)

  for (supp_comp in 1:2) {
    lp_sum_w_imp <- lp_sum_w_imp + sum(results_three$lpdetails$eps_star[[supp_comp]] *
                                   results_three$weight_star[supp_comp] * constraints$importances)
    lp_sum_wo_imp <- lp_sum_wo_imp + sum(results_three$lpdetails$eps_star[[supp_comp]])
    sum_w_imp <- sum_w_imp + sum(results_three$eps_star[[supp_comp]] *
                                   results_three$weight_star[supp_comp] * constraints$importances)
    sum_wo_imp <- sum_wo_imp + sum(results_three$eps_star[[supp_comp]])
  }
  expect_equal(results_three$lpdetails$objective,
               lp_sum_w_imp,
               tolerance = .0001)
  expect_equal(results_three$lpdetails$objective_wo_importances,  20.88223, tolerance = .0001)
  expect_equal(results_three$lpdetails$objective_wo_importances,
               lp_sum_wo_imp, tolerance = .0001)
  expect_equal(results_three$objective, 50.44668, tolerance = .0001)
  expect_equal(results_three$objective, sum_w_imp,
               tolerance = .0001)
  expect_equal(results_three$objective_wo_importances, 21.08154, tolerance = .0001)
  expect_equal(results_three$objective_wo_importances, sum_wo_imp, tolerance = .0001)
})

# Since sample sizes only correct in expectation, this varies from seed to seed
test_that("number of units chosen is approximately what we wanted for each group", {
  expect_equal(as.numeric(table(z[results_three$selected], data$category[results_three$selected])), rep(4, 6))
  expect_equal(as.numeric(table(z[results_three$selected_star[[1]]], data$category[results_three$selected_star[[1]]])), c(0, 1, 1, 1))
  expect_equal(as.numeric(table(z[results_three$selected_star[[2]]], data$category[results_three$selected_star[[2]]])), rep(1, 2))
})

test_that("units chosen for either main or supplemental group", {
  expect_equal(sum(results_three$selected + results_three$selected_star[[1]] + results_three$selected_star[[2]] > 1), 0)
})



# Tests for low memory ----

set.seed(64, kind = "Mersenne-Twister")
results_three_low_mem <- optimize_controls(z = z, X = constraints$X, st = data$category, ratio = 1,
                             q_star_s = list(matrix(c(0, rep(1, 3), rep(0, 2)), nrow = 3,
                                                    byrow = TRUE, dimnames = list(NULL, c("1", "2"))),
                                             matrix(c(1,0,1,0, rep(0, 2)), nrow = 3,
                                                    byrow = TRUE, dimnames = list(NULL, c("1", "2")))),
                             treated = 2, treated_star = c(1, 1), weight_star = c(2, 1),
                             importances = constraints$importances,
                             integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                             time_limit = Inf, correct_sizes = FALSE, low_memory = TRUE)


test_that("optimization gives correct results", {
  expect_equal(results_three_low_mem$lpdetails$objective,
               results_three$lpdetails$objective, tolerance = .0001)
  expect_equal(results_three_low_mem$lpdetails$objective_wo_importances,
               results_three$lpdetails$objective_wo_importances, tolerance = .0001)
 expect_equal(results_three_low_mem$objective,
              results_three$objective, tolerance = .0001)
  expect_equal(results_three_low_mem$objective_wo_importances,
               results_three$objective_wo_importances, tolerance = .0001)
})

test_that("eps not reported", {
  expect_true(is.null(results_three_low_mem$eps))
  expect_true(is.null(results_three_low_mem$eps_star))
  expect_true(is.null(results_three_low_mem$lpdetails$eps))
  expect_true(is.null(results_three_low_mem$lpdetails$eps_star))
})

Try the natstrat package in your browser

Any scripts or data that you put into this service are public.

natstrat documentation built on Oct. 15, 2021, 5:12 p.m.