Nothing
# General optimization tests ----
set.seed(64, kind = "Mersenne-Twister")
z <- c(rep(0, 14), rep(1, 6))
data <- data.frame(color = c(rep("Red", 4), rep("White", 7), rep("Blue", 3), rep("White", 2), rep("Red", 4)),
number = rnorm(20),
category = c(rep(c("1", "2"), 5), "1", rep("2", 3), "1", rep("2", 4), "1"))
data$number[c(1, 5, 11)] <- NA
constraints <- suppressWarnings(generate_constraints(list(color + number ~ 2 * category), z, data = data,
autogen_missing = 4))
results <- optimize_controls(z = z, X = constraints$X, st = data$category, ratio = 1.5,
importances = constraints$importances,
integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
time_limit = Inf)
test_that("optimization gives correct results", {
expect_equal(results$lpdetails$objective, 6.124562, tolerance = .0001)
expect_equal(results$objective, 6.264861, tolerance = .0001)
})
test_that("optimization chooses correct number of units", {
expect_equal(sum(results$pr[results$pr < 1 & results$pr > 0]),
sum(results$selected[results$pr < 1 & results$pr > 0]))
expect_equal(sum(results$selected[z == 0]), sum(round(table(z, data$category) * 1.5) [2,]))
})
test_that("importances incorporated properly", {
expect_equal(results$objective, sum(results$importances * results$eps))
expect_equal(results$objective_wo_importances, sum(results$eps))
expect_equal(results$lpdetails$objective, sum(results$importances * results$lpdetails$eps))
expect_equal(results$lpdetails$objective_wo_importances, sum(results$lpdetails$eps))
})
# SD vs epsilon tests ----
data_for_sds <- cbind(data[, 1, drop = FALSE], is.na(data$number))
names(data_for_sds)[2] <- "number_missing"
sds <- check_balance(z, data_for_sds, data$category, results$selected, message = FALSE)
test_that("epsilons across strata equal standardized diff in means across when no missingness", {
expect_equal(sds$sd_across[, "abs_stand_diff_after"],
as.numeric(rowSums(results$eps[paste0(row.names(sds$sd_across), "_1"),])))
})
test_that("sum of epsilons within strata equal weighted avg of within strata standardized diff in means
when ratios lead to integer q_s and no missingness", {
covs <- row.names(sds$sd_across)
stripped_row_names <- sapply(strsplit(row.names(results$eps), "_"),
function(k) {paste0(k[-length(k)], collapse = "_")})
sum_eps_within <- sapply(covs, function(cov) {
sum(results$eps[stripped_row_names == cov &
grepl("_category", row.names(results$eps))])})
expect_equal(sds$sd_strata_avg[, "abs_stand_diff_after"],
as.numeric(sum_eps_within))
})
# EMD Tests ----
z <- c(rep(0, 15), rep(1, 5))
data <- data.frame(color = c(rep("Red", 5), rep("White", 2), rep("Blue", 5), rep("White", 4), rep("Red", 4)),
number = 1:20,
category = c("1", "1", "1", rep(c( "2", "3"), 6), "1", rep("2", 2), "3", "1"))
data$number[c(1, 5, 11, 16)] <- NA
constraints <- suppressWarnings(generate_constraints(list(color + number ~ 2 * category), z, data = data,
autogen_missing = 4))
strata_dist <- matrix(c(0, 2, 1, 2, 0, 1, 1, 1, 0), byrow = TRUE, ncol = 3, dimnames = list(c("1", "2", "3"), c("1", "2", "3")))
q_s <- generate_qs(z, st = data$category, ratio = 2.5, max_ratio = NULL, max_extra_s = NULL, strata_dist = strata_dist)
results_emd <- suppressWarnings(optimize_controls(z = z, X = constraints$X, st = data$category, q_s = q_s,
importances = constraints$importances,
integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
time_limit = Inf))
test_that("EMD chooses correct number of units", {
expect_equal(sum(results_emd$selected),
sum(round(2.5 * table(z, data$category)[2, ])) + sum(table(z, data$category)[2, ]))
})
test_that("EMD gives right objective", {
expect_equal(results_emd$objective, 27.89788, tolerance = 0.0001)
})
test_that("Choosing from closest strata", {
expect_equal(as.numeric(table(data$category[results_emd$selected & !z]))[3], 4)
})
test_that("Specified max ratio and max extra working", {
q_s2 <- generate_qs(z, st = data$category, ratio = 2.5, max_ratio = 0, max_extra_s = 1, strata_dist = strata_dist)
results_emd2 <- suppressWarnings(optimize_controls(z = z, X = constraints$X, st = data$category, q_s = q_s2,
importances = constraints$importances,
integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
time_limit = Inf))
expect_equal(as.numeric(table(data$category[results_emd2$selected & !z]))[3], 3)
q_s3 <- generate_qs(z, st = data$category, ratio = 2.5, max_ratio = Inf, max_extra_s = 1, strata_dist = strata_dist)
results_emd3 <- suppressWarnings(optimize_controls(z = z, X = constraints$X, st = data$category, q_s = q_s3,
importances = constraints$importances,
integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
time_limit = Inf))
expect_equal(as.numeric(table(data$category[results_emd3$selected & !z]))[3], 4)
q_s4 <- generate_qs(z, st = data$category, ratio = 2.5, max_ratio = 3, max_extra_s = 0, strata_dist = strata_dist)
results_emd4 <- suppressWarnings(optimize_controls(z = z, X = constraints$X, st = data$category, q_s = q_s4,
importances = constraints$importances,
integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
time_limit = Inf))
expect_equal(as.numeric(table(data$category[results_emd4$selected & !z]))[3], 3)
})
# Multiple control group tests ----
z <- c(rep(0, 10), rep(1, 10), rep(2, 8))
data <- data.frame(color = c(rep("Red", 5), rep("White", 2), rep("Blue", 3),
rep("White", 2), rep("Red", 8), rep("White", 3), rep("Blue", 5)),
number = 1:28,
category = c(rep(c("1", "2"), 5), "1", rep("2", 1), rep(c("1", "2"), 8)))
data$number[c(1, 5, 11)] <- NA
constraints <- suppressWarnings(generate_constraints(list(color + number ~ 2 * category),
z, data = data, treated = 2,
autogen_missing = 4, denom_variance = "pooled"))
results <- optimize_controls(z = z, X = constraints$X, st = data$category, ratio = .5,
treated = 2, importances = constraints$importances,
integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
time_limit = Inf)
test_that("optimization gives correct results", {
expect_equal(results$lpdetails$objective, 60.63904, tolerance = .0001)
expect_equal(results$lpdetails$objective, sum(results$lpdetails$eps * constraints$importances),
tolerance = .0001)
expect_equal(results$lpdetails$objective_wo_importances, 40.06041, tolerance = .0001)
expect_equal(results$lpdetails$objective_wo_importances, sum(results$lpdetails$eps), tolerance = .0001)
expect_equal(results$objective, 61.7425, tolerance = .0001)
expect_equal(results$objective, sum(results$eps * constraints$importances),
tolerance = .0001)
expect_equal(results$objective_wo_importances, 40.97886, tolerance = .0001)
expect_equal(results$objective_wo_importances, sum(results$eps), tolerance = .0001)
})
test_that("optimization chooses correct number of units", {
expect_equal(sum(results$pr[results$pr < 1 & results$pr > 0]),
sum(results$selected[results$pr < 1 & results$pr > 0]))
expect_equal(sum(results$selected[z == 0]), sum(round(table(z, data$category)[3,] * 0.5) ))
expect_equal(sum(results$selected[z == 1]), sum(round(table(z, data$category)[3,] * 0.5) ))
expect_equal(sum(results$selected[z == 2]), sum(round(table(z, data$category)[3,] * 1) ))
})
data_for_sds <- cbind(data[, 1, drop = FALSE], is.na(data$number))
names(data_for_sds)[2] <- "number_missing"
# Between group 0 and 1
sds1 <- check_balance(z = z, control = 0, treated = 1, X = data_for_sds,
st = data$category, selected = results$selected,
denom_variance = "pooled", message = FALSE)
# Between group 0 and 2
sds2 <- check_balance(z = z, control = 0, treated = 2, X = data_for_sds,
st = data$category, selected = results$selected,
denom_variance = "pooled", message = FALSE)
# Between group 1 and 2
sds3 <- check_balance(z = z, control = 1, treated = 2, X = data_for_sds,
st = data$category, selected = results$selected,
denom_variance = "pooled", message = FALSE)
test_that("epsilons across strata equal standardized diff in means across when no missingness", {
expect_equal(sds1$sd_across[, "abs_stand_diff_after"],
as.numeric(rowSums(results$eps[paste0(row.names(sds1$sd_across), "_1_0:1"),])))
expect_equal(sds2$sd_across[, "abs_stand_diff_after"],
as.numeric(rowSums(results$eps[paste0(row.names(sds2$sd_across), "_1_0:2"),])))
expect_equal(sds3$sd_across[, "abs_stand_diff_after"],
as.numeric(rowSums(results$eps[paste0(row.names(sds3$sd_across), "_1_1:2"),])))
})
test_that("sum of epsilons within strata equal weighted avg of within strata standardized diff in means
when no missingness and ratios lead to integer q_s", {
covs <- row.names(sds1$sd_across)
stripped_row_names <- sapply(strsplit(row.names(results$eps), "_"),
function(k) {paste0(k[1:(length(k)-2)], collapse = "_")})
sum_eps_within <- sapply(covs, function(cov) {
sum(results$eps[stripped_row_names == cov &
grepl("_category", row.names(results$eps)) &
grepl("_0:1", row.names(results$eps))])})
expect_equal(sds1$sd_strata_avg[, "abs_stand_diff_after"],
as.numeric(sum_eps_within))
sum_eps_within <- sapply(covs, function(cov) {
sum(results$eps[stripped_row_names == cov &
grepl("_category", row.names(results$eps)) &
grepl("_0:2", row.names(results$eps))])})
expect_equal(sds2$sd_strata_avg[, "abs_stand_diff_after"],
as.numeric(sum_eps_within))
sum_eps_within <- sapply(covs, function(cov) {
sum(results$eps[stripped_row_names == cov &
grepl("_category", row.names(results$eps)) &
grepl("_1:2", row.names(results$eps))])})
expect_equal(sds3$sd_strata_avg[, "abs_stand_diff_after"],
as.numeric(sum_eps_within))
})
results <- optimize_controls(z = z, X = constraints$X, st = data$category, ratio = c(.5, .75, 1),
treated = 2, importances = constraints$importances,
integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
time_limit = Inf)
test_that("multiple ratios still choose correct number of units", {
expect_equal(sum(results$pr[results$pr < 1 & results$pr > 0]),
sum(results$selected[results$pr < 1 & results$pr > 0]))
expect_equal(sum(results$selected[z == 0]), sum(round(table(z, data$category)[3,] * 0.5) ))
expect_equal(sum(results$selected[z == 1]), sum(round(table(z, data$category)[3,] * 0.75) ))
expect_equal(sum(results$selected[z == 2]), sum(round(table(z, data$category)[3,] * 1) ))
})
# Two comparisons tests ----
set.seed(64, kind = "Mersenne-Twister")
z <- c(rep(0, 20), rep(1, 12), rep(2, 8))
data <- data.frame(color = c(rep(c(rep("Red", 4), rep("White", 1), rep("Blue", 2)), 3), "Blue",
rep("White", 2), rep("Red", 8), rep("White", 3), rep("Blue", 5)),
number = c(sample(1:20, 20, replace = TRUE), sample(10:30, 12, replace = TRUE),
sample(10:20, 8, replace = TRUE)),
category = c(rep(c("1", "2"), 20)))
data$number[c(1, 5, 11, 22, 30, 39)] <- NA
constraints <- suppressWarnings(generate_constraints(list(color + number ~ 2 * category),
z, data = data, treated = 2,
autogen_missing = 4, denom_variance = "pooled"))
results_two <- optimize_controls(z = z, X = constraints$X, st = data$category, ratio = 1,
q_star_s = matrix(c(rep(2, 4), rep(0, 2)), nrow = 3,
byrow = TRUE, dimnames = list(NULL, c("1", "2"))),
treated = 2, treated_star = 1, weight_star = 2,
importances = constraints$importances,
integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
time_limit = Inf, correct_sizes = FALSE, low_memory = FALSE)
test_that("optimization gives correct results", {
expect_equal(results_two$lpdetails$objective, 44.17337, tolerance = .0001)
expect_equal(results_two$lpdetails$objective,
sum(results_two$lpdetails$eps * constraints$importances) +
sum(results_two$lpdetails$eps_star * results_two$weight_star * constraints$importances),
tolerance = .0001)
expect_equal(results_two$lpdetails$objective_wo_importances, 17.92818, tolerance = .0001)
expect_equal(results_two$lpdetails$objective_wo_importances,
sum(results_two$lpdetails$eps) + sum(results_two$lpdetails$eps_star), tolerance = .0001)
expect_equal(results_two$objective, 44.17337, tolerance = .0001)
expect_equal(results_two$objective, sum(results_two$eps * constraints$importances) +
sum(results_two$eps_star * results_two$weight_star * constraints$importances),
tolerance = .0001)
expect_equal(results_two$objective_wo_importances, 17.92818, tolerance = .0001)
expect_equal(results_two$objective_wo_importances, sum(results_two$eps) + sum(results_two$eps_star), tolerance = .0001)
})
# Since sample sizes only correct in expectation, this varies from seed to seed
test_that("number of units chosen is approximately what we wanted for each group", {
expect_equal(as.numeric(table(z[results_two$selected], data$category[results_two$selected])), rep(4, 6))
expect_equal(as.numeric(table(z[results_two$selected_star], data$category[results_two$selected_star])), rep(2, 4))
})
test_that("units chosen for either main or supplemental group", {
expect_equal(sum(results_two$selected & results_two$selected_star), 0)
})
# Three comparisons tests ----
set.seed(64, kind = "Mersenne-Twister")
results_three <- optimize_controls(z = z, X = constraints$X, st = data$category, ratio = 1,
q_star_s = list(matrix(c(0, rep(1, 3), rep(0, 2)), nrow = 3,
byrow = TRUE, dimnames = list(NULL, c("1", "2"))),
matrix(c(1, 0, 1, 0, rep(0, 2)), nrow = 3,
byrow = TRUE, dimnames = list(NULL, c("1", "2")))),
treated = 2, treated_star = c(1, 1), weight_star = c(2, 1),
importances = constraints$importances,
integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
time_limit = Inf, correct_sizes = FALSE, low_memory = FALSE)
test_that("optimization gives correct results", {
expect_equal(results_three$lpdetails$objective, 49.5996, tolerance = .0001)
lp_sum_w_imp <- sum(results_three$lpdetails$eps * constraints$importances)
lp_sum_wo_imp <- sum(results_three$lpdetails$eps)
sum_w_imp <- sum(results_three$eps * constraints$importances)
sum_wo_imp <- sum(results_three$eps)
for (supp_comp in 1:2) {
lp_sum_w_imp <- lp_sum_w_imp + sum(results_three$lpdetails$eps_star[[supp_comp]] *
results_three$weight_star[supp_comp] * constraints$importances)
lp_sum_wo_imp <- lp_sum_wo_imp + sum(results_three$lpdetails$eps_star[[supp_comp]])
sum_w_imp <- sum_w_imp + sum(results_three$eps_star[[supp_comp]] *
results_three$weight_star[supp_comp] * constraints$importances)
sum_wo_imp <- sum_wo_imp + sum(results_three$eps_star[[supp_comp]])
}
expect_equal(results_three$lpdetails$objective,
lp_sum_w_imp,
tolerance = .0001)
expect_equal(results_three$lpdetails$objective_wo_importances, 20.88223, tolerance = .0001)
expect_equal(results_three$lpdetails$objective_wo_importances,
lp_sum_wo_imp, tolerance = .0001)
expect_equal(results_three$objective, 50.44668, tolerance = .0001)
expect_equal(results_three$objective, sum_w_imp,
tolerance = .0001)
expect_equal(results_three$objective_wo_importances, 21.08154, tolerance = .0001)
expect_equal(results_three$objective_wo_importances, sum_wo_imp, tolerance = .0001)
})
# Since sample sizes only correct in expectation, this varies from seed to seed
test_that("number of units chosen is approximately what we wanted for each group", {
expect_equal(as.numeric(table(z[results_three$selected], data$category[results_three$selected])), rep(4, 6))
expect_equal(as.numeric(table(z[results_three$selected_star[[1]]], data$category[results_three$selected_star[[1]]])), c(0, 1, 1, 1))
expect_equal(as.numeric(table(z[results_three$selected_star[[2]]], data$category[results_three$selected_star[[2]]])), rep(1, 2))
})
test_that("units chosen for either main or supplemental group", {
expect_equal(sum(results_three$selected + results_three$selected_star[[1]] + results_three$selected_star[[2]] > 1), 0)
})
# Tests for low memory ----
set.seed(64, kind = "Mersenne-Twister")
results_three_low_mem <- optimize_controls(z = z, X = constraints$X, st = data$category, ratio = 1,
q_star_s = list(matrix(c(0, rep(1, 3), rep(0, 2)), nrow = 3,
byrow = TRUE, dimnames = list(NULL, c("1", "2"))),
matrix(c(1,0,1,0, rep(0, 2)), nrow = 3,
byrow = TRUE, dimnames = list(NULL, c("1", "2")))),
treated = 2, treated_star = c(1, 1), weight_star = c(2, 1),
importances = constraints$importances,
integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
time_limit = Inf, correct_sizes = FALSE, low_memory = TRUE)
test_that("optimization gives correct results", {
expect_equal(results_three_low_mem$lpdetails$objective,
results_three$lpdetails$objective, tolerance = .0001)
expect_equal(results_three_low_mem$lpdetails$objective_wo_importances,
results_three$lpdetails$objective_wo_importances, tolerance = .0001)
expect_equal(results_three_low_mem$objective,
results_three$objective, tolerance = .0001)
expect_equal(results_three_low_mem$objective_wo_importances,
results_three$objective_wo_importances, tolerance = .0001)
})
test_that("eps not reported", {
expect_true(is.null(results_three_low_mem$eps))
expect_true(is.null(results_three_low_mem$eps_star))
expect_true(is.null(results_three_low_mem$lpdetails$eps))
expect_true(is.null(results_three_low_mem$lpdetails$eps_star))
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.