test-optimize_controls.R
In natstrat: Obtain Unweighted Natural Strata that Balance Many Covariates

# General optimization tests ----
set.seed(64, kind = "Mersenne-Twister")
z <- c(rep(0, 14), rep(1, 6))
data <- data.frame(color = c(rep("Red", 4), rep("White", 7), rep("Blue", 3), rep("White", 2), rep("Red", 4)),
                   number = rnorm(20),
                   category = c(rep(c("1", "2"), 5), "1", rep("2", 3), "1", rep("2", 4), "1"))
data$number[c(1, 5, 11)] <- NA
constraints <- suppressWarnings(generate_constraints(list(color + number ~ 2 * category), z, data = data,
                                                     autogen_missing = 4))
results <- optimize_controls(z = z, X = constraints$X, st = data$category, ratio = 1.5,
                             importances = constraints$importances,
                             integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                             time_limit = Inf)

test_that("optimization gives correct results", {
  expect_equal(results$lpdetails$objective, 6.124562, tolerance = .0001)
  expect_equal(results$objective, 6.264861, tolerance = .0001)
})

test_that("optimization chooses correct number of units", {
  expect_equal(sum(results$pr[results$pr < 1 & results$pr > 0]),
               sum(results$selected[results$pr < 1 & results$pr > 0]))
  expect_equal(sum(results$selected[z == 0]), sum(round(table(z, data$category) * 1.5) [2,]))
})

test_that("importances incorporated properly", {
  expect_equal(results$objective, sum(results$importances * results$eps))
  expect_equal(results$objective_wo_importances, sum(results$eps))
  expect_equal(results$lpdetails$objective, sum(results$importances * results$lpdetails$eps))
  expect_equal(results$lpdetails$objective_wo_importances, sum(results$lpdetails$eps))

})

# SD vs epsilon tests ----

data_for_sds <- cbind(data[, 1, drop = FALSE], is.na(data$number))
names(data_for_sds)[2] <- "number_missing"
sds <- check_balance(z, data_for_sds, data$category, results$selected, message = FALSE)

test_that("epsilons across strata equal standardized diff in means across when no missingness", {
  expect_equal(sds$sd_across[, "abs_stand_diff_after"],
               as.numeric(rowSums(results$eps[paste0(row.names(sds$sd_across), "_1"),])))
})

test_that("sum of epsilons within strata equal weighted avg of within strata standardized diff in means
          when ratios lead to integer q_s and no missingness", {
            covs <- row.names(sds$sd_across)
            stripped_row_names <- sapply(strsplit(row.names(results$eps), "_"),
                                         function(k) {paste0(k[-length(k)], collapse = "_")})
            sum_eps_within <- sapply(covs, function(cov) {
              sum(results$eps[stripped_row_names == cov &
                                grepl("_category", row.names(results$eps))])})
            expect_equal(sds$sd_strata_avg[, "abs_stand_diff_after"],
                         as.numeric(sum_eps_within))

          })



# EMD Tests ----

z <- c(rep(0, 15), rep(1, 5))
data <- data.frame(color = c(rep("Red", 5), rep("White", 2), rep("Blue", 5), rep("White", 4), rep("Red", 4)),
                   number = 1:20,
                   category = c("1", "1", "1", rep(c( "2", "3"), 6), "1", rep("2", 2), "3", "1"))
data$number[c(1, 5, 11, 16)] <- NA
constraints <- suppressWarnings(generate_constraints(list(color + number ~ 2 * category), z, data = data,
                                                     autogen_missing = 4))
strata_dist <- matrix(c(0, 2, 1, 2, 0, 1, 1, 1, 0), byrow = TRUE, ncol = 3, dimnames = list(c("1", "2", "3"), c("1", "2", "3")))
q_s <- generate_qs(z, st = data$category, ratio = 2.5, max_ratio = NULL, max_extra_s = NULL, strata_dist = strata_dist)
results_emd <- suppressWarnings(optimize_controls(z = z, X = constraints$X, st = data$category, q_s = q_s,
                                                  importances = constraints$importances,
                                                  integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                                                  time_limit = Inf))

test_that("EMD chooses correct number of units", {
  expect_equal(sum(results_emd$selected),
               sum(round(2.5 * table(z, data$category)[2, ])) + sum(table(z, data$category)[2, ]))
})

test_that("EMD gives right objective", {
  expect_equal(results_emd$objective, 27.89788, tolerance = 0.0001)
})

test_that("Choosing from closest strata", {
  expect_equal(as.numeric(table(data$category[results_emd$selected & !z]))[3], 4)
})

test_that("Specified max ratio and max extra working", {
  q_s2 <- generate_qs(z, st = data$category, ratio = 2.5, max_ratio = 0, max_extra_s = 1, strata_dist = strata_dist)
  results_emd2 <- suppressWarnings(optimize_controls(z = z, X = constraints$X, st = data$category, q_s = q_s2,
                                                     importances = constraints$importances,
                                                     integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                                                     time_limit = Inf))
  expect_equal(as.numeric(table(data$category[results_emd2$selected & !z]))[3], 3)
  q_s3 <- generate_qs(z, st = data$category, ratio = 2.5, max_ratio = Inf, max_extra_s = 1, strata_dist = strata_dist)
  results_emd3 <- suppressWarnings(optimize_controls(z = z, X = constraints$X, st = data$category,  q_s = q_s3,
                                                     importances = constraints$importances,
                                                     integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                                                     time_limit = Inf))
  expect_equal(as.numeric(table(data$category[results_emd3$selected & !z]))[3], 4)
  q_s4 <- generate_qs(z, st = data$category, ratio = 2.5,  max_ratio = 3, max_extra_s = 0, strata_dist = strata_dist)
  results_emd4 <- suppressWarnings(optimize_controls(z = z, X = constraints$X, st = data$category, q_s = q_s4,
                                                     importances = constraints$importances,
                                                     integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                                                     time_limit = Inf))
  expect_equal(as.numeric(table(data$category[results_emd4$selected & !z]))[3], 3)
})



# Multiple control group tests ----

z <- c(rep(0, 10), rep(1, 10), rep(2, 8))
data <- data.frame(color = c(rep("Red", 5), rep("White", 2), rep("Blue", 3),
                             rep("White", 2), rep("Red", 8), rep("White", 3), rep("Blue", 5)),
                   number = 1:28,
                   category = c(rep(c("1", "2"), 5), "1", rep("2", 1), rep(c("1", "2"), 8)))
data$number[c(1, 5, 11)] <- NA
constraints <- suppressWarnings(generate_constraints(list(color + number ~ 2 * category),
                                                     z, data = data, treated = 2,
                                                     autogen_missing = 4, denom_variance = "pooled"))
results <- optimize_controls(z = z, X = constraints$X, st = data$category, ratio = .5,
                             treated = 2, importances = constraints$importances,
                             integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                             time_limit = Inf)

test_that("optimization gives correct results", {
  expect_equal(results$lpdetails$objective, 60.63904, tolerance = .0001)
  expect_equal(results$lpdetails$objective, sum(results$lpdetails$eps * constraints$importances),
               tolerance = .0001)
  expect_equal(results$lpdetails$objective_wo_importances, 40.06041, tolerance = .0001)
  expect_equal(results$lpdetails$objective_wo_importances, sum(results$lpdetails$eps), tolerance = .0001)
  expect_equal(results$objective, 61.7425, tolerance = .0001)
  expect_equal(results$objective, sum(results$eps * constraints$importances),
               tolerance = .0001)
  expect_equal(results$objective_wo_importances, 40.97886, tolerance = .0001)
  expect_equal(results$objective_wo_importances, sum(results$eps), tolerance = .0001)
})

test_that("optimization chooses correct number of units", {
  expect_equal(sum(results$pr[results$pr < 1 & results$pr > 0]),
               sum(results$selected[results$pr < 1 & results$pr > 0]))
  expect_equal(sum(results$selected[z == 0]), sum(round(table(z, data$category)[3,] * 0.5) ))
  expect_equal(sum(results$selected[z == 1]), sum(round(table(z, data$category)[3,] * 0.5) ))
  expect_equal(sum(results$selected[z == 2]), sum(round(table(z, data$category)[3,] * 1) ))
})

data_for_sds <- cbind(data[, 1, drop = FALSE], is.na(data$number))
names(data_for_sds)[2] <- "number_missing"
# Between group 0 and 1
sds1 <- check_balance(z = z, control = 0, treated = 1, X = data_for_sds,
                      st = data$category, selected = results$selected,
                      denom_variance = "pooled", message = FALSE)
# Between group 0 and 2
sds2 <- check_balance(z = z, control = 0, treated = 2, X = data_for_sds,
                      st = data$category, selected = results$selected,
                      denom_variance = "pooled", message = FALSE)
# Between group 1 and 2
sds3 <- check_balance(z = z, control = 1, treated = 2, X = data_for_sds,
                      st = data$category, selected = results$selected,
                      denom_variance = "pooled", message = FALSE)

test_that("epsilons across strata equal standardized diff in means across when no missingness", {
  expect_equal(sds1$sd_across[, "abs_stand_diff_after"],
               as.numeric(rowSums(results$eps[paste0(row.names(sds1$sd_across), "_1_0:1"),])))
  expect_equal(sds2$sd_across[, "abs_stand_diff_after"],
               as.numeric(rowSums(results$eps[paste0(row.names(sds2$sd_across), "_1_0:2"),])))
  expect_equal(sds3$sd_across[, "abs_stand_diff_after"],
               as.numeric(rowSums(results$eps[paste0(row.names(sds3$sd_across), "_1_1:2"),])))
})

test_that("sum of epsilons within strata equal weighted avg of within strata standardized diff in means
          when no missingness and ratios lead to integer q_s", {
            covs <- row.names(sds1$sd_across)
            stripped_row_names <- sapply(strsplit(row.names(results$eps), "_"),
                                         function(k) {paste0(k[1:(length(k)-2)], collapse = "_")})
            sum_eps_within <- sapply(covs, function(cov) {
              sum(results$eps[stripped_row_names == cov &
                                grepl("_category", row.names(results$eps)) &
                                grepl("_0:1", row.names(results$eps))])})
            expect_equal(sds1$sd_strata_avg[, "abs_stand_diff_after"],
                         as.numeric(sum_eps_within))

            sum_eps_within <- sapply(covs, function(cov) {
              sum(results$eps[stripped_row_names == cov &
                                grepl("_category", row.names(results$eps)) &
                                grepl("_0:2", row.names(results$eps))])})
            expect_equal(sds2$sd_strata_avg[, "abs_stand_diff_after"],
                         as.numeric(sum_eps_within))

            sum_eps_within <- sapply(covs, function(cov) {
              sum(results$eps[stripped_row_names == cov &
                                grepl("_category", row.names(results$eps)) &
                                grepl("_1:2", row.names(results$eps))])})
            expect_equal(sds3$sd_strata_avg[, "abs_stand_diff_after"],
                         as.numeric(sum_eps_within))
          })

results <- optimize_controls(z = z, X = constraints$X, st = data$category, ratio = c(.5, .75, 1),
                             treated = 2, importances = constraints$importances,
                             integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                             time_limit = Inf)
test_that("multiple ratios still choose correct number of units", {
  expect_equal(sum(results$pr[results$pr < 1 & results$pr > 0]),
               sum(results$selected[results$pr < 1 & results$pr > 0]))
  expect_equal(sum(results$selected[z == 0]), sum(round(table(z, data$category)[3,] * 0.5) ))
  expect_equal(sum(results$selected[z == 1]), sum(round(table(z, data$category)[3,] * 0.75) ))
  expect_equal(sum(results$selected[z == 2]), sum(round(table(z, data$category)[3,] * 1) ))
})



# Two comparisons tests ----

set.seed(64, kind = "Mersenne-Twister")
z <- c(rep(0, 20), rep(1, 12), rep(2, 8))
data <- data.frame(color = c(rep(c(rep("Red", 4), rep("White", 1), rep("Blue", 2)), 3), "Blue",
                             rep("White", 2), rep("Red", 8), rep("White", 3), rep("Blue", 5)),
                   number = c(sample(1:20, 20, replace = TRUE), sample(10:30, 12, replace = TRUE),
                              sample(10:20, 8, replace = TRUE)),
                   category = c(rep(c("1", "2"), 20)))
data$number[c(1, 5, 11, 22, 30, 39)] <- NA
constraints <- suppressWarnings(generate_constraints(list(color + number ~ 2 * category),
                                                     z, data = data, treated = 2,
                                                     autogen_missing = 4, denom_variance = "pooled"))
results_two <- optimize_controls(z = z, X = constraints$X, st = data$category, ratio = 1,
                             q_star_s = matrix(c(rep(2, 4), rep(0, 2)), nrow = 3,
                                               byrow = TRUE, dimnames = list(NULL, c("1", "2"))),
                             treated = 2, treated_star = 1, weight_star = 2,
                             importances = constraints$importances,
                             integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                             time_limit = Inf, correct_sizes = FALSE, low_memory = FALSE)


test_that("optimization gives correct results", {
  expect_equal(results_two$lpdetails$objective, 44.17337, tolerance = .0001)
  expect_equal(results_two$lpdetails$objective,
               sum(results_two$lpdetails$eps * constraints$importances) +
                 sum(results_two$lpdetails$eps_star * results_two$weight_star * constraints$importances),
               tolerance = .0001)
  expect_equal(results_two$lpdetails$objective_wo_importances,  17.92818, tolerance = .0001)
  expect_equal(results_two$lpdetails$objective_wo_importances,
               sum(results_two$lpdetails$eps) + sum(results_two$lpdetails$eps_star), tolerance = .0001)
  expect_equal(results_two$objective, 44.17337, tolerance = .0001)
  expect_equal(results_two$objective, sum(results_two$eps * constraints$importances) +
                 sum(results_two$eps_star * results_two$weight_star * constraints$importances),
               tolerance = .0001)
  expect_equal(results_two$objective_wo_importances, 17.92818, tolerance = .0001)
  expect_equal(results_two$objective_wo_importances, sum(results_two$eps) + sum(results_two$eps_star), tolerance = .0001)
})

# Since sample sizes only correct in expectation, this varies from seed to seed
test_that("number of units chosen is approximately what we wanted for each group", {
  expect_equal(as.numeric(table(z[results_two$selected], data$category[results_two$selected])), rep(4, 6))
  expect_equal(as.numeric(table(z[results_two$selected_star], data$category[results_two$selected_star])), rep(2, 4))
})

test_that("units chosen for either main or supplemental group", {
  expect_equal(sum(results_two$selected & results_two$selected_star), 0)
})



# Three comparisons tests ----

set.seed(64, kind = "Mersenne-Twister")

results_three <- optimize_controls(z = z, X = constraints$X, st = data$category, ratio = 1,
                             q_star_s = list(matrix(c(0, rep(1, 3), rep(0, 2)), nrow = 3,
                                               byrow = TRUE, dimnames = list(NULL, c("1", "2"))),
                                             matrix(c(1, 0, 1, 0, rep(0, 2)), nrow = 3,
                                                    byrow = TRUE, dimnames = list(NULL, c("1", "2")))),
                             treated = 2, treated_star = c(1, 1), weight_star = c(2, 1),
                             importances = constraints$importances,
                             integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                             time_limit = Inf, correct_sizes = FALSE, low_memory = FALSE)

test_that("optimization gives correct results", {
  expect_equal(results_three$lpdetails$objective, 49.5996, tolerance = .0001)
  lp_sum_w_imp <- sum(results_three$lpdetails$eps * constraints$importances)
  lp_sum_wo_imp <- sum(results_three$lpdetails$eps)
  sum_w_imp <- sum(results_three$eps * constraints$importances)
  sum_wo_imp <- sum(results_three$eps)

  for (supp_comp in 1:2) {
    lp_sum_w_imp <- lp_sum_w_imp + sum(results_three$lpdetails$eps_star[[supp_comp]] *
                                   results_three$weight_star[supp_comp] * constraints$importances)
    lp_sum_wo_imp <- lp_sum_wo_imp + sum(results_three$lpdetails$eps_star[[supp_comp]])
    sum_w_imp <- sum_w_imp + sum(results_three$eps_star[[supp_comp]] *
                                   results_three$weight_star[supp_comp] * constraints$importances)
    sum_wo_imp <- sum_wo_imp + sum(results_three$eps_star[[supp_comp]])
  }
  expect_equal(results_three$lpdetails$objective,
               lp_sum_w_imp,
               tolerance = .0001)
  expect_equal(results_three$lpdetails$objective_wo_importances,  20.88223, tolerance = .0001)
  expect_equal(results_three$lpdetails$objective_wo_importances,
               lp_sum_wo_imp, tolerance = .0001)
  expect_equal(results_three$objective, 50.44668, tolerance = .0001)
  expect_equal(results_three$objective, sum_w_imp,
               tolerance = .0001)
  expect_equal(results_three$objective_wo_importances, 21.08154, tolerance = .0001)
  expect_equal(results_three$objective_wo_importances, sum_wo_imp, tolerance = .0001)
})

# Since sample sizes only correct in expectation, this varies from seed to seed
test_that("number of units chosen is approximately what we wanted for each group", {
  expect_equal(as.numeric(table(z[results_three$selected], data$category[results_three$selected])), rep(4, 6))
  expect_equal(as.numeric(table(z[results_three$selected_star[[1]]], data$category[results_three$selected_star[[1]]])), c(0, 1, 1, 1))
  expect_equal(as.numeric(table(z[results_three$selected_star[[2]]], data$category[results_three$selected_star[[2]]])), rep(1, 2))
})

test_that("units chosen for either main or supplemental group", {
  expect_equal(sum(results_three$selected + results_three$selected_star[[1]] + results_three$selected_star[[2]] > 1), 0)
})



# Tests for low memory ----

set.seed(64, kind = "Mersenne-Twister")
results_three_low_mem <- optimize_controls(z = z, X = constraints$X, st = data$category, ratio = 1,
                             q_star_s = list(matrix(c(0, rep(1, 3), rep(0, 2)), nrow = 3,
                                                    byrow = TRUE, dimnames = list(NULL, c("1", "2"))),
                                             matrix(c(1,0,1,0, rep(0, 2)), nrow = 3,
                                                    byrow = TRUE, dimnames = list(NULL, c("1", "2")))),
                             treated = 2, treated_star = c(1, 1), weight_star = c(2, 1),
                             importances = constraints$importances,
                             integer = FALSE, solver = "Rglpk", seed = 1, runs = 5,
                             time_limit = Inf, correct_sizes = FALSE, low_memory = TRUE)


test_that("optimization gives correct results", {
  expect_equal(results_three_low_mem$lpdetails$objective,
               results_three$lpdetails$objective, tolerance = .0001)
  expect_equal(results_three_low_mem$lpdetails$objective_wo_importances,
               results_three$lpdetails$objective_wo_importances, tolerance = .0001)
 expect_equal(results_three_low_mem$objective,
              results_three$objective, tolerance = .0001)
  expect_equal(results_three_low_mem$objective_wo_importances,
               results_three$objective_wo_importances, tolerance = .0001)
})

test_that("eps not reported", {
  expect_true(is.null(results_three_low_mem$eps))
  expect_true(is.null(results_three_low_mem$eps_star))
  expect_true(is.null(results_three_low_mem$lpdetails$eps))
  expect_true(is.null(results_three_low_mem$lpdetails$eps_star))
})
Any scripts or data that you put into this service are public.
natstrat documentation built on Oct. 15, 2021, 5:12 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
natstrat
Obtain Unweighted Natural Strata that Balance Many Covariates

tests/testthat/test-optimize_controls.R
In natstrat: Obtain Unweighted Natural Strata that Balance Many Covariates

Try the natstrat package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

natstrat Obtain Unweighted Natural Strata that Balance Many Covariates

tests/testthat/test-optimize_controls.R In natstrat: Obtain Unweighted Natural Strata that Balance Many Covariates

Try the natstrat package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

natstrat
Obtain Unweighted Natural Strata that Balance Many Covariates

tests/testthat/test-optimize_controls.R
In natstrat: Obtain Unweighted Natural Strata that Balance Many Covariates