test_rubin.R
In baggr: Bayesian Aggregate Treatment Effects

context("baggr() calls with Rubin model")
library(baggr)
library(testthat)


# prepare inputs ----------------------------------------------------------
set.seed(1990)

# pooled, with equal SE's!
df_pooled <- data.frame("tau" = c(1, -1, .5, -.5, .7, -.7, 1.3, -1.3),
                        "se" = rep(1, 8),
                        "state" = datasets::state.name[1:8])

# tests ----------------------------------------------------------
test_that("Error messages for wrong inputs are in place", {
  expect_error(baggr("Text"), "it's not a data.frame")
  expect_error(baggr(5), "it's not a data.frame")
  expect_error(baggr(df_pooled[1,]), "specify hyper-SD prior manually")

  # model or pooling type doesn't exist
  expect_error(baggr(df_pooled, "made_up_model"), "Unrecognised model")
  expect_error(baggr(df_pooled, pooling = "nune"), "one of")

  # NA or NULL inputs
  df_na <- df_pooled; df_na$tau[1] <- NA
  expect_error(baggr(df_na),"NA values")
  df_na <- df_pooled; df_na$se[2] <- NA
  expect_error(baggr(df_na),"NA values")
  df_na <- df_pooled; df_na$se <- NULL
  expect_error(baggr(df_na),"no column")
  df_na <- df_pooled; df_na$tau <- NULL
  expect_error(baggr(df_na),"no column")
  df_na <- df_pooled; df_na$tau <- as.character(df_na$tau)
  expect_error(baggr(df_na),"are not numeric")


  # test_that("Converting inputs works correctly") more explicitly
  expect_identical(names(convert_inputs(df_pooled, "rubin")),
                   c("K", "theta_hat_k", "se_theta_k", "K_test",
                     "test_theta_hat_k", "test_se_theta_k", "Nc", "X", "X_test"))

  expect_warning(baggr(df_pooled, group = "state1000", iter = 50, refresh = 0),
                 "No labels will be added.")

})


# There will always be a divergent transition / ESS warning produced by Stan
# at iter = 200.
bg5_n <- expect_warning(baggr(df_pooled, "rubin", pooling = "none", group = "state",
                              iter = 200, chains = 2, refresh = 0,
                              show_messages = F))
bg5_p <- expect_warning(baggr(df_pooled, "rubin", pooling = "partial", group = "state",
                              iter = 200, chains = 2, refresh = 0,
                              show_messages = F))
bg5_f <- expect_warning(baggr(df_pooled, "rubin", pooling = "full", group = "state",
                              iter = 200, chains = 2, refresh = 0,
                              show_messages = F))
bg5_ppd <- expect_warning(baggr(df_pooled, "rubin", ppd = TRUE,
                                iter = 200, chains = 2, refresh = 0,
                                show_messages = F))

test_that("Different pooling methods work for Rubin model", {
  expect_is(bg5_n, "baggr")
  expect_is(bg5_p, "baggr")
  expect_is(bg5_f, "baggr")
})

test_that("Extra args to Stan passed via ... work well", {
  expect_equal(nrow(as.matrix(bg5_p$fit)), 200) #right dimension means right iter
  expect_error(baggr(df_pooled, rubbish = 41))
})

test_that("Various attr of baggr object are correct", {
  expect_equal(bg5_n$pooling, "none")
  expect_equal(bg5_p$pooling, "partial")
  expect_equal(bg5_f$pooling, "full")
  expect_equal(bg5_p$n_parameters, 1)
  expect_equal(bg5_p$n_groups, 8)
  expect_equal(bg5_p$effects, "mean")
  expect_equal(bg5_p$model, "rubin")
  expect_is(bg5_p$fit, "stanfit")
})

test_that("Data are available in baggr object", {
  expect_is(bg5_n$data, "data.frame")
  expect_is(bg5_p$data, "data.frame")
  expect_is(bg5_f$data, "data.frame")
})

test_that("Pooling metrics", {
  # all pooling metric are the same as SE's are the same
  expect_equal(length(unique(bg5_p$pooling_metric[1,,1])), 1) #expect_length()
  expect_equal(length(unique(bg5_p$pooling_metric[2,,1])), 1)
  expect_equal(length(unique(bg5_p$pooling_metric[3,,1])), 1)
  # all pooling stats are 0 if no pooling
  expect_equal(unique(as.numeric(bg5_n$pooling_metric)), 0)
  # full pooling means 1's everywhere
  expect_equal(unique(as.numeric(bg5_f$pooling_metric)), 1)

  pp <- pooling(bg5_p)
  expect_is(pp, "array")
  expect_gt(min(pp), 0)
  expect_lt(max(pp), 1)
  expect_identical(bg5_p$pooling_metric, pooling(bg5_p))

  # since all SEs are the same, pooling should be the same for all sites
  capture_output(print(pp))
  # expect_equal(pp[2,,1], .75, tolerance = .1) #YUGE tolerance as we only do 200 iter
  expect_equal(length(unique(pp[2,,1])), 1)
  expect_equal(as.numeric(pp[2,1,1]), .75, tolerance = .1)

  # Calculation of I^2 and H^2
  i2 <- pooling(bg5_p, metric = "isq")
  expect_is(i2, "array")
  expect_gte(min(i2), 0)
  expect_lte(max(i2), 1)
  h2 <- pooling(bg5_p, metric = "hsq")
  expect_is(h2, "array")
  expect_gt(min(h2), 1)

  # Calculation of weights makes sense
  wt <- weights(bg5_p)
  expect_is(wt, "array")
  expect_equal(dim(wt), c(3,8,1))
  expect_equal(sum(wt[2,,1]), 1)
  expect_lte(sum(wt[1,,1]), sum(wt[2,,1]))
  expect_gte(sum(wt[3,,1]), sum(wt[2,,1]))
  expect_gte(sum(wt[1,,1]), 0)
  wt2 <- pooling(bg5_p, metric = "weights")
  expect_identical(wt, wt2)

})




test_that("Calculation of effects works", {
  expect_is(group_effects(bg5_p), "array")
  expect_is(treatment_effect(bg5_p), "list")

  expect_identical(dim(group_effects(bg5_n)), as.integer(c(200, 8 , 1)))
  expect_identical(dim(group_effects(bg5_p)), as.integer(c(200, 8 , 1)))
  expect_identical(dim(group_effects(bg5_f)), as.integer(c(200, 8 , 1)))
  expect_identical(names(treatment_effect(bg5_p)), c("tau", "sigma_tau"))
})


test_that("Plotting works", {
  expect_is(plot(bg5_ppd), "gg")
  expect_is(plot(bg5_n), "gg")
  expect_is(plot(bg5_p, order = TRUE), "gg")
  expect_is(plot(bg5_p, style = "forest"), "gg")
  expect_is(plot(bg5_f, order = FALSE), "gg")
  # but we can crash it easily if
  expect_error(plot(bg5_n, style = "rubbish"), "one of")
})


test_that("printing works", {
  capture_output(print(bg5_n))
  capture_output(print(bg5_p))
  capture_output(print(bg5_p, group = FALSE))
  expect_error(print(bg5_p, group = "abc"), "logical")
  capture_output(print(bg5_f))
  capture_output(print(bg5_ppd))
})

test_that("Forest plots for Rubin model", {
  expect_is(forest_plot(bg5_n), "gforge_forestplot")
  expect_is(forest_plot(bg5_p), "gforge_forestplot")
  expect_is(forest_plot(bg5_p, show = "posterior"), "gforge_forestplot")
  expect_is(forest_plot(bg5_p, show = "both"), "gforge_forestplot")
  expect_is(forest_plot(bg5_f), "gforge_forestplot")
  expect_is(forest_plot(bg5_f, graph.pos = 1), "gforge_forestplot")
  expect_error(forest_plot(cars), "baggr objects")
  expect_error(forest_plot(bg5_p, show = "abc"), "one of")
})
test_that("Test data can be used in the Rubin model", {
  # Wrong data type:
  expect_error(baggr(data = df_pooled, test_data = cars), "is of type")

  # NA values or wrong cols:
  df2 <- df_pooled; df2$tau[1] <- NA
  expect_error(baggr(data = df_pooled, test_data = df2), "NA")
  df_na <- df_pooled[7:8,]; df_na$tau <- NULL
  expect_error(baggr(df_pooled[1:6,], test_data = df_na), "is of type")

  # This should work:
  bg_lpd <- expect_warning(baggr(df_pooled[1:6,], test_data = df_pooled[7:8,],
                                 iter = 500, refresh = 0))
  expect_is(bg_lpd, "baggr")
  # make sure that we have 6 sites, not 8:
  expect_equal(dim(group_effects(bg_lpd)), c(1000, 6, 1))
  # make sure it's not 0
  expect_equal(mean(rstan::extract(bg_lpd$fit, "logpd[1]")[[1]]), -3.6, tolerance = 1)

})


# covariates ------
sa <- schools
sa$a <- rnorm(8)
sa$b <- rnorm(8)
sa$f <- as.factor(c(rep("Yes", 4), rep("No", 4)))
sa$f2 <- as.factor(c(rep("Yes", 3), rep("No", 2), rep("Maybe", 3)))
sa$bad <- c(rnorm(7), NA)

sb <- sa
sb$b <- NULL
bg_cov <- expect_warning(
  baggr(sa, covariates = c("a", "b"), iter = 200, refresh = 0))
bg_cov_factor <- expect_warning(
  baggr(sa, covariates = c("f"), iter = 200, refresh = 0))
bg_cov_factor2 <- expect_warning(
  baggr(sa, covariates = c("f2"), iter = 200, refresh = 0))
expect_identical(attr(bg_cov_factor$inputs, "covariate_coding"), c("fYes"))
expect_identical(attr(bg_cov_factor2$inputs, "covariate_coding"), c("f2No", "f2Yes"))
bg_cov_test <- expect_warning(
  baggr(sa, covariates = c("a"), test_data = sb, iter = 200, refresh = 0))
bg_cov_prior1 <- expect_warning(
  baggr(sa, covariates = c("a", "b"),
        iter = 200, refresh = 0, prior_beta = normal(0, 3)))
bg_cov_prior2 <- expect_warning(
  baggr(sa, covariates = c("a", "b"),
        iter = 200, refresh = 0, prior = list("beta" = uniform(-5, 5))))

test_that("Model with covariates works fine", {
  expect_is(bg_cov, "baggr")
  expect_equal(bg_cov$formatted_prior$prior_beta_fam, 1)
  expect_equal(bg_cov$formatted_prior$prior_beta_val[1], 0)
  expect_gt(bg_cov$formatted_prior$prior_beta_val[2], 0)
  expect_equal(bg_cov$formatted_prior$prior_beta_val[3], 0)
  expect_error(baggr(sa, covariates = c("made_up_covariates")))
  expect_error(baggr(sa, covariates = c("a", "b", "made_up_covariates")))
  expect_error(baggr(sa, covariates = c("bad")), "NA")
  expect_length(bg5_p$covariates, 0)
  expect_length(bg_cov$covariates, 2)
  expect_null(bg_cov$mean_lpd)

  # Fixed effects extraction
  expect_is(fixed_effects(bg_cov), "matrix")
  expect_is(fixed_effects(bg_cov, transform = exp), "matrix")
  expect_equal(dim(fixed_effects(bg_cov, summary = TRUE)), c(2,5,1))
  expect_equal(dim(fixed_effects(bg_cov, summary = FALSE))[2], 2)

  # Bubble plots
  p1 <- bubble(bg_cov, "a")
  p2 <- bubble(bg_cov_factor, "f")
  p1
  p2
  expect_is(p1, "gg")
  expect_is(p2, "gg")

  # covariates and test_data
  expect_error(baggr(sa, covariates = c("a", "b"), test_data = sb), "Cannot bind")
  expect_error(baggr(sb, model = "rubin", test_data=sa[1:2,], covariates = c("b")), "are not columns")
  expect_is(bg_cov_test$mean_lpd, "numeric")
  expect_length(bg_cov_test$covariates, 1)

  # Setting priors for covariates manually works
  expect_is(bg_cov_prior1, "baggr")
  expect_is(bg_cov_prior2, "baggr")
  expect_equal(bg_cov_prior1$formatted_prior$prior_beta_fam, 1)
  expect_equal(bg_cov_prior1$formatted_prior$prior_beta_val, c(0,3,0))
  expect_equal(bg_cov_prior2$formatted_prior$prior_beta_fam, 0)
  expect_equal(bg_cov_prior2$formatted_prior$prior_beta_val, c(-5,5,0))
})

# test helpers -----

test_that("Extracting treatment/study effects works", {
  expect_error(treatment_effect(df_pooled))
  expect_is(treatment_effect(bg5_p), "list")
  expect_identical(names(treatment_effect(bg5_p)), c("tau", "sigma_tau"))
  expect_is(treatment_effect(bg5_p)$tau, "numeric") #this might change to accommodate more dim's
  expect_message(treatment_effect(bg5_n), "no treatment effect estimated when")
  expect_length(treatment_effect(bg5_p, summary = TRUE)$tau, 5)

  # Drawing values of tau:
  expect_error(effect_draw(cars))
  expect_is(effect_draw(bg5_p), "numeric")
  expect_length(effect_draw(bg5_p), 200)
  expect_length(effect_draw(bg5_p,7), 7)
  eds1 <- effect_draw(bg5_p, summary = T)
  eds2 <- effect_draw(bg5_p, summary = T, interval = .5)
  expect_length(eds1, 5)
  expect_gt(eds2[1], eds1[1]) #narrower interval
  expect_warning(effect_draw(bg5_p, 1e05), "more effect draws than there are available samples")

  # Plotting tau:
  expect_is(effect_plot(bg5_p), "gg")
  expect_is(effect_plot(bg5_p, bg5_f), "gg")
  expect_is(effect_plot("Model A" = bg5_p, "Model B" = bg5_f), "gg")
  # Crashes when passing nonsense
  expect_error(effect_plot(cars), "baggr class")
  expect_error(effect_plot(cars, cars, bg5_f), "baggr class")
})


test_that("baggr_compare basic cases work with Rubin", {
  # If I pass nothing
  expect_error(baggr_compare(), "Must provide baggr models")
  # pooling
  expect_error(baggr_compare(schools, pooling = "full"))
  # if I pass rubbish
  expect_error(baggr_compare(cars))
  # if I pass list of rubbish
  expect_error(baggr_compare("Fit 1" = cars, "Fit 2" = cars))
  # try to make nonexistant comparison:
  expect_error(baggr_compare(bg5_p, bg5_n, bg5_f, compare = "sreffects"), "one of")
  # Run models from baggr_compare:
  bgcomp <- expect_warning(baggr_compare(schools,
                                         iter = 200, refresh = 0))
  expect_is(bgcomp, "baggr_compare")
  # Compare prior vs posterior:
  bgcomp <- expect_warning(baggr_compare(schools, iter = 200,
                                         what = "prior", refresh = 0))
  expect_is(bgcomp, "baggr_compare")
  # Compare existing models:
  expect_error(baggr_compare("Name" = bg5_p, "Name" = bg5_n), "unique model names")

  bgcomp2 <- baggr_compare(bg5_p, bg5_n, bg5_f)
  expect_is(bgcomp2, "baggr_compare")
  p1 <- plot(bgcomp2)
  p2 <- plot(bgcomp2, add_values = TRUE)
  expect_is(p1, "gg")
  expect_is(p2, "gg")
})

test_that("loocv", {
  skip_on_cran()

  # Rubbish model
  expect_error(loocv(schools, model = "rubbish"), "Inference failed")
  # Can't do pooling none
  expect_error(loocv(schools, pooling = "none"))

  loo_model <- expect_warning(loocv(schools, return_models = TRUE, iter = 200, refresh = 0))
  expect_is(loo_model, "baggr_cv")
  capture_output(print(loo_model))
})





# 8 schools correctness test -----


test_that("The default 8 schools result is close to the result in BDA", {
  skip_on_cran()
  bg_s <- baggr(schools, refresh = 0, iter = 2000, control = list(adapt_delta = .99))
  expect_equal(mean(treatment_effect(bg_s)$tau), 8, tolerance = .25)
  expect_equal(mean(treatment_effect(bg_s)$sigma_tau), 7, tolerance = .25)
})


# Bangert-Drowns correctness check -----

# dt_bd <- metafor::dat.bangertdrowns2004 %>%
# dplyr::mutate(study = paste(author, year), tau = yi, se = sqrt(vi))
# write.table(dt_bd, file = "inst/tests/bangertdrowns2004.csv")
dt_bd <- read.table(system.file("tests", "bangertdrowns2004.csv", package = "baggr"))
bg_bd <- baggr(dt_bd, group = "study", model = "rubin", iter = 4000,
               prior_hypersd = uniform(0, 10), refresh = 0)
# RE results from metafor:
# metafor::rma(yi, vi, data=dt2)
test_that("Bangert-Drowns meta-analysis result is close to metafor output", {
  expect_equal(mean(treatment_effect(bg_bd)$tau), 0.22, tolerance = .01)
  expect_equal(as.numeric(quantile(treatment_effect(bg_bd)$tau, .025)),
               0.13, tolerance = .02)
  expect_equal(as.numeric(quantile(treatment_effect(bg_bd)$tau, .975)),
               0.31, tolerance = .02)
})


comp_rbpl <- expect_warning(baggr_compare(
  schools, model = "rubin", iter = 200, what = "pooling"
))

comp_rbpr <- expect_warning(baggr_compare(
  schools, model = "rubin", iter = 200, what = "prior"
))

test_that("baggr comparison method works for Rubin model", {

  expect_is(comp_rbpr, "baggr_compare")
  expect_is(comp_rbpl, "baggr_compare")

  expect_is(testthat::capture_output(print(comp_rbpl)), "character")
  expect_is(testthat::capture_output(print(comp_rbpr)), "character")

  expect_gt(length(comp_rbpl), 0)
  expect_gt(length(comp_rbpr), 0)

  expect_is(plot(comp_rbpl), "gg")

  expect_is(plot(comp_rbpr), "gg")

  expect_is(plot(comp_rbpl, grid_models = TRUE), "gtable")

  expect_is(plot(comp_rbpr, grid_models = TRUE), "gtable")
})

test_that("Plot quantiles", {
  expect_error(plot_quantiles(bg5_p))
})

test_that("You can run Rubin model with mutau type inputs", {
  df_mutau <- data.frame("tau" = c(1, -1, .5, -.5, .7, -.7, 1.3, -1.3),
                         "se.tau" = rep(1, 8),
                         "mu" = rnorm(8),
                         "se.mu" = rep(1, 8),
                         "state" = datasets::state.name[1:8])
  bg <- expect_warning(baggr(df_mutau, model = "rubin", iter = 20, refresh = 0))
  expect_is(bg, "baggr")
  expect_equal(bg$model, "rubin")
  bg <- expect_warning(baggr(df_mutau[1:6,], test_data = df_mutau[7:8,],
                             model = "rubin", iter = 20, refresh = 0))
  expect_is(bg, "baggr")
  expect_gt(bg$mean_lpd, 0)
})