tests/testthat/test-estimators.R

context("Estimators")

my_population <- declare_population(N = 500, noise = rnorm(N))
my_potential_outcomes <- declare_potential_outcomes(Y_Z_0 = noise, Y_Z_1 = noise + rnorm(N, mean = 2, sd = 2))
my_assignment <- declare_assignment(m = 25)
reveal_outcomes <- declare_reveal()

expect_estimates <- function(estimates, label = NULL) {
  expect_equal(
    names(estimates),
    c("estimator_label", "term", "estimate", "std.error", "statistic", "p.value", "conf.low", "conf.high", "df", "outcome")
  )
  if (is.character(label)) {
    expect_equal(estimates$estimator_label, label)
  }
}

test_that("difference in means", {
  my_estimator <- declare_estimator(Y ~ Z)
  my_population() %>% my_potential_outcomes() %>% my_assignment() %>% reveal_outcomes() %>% my_estimator() %>% expect_estimates()
})

test_that("lm with robust ses", {
  my_estimator <- declare_estimator(Y ~ Z, model = lm_robust)
  my_population() %>% my_potential_outcomes() %>% my_assignment() %>% reveal_outcomes() %>% my_estimator() %>% expect_estimates()
})


test_that("lm with HC3 robust ses", {
  my_estimator <- declare_estimator(Y ~ Z, model = lm_robust, se_type = "HC3")
  my_population() %>% my_potential_outcomes() %>% my_assignment() %>% reveal_outcomes() %>% my_estimator() %>% expect_estimates()
})

test_that("custom estimator function", {
  my_mean <- function(data) {
    data.frame(estimate = with(data, 2), foo = mean(data$Y))
  }
  my_estimator_custom <- declare_estimator(handler = tidy_estimator(my_mean))
  cust <- my_population() %>% my_potential_outcomes() %>% my_assignment() %>% reveal_outcomes() %>% my_estimator_custom()
  expect_equal(cust$estimate, 2)
})

test_that("check blocked d-i-m estimator", {
  my_population <- declare_population(N = 500, noise = rnorm(N), blocks = sample(rep(c("A", "B"), each = 250), N, replace = F))
  my_potential_outcomes <- declare_potential_outcomes(Y_Z_0 = noise, Y_Z_1 = noise + rnorm(N, mean = 2, sd = 2) + 5 * (blocks == "A"))
  my_assignment <- declare_assignment(blocks = blocks)

  ## lm with HC3 robust ses
  my_estimator_blocked <- declare_estimator(Y ~ Z, model = difference_in_means, blocks = `blocks`)
  df <- my_population() %>% my_potential_outcomes() %>% my_assignment() %>% declare_reveal()()
  my_estimator_notblocked <- declare_estimator(Y ~ Z)

  df %>% my_estimator_notblocked() %>% expect_estimates()
  df %>% my_estimator_blocked() %>% expect_estimates() ## it is different!
})



test_that("regression from estimatr works as an estimator", {
  my_population <- declare_population(N = 500, noise = rnorm(N))
  my_potential_outcomes <- declare_potential_outcomes(Y_Z_0 = noise, Y_Z_1 = noise + rnorm(N, mean = 2, sd = 2))
  my_assignment <- declare_assignment(m = 100)
  pate <- declare_estimand(mean(Y_Z_1 - Y_Z_0), label = "pate")
  pate_estimator <- declare_estimator(Y ~ Z + noise,
    model = lm_robust,
    term = "noise",
    estimand = pate, label = "pate_hat"
  )
  reveal_outcomes <- declare_reveal()

  my_design <- my_population +
    my_potential_outcomes +
    pate +
    my_assignment +
    reveal_outcomes +
    pate_estimator

  estimate <- draw_estimates(my_design)
  expect_equal(estimate$estimator_label, "pate_hat")
  expect_equal(estimate$term, "noise")
  expect_equal(estimate$estimand_label, "pate")
})

population <- declare_population(N = 200, noise = rnorm(N))

potential_outcomes <- declare_potential_outcomes(formula = Y ~ noise + Z * .5)

pop <- potential_outcomes(population())

sampling <- declare_sampling(n = 100)

assignment <- declare_assignment(m = 50)

sate <- declare_estimand(SATE = mean(Y_Z_1 - Y_Z_0))

reveal_outcomes <- declare_reveal()

test_that("multiple estimator declarations work", {
  estimator_1 <-
    declare_estimator(
      formula = Y ~ Z,
      model = estimatr::lm_robust,
      estimand = sate,
      label = "estimator_1"
    )

  estimator_2 <-
    declare_estimator(
      formula = Y ~ Z,
      model = estimatr::lm_robust,
      estimand = sate,
      label = "estimator_2"
    )

  design <- population +
    potential_outcomes +
    sampling +
    sate +
    assignment +
    reveal_outcomes +
    estimator_1 +
    estimator_2

  e <- draw_estimates(design)

  expect_equal(e$estimator_label, c("estimator_1", "estimator_2"))
})

test_that("multiple estimator declarations work", {
  estimator_3 <-
    declare_estimator(
      formula = Y ~ Z,
      model = estimatr::lm_robust,
      estimand = sate,
      label = "estimator_3"
    )

  estimator_4 <-
    declare_estimator(
      formula = Y ~ Z,
      model = estimatr::lm_robust,
      estimand = sate,
      label = "estimator_4"
    )

  design <- population +
    potential_outcomes +
    sampling +
    sate +
    assignment +
    reveal_outcomes +
    estimator_3 +
    estimator_4

  e <- draw_estimates(design)

  expect_equal(e$estimator_label, c("estimator_3", "estimator_4"))
})

test_that("multiple estimator declarations work", {
  estimator_5 <-
    declare_estimator(
      formula = Y ~ Z,
      model = estimatr::lm_robust,
      estimand = sate
    )

  estimator_6 <-
    declare_estimator(
      formula = Y ~ Z,
      model = estimatr::lm_robust,
      estimand = sate
    )

  # This could eventually be fixed so that the estimator names are inherited
  expect_error({
    design <- population +
      potential_outcomes +
      sampling +
      sate +
      assignment +
      reveal_outcomes +
      estimator_5 +
      estimator_6
  })
})

context("Labeling estimator output with estimands")

mand_arg_label <- declare_estimand(ATE = mean(Y_Z_1 - Y_Z_0))
mand_explicit_label <- declare_estimand(mean(Y_Z_1 - Y_Z_0), label = "ATE")

expect_label <- function(df, expected_label, estimand_label) {
  expect_equal(df$estimator_label, expected_label)
  expect_equal(df$estimand_label, estimand_label)
}

df <- data.frame(
  Y = c(0, 0, 0, 0, 1, 1, 1, 1),
  Z = c(0, 0, 0, 0, 1, 1, 1, 1)
)

test_that("labels for estimates and estimands work estimand splat labeld estimator default", {
  mator_no_label <- declare_estimator(Y ~ Z, estimand = mand_arg_label)
  df %>% mator_no_label() %>% expect_label("estimator", "ATE")
})

test_that("labels for estimates and estimands work, label explicit, estimand splat labeled", {
  mator_label <- declare_estimator(Y ~ Z, estimand = mand_arg_label, label = "an_estimator")
  df %>% mator_label() %>% expect_label("an_estimator", "ATE")
})

test_that("labels for estimates and estimands work estimand splat labeld label =NULL", {
  mator_label_null <- declare_estimator(Y ~ Z, estimand = mand_arg_label, label = NULL)
  expect_error(df %>% mator_label_null())
})

test_that("labels for estimates and estimands work - label default", {
  mator_no_label <- declare_estimator(Y ~ Z, estimand = mand_explicit_label)
  df %>% mator_no_label() %>% expect_label("estimator", "ATE")
})

test_that("labels for estimates and estimands work - label explicit", {
  mator_label <- declare_estimator(Y ~ Z, estimand = mand_explicit_label, label = "an_estimator")
  # mator_label_noquote <- declare_estimator(Y ~ Z, estimand = mand_explicit_label, label = an_estimator)
  df %>% mator_label() %>% expect_label("an_estimator", "ATE")
})

test_that("labels for estimates and estimands work- label=NULL", {
  mator_label_null <- declare_estimator(Y ~ Z, estimand = mand_explicit_label, label = NULL)
  expect_error(df %>% mator_label_null())
})


test_that("labels for estimates and estimands work estimand label, estimator default", {
  mator_no_label <- declare_estimator(Y ~ Z, estimand = mand_explicit_label)
  df %>% mator_no_label() %>% expect_label("estimator", "ATE")
})


test_that("coefficient_names = TRUE returns all term", {
  tst <-
    data.frame(
      x = runif(100),
      y = runif(100),
      wt = runif(100),
      clust = sample(1:10, 100, replace = TRUE)
    )

  est4 <- declare_estimator(
    y ~ x + as.factor(clust),
    clusters = clust,
    weights = wt,
    model = lm_robust,
    term = TRUE
  )

  result <- est4(tst)

  expect_gt(nrow(result), 0)
})


test_that("default estimator handler validation fn", {
  expect_error(declare_estimator(model = "Test"))
  expect_error(declare_estimator(model = I))
})

test_that("tidy_estimator, handler does not take data", {
  expect_error(tidy_estimator(I), "function with a data argument")
})

test_that("model_handler runs directly", {
  lm_out <- structure(list(term = "group2", estimate = 1.58, std.error = 0.849091017238762, 
                           statistic = 1.86081346748685, p.value = 0.0791867142159382, 
                           conf.low = -0.203874032287599, conf.high = 3.3638740322876), row.names = c(NA, 
                                                                                                      -1L), class = c("tbl_df", "tbl", "data.frame"))

  result <- model_handler(sleep, extra ~ group, model = lm, term = "group2")
  expect_equivalent(as.data.frame(result), as.data.frame(lm_out))
})


test_that("estimators have different columns", {
  skip_if_not_installed("Matching")
  population <- declare_population(
    N = 1000,
    X1 = rnorm(N),
    X2 = rnorm(N),
    X3 = rnorm(N)
  )

  potential_outcomes <-
    declare_potential_outcomes(formula = Y ~ X1 + X2 + X3 + Z)

  assignment <- declare_assignment(
    handler = function(data) {
      prob <- with(data, pnorm(X1 + X2 + X3))
      data$Z <- rbinom(nrow(data), 1, prob)
      return(data)
    }
  )

  att <- declare_estimand(att = mean(Y_Z_1[Z == 1] - Y_Z_0[Z == 1]))

  estimator_d_i_m <- declare_estimator(Y ~ Z, estimand = "estimand", label = "dim")

  matching_helper <- function(data) {
    match_out <- with(data, Matching::Match(
      Y = Y,
      Tr = Z,
      X = cbind(X1, X2, X3)
    ))
    return(data.frame(estimate = match_out$est))
  }

  estimator_m <- declare_estimator(
    handler = tidy_estimator(matching_helper),
    estimand = att,
    label = "matching"
  )

  matching <- population +
    potential_outcomes +
    assignment +
    declare_reveal(Y, Z) +
    estimator_d_i_m +
    estimator_m

  result <- draw_estimates(matching)

  expect_length(result, 11)
  expect_equal(nrow(result), 2)
})


test_that("when a term is missing from a model there is an informative error", {
  data <- fabricate(
    N = 100,
    Y = rbinom(N, 1, .5),
    Z = rbinom(N, 1, .5)
  )
  ols <- declare_estimator(Y ~ Z, model = lm_robust, term = "X")

  expect_error(ols(data), "Not all of the terms declared in your estimator are present in the model output, including X.")
})
DeclareDesign/ddoldversion documentation built on Oct. 30, 2019, 5:17 p.m.