context("Estimator - horvitz_thompson")
test_that("Horvitz-Thompson matches d-i-m under certain conditions", {
n <- 4
dat <- data.frame(
y0 = rnorm(n),
z = rep(0:1, each = n / 2),
ps = rep(0.5, n)
)
dat$y1 <- dat$y0 + 0.43
dat$y <- ifelse(dat$z, dat$y1, dat$y0)
expect_equal(
coef(horvitz_thompson(
y ~ z,
condition_prs = ps,
data = dat
)),
coef(difference_in_means(
y ~ z,
data = dat
))
)
})
test_that("Horvitz-Thompson works in simple case", {
n <- 40
dat <- data.frame(
y = rnorm(n)
)
simp_decl <- randomizr::declare_ra(N = n, prob = 0.4, simple = T)
dat$z <- randomizr::conduct_ra(simp_decl)
ht_simp <- horvitz_thompson(
y ~ z,
data = dat,
ra_declaration = simp_decl,
return_condition_pr_mat = TRUE
)
# Also with no SEs
ht_simp_no <- horvitz_thompson(
y ~ z,
data = dat,
ra_declaration = simp_decl,
return_condition_pr_mat = TRUE,
se_type = "none"
)
expect_equal(
ht_simp$coefficients,
ht_simp_no$coefficients
)
expect_equivalent(
as.numeric(tidy(ht_simp_no)[c("std.error", "p.value", "conf.low", "conf.high")]),
rep(NA_real_, 4)
)
# Works with constant effects assumption
ht_const <- horvitz_thompson(
y ~ z,
data = dat,
ra_declaration = simp_decl,
se_type = "constant"
)
# picks out right declaration
ht_rev <- horvitz_thompson(
y ~ z,
data = dat,
condition1 = 1,
condition2 = 0,
ra_declaration = simp_decl,
return_condition_pr_mat = TRUE
)
# Fails properly if condition in treatment but not in declaration
dat$z[1] <- 2
expect_error(
horvitz_thompson(
y ~ z,
data = dat,
condition1 = 0,
condition2 = 2,
ra_declaration = simp_decl
)
)
expect_equal(
tidy(ht_simp)[, c("estimate", "std.error")],
tidy(ht_rev)[, c("estimate", "std.error")] * c(-1, 1)
)
# Simple designs needn't use condition matrix as joint prs are product of marginals
expect_equal(
ht_simp$condition_pr_mat,
NULL
)
# complete randomization works as well
comp_decl <- randomizr::declare_ra(N = n, prob = 0.4, simple = FALSE)
dat$z_comp <- randomizr::conduct_ra(comp_decl)
dat$pr_comp <- 0.4
# We can learn it! or you can tell us
expect_equal(
ht_comp <- horvitz_thompson(y ~ z_comp, data = dat, simple = FALSE),
horvitz_thompson(y ~ z_comp, data = dat, ra_declaration = comp_decl)
)
expect_equal(
ht_comp,
horvitz_thompson(y ~ z_comp, data = dat, simple = FALSE, condition_prs = pr_comp)
)
# Also with no SEs
ht_comp_no <- horvitz_thompson(y ~ z_comp, data = dat, simple = FALSE, se_type = "none")
expect_equal(
ht_comp$coefficients,
ht_comp_no$coefficients
)
expect_equivalent(
as.numeric(tidy(ht_comp_no)[c("std.error", "p.value", "conf.low", "conf.high")]),
rep(NA_real_, 4)
)
# error if you pass wrong prs
dat$pr_wrong <- dat$pr_comp
dat$pr_wrong[1] <- 0.5
expect_error(
horvitz_thompson(y ~ z_comp, data = dat, simple = FALSE, condition_prs = pr_wrong),
"Treatment probabilities must be fixed for complete randomized designs"
)
# Works without data frame!
ht_with <- with(
dat,
horvitz_thompson(y ~ z_comp, simple = FALSE, condition_prs = pr_comp)
)
pr_comp <- dat$pr_comp
y <- dat$y
z_comp <- dat$z_comp
ht_glob <- horvitz_thompson(y ~ z_comp, simple = FALSE, condition_prs = pr_comp)
ht_rec <- horvitz_thompson(y ~ z_comp, simple = FALSE, condition_prs = 0.4)
expect_equal(
ht_with,
ht_glob
)
expect_equal(
ht_with,
ht_rec
)
# with declaration
ht_nod <- horvitz_thompson(y ~ z_comp, ra_declaration = comp_decl)
ht_d <- horvitz_thompson(y ~ z_comp, data = dat, ra_declaration = comp_decl)
expect_equal(
tidy(ht_nod),
tidy(ht_d)
)
})
test_that("Horvitz-Thompson works with clustered data", {
n <- 8
dat <- data.frame(
y = rnorm(n),
cl = rep(1:4, each = 2)
)
## Complete random sample, clustered
clust_crs_decl <- randomizr::declare_ra(N = nrow(dat), clusters = dat$cl, prob = 0.5)
dat$z <- randomizr::conduct_ra(clust_crs_decl)
# Regular SE using Young's inequality
ht_crs_decl <- horvitz_thompson(y ~ z, data = dat, ra_declaration = clust_crs_decl)
expect_true(
!is.na(ht_crs_decl$coefficients)
)
expect_equivalent(
ht_crs_decl$df,
NA
)
# Also with no SEs
ht_crs_decl_no <- horvitz_thompson(y ~ z, data = dat, ra_declaration = clust_crs_decl, se_type = "none")
expect_equal(
ht_crs_decl$coefficients,
ht_crs_decl_no$coefficients
)
expect_equivalent(
as.numeric(tidy(ht_crs_decl_no)[c("std.error", "p.value", "conf.low", "conf.high")]),
rep(NA_real_, 4)
)
expect_message(
horvitz_thompson(y ~ z, data = dat, clusters = cl, condition_prs = rep(0.5, nrow(dat))),
"Assuming simple cluster randomization"
)
expect_message(
horvitz_thompson(y ~ z, data = dat, clusters = cl, condition_prs = rep(0.5, nrow(dat)), simple = FALSE),
NA
)
# Can infer probabilities as well
expect_equal(
ht_crs_decl,
horvitz_thompson(y ~ z, data = dat, clusters = cl, simple = FALSE)
)
# And constant effects error for non-simple designs
expect_error(
horvitz_thompson(y ~ z, data = dat, ra_declaration = clust_crs_decl, se_type = "constant"),
"`se_type` = 'constant' only supported for simple random"
)
## Simple random sample, clustered
clust_srs_decl <- randomizr::declare_ra(
N = nrow(dat),
clusters = dat$cl,
prob = 0.4,
simple = TRUE
)
# With declaration
# Regular SE using Young's inequality
ht_srs_decl <- horvitz_thompson(y ~ z, data = dat, ra_declaration = clust_srs_decl)
# Also with no SEs
ht_srs_decl_no <- horvitz_thompson(y ~ z, data = dat, ra_declaration = clust_srs_decl, se_type = "none")
expect_equal(
ht_srs_decl$coefficients,
ht_srs_decl_no$coefficients
)
expect_equivalent(
as.numeric(tidy(ht_srs_decl_no)[c("std.error", "p.value", "conf.low", "conf.high")]),
rep(NA_real_, 4)
)
# Not the same because second doesn't know it's clustered!
# Just passing mat
clust_srs_mat <- declaration_to_condition_pr_mat(clust_srs_decl)
ht_srs_nodecl <- horvitz_thompson(y ~ z, data = dat, condition_pr_mat = clust_srs_mat)
# Also with no SEs
ht_srs_nodecl_no <- horvitz_thompson(y ~ z, data = dat, condition_pr_mat = clust_srs_mat, se_type = "none")
expect_equal(
ht_srs_nodecl$coefficients,
ht_srs_nodecl_no$coefficients
)
# works if I also pass cluster
expect_identical(
ht_srs_decl,
ht_srs_cl <- horvitz_thompson(y ~ z, data = dat, clusters = cl, condition_pr_mat = clust_srs_mat)
)
# Also with no SEs
ht_srs_cl_no <- horvitz_thompson(y ~ z, data = dat, clusters = cl, condition_pr_mat = clust_srs_mat, se_type = "none")
expect_equal(
ht_srs_cl$coefficients,
ht_srs_cl_no$coefficients
)
# Can infer from number of treated clusters per block the treatment pr
clbl_dat <- data.frame(
cl_new = cl_new <- c(1, 2, 3, 4, 5, 5, 6, 6, 7, 7, 8, 8),
bl = rep(1:3, each = 4),
y = rnorm(12)
)
# pr = 0.25 in first, 0.5 in second
blcl_ra <- randomizr::declare_ra(blocks = clbl_dat$bl, clusters = clbl_dat$cl_new, block_m = c(1, 2, 1))
clbl_dat$z_clbl <- randomizr::conduct_ra(blcl_ra)
expect_equivalent(
horvitz_thompson(y ~ z_clbl, data = clbl_dat, ra_declaration = blcl_ra),
horvitz_thompson(y ~ z_clbl, data = clbl_dat, blocks = bl, clusters = cl_new)
)
# should work with just a column if SRS!
dat$ps <- 0.4
expect_identical(
ht_srs_decl,
ht_srs_prs <- horvitz_thompson(y ~ z, data = dat, clusters = cl, condition_prs = ps)
)
# Also with no SEs
ht_srs_prs_no <- horvitz_thompson(y ~ z, data = dat, clusters = cl, condition_prs = ps, se_type = "none")
expect_equal(
ht_srs_prs$coefficients,
ht_srs_prs_no$coefficients
)
# And constant effects
# Only work for simple for now
expect_error(
horvitz_thompson(y ~ z, data = dat, ra_declaration = clust_srs_decl, se_type = "constant"),
"`se_type` = 'constant' only supported for simple random designs at the moment"
)
# Fails with condition_pr varying within cluster
dat$p_wrong <- dat$ps
dat$p_wrong[1] <- 0.545
expect_error(
horvitz_thompson(y ~ z, data = dat, clusters = cl, condition_prs = p_wrong),
"`condition_prs` must be constant within `cluster`"
)
# Or pr outside of 0 1
dat$p_wrong[1] <- 1.5
expect_error(
horvitz_thompson(y ~ z, data = dat, clusters = cl, condition_prs = p_wrong),
"`condition_prs` must be a vector of positive values no greater than 1"
)
# or treatment varying within a cluster
dat$z_wrong <- dat$z
dat$z_wrong[1:2] <- c(0, 1)
table(dat$z_wrong, dat$cl)
expect_error(
horvitz_thompson(y ~ z_wrong, data = dat, clusters = cl, condition_prs = ps),
"Treatment condition must be constant within `clusters`"
)
})
test_that("Horvitz-Thompson works with missingness", {
n <- 40
dat <- data.frame(
y = rnorm(n),
bl = rep(1:10, each = 4),
ps = 0.35
)
decl <- randomizr::declare_ra(n, prob = 0.35)
dat$z <- randomizr::conduct_ra(decl)
missing_dat <- dat
missing_dat$y[1] <- NA
expect_error(
ht_miss <- horvitz_thompson(y ~ z, data = missing_dat, ra_declaration = decl),
NA
)
expect_error(
ht_miss_pr <- horvitz_thompson(y ~ z, data = missing_dat, condition_prs = 0.35, simple = FALSE),
NA
)
expect_equal(ht_miss, ht_miss_pr)
# Test that we didn't edit the declaration in the users env
# Should work a second time
expect_error(
horvitz_thompson(y ~ z, data = missing_dat, ra_declaration = decl),
NA
)
missing_dat$ps[2] <- NA
dat$drop_these <- c(1, 1, rep(0, times = n - 2))
expect_warning(
ht_miss <- horvitz_thompson(y ~ z, data = missing_dat, condition_prs = ps),
"missingness in the condition_pr"
)
expect_equal(
horvitz_thompson(y ~ z, data = dat, condition_prs = ps, subset = drop_these == 0),
ht_miss
)
})
# test blocks in the data
test_that("Estimating Horvitz-Thompson can be done two ways with blocks", {
n <- 40
dat <- data.frame(
y = rnorm(n),
bl = rep(1:10, each = 4)
)
bl_ra <- randomizr::declare_ra(blocks = dat$bl)
dat$z <- randomizr::conduct_ra(bl_ra)
bl_pr_mat <- declaration_to_condition_pr_mat(bl_ra)
# This creates estimates within blocks and then joins them together using the common
# formula
ht_declare_bl <- horvitz_thompson(y ~ z, data = dat, ra_declaration = bl_ra)
# This estimates the treatment effect at once using only condition_pr_mat
ht_condmat_bl <- horvitz_thompson(y ~ z, data = dat, condition_pr_mat = bl_pr_mat)
expect_equivalent(
tidy(ht_declare_bl),
tidy(ht_condmat_bl)
)
# Also with no SEs
ht_declare_bl_no <- horvitz_thompson(y ~ z, data = dat, ra_declaration = bl_ra, se_type = "none")
ht_condmat_bl_no <- horvitz_thompson(y ~ z, data = dat, condition_pr_mat = bl_pr_mat, se_type = "none")
expect_equal(
ht_declare_bl$coefficients,
ht_declare_bl_no$coefficients
)
expect_equal(
ht_condmat_bl$coefficients,
ht_condmat_bl_no$coefficients
)
dat$mps <- rep(1:20, each = 2)
mp_ra <- randomizr::declare_ra(blocks = dat$mps)
dat$z <- randomizr::conduct_ra(mp_ra)
mp_pr_mat <- declaration_to_condition_pr_mat(mp_ra)
ht_declare_mp <- horvitz_thompson(y ~ z, data = dat, ra_declaration = mp_ra)
# This estimates the treatment effect at once using only condition_pr_mat
ht_condmat_mp <- horvitz_thompson(y ~ z, data = dat, condition_pr_mat = mp_pr_mat)
expect_equivalent(
tidy(ht_declare_mp),
tidy(ht_condmat_mp)
)
# block messages when passing with simple = TRUE flag, not otherwise
dat$p <- tapply(dat$z, dat$bl, mean)[dat$bl]
expect_message(
ht_declare_mp <- horvitz_thompson(y ~ z, data = dat, blocks = bl, condition_prs = p, simple = TRUE),
"Assuming complete random assignment of clusters within blocks."
)
expect_message(
ht_declare_mp <- horvitz_thompson(y ~ z, data = dat, blocks = bl, condition_prs = p, simple = FALSE),
NA
)
})
# errors when arguments are passed that shouldn't be together
test_that("Horvitz-Thompson properly checks arguments and data", {
n <- 8
dat <- data.frame(
y = rnorm(n),
ps = 0.4,
z = sample(rep(0:1, each = n / 2)),
x = runif(n),
cl = rep(1:4, each = 2),
bl = rep(1:2, each = 4)
)
decl <- randomizr::declare_ra(N = n, prob = 0.4, simple = FALSE)
# default is mean(ps)
expect_identical(
horvitz_thompson(y ~ z, data = dat),
horvitz_thompson(y ~ z, data = dat, condition_prs = rep(0.5, times = nrow(dat)))
)
expect_error(
horvitz_thompson(y ~ z, data = dat, condition_prs = ps, ra_declaration = decl),
"Cannot use `ra_declaration` with any of"
)
expect_error(
horvitz_thompson(y ~ z, data = dat, condition_pr_mat = declaration_to_condition_pr_mat(decl), ra_declaration = decl),
"Cannot use `ra_declaration` with any of"
)
expect_error(
horvitz_thompson(y ~ z + x, data = dat, ra_declaration = decl),
"must have only one variable on the right-hand side"
)
expect_error(
horvitz_thompson(y ~ z, data = dat, ra_declaration = randomizr::declare_ra(N = n + 1, prob = 0.4)),
"variable lengths differ"
)
ht_o <- horvitz_thompson(y ~ z, data = dat, ci = FALSE)
expect_equivalent(
as.matrix(tidy(horvitz_thompson(y ~ z, data = dat, ci = FALSE))[, c("p.value", "conf.low", "conf.high")]),
matrix(NA, nrow = 1, ncol = 3)
)
# condition pr mat is the wrong size
expect_error(
horvitz_thompson(
y ~ z,
data = dat,
condition_pr_mat = matrix(rnorm(4), 2, 2)
),
"cleaning the data"
)
})
test_that("Works without variation in treatment", {
set.seed(1)
dat <- data.frame(
y = rnorm(20),
bl = 1:5,
ps = 0.4
)
# Simple case
dat$z_const <- 1
ht_const_1 <- horvitz_thompson(
y ~ z_const,
data = dat
)
ht_const_cond1 <- horvitz_thompson(
y ~ z_const,
data = dat,
condition2 = 1
)
expect_equivalent(
ht_const_1,
ht_const_cond1
)
expect_equivalent(coef(ht_const_1), mean(dat$y))
expect_equivalent(ht_const_1$std.error, 1 / (nrow(dat)) * sqrt(sum(dat$y ^ 2)))
expect_equivalent(
ht_const_1$df,
NA
)
ht_const <- horvitz_thompson(
y ~ z_const,
data = dat,
condition_prs = ps
)
expect_equivalent(coef(ht_const), mean(dat$y / dat$ps))
expect_equivalent(ht_const$std.error, 1 / (nrow(dat)) * sqrt(sum((dat$y / dat$ps) ^ 2)))
## Blocks and all are treated
ht_block <- horvitz_thompson(
y ~ z_const,
data = dat,
blocks = bl,
condition_prs = ps,
return_condition_pr_mat = TRUE
)
# with blocks SE is different because not simple any more
expect_equivalent(coef(ht_block), mean(dat$y / dat$ps))
# expect_equal(ht_block$std.error, 1/(nrow(dat)) * sqrt(sum((dat$y / dat$ps)^2)))
## Blocks and some are treated!
dat$z_diff <- as.numeric(dat$bl <= 2)
ht_block <- horvitz_thompson(
y ~ z_diff,
data = dat,
blocks = bl,
condition_prs = rep(0.4, nrow(dat))
)
ht_block
# With only one treatment, but value is 0, still put it as treatment!!
# But note we leave a hint in the coefficient name
dat$z <- 0
ht_zero <- horvitz_thompson(
y ~ z,
data = dat,
blocks = bl,
condition_prs = rep(0.5, nrow(dat))
)
expect_identical(ht_zero$term, "z0")
# Drop name if they specify the only treatment as condition1
ht_rev <- horvitz_thompson(
y ~ z,
data = dat,
blocks = bl,
condition1 = 0,
condition_prs = rep(0.5, nrow(dat))
)
expect_identical(ht_rev$term, "z")
# This is only true because condition prs are 0.5
expect_identical(
tidy(ht_zero)[c("estimate", "std.error")],
tidy(ht_rev)[c("estimate", "std.error")] * c(-1, 1)
)
# Some weird specifications that hit unusual parts of the variance
cpm <- diag(0.5, nrow = 4, ncol = 4)
y <- rnorm(2)
t <- c(0, 1)
expect_error(
horvitz_thompson(y ~ t, condition_pr_mat = cpm),
NA
)
t <- c(1, 1)
expect_error(
horvitz_thompson(y ~ t, condition_pr_mat = cpm),
NA
)
})
test_that("multi-valued treatments not allowed in ra_declaration", {
dat <- data.frame(
y = rnorm(20),
ps = 0.4
)
decl_multi <- randomizr::declare_ra(N = 20, prob_each = c(0.4, 0.4, 0.2))
dat$z <- randomizr::conduct_ra(decl_multi)
expect_error(
horvitz_thompson(y ~ z, data = dat, ra_declaration = decl_multi),
"Cannot use horvitz_thompson\\(\\) with a `ra_declaration` with"
)
# will work but you have to get the PRs right!
ht_condition <- horvitz_thompson(
y ~ z,
data = dat,
condition_prs = ps,
condition1 = "T1",
condition2 = "T2"
)
subdat <- dat[dat$z != "T3", ]
ht_subdat <- horvitz_thompson(
y ~ z,
data = subdat,
condition_prs = ps
)
ht_subset <- horvitz_thompson(
y ~ z,
data = dat,
subset = z != "T3",
condition_prs = ps
)
expect_equal(
ht_condition,
ht_subdat
)
expect_equal(
ht_condition,
ht_subset
)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.