context("Estimator - lm_robust, clustered")
test_that("lm cluster se", {
N <- 100
dat <- data.frame(
Y = rnorm(N),
Z = rbinom(N, 1, .5),
X = rnorm(N),
J = sample(1:10, N, replace = T),
W = runif(N)
)
## Test functionality
lm_robust(Y ~ Z, clusters = J, data = dat)
lm_robust(Y ~ Z + X, clusters = J, data = dat)
lm_robust(
Y ~ Z + X,
clusters = J,
data = dat
)
lm_robust(
Y ~ Z + X,
clusters = J,
se_type = "stata",
data = dat,
ci = T
)
expect_equivalent(
as.matrix(
tidy(
lm_robust(
Y ~ X + Z,
clusters = J,
ci = FALSE,
data = dat
)
)[, c("p.value", "conf.low", "conf.high")]
),
matrix(NA, nrow = 3, ncol = 3)
)
## Test equality
lm_interact <-
lm_robust(
Y ~ Z * X,
clusters = J,
data = dat
)
lm_interact_stata <-
lm_robust(
Y ~ Z * X,
clusters = J,
se_type = "stata",
data = dat
)
lm_interact_simple <- lm(Y ~ Z * X, data = dat)
bm_interact <-
BMlmSE(
lm_interact_simple,
clustervar = as.factor(dat$J),
IK = FALSE
)
bm_interact
bm_interact_interval <-
coef(lm_interact_simple)["Z:X"] +
qt(0.975, df = bm_interact$dof["Z:X"]) * bm_interact$se["Z:X"] * c(-1, 1)
bm_interact_stata_interval <-
coef(lm_interact_simple)["Z:X"] +
qt(0.975, df = length(unique(dat$J)) - 1) * bm_interact$se.Stata["Z:X"] * c(-1, 1)
expect_equivalent(
as.numeric(tidy(lm_interact)[4, c("std.error", "conf.low", "conf.high")]),
c(bm_interact$se["Z:X"], bm_interact_interval)
)
expect_equivalent(
as.numeric(tidy(lm_interact_stata)[4, c("std.error", "conf.low", "conf.high")]),
c(bm_interact$se.Stata["Z:X"], bm_interact_stata_interval)
)
lm_full <-
lm_robust(
Y ~ Z + X,
clusters = J,
data = dat
)
lm_full_simple <- lm(Y ~ Z + X, data = dat)
bm_full <-
BMlmSE(
lm_full_simple,
clustervar = as.factor(dat$J),
IK = FALSE
)
bm_full_moe <- qt(0.975, df = bm_full$dof) * bm_full$se
bm_full_lower <- coef(lm_full_simple) - bm_full_moe
bm_full_upper <- coef(lm_full_simple) + bm_full_moe
expect_equivalent(
as.matrix(tidy(lm_full)[, c("std.error", "conf.low", "conf.high")]),
cbind(bm_full$se, bm_full_lower, bm_full_upper)
)
## Works with rank deficient case
dat$X2 <- dat$X
for (se_type in cr_se_types) {
lmr_rd <- lm_robust(Y ~ X + Z + X2, data = dat, clusters = J, se_type = se_type)
lmr_full <- lm_robust(Y ~ X + Z, data = dat, clusters = J, se_type = se_type)
expect_identical(
tidy(lmr_rd)[1:3, ],
tidy(lmr_full)
)
}
## Test error handling
expect_error(
lm_robust(
Y ~ Z,
clusters = J,
se_type = "HC2",
data = dat
),
"CR2"
)
expect_error(
lm_robust(
Y ~ Z,
se_type = "CR2",
data = dat
),
"CR2"
)
# To easily do with and without weights
test_lm_cluster_variance <- function(w) {
# Test other estimators
lm_cr0 <- lm_robust(Y ~ Z + X, data = dat, weights = w, clusters = J, se_type = "CR0")
lm_stata <- lm_robust(Y ~ Z + X, data = dat, weights = w, clusters = J, se_type = "stata")
lm_cr2 <- lm_robust(Y ~ Z + X, data = dat, weights = w, clusters = J, se_type = "CR2")
# Stata is the same as CR0 but with finite sample
expect_equivalent(
lm_cr0$std.error ^ 2,
lm_stata$std.error ^ 2 * (N - length(coef(lm_stata))) * (length(unique(dat$J)) - 1) / ((N - 1) * length(unique(dat$J)))
)
expect_false(all(lm_cr0$std.error == lm_stata$std.error))
expect_false(all(lm_cr0$std.error == lm_cr2$std.error))
expect_false(all(lm_stata$std.error == lm_cr2$std.error))
expect_false(all(lm_stata$df == lm_cr2$df))
expect_equivalent(
lm_cr0$df,
lm_stata$df
)
}
# No weights first
test_lm_cluster_variance(NULL)
test_lm_cluster_variance(dat$W)
})
test_that("Clustered SEs match clubSandwich", {
skip_if_not_installed("clubSandwich")
skip_on_cran()
lm_o <- lm(mpg ~ hp, data = mtcars)
lm_ow <- lm(mpg ~ hp, data = mtcars, weights = wt)
for (se_type in cr_se_types) {
lm_r <- lm_robust(mpg ~ hp, data = mtcars, clusters = cyl, se_type = se_type)
lm_rw <- lm_robust(mpg ~ hp, data = mtcars, weights = wt, clusters = cyl, se_type = se_type)
expect_equivalent(
vcov(lm_r),
as.matrix(clubSandwich::vcovCR(
lm_o,
cluster = mtcars$cyl,
type = ifelse(se_type == "stata", "CR1S", se_type)
))
)
expect_equivalent(
vcov(lm_rw),
as.matrix(clubSandwich::vcovCR(
lm_ow,
cluster = mtcars$cyl,
type = ifelse(se_type == "stata", "CR1S", se_type)
))
)
}
})
test_that("multiple outcomes", {
skip_if_not_installed("clubSandwich")
skip_on_cran()
for (se_type in cr_se_types) {
lmo <- lm(cbind(mpg, hp) ~ wt, data = mtcars)
lmow <- lm(cbind(mpg, hp) ~ wt, weights = qsec, data = mtcars)
lmro <- lm_robust(cbind(mpg, hp) ~ wt, data = mtcars, clusters = cyl, se_type = se_type)
lmrow <- lm_robust(cbind(mpg, hp) ~ wt, weights = qsec, data = mtcars, clusters = cyl, se_type = se_type)
if (se_type == "stata") {
# Have to manually do correction for CR1stata
# because clubSandwich uses n*ny and r*ny in place of n and r
# in stata correction
J <- length(unique(mtcars$cyl))
n <- nrow(mtcars)
r <- 2
cs_vcov <- as.matrix(clubSandwich::vcovCR(lmo, cluster = mtcars$cyl, type = "CR0")) *
((J * (n - 1)) / ((J - 1) * (n - r)))
cs_vcov_w <- as.matrix(clubSandwich::vcovCR(lmow, cluster = mtcars$cyl, type = "CR0")) *
((J * (n - 1)) / ((J - 1) * (n - r)))
} else {
cs_vcov <- as.matrix(clubSandwich::vcovCR(lmo,
cluster = mtcars$cyl,
type = se_type))
cs_vcov_w <- as.matrix(clubSandwich::vcovCR(lmow,
cluster = mtcars$cyl,
type = se_type))
}
expect_equivalent(
vcov(lmro),
cs_vcov
)
expect_equivalent(
vcov(lmrow),
cs_vcov_w
)
}
# Test same as individual models
lmro <- lm_robust(cbind(mpg, hp) ~ wt, data = mtcars, clusters = cyl)
lmmpg <- lm_robust(mpg ~ wt, data = mtcars, clusters = cyl)
lmhp <- lm_robust(hp ~ wt, data = mtcars, clusters = cyl)
expect_equivalent(
tidy(lmro)$df[1:2],
lmmpg$df
)
expect_equivalent(
tidy(lmro)$df[3:4],
lmhp$df
)
expect_equivalent(lmro$r.squared[1], lmmpg$r.squared)
expect_equivalent(lmro$r.squared[2], lmhp$r.squared)
})
test_that("lm cluster se with missingness", {
dat <- data.frame(
Y = rnorm(100),
Z = rbinom(100, 1, .5),
X = rnorm(100),
J = sample(1:10, 100, replace = T),
W = runif(100)
)
dat$X[23] <- NA
dat$J[63] <- NA
expect_warning(
estimatr_cluster_out <- lm_robust(
Y ~ Z + X,
clusters = J,
data = dat
),
"missingness in the cluster"
)
estimatr_cluster_sub <- lm_robust(
Y ~ Z + X,
clusters = J,
data = dat[-c(23, 63), ]
)
estimatr_cluster_out[["call"]] <- NULL
estimatr_cluster_sub[["call"]] <- NULL
expect_equal(
estimatr_cluster_out,
estimatr_cluster_sub
)
})
test_that("lm works with quoted or unquoted vars and withor without factor clusters", {
dat <- data.frame(
Y = rnorm(100),
Z = rbinom(100, 1, .5),
X = rnorm(100),
J = sample(1:10, 100, replace = T),
W = runif(100)
)
lmr <- lm_robust(Y~Z, data = dat, weights = W)
lmrq <- lm_robust(Y~Z, data = dat, weights = W)
expect_equal(
rmcall(lmr),
rmcall(lmrq)
)
# works with char
dat$J <- as.character(dat$J)
lmrc <- lm_robust(Y~Z, data = dat, clusters = J)
lmrcq <- lm_robust(Y~Z, data = dat, clusters = J)
expect_equal(
rmcall(lmrc),
rmcall(lmrcq)
)
# works with num
dat$J_num <- as.numeric(dat$J)
lmrc_qnum <- lm_robust(Y~Z, data = dat, clusters = J_num)
expect_equal(
rmcall(lmrc),
rmcall(lmrc_qnum)
)
# works with factor
dat$J_fac <- as.factor(dat$J)
expect_equivalent(
rmcall(lm_robust(Y~Z, data = dat, clusters = J_fac)),
rmcall(lm_robust(Y~Z, data = dat, clusters = J))
)
# works with being cast in the call
lm_robust(Y~Z, data = dat, clusters = as.factor(J))
})
test_that("Clustered SEs work with clusters of size 1", {
dat <- data.frame(
Y = rnorm(100),
X = rnorm(100),
J = 1:100
)
lm_cr2 <- lm_robust(Y ~ X, data = dat, clusters = J)
lm_stata <- lm_robust(Y ~ X, data = dat, clusters = J, se_type = "stata")
lmo <- lm(Y ~ X, data = dat)
bmo <-
BMlmSE(
lmo,
clustervar = as.factor(dat$J),
IK = FALSE
)
expect_equivalent(
as.matrix(tidy(lm_cr2)[, c("estimate", "std.error", "df")]),
cbind(coef(lmo), bmo$se, bmo$dof)
)
expect_equivalent(
as.matrix(tidy(lm_stata)[, c("estimate", "std.error")]),
cbind(coef(lmo), bmo$se.Stata)
)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.