context("lm objects")
set.seed(20190513)
m <- 8
cluster <- factor(rep(LETTERS[1:m], 3 + rpois(m, 5)))
n <- length(cluster)
const <- rep("whuzzup.", n)
X <- matrix(rnorm(3 * n), n, 3)
nu <- rnorm(m)[cluster]
e <- rnorm(n)
w <- rgamma(n, shape = 3, scale = 3)
y <- X %*% c(.4, .3, -.3) + nu + e
dat <- data.frame(y, X, cluster, const, w, row = 1:n)
lm_fit <- lm(y ~ X1 + X2 + X3, data = dat)
WLS_fit <- lm(y ~ X1 + X2 + X3, data = dat, weights = w)
CR_types <- paste0("CR",0:4)
# obj <- WLS_fit
# y <- dat$y
# type <- "CR2"
# vcov <- vcovCR(obj, cluster = cluster, type = type)
# target = NULL
# inverse_var = FALSE
test_that("bread works", {
expect_true(check_bread(lm_fit, cluster = dat$cluster, y = dat$y))
lm_vcov <- bread(lm_fit) * summary(lm_fit)$sigma^2 / v_scale(lm_fit)
expect_equal(vcov(lm_fit), lm_vcov)
expect_true(check_bread(WLS_fit, cluster = dat$cluster, y = dat$y))
wls_vcov <- bread(WLS_fit) * summary(WLS_fit)$sigma^2 / v_scale(WLS_fit)
expect_equal(vcov(WLS_fit), wls_vcov)
})
test_that("vcovCR options don't matter for CR0", {
expect_error(vcovCR(lm_fit, type = "CR0"))
CR0 <- vcovCR(lm_fit, cluster = dat$cluster, type = "CR0")
expect_output(print(CR0))
attr(CR0, "target") <- NULL
attr(CR0, "inverse_var") <- NULL
CR0_A <- vcovCR(lm_fit, cluster = dat$cluster, type = "CR0", target = 1 / dat$w)
attr(CR0_A, "target") <- NULL
attr(CR0_A, "inverse_var") <- NULL
expect_identical(CR0_A, CR0)
CR0_B <- vcovCR(lm_fit, cluster = dat$cluster, type = "CR0", target = 1 / dat$w, inverse_var = FALSE)
attr(CR0_B, "target") <- NULL
attr(CR0_B, "inverse_var") <- NULL
expect_identical(CR0_A, CR0)
CR0_C <- vcovCR(lm_fit, cluster = dat$cluster, type = "CR0", target = 1 / dat$w, inverse_var = TRUE)
attr(CR0_C, "target") <- NULL
attr(CR0_C, "inverse_var") <- NULL
expect_identical(CR0_C, CR0)
wCR0 <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR0")
attr(wCR0, "target") <- NULL
attr(wCR0, "inverse_var") <- NULL
wCR0_A <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR0", target = 1 / dat$w)
attr(wCR0_A, "target") <- NULL
attr(wCR0_A, "inverse_var") <- NULL
expect_identical(wCR0_A, wCR0)
wCR0_B <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR0", target = 1 / dat$w, inverse_var = FALSE)
attr(wCR0_B, "target") <- NULL
attr(wCR0_B, "inverse_var") <- NULL
expect_identical(wCR0_B, wCR0)
wCR0_C <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR0", target = 1 / dat$w, inverse_var = TRUE)
attr(wCR0_C, "target") <- NULL
attr(wCR0_C, "inverse_var") <- NULL
expect_identical(wCR0_C, wCR0)
})
test_that("vcovCR options work for CR2", {
CR2_iv <- vcovCR(lm_fit, cluster = dat$cluster, type = "CR2")
expect_equal(vcovCR(lm_fit, cluster = dat$cluster, type = "CR2", inverse_var = TRUE), CR2_iv)
expect_equal(vcovCR(lm_fit, cluster = dat$cluster, type = "CR2", target = rep(1, n), inverse_var = TRUE), CR2_iv)
attr(CR2_iv, "inverse_var") <- FALSE
CR2_not <- vcovCR(lm_fit, cluster = dat$cluster, type = "CR2", inverse_var = FALSE)
expect_equal(CR2_not, CR2_iv)
expect_equal(vcovCR(lm_fit, cluster = dat$cluster, type = "CR2", target = rep(1, n)), CR2_not)
expect_equal(vcovCR(lm_fit, cluster = dat$cluster, type = "CR2", target = rep(1, n), inverse_var = FALSE), CR2_not)
expect_false(identical(vcovCR(lm_fit, cluster = dat$cluster, type = "CR2", target = 1 / dat$w), CR2_not))
wCR2_id <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR2")
expect_equal(vcovCR(WLS_fit, cluster = dat$cluster, type = "CR2", inverse_var = FALSE), wCR2_id)
expect_equal(vcovCR(WLS_fit, cluster = dat$cluster, type = "CR2", target = rep(1, n)), wCR2_id)
expect_equal(vcovCR(WLS_fit, cluster = dat$cluster, type = "CR2", target = rep(1, n), inverse_var = FALSE), wCR2_id)
wCR2_iv <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR2", inverse_var = TRUE)
wCR2_target <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR2", target = 1 / dat$w, inverse_var = TRUE)
expect_false(identical(wCR2_target, wCR2_id))
expect_equal(matrix(wCR2_target, dim(wCR2_target)), matrix(wCR2_iv, dim(wCR2_iv)))
expect_equal(vcovCR(WLS_fit, cluster = dat$cluster, type = "CR2", target = 1 / dat$w, inverse_var = TRUE), wCR2_target)
})
test_that("vcovCR options work for CR4", {
CR4_iv <- vcovCR(lm_fit, cluster = dat$cluster, type = "CR4")
expect_equal(vcovCR(lm_fit, cluster = dat$cluster, type = "CR4", inverse_var = TRUE), CR4_iv)
expect_equal(vcovCR(lm_fit, cluster = dat$cluster, type = "CR4", target = rep(1, n), inverse_var = TRUE), CR4_iv)
attr(CR4_iv, "inverse_var") <- FALSE
CR4_not <- vcovCR(lm_fit, cluster = dat$cluster, type = "CR4", inverse_var = FALSE)
expect_equal(CR4_not, CR4_iv)
expect_equal(vcovCR(lm_fit, cluster = dat$cluster, type = "CR4", target = rep(1, n)), CR4_not)
expect_equal(vcovCR(lm_fit, cluster = dat$cluster, type = "CR4", target = rep(1, n), inverse_var = FALSE), CR4_not)
expect_false(identical(vcovCR(lm_fit, cluster = dat$cluster, type = "CR4", target = 1 / dat$w), CR4_not))
wCR4_id <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR4")
expect_equal(vcovCR(WLS_fit, cluster = dat$cluster, type = "CR4", inverse_var = FALSE), wCR4_id)
expect_equal(vcovCR(WLS_fit, cluster = dat$cluster, type = "CR4", target = rep(1, n)), wCR4_id)
expect_equal(vcovCR(WLS_fit, cluster = dat$cluster, type = "CR4", target = rep(1, n), inverse_var = FALSE), wCR4_id)
wCR4_iv <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR4", inverse_var = TRUE)
wCR4_target <- vcovCR(WLS_fit, cluster = dat$cluster, type = "CR4", target = 1 / dat$w, inverse_var = TRUE)
expect_false(identical(wCR4_target, wCR4_id))
expect_equal(matrix(wCR4_target, dim(wCR4_target)), matrix(wCR4_iv, dim(wCR4_iv)))
expect_equal(vcovCR(WLS_fit, cluster = dat$cluster, type = "CR4", target = 1 / dat$w, inverse_var = TRUE), wCR4_target)
})
test_that("CR2 and CR4 are target-unbiased", {
expect_true(check_CR(lm_fit, vcov = "CR2", cluster = dat$cluster))
expect_true(check_CR(WLS_fit, vcov = "CR2", cluster = dat$cluster))
expect_true(check_CR(lm_fit, vcov = "CR4", cluster = dat$cluster))
expect_true(check_CR(WLS_fit, vcov = "CR4", cluster = dat$cluster))
})
test_that("vcovCR is equivalent to vcovHC when clusters are all of size 1", {
library(sandwich, quietly=TRUE)
CR0 <- vcovCR(lm_fit, cluster = dat$row, type = "CR0")
expect_equal(vcovHC(lm_fit, type = "HC0"), as.matrix(CR0))
CR1 <- vcovCR(lm_fit, cluster = dat$row, type = "CR1S")
expect_equal(vcovHC(lm_fit, type = "HC1"), as.matrix(CR1))
CR2 <- vcovCR(lm_fit, cluster = dat$row, type = "CR2")
expect_equal(vcovHC(lm_fit, type = "HC2"), as.matrix(CR2))
CR3 <- vcovCR(lm_fit, cluster = dat$row, type = "CR3")
expect_equal(vcovHC(lm_fit, type = "HC3"), as.matrix(CR3))
})
test_that("CR2 is equivalent to Welch t-test for DiD design", {
m0 <- 4
m1 <- 9
m <- m0 + m1
cluster <- factor(rep(LETTERS[1:m], each = 2))
n <- length(cluster)
time <- rep(c(1,2), m)
trt_clusters <- c(rep(0,m0), rep(1,m1))
trt <- (time - 1) * rep(trt_clusters, each = 2)
nu <- rnorm(m)[cluster]
e <- rnorm(n)
y <- 0.4 * trt + nu + e
dat <- data.frame(y, time, trt, cluster)
lm_DID <- lm(y ~ cluster + factor(time) + trt, data = dat)
t_Satt <- coef_test(lm_DID, vcov = "CR2", cluster = dat$cluster)["trt",]
y_diff <- apply(matrix(y, nrow = 2), 2, diff)
t_Welch <- t.test(y_diff ~ trt_clusters)
expect_equal(with(t_Welch, estimate[[2]] - estimate[[1]]), t_Satt$beta)
expect_equal(as.numeric(-t_Welch$statistic), with(t_Satt, beta / SE))
expect_is(all.equal(as.numeric(t_Welch$parameter), t_Satt$df), "character")
df <- m^2 * (m0 - 1) * (m1 - 1) / (m0^2 * (m0 - 1) + m1^2 * (m1 - 1))
expect_equal(t_Satt$df, df)
})
test_that("Order doesn't matter.",{
check_sort_order(WLS_fit, dat, "cluster")
})
test_that("clubSandwich works with dropped observations", {
dat_miss <- dat
miss_indicator <- sample.int(n, size = round(n / 10))
dat_miss$X1[miss_indicator] <- NA
dat_miss$cluster[miss_indicator] <- NA
lm_dropped <- lm(y ~ X1 + X2 + X3, data = dat_miss)
dat_complete <- subset(dat_miss, !is.na(X1))
lm_complete <- lm(y ~ X1 + X2 + X3, data = dat_complete)
CR_drop <- lapply(CR_types, function(x) vcovCR(lm_dropped, cluster = dat_miss$cluster, type = x))
CR_complete <- lapply(CR_types, function(x) vcovCR(lm_complete, cluster = dat_complete$cluster, type = x))
expect_equal(CR_drop, CR_complete)
test_drop <- lapply(CR_types, function(x) coef_test(lm_dropped, vcov = x, cluster = dat_miss$cluster, test = "All", p_values = FALSE))
test_complete <- lapply(CR_types, function(x) coef_test(lm_complete, vcov = x, cluster = dat_complete$cluster, test = "All", p_values = FALSE))
expect_equal(test_drop, test_complete)
})
test_that("clubSandwich requires no missing values on the clustering variable", {
dat_miss <- dat
miss_indicator <- sample.int(n, size = round(n / 10))
dat_miss$cluster[miss_indicator] <- NA
lm_dropped <- lm(y ~ X1 + X2 + X3, data = dat_miss)
expect_error(vcovCR(lm_dropped, cluster = dat_miss$cluster, type = "CR0"),
"Clustering variable cannot have missing values.")
expect_error(coef_test(lm_dropped, vcov = "CR0", cluster = dat_miss$cluster, test = "All"),
"Clustering variable cannot have missing values.")
})
test_that("clubSandwich works with aliased predictors", {
data(npk, package = "datasets")
npk_alias <- lm(yield ~ block + N*P*K, npk)
npk_drop <- lm(yield ~ block + N + P + K + N:P + N:K + P:K, npk)
CR_alias <- lapply(CR_types[-4], function(x) vcovCR(npk_alias, cluster = npk$block, type = x))
CR_drop <- lapply(CR_types[-4], function(x) vcovCR(npk_drop, cluster = npk$block, type = x))
expect_equal(CR_alias, CR_drop)
test_drop <- lapply(CR_types[-4], function(x) coef_test(npk_alias, vcov = x, cluster = npk$block, test = c("z","naive-t","Satterthwaite"), coefs = 7:12, p_values = FALSE))
test_complete <- lapply(CR_types[-4], function(x) coef_test(npk_drop, vcov = x, cluster = npk$block, test = c("z","naive-t","Satterthwaite"), coefs = 7:12, p_values = FALSE))
expect_equal(test_drop, test_complete)
})
test_that("weight scale doesn't matter", {
lm_fit_w <- lm(y ~ X1 + X2 + X3, data = dat, weights = rep(4, nrow(dat)))
unweighted_fit <- lapply(CR_types, function(x) vcovCR(lm_fit, cluster = cluster, type = x))
weighted_fit <- lapply(CR_types, function(x) vcovCR(lm_fit_w, cluster = cluster, type = x))
expect_equal(lapply(unweighted_fit, as.matrix),
lapply(weighted_fit, as.matrix),
tol = 5 * 10^-7)
target <- 1 + rpois(nrow(dat), lambda = 8)
unweighted_fit <- lapply(CR_types, function(x) vcovCR(lm_fit, cluster = cluster, type = x, target = target))
weighted_fit <- lapply(CR_types, function(x) vcovCR(lm_fit_w, cluster = cluster, type = x, target = target * 15))
expect_equal(lapply(unweighted_fit, as.matrix),
lapply(weighted_fit, as.matrix),
tol = 5 * 10^-7)
})
test_that("clubSandwich works with weights of zero.", {
data("LifeCycleSavings", package = "datasets")
n_life <- nrow(LifeCycleSavings)
LifeCycleSavings$cl <- substr(rownames(LifeCycleSavings), 1, 1)
table(LifeCycleSavings$cl)
LifeCycleSavings$wt <- rpois(n_life, lambda = 0.8)
table(LifeCycleSavings$wt)
lm_full <- lm(sr ~ pop15 + pop75 + dpi + ddpi, data = LifeCycleSavings, weights = wt)
LCS_sub <- subset(LifeCycleSavings, wt > 0)
lm_sub <- lm(sr ~ pop15 + pop75 + dpi + ddpi, data = LCS_sub, weights = wt)
CR_full <- lapply(CR_types, function(x) vcovCR(lm_full, cluster = LifeCycleSavings$cl, type = x))
CR_sub <- lapply(CR_types, function(x) vcovCR(lm_sub, cluster = LCS_sub$cl, type = x))
expect_equal(CR_full, CR_sub, check.attributes = FALSE)
test_full <- lapply(CR_types, function(x) coef_test(lm_full, vcov = x, cluster = LifeCycleSavings$cl, test = c("z","naive-t","Satterthwaite"), p_values = TRUE))
test_sub <- lapply(CR_types, function(x) coef_test(lm_sub, vcov = x, cluster = LCS_sub$cl, test = c("z","naive-t","Satterthwaite"), p_values = TRUE))
expect_equal(test_full, test_sub, check.attributes = FALSE)
dat_miss <- LifeCycleSavings
miss_indicator <- sample.int(n_life, size = round(n_life / 5))
dat_miss$pop15[miss_indicator] <- NA
dat_miss$cl[miss_indicator] <- NA
with(dat_miss, table(wt, is.na(pop15)))
lm_dropped <- lm(sr ~ pop15 + pop75 + dpi + ddpi, data = dat_miss, weights = wt)
dat_complete <- subset(dat_miss, !is.na(pop15))
lm_complete <- lm(sr ~ pop15 + pop75 + dpi + ddpi, data = dat_complete, weights = wt)
CR_drop <- lapply(CR_types, function(x) vcovCR(lm_dropped, cluster = dat_miss$cl, type = x))
CR_complete <- lapply(CR_types, function(x) vcovCR(lm_complete, cluster = dat_complete$cl, type = x))
expect_equal(CR_drop, CR_complete)
test_drop <- lapply(CR_types, function(x) coef_test(lm_dropped, vcov = x, cluster = dat_miss$cl, test = "All", p_values = FALSE))
test_complete <- lapply(CR_types, function(x) coef_test(lm_complete, vcov = x, cluster = dat_complete$cl, test = "All", p_values = FALSE))
expect_equal(test_drop, test_complete)
})
test_that("vcovCR errors when there is only one cluster.", {
single_cluster_error_msg <- "Cluster-robust variance estimation will not work when the data only includes a single cluster."
expect_error(
vcovCR(lm_fit, cluster = dat$const, type = "CR0"),
single_cluster_error_msg
)
expect_error(
conf_int(WLS_fit, vcov = "CR1", cluster = dat$const), single_cluster_error_msg
)
expect_error(
coef_test(lm_fit, vcov = "CR2", cluster = dat$const), single_cluster_error_msg
)
expect_error(
Wald_test(WLS_fit, constraints = constrain_zero(2:4),
vcov = "CR3", cluster = dat$const),
single_cluster_error_msg
)
})
test_that("vcovCR works with intercept-only model and user-specified weights.", {
lm_int <- lm(y ~ 1, data = dat)
HC_un <- coef_test(lm_int, vcov="CR2", cluster=dat$row, test = "All")
# Unweighted, HC-robust
N <- nobs(lm_int)
yi <- dat$y
wi <- rep(1, N)
W <- sum(wi)
wi <- wi / W
vi <- rep(1, N)
V <- sum(vi)
ei <- residuals_CS(lm_int)
M <- sum(wi^2 * vi)
ai <- 1 / sqrt(1 - 2 * wi + M / vi)
V_hand <- sum(wi^2 * ai^2 * ei^2)
pi_theta_pj <- diag(vi) - tcrossprod(rep(1,N), wi * vi) - tcrossprod(wi * vi, rep(1, N)) + M
df <- M^2 / sum(tcrossprod(ai^2 * wi^2) * (pi_theta_pj^2))
expect_true(check_bread(lm_int, cluster = dat$row, y = dat$y))
expect_true(check_CR(lm_int, vcov = "CR2", cluster = dat$row))
expect_equal(sqrt(V_hand), HC_un$SE)
expect_equal(df, HC_un$df_Satt)
expect_equal(Inf, HC_un$df_z)
expect_equal(N - 1, HC_un$df_t)
# Unweighted, cluster-robust
CR_un <- coef_test(lm_int, vcov="CR2", cluster=dat$cluster, test = "All")
J <- nlevels(dat$cluster)
w_j <- tapply(wi, dat$cluster, sum)
e_j <- tapply(wi * ei, dat$cluster, sum) / w_j
v_j <- tapply(wi^2 * vi, dat$cluster, sum) / w_j^2
a_j <- 1 / sqrt(1 - 2 * w_j + M / v_j)
V_hand <- sum(w_j^2 * a_j^2 * e_j^2)
pi_theta_pj <- diag(v_j) - tcrossprod(rep(1,J), w_j * v_j) - tcrossprod(w_j * v_j, rep(1, J)) + M
df <- M^2 / sum(tcrossprod(a_j^2 * w_j^2) * (pi_theta_pj^2))
expect_true(check_bread(lm_int, cluster = dat$cluster, y = dat$y))
expect_true(check_CR(lm_int, vcov = "CR2", cluster = dat$cluster))
expect_equal(sqrt(V_hand), CR_un$SE)
expect_equal(df, CR_un$df_Satt)
expect_equal(Inf, CR_un$df_z)
expect_equal(J - 1, CR_un$df_t)
# Weighted, HC-robust
WLS_int <- lm(y ~ 1, data = dat, weights = w)
HC_wt <- coef_test(WLS_int, vcov="CR2", cluster=dat$row, test = "All")
N <- nobs(WLS_int)
yi <- dat$y
wi <- WLS_int$weights
W <- sum(wi)
wi <- wi / W
vi <- rep(1, N)
V <- sum(vi)
ei <- residuals_CS(WLS_int)
M <- sum(wi^2 * vi)
ai <- 1 / sqrt(1 - 2 * wi + M / vi)
V_hand <- sum(wi^2 * ai^2 * ei^2)
pi_theta_pj <- diag(vi) - tcrossprod(rep(1,N), wi * vi) - tcrossprod(wi * vi, rep(1, N)) + M
df <- M^2 / sum(tcrossprod(ai^2 * wi^2) * (pi_theta_pj^2))
expect_true(check_bread(WLS_int, cluster = dat$row, y = dat$y))
expect_true(check_CR(WLS_int, vcov = "CR2", cluster = dat$row))
expect_equal(sqrt(V_hand), HC_wt$SE)
expect_equal(df, HC_wt$df_Satt)
expect_equal(Inf, HC_wt$df_z)
expect_equal(N - 1, HC_wt$df_t)
# Weighted, cluster-robust
CR_wt <- coef_test(WLS_int, vcov="CR2", cluster=dat$cluster, test = "All")
J <- nlevels(dat$cluster)
w_j <- tapply(wi, dat$cluster, sum)
e_j <- tapply(wi * ei, dat$cluster, sum) / w_j
v_j <- tapply(wi^2 * vi, dat$cluster, sum) / w_j^2
a_j <- 1 / sqrt(1 - 2 * w_j + M / v_j)
V_hand <- sum(w_j^2 * a_j^2 * e_j^2)
pi_theta_pj <- diag(v_j) - tcrossprod(rep(1,J), w_j * v_j) - tcrossprod(w_j * v_j, rep(1, J)) + M
df <- M^2 / sum(tcrossprod(a_j^2 * w_j^2) * (pi_theta_pj^2))
expect_true(check_bread(WLS_int, cluster = dat$cluster, y = dat$y))
expect_true(check_CR(WLS_int, vcov = "CR2", cluster = dat$cluster))
expect_equal(sqrt(V_hand), CR_wt$SE, tolerance = 10^-3)
expect_equal(df, CR_wt$df_Satt, tolerance = 10^-3)
expect_equal(Inf, CR_wt$df_z)
expect_equal(J - 1, CR_wt$df_t)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.