Nothing
#' `preprocess2` is a S3 method that fetches data from several model
#' objectects for use with `boottest()`.
#'
#' @param object An objectect of type lm, fixest, felm or ivreg
#' @param ... other arguments
#' @noRd
#'
#' @importFrom Matrix Diagonal
#' @return An object of class `preprocess2`.
preprocess2 <- function(object, ...) {
UseMethod("preprocess2")
}
preprocess2.fixest <-
function(object,
clustid,
R,
param,
fe,
engine,
bootcluster,
bootstrap_type) {
#' preprocess data for objects of type fixest
#'
#' @param object an object of type fixest
#' @param clustid a character string containing the name(s) of the
#' cluster variables
#' @param R Hypothesis Vector giving linear combinations of coefficients.
#' @param fe character vector. name of the fixed effect to be projected
#' out in the bootstrap
#' @param param character vector. names of the parameter(s) to test
#' @param engine The bootstrap algorithm to run. Either "R" or
#' "WildBootTests.jl"
#' @param bootcluster a character string containing the name(s) of the
#' bootcluster variables. Alternatively, "min" or "max"
#' @param bootstrap_type Which bootstrap type should be run?
#' '11', '13', '31' or 33'. For more infos, see the paper
#' by MacKinnon, Nielsen & Webb (2022)
#'
#' @noRd
#'
#' @method preprocess2 fixest
call <- object$call
call_env <- object$call_env
fml <- formula(object)
fml <- Formula::as.Formula(fml)
fml_full <- formula(fml, collapse = TRUE)
N <- nobs(object)
# lm and felm don't drop NAs due to multicollinearity, while fixest does
k <- length(na.omit(coef(object)))
method <- object$family
# fixest specific checks
if (object$method != "feols") {
rlang::abort(
"boottest() only supports OLS estimation via fixest::feols() - it
does not support non-linear models computed via e.g. fixest::fepois()
or fixest::feglm.",
use_cli_format = TRUE
)
}
# if (!is.null(object$is_sunab)) {
# if(object$is_sunab == TRUE){
# rlang::abort(
# "boottest() does not support the Sun-Abrams
# estimator via `sunab()`."
# )
# }
# }
is_iv <- ifelse(!is.null(object$fml_all$iv), TRUE, FALSE)
has_fe <- ifelse(!is.null(object$fml_all$fixef), TRUE, FALSE)
if (!is_iv) {
X <-
model.matrix(object,
type = "rhs",
na.rm = TRUE,
collin.rm = TRUE
)
} else {
X_endog <-
model.matrix(object,
type = "iv.endo",
na.rm = TRUE,
collin.rm = TRUE
)
X_exog <-
model.matrix(object,
type = "iv.exo",
na.rm = TRUE,
collin.rm = TRUE
)
instruments <-
model.matrix(object,
type = "iv.inst",
na.rm = TRUE,
collin.rm = TRUE
)
}
Y <- model.matrix(object, type = "lhs")
weights <- weights(object)
if (is.null(weights)) {
has_weights <- FALSE
weights <- rep(1, N)
} else {
has_weights <- TRUE
}
if (has_weights) {
if (!is.null(fe)) {
rlang::abort(
"boottest() unfortunately currently does not support WLS and fixed
effects. Please set fe = NULL to run a bootstrap with WLS.",
use_cli_format = TRUE
)
}
}
fixed_effect <- NULL
k2 <- 0
W <- n_fe <- NULL
if (has_fe) {
# if(!is.null(fe)){
get_fe <- transform_fe(
object = object,
X = X,
Y = Y,
fe = fe,
N = N,
has_weights = has_weights,
engine = engine,
bootstrap_type = bootstrap_type,
clustid_char = clustid
)
X <- get_fe$X
Y <- get_fe$Y
fixed_effect <- get_fe$fixed_effect
W <- get_fe$W
n_fe <- get_fe$n_fe
k2 <- get_fe$k2
# }
}
# get cluster variable
if (!is.null(clustid)) {
clustid_list <- get_cluster(
object = object,
clustid_char = clustid,
N = N,
bootcluster = bootcluster,
call_env = call_env
)
vcov_sign <- clustid_list$vcov_sign
clustid <- clustid_list$clustid
clustid_dims <- ncol(clustid)
N_G <- clustid_list$N_G
cluster_names <- clustid_list$cluster_names
cluster_bootcluster <- clustid_list$cluster_bootcluster
bootcluster <- clustid_list$bootcluster
all_c <- clustid_list$all_c
} else {
vcov_sign <-
clustid_dims <-
clustid <- bootcluster <- N_G <- cluster_names <- NULL
cluster_bootcluster <- bootcluster <- all_c <- NULL
}
# iv prep
instruments <- X_exog <- X_endog <- NULL
# if(is_iv){
# R0 <- rep(0, n_exog + n_endog)
# R0[
# match(param, c(names(object$exogenous), names(object$endogenous)))] <- R
# names(R0) <- c(names(object$exogenous), names(object$endogenous))
# } else {
if (!is.matrix(R)) {
R0 <- rep(0, length(colnames(X)))
R0[match(param, colnames(X))] <- R
names(R0) <- colnames(X)
} else {
q <- nrow(R)
p <- ncol(R)
R0 <- matrix(0, q, ncol(X))
R0[, 1:p] <- R
}
# }
res <- list(
Y = Y,
X = X,
weights = weights,
fixed_effect = fixed_effect,
W = W,
n_fe = n_fe,
N = N,
k = k,
k2 = k2,
clustid = clustid,
vcov_sign = vcov_sign,
clustid_dims = clustid_dims,
N_G = N_G,
cluster_bootcluster = cluster_bootcluster,
bootcluster = bootcluster,
N_G_bootcluster = length(unique(bootcluster[[1]])),
R0 = R0,
# model_frame = model_frame,
X_exog = X_exog,
X_endog = X_endog,
instruments = instruments,
has_fe = has_fe,
all_c = all_c
)
if (is_iv) {
class(res) <- c("preprocess", "iv")
} else {
class(res) <- c("preprocess", "ols")
}
res
}
preprocess2.felm <-
function(object,
clustid,
R,
param,
fe,
engine,
bootcluster,
bootstrap_type) {
#' preprocess data for objects of type felm
#'
#' @param object an object of type felm
#' @param clustid a character string containing the name(s) of the
#' cluster variables
#' @param R Hypothesis Vector giving linear combinations of coefficients.
#' @param fe character vector. name of the fixed effect to be projected
#' out in the bootstrap
#' @param param character vector. names of the parameter(s) to test
#' @param engine The bootstrap algorithm to run. Either "R" or
#' "WildBootTests.jl"
#' @param bootcluster a character string containing the name(s) of the
#' bootcluster variables. Alternatively, "min" or "max"
#' @param bootstrap_type Which bootstrap type should be run?
#' '11', '13', '31' or 33'. For more infos,
#' see the paper by MacKinnon, Nielsen & Webb (2022)
#'
#' @noRd
#'
#' @method preprocess2 felm
call <- object$call
call_env <- environment(formula(object))
fml <- formula(object)
fml <- Formula::as.Formula(fml)
N <- nobs(object)
# lm and felm don't drop NAs due to multicollinearity, while fixest does
k <- length(na.omit(coef(object)))
p <- object$p
is_iv <- FALSE
if (
suppressWarnings(
formula(
Formula::as.Formula(
eval(
object$formula
)
),
lhs = 0,
rhs = 3
)
) != "~0") {
rlang::abort(
"IV regression is currently not supported by boottest() for
objects of type 'felm'. You can either use 'fixest::feols()'
or 'ivreg::ivreg' for IV-regression.",
use_cli_format = TRUE
)
is_iv <- TRUE
}
X <- model_matrix(object, type = "rhs", collin.rm = TRUE)
Y <- model.response(model.frame(object))
has_fe <- ifelse(length(names(object$fe)) > 0, TRUE, FALSE)
weights <- weights(object)
if (is.null(weights)) {
has_weights <- FALSE
weights <- rep(1, N)
} else {
has_weights <- TRUE
}
if (has_weights) {
if (!is.null(fe)) {
rlang::abort(
"boottest() unfortunately currently does not support WLS and
fixed effects. Please set fe = NULL to run a bootstrap with WLS.",
use_cli_format = TRUE
)
}
}
if (has_fe) {
get_fe <- transform_fe(
object = object,
X = X,
Y = Y,
fe = fe,
N = N,
has_weights = has_weights,
engine = engine,
bootstrap_type = bootstrap_type,
clustid_char = clustid
)
X <- get_fe$X
Y <- get_fe$Y
fixed_effect <- get_fe$fixed_effect
W <- get_fe$W
n_fe <- get_fe$n_fe
k2 <- get_fe$k2
} else {
fixed_effect <- NULL
k2 <- 0
W <- n_fe <- NULL
W <- n_fe <- NULL
}
# get cluster variable
if (!is.null(clustid)) {
clustid_list <- get_cluster(
object = object,
clustid_char = clustid,
N = N,
bootcluster = bootcluster,
call_env = call_env
)
vcov_sign <- clustid_list$vcov_sign
clustid <- clustid_list$clustid
clustid_dims <- ncol(clustid)
N_G <- clustid_list$N_G
cluster_names <- clustid_list$cluster_names
cluster_bootcluster <- clustid_list$cluster_bootcluster
bootcluster <- clustid_list$bootcluster
all_c <- clustid_list$all_c
} else {
vcov_sign <-
clustid_dims <-
clustid <- bootcluster <- N_G <- cluster_names <- NULL
cluster_bootcluster <- bootcluster <- all_c <- NULL
}
# iv prep
instruments <- X_exog <- X_endog <- NULL
# if(is_iv){
# R0 <- rep(0, n_exog + n_endog)
# R0[
# match(param, c(names(object$exogenous), names(object$endogenous)))] <- R
# names(R0) <- c(names(object$exogenous), names(object$endogenous))
# } else {
if (!is.matrix(R)) {
R0 <- rep(0, length(colnames(X)))
R0[match(param, colnames(X))] <- R
names(R0) <- colnames(X)
} else {
# need matrix input for mboottest
q <- nrow(R)
p <- ncol(R)
R0 <- matrix(0, q, ncol(X))
R0[, 1:p] <- R
}
# }
res <- list(
Y = Y,
X = X,
weights = weights,
fixed_effect = fixed_effect,
W = W,
n_fe = n_fe,
N = N,
k = k,
k2 = k2,
clustid = clustid,
vcov_sign = vcov_sign,
clustid_dims = clustid_dims,
N_G = N_G,
cluster_bootcluster = cluster_bootcluster,
bootcluster = bootcluster,
N_G_bootcluster = length(unique(bootcluster[[1]])),
R0 = R0,
X_exog = NULL,
X_endog = NULL,
instruments = NULL,
has_fe = has_fe,
all_c = all_c
)
if (is_iv) {
class(res) <- c("preprocess", "iv")
} else {
class(res) <- c("preprocess", "ols")
}
res
}
preprocess2.lm <-
function(object,
clustid,
R,
param,
engine,
bootcluster,
bootstrap_type = NULL) {
#' preprocess data for objects of type lm
#'
#' @param object an object of type lm
#' @param clustid a character string containing the name(s) of the
#' cluster variables
#' @param R Hypothesis Vector giving linear combinations of coefficients.
#' @param param character vector. names of the parameter(s) to test
#' @param engine The bootstrap algorithm to run. Either "R" or
#' "WildBootTests.jl"
#' @param bootcluster a character string containing the name(s) of
#' the bootcluster variables. Alternatively, "min" or "max"
#'
#' @noRd
#'
#' @method preprocess2 lm
call <- object$call
call_env <- environment(formula(object))
fml <- formula(object)
fml <- Formula::as.Formula(fml)
N <- nobs(object)
# lm and felm don't drop NAs due to multicollinearity, while fixest does
k <- length(na.omit(coef(object)))
p <- object$p
is_iv <- FALSE
X <- model_matrix(object, collin.rm = TRUE)
Y <- model.response(model.frame(object))
has_fe <- FALSE
weights <- weights(object)
if (is.null(weights)) {
has_weights <- FALSE
weights <- rep(1, N)
} else {
has_weights <- TRUE
}
# get cluster variable
if (!is.null(clustid)) {
clustid_list <- get_cluster(
object = object,
clustid_char = clustid,
N = N,
bootcluster = bootcluster,
call_env = call_env
)
vcov_sign <- clustid_list$vcov_sign
clustid <- clustid_list$clustid
clustid_dims <- ncol(clustid)
N_G <- clustid_list$N_G
cluster_names <- clustid_list$cluster_names
cluster_bootcluster <- clustid_list$cluster_bootcluster
bootcluster <- clustid_list$bootcluster
all_c <- clustid_list$all_c
} else {
vcov_sign <-
clustid_dims <-
clustid <- bootcluster <- N_G <- cluster_names <- NULL
cluster_bootcluster <- bootcluster <- all_c <- NULL
}
instruments <- X_exog <- X_endog <- NULL
if (!is.matrix(R)) {
R0 <- rep(0, length(colnames(X)))
R0[match(param, colnames(X))] <- R
names(R0) <- colnames(X)
} else {
q <- nrow(R)
p <- ncol(R)
R0 <- matrix(0, q, ncol(X))
R0[, 1:p] <- R
}
res <- list(
Y = Y,
X = X,
weights = weights,
fixed_effect = NULL,
W = NULL,
n_fe = NULL,
N = N,
k = k,
k2 = 0,
clustid = clustid,
vcov_sign = vcov_sign,
clustid_dims = clustid_dims,
N_G = N_G,
cluster_bootcluster = cluster_bootcluster,
bootcluster = bootcluster,
N_G_bootcluster = length(unique(bootcluster[[1]])),
R0 = R0,
X_exog = NULL,
X_endog = NULL,
instruments = NULL,
has_fe = has_fe,
all_c = all_c
)
class(res) <- c("preprocess", "ols")
res
}
preprocess2.ivreg <-
function(object,
clustid,
R,
param,
engine,
bootcluster,
bootstrap_type = NULL) {
#' preprocess data for objects of type ivreg
#'
#' @param object an object of type ivreg
#' @param clustid a character string containing the name(s) of the
#' cluster variables
#' @param R Hypothesis Vector giving linear combinations of coefficients.
#' @param param character vector. names of the parameter(s) to test
#' @param engine The bootstrap algorithm to run. Either "R" or
#' "WildBootTests.jl"
#' @param bootcluster a character string containing the name(s) of
#' the bootcluster variables. Alternatively, "min" or "max"
#'
#' @noRd
#'
#' @method preprocess2 ivreg
call <- object$call
call_env <- environment(formula(object))
fml <- formula(object)
fml <- Formula::as.Formula(fml)
is_iv <- TRUE
N <- nobs(object)
# lm and felm don't drop NAs due to multicollinearity, while fixest does
k <- length(na.omit(coef(object)))
p <- object$p
is_iv <- FALSE
has_fe <- FALSE
X_endog <-
model.matrix(
object,
component = "regressors", na.rm = TRUE
)[, object$endogenous, drop = FALSE]
X_exog <-
model.matrix(
object,
component = "instruments", na.rm = TRUE
)[, object$exogenous, drop = FALSE]
instruments <-
model.matrix(
object,
component = "instruments", na.rm = TRUE
)[, object$instruments, drop = FALSE]
Y <- model.response(model.frame(object))
n_exog <- length(object$exogenous)
n_endog <- length(object$endogenous)
n_instruments <- length(object$instruments)
weights <- weights(object)
if (is.null(weights)) {
has_weights <- FALSE
weights <- rep(1, N)
} else {
has_weights <- TRUE
}
# get cluster variable
if (!is.null(clustid)) {
clustid_list <- get_cluster(
object = object,
clustid_char = clustid,
N = N,
bootcluster = bootcluster,
call_env = call_env
)
vcov_sign <- clustid_list$vcov_sign
clustid <- clustid_list$clustid
clustid_dims <- ncol(clustid)
N_G <- clustid_list$N_G
cluster_names <- clustid_list$cluster_names
cluster_bootcluster <- clustid_list$cluster_bootcluster
bootcluster <- clustid_list$bootcluster
all_c <- clustid_list$all_c
} else {
vcov_sign <-
clustid_dims <-
clustid <- bootcluster <- N_G <- cluster_names <- NULL
cluster_bootcluster <- bootcluster <- all_c <- NULL
}
# iv prep
R0 <- rep(0, n_exog + n_endog)
R0[
match(
param,
c(
names(object$exogenous), names(object$endogenous)
)
)
] <- R
names(R0) <- c(
names(object$exogenous),
names(object$endogenous)
)
res <- list(
Y = Y,
X = NULL,
weights = weights,
fixed_effect = NULL,
W = NULL,
n_fe = NULL,
N = N,
k = k,
k2 = 0,
clustid = clustid,
vcov_sign = vcov_sign,
clustid_dims = clustid_dims,
N_G = N_G,
cluster_bootcluster = cluster_bootcluster,
bootcluster = bootcluster,
N_G_bootcluster = length(unique(bootcluster[[1]])),
R0 = R0,
# model_frame = model_frame,
X_exog = X_exog,
X_endog = X_endog,
instruments = instruments,
has_fe = has_fe,
all_c = all_c
)
class(res) <- c("preprocess", "iv")
res
}
demean_fe <- function(X, Y, fe, has_weights, N) {
#' function deamens design matrix X and depvar Y if fe != NULL
#'
#' @param X the design matrix X
#' @param Y the dependent variable Y as a numeric vector
#' @param fe the name of the fixed effect to be projected out
#' @param has_weights logical - have regression weights been used in the
#' original model?
#' @param N the number of observations
#'
#' @return A list that includes - among other things - the demeaned
#' design matrix X and depvar Y
#'
#' @noRd
g <- collapse::GRP(fe, call = FALSE)
X <- collapse::fwithin(X, g)
Y <- collapse::fwithin(Y, g)
fixed_effect_W <- as.factor(fe[, 1])
if (!has_weights) {
levels(fixed_effect_W) <-
(1 / table(fe)) # because duplicate levels are forbidden
} else {
rlang::abort(
"Currently, boottest() does not jointly support regression weights /
WLS and fixed effects. If you want to use
boottest() for inference based on WLS, please set fe = NULL.",
use_cli_format = TRUE
)
# levels(fixed_effect_W) <- 1 / table(fixed_effect)
}
W <- Matrix::Diagonal(N, as.numeric(as.character(fixed_effect_W)))
n_fe <- length(unique(fe[, 1]))
res <- list(
X = X,
Y = Y,
fixed_effect_W,
W = W,
n_fe = n_fe
)
res
}
transform_fe <-
function(object, X, Y, fe, has_weights, N, engine,
bootstrap_type, clustid_char) {
#' preprocess the model fixed effects
#'
#' If is.null(fe) == TRUE, all
#' fixed effects specified in the fixest or felm model are simply added
#' as dummy variables to the design matrix X. If !is.null(fe), the fixed
#' effect specified via fe is projected out in the bootstrap - all other
#' fixed effects are added as dummy variables
#' @param object the regression object
#' @param X the design matrix of the regression object
#' @param fe character vector, name of the fe to be projected out, or NULL
#' @param has_weights logical - have regression weights been used in the
#' original model?
#' @param N the number of observations
#' @param engine bootstrap algorithm to run. Either "R" or
#' "WildBootTests.jl"
#'
#' @return a list containing X - the design matrix plus additionally
#' attached fixed effects, and
#' fixed_effect - a data.frame containing the fixed effect to
#' be projected out
#'
#' @noRd
all_fe <- model_matrix(object, type = "fixef", collin.rm = TRUE)
# make sure all fixed effects variables are characters
n_fe <- ncol(all_fe)
all_fe_names <- names(all_fe)
k2 <- Reduce("+", lapply(all_fe, function(x) {
length(unique(x))
}))
fe_df <- W <- n_fe <- NULL
# if a fe is to be projected out in the bootstrap
if (!is.null(fe)) {
# add all fe except for fe to data frame
fe_df <- all_fe[, fe, drop = FALSE]
add_fe <- all_fe[, all_fe_names != fe, drop = FALSE]
add_fe_names <- names(add_fe)
# nothing to add if only one fixed effect in model
if (length(add_fe_names) != 0) {
fml_fe <- reformulate(add_fe_names, response = NULL)
add_fe_dummies <-
model.matrix(
fml_fe, model.frame(
fml_fe,
data = as.data.frame(
add_fe
)
)
)
# drop the intercept
add_fe_dummies <-
add_fe_dummies[, -which(colnames(add_fe_dummies) == "(Intercept)")]
X <-
as.matrix(collapse::add_vars(as.data.frame(X), add_fe_dummies))
}
# project out fe
if (engine == "R") {
if(bootstrap_type != "fnw11"){
if(fe != clustid_char){
rlang::abort("Only cluster fixed effects are supported for bootstrap_types
'11', '13', '31', '33'.",
use_cli_format = TRUE
)
}
}
# WildBootTests.jl does demeaning internally
prep_fe <- demean_fe(X, Y, fe_df, has_weights, N)
X <- prep_fe$X
Y <- prep_fe$Y
W <- prep_fe$W
n_fe <- prep_fe$n_fe
}
} else {
add_fe <- all_fe
add_fe_names <- names(add_fe)
fml_fe <- reformulate(add_fe_names, response = NULL)
add_fe_dummies <-
model.matrix(fml_fe, model.frame(fml_fe, data = as.data.frame(add_fe)))
X <-
as.matrix(collapse::add_vars(as.data.frame(X), add_fe_dummies))
}
res <- list(
X = X,
Y = Y,
W = W,
n_fe = n_fe,
k2 = k2,
fixed_effect = fe_df
)
res
}
get_cluster <-
function(object,
clustid_char,
bootcluster,
N,
call_env) {
#' function creates a data.frame with cluster variables
#'
#' @param object An object of type lm, fixest, felm or ivreg
#' @param clustid_char the name of the cluster variable(s) as
#' a character vector
#' @param bootcluster the name of the bootcluster variable(s)
#' as a character vector, or "min" or "max" for multiway clustering
#' @param N the number of observations used in the bootstrap
#' @param call_env the environment in which the 'object' was evaluated
#'
#' @noRd
#'
#' @return a list, containing, among other things, a data.frame of the
#' cluster variables,
#' a data.frame of the bootcluster variable(s), and a helper
#' matrix, all_c, used in `engine_julia()`
# ----------------------------------------------------------------------- #
# Note: a large part of the following code was taken and adapted from the
# sandwich R package, which is distributed under GPL-2 | GPL-3
# Zeileis A, Köll S, Graham N (2020). "Various Versatile Variances:
# An object-Oriented Implementation of Clustered Covariances in R."
# _Journal of Statistical Software_, *95*(1), 1-36.
# doi: 10.18637/jss.v095.i01 (URL: https://doi.org/10.18637/jss.v095.i01).
# changes by Alexander Fischer:
# no essential changes, but slight reorganization of pieces of code
dreamerr::check_arg(clustid_char, "character scalar|charakter vector")
dreamerr::check_arg(bootcluster, "character scalar | character vector")
clustid_fml <- reformulate(clustid_char)
# Step 1: create cluster df
manipulate_object <- function(object){
if(inherits(object, "fixest")){
if(!is.null(object$fixef_vars)){
update(object, . ~ + 1 | . + 1)
} else {
update(object, . ~ + 1 )
}
} else {
object
}
}
cluster_tmp <-
if ("Formula" %in% loadedNamespaces()) {
## FIXME to suppress potential warnings due to | in Formula
suppressWarnings(
expand.model.frame(
model =
manipulate_object(object),
extras = clustid_fml,
na.expand = FALSE,
envir = call_env
)
)
} else {
expand.model.frame(
model =
manipulate_object(object),
extras = clustid_fml,
na.expand = FALSE,
envir = call_env
)
}
# if(inherits(cluster_tmp, "try-error")){
# if(inherits(object, "fixest") || inherits(object, "felm")){
# if(grepl("non-numeric argument to binary operator$",
# attr(cluster_tmp, "condition")$message)){
# rlang::abort("In your model, you have specified multiple fixed effects,
# none of which are of type factor. While `fixest::feols()` and
# `lfe::felm()` handle this case without any troubles,
# `boottest()` currently cannot handle this case - please
# change the type of (at least one) fixed effect(s) to factor.
# If this does not solve the error, please report the issue
# at https://github.com/s3alfisc/fwildclusterboot.")
# }
# if(grepl("operations are possible only for numeric, logical
# or complex types$",
# attr(cluster_tmp, "condition")$message)){
# rlang::abort("Either a fixed effect or a cluster variable in your fixest()
# or felm() model is currently specified as a character.
# 'boottest()' relies on 'expand.model.frame',
# which can not handle these variable types in models.
# Please change these character variables to factors. ")
# }
# }
# }
cluster_df <-
model.frame(clustid_fml, cluster_tmp, na.action = na.pass)
# without cluster intersection
N_G <-
vapply(cluster_df, function(x) {
length(unique(x))
}, numeric(1))
# Step 1: decode bootcluster variable
# create a bootcluster vector
if (length(bootcluster) == 1) {
if (bootcluster == "max") {
# use both vars
bootcluster_char <- clustid_char
} else if (bootcluster == "min") {
# only minimum var
bootcluster_char <- clustid_char[which.min(N_G)]
} else {
bootcluster_char <- bootcluster
}
} else {
bootcluster_char <- bootcluster
}
# add bootcluster variable to formula of clusters
cluster_bootcluster_fml <-
update(
clustid_fml, paste(
"~ . +", paste(
bootcluster_char,
collapse = " + "
)
)
)
cluster_bootcluster_tmp <-
if ("Formula" %in% loadedNamespaces()) {
## FIXME to suppress potential warnings due to | in Formula
suppressWarnings(
expand.model.frame(
model =
manipulate_object(object),
extras = cluster_bootcluster_fml,
na.expand = FALSE,
envir = call_env
)
)
} else {
expand.model.frame(
model =
manipulate_object(object),
extras = cluster_bootcluster_fml,
na.expand = FALSE,
envir = call_env
)
}
# data.frame as needed for WildBootTests.jl
cluster_bootcluster_df <- model.frame(
cluster_bootcluster_fml,
cluster_bootcluster_tmp,
na.action = na.pass
)
# if(inherits(cluster_tmp, "try-error")){
# if(inherits(object, "fixest") || inherits(object, "felm")){
# if(
# grepl(
# "non-numeric argument to binary operator$",
# attr(
# cluster_tmp, "condition"
# )$message
# )
# ){
# rlang::abort(
# "In your model, you have specified multiple fixed effects,
# none of which are of type factor. While `fixest::feols()` and
# `lfe::felm()` handle this case without any troubles, `boottest()`
# currently cannot handle this case - please change the type of
# (at least one) fixed effect(s) to factor. If this does not solve
# the error, please report the issue at
# https://github.com/s3alfisc/fwildclusterboot."
# )
# }
# if(
# grepl(
# "operations are possible only for numeric, logical or complex types$",
# attr(
# cluster_tmp,
# "condition")$message
# )
# ){
# rlang::abort(
# "Either a fixed effect or a cluster variable in your fixest() or
# felm() model is currently specified as a character. 'boottest()'
# relies on 'expand.model.frame', which can not handle these variable
# types in models.
# Please change these character variables to factors."
# )
# }
# }
# }
# data.frames with clusters, bootcluster
cluster <- cluster_bootcluster_df[, clustid_char, drop = FALSE]
bootcluster <-
cluster_bootcluster_df[, bootcluster_char, drop = FALSE]
if (!any(bootcluster_char %in% clustid_char)) {
is_subcluster <- TRUE
if (!(any(names(bootcluster) %in% c(clustid_char, names(coef(
object
)))))) {
rlang::abort(
"A bootcluster variable is neither contained in the cluster
variables nor in the model coefficients.",
use_cli_format = TRUE
)
}
} else {
is_subcluster <- FALSE
}
## handle omitted or excluded observations (works for lfe, lm)
if ((N != NROW(cluster)) &&
!is.null(object$na.action) &&
(class(object$na.action) %in% c("exclude", "omit"))) {
cluster <- cluster[-object$na.action, , drop = FALSE]
}
if ((N != NROW(bootcluster)) &&
!is.null(object$na.action) &&
(class(object$na.action) %in% c("exclude", "omit"))) {
bootcluster <- bootcluster[-object$na.action, , drop = FALSE]
}
if (N != nrow(cluster) && inherits(object, "fixest")) {
cluster <- cluster[unlist(object$obs_selection), , drop = FALSE]
bootcluster <-
bootcluster[unlist(object$obs_selection), , drop = FALSE]
}
if (NROW(cluster) != N) {
rlang::abort(
"The number of observations in 'cluster' and 'nobs()' do not match",
use_cli_format = TRUE
)
}
if (NROW(bootcluster) != N) {
rlang::abort(
"The number of observations in 'bootcluster' and 'nobs()' do not match",
use_cli_format = TRUE
)
}
if (any(is.na(cluster))) {
rlang::abort(
"`boottest()` cannot handle NAs in `clustid` variables that are not
part of the estimated model object.",
use_cli_format = TRUE
)
}
if (any(is.na(bootcluster))) {
rlang::abort(
"`boottest()` cannot handle NAs in `bootcluster` variables that are
not part of the estimated model object.",
use_cli_format = TRUE
)
}
clustid_dims <- length(clustid_char)
i <- !vapply(cluster, is.numeric, logical(1))
cluster[i] <- lapply(cluster[i], as.character)
# taken from multiwayvcov::cluster.boot
acc <- list()
for (i in 1:clustid_dims) {
acc <-
append(acc, utils::combn(1:clustid_dims, i, simplify = FALSE))
}
vcov_sign <- vapply(acc, function(i) {
(-1)^(length(i) + 1)
}, numeric(1))
acc <- acc[-1:-clustid_dims]
if (clustid_dims > 1) {
for (i in acc) {
cluster <- cbind(cluster, Reduce(paste, cluster[, i]))
names(cluster)[length(names(cluster))] <-
Reduce(paste, names(cluster[, i]))
}
}
N_G <- vapply(cluster, function(x) {
length(unique(x))
}, numeric(1))
# now do all the other bootcluster things
c1 <-
bootcluster_char[which(!(bootcluster_char %in% clustid_char))]
# both bootstrapping and error cluster: all variables in clustid_char that
# are also in bootcluster
c2 <- clustid_char[which(clustid_char %in% bootcluster_char)]
# only error cluster: variables in clustid_char not in c1, c2
c3 <- clustid_char[which(!(clustid_char %in% c(c1, c2)))]
all_c <- c(c1, c2, c3)
if (length(bootcluster_char) > 1) {
bootcluster <- as.data.frame(Reduce(paste, bootcluster))
names(bootcluster) <- Reduce(paste, bootcluster_char)
}
res <- list(
vcov_sign = vcov_sign,
clustid_dims = clustid_dims,
clustid = cluster,
N_G = N_G,
cluster_names = names(cluster),
all_c = all_c,
bootcluster = bootcluster,
cluster_bootcluster = cluster_bootcluster_df
)
res
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.