R/preprocess2.R
In fwildclusterboot: Fast Wild Cluster Bootstrap Inference for Linear Models

#' `preprocess2` is a S3 method that fetches data from several model
#' objectects for use with `boottest()`.
#'
#' @param object An objectect of type lm, fixest, felm or ivreg
#' @param ... other arguments
#' @noRd
#'
#' @importFrom Matrix Diagonal
#' @return An object of class `preprocess2`.

preprocess2 <- function(object, ...) {
  UseMethod("preprocess2")
}



preprocess2.fixest <-
  function(object,
           clustid,
           R,
           param,
           fe,
           engine,
           bootcluster, 
           bootstrap_type) {
    #' preprocess data for objects of type fixest
    #'
    #' @param object an object of type fixest
    #' @param clustid a character string containing the name(s) of the
    #'  cluster variables
    #' @param R Hypothesis Vector giving linear combinations of coefficients.
    #' @param fe character vector. name of the fixed effect to be projected
    #'  out in the bootstrap
    #' @param param character vector. names of the parameter(s) to test
    #' @param engine The bootstrap algorithm to run. Either "R" or
    #'  "WildBootTests.jl"
    #' @param bootcluster a character string containing the name(s) of the
    #'  bootcluster variables. Alternatively, "min" or "max"
    #' @param bootstrap_type Which bootstrap type should be run? 
    #' '11', '13', '31' or 33'. For more infos, see the paper
    #' by MacKinnon, Nielsen & Webb (2022)
    #'
    #' @noRd
    #'
    #' @method preprocess2 fixest

    call <- object$call
    call_env <- object$call_env
    fml <- formula(object)
    fml <- Formula::as.Formula(fml)

    fml_full <- formula(fml, collapse = TRUE)

    N <- nobs(object)
    # lm and felm don't drop NAs due to multicollinearity, while fixest does
    k <- length(na.omit(coef(object)))

    method <- object$family
    # fixest specific checks
    if (object$method != "feols") {
      rlang::abort(
        "boottest() only supports OLS estimation via fixest::feols() - it
        does not support non-linear models computed via e.g. fixest::fepois()
        or fixest::feglm.", 
        use_cli_format = TRUE
      )
    }
    
    # if (!is.null(object$is_sunab)) {
    #   if(object$is_sunab == TRUE){
    #     rlang::abort(
    #       "boottest() does not support the Sun-Abrams
    #       estimator via `sunab()`."
    #     )
    #   }
    # }

    is_iv <- ifelse(!is.null(object$fml_all$iv), TRUE, FALSE)
    has_fe <- ifelse(!is.null(object$fml_all$fixef), TRUE, FALSE)

    if (!is_iv) {
      X <-
        model.matrix(object,
          type = "rhs",
          na.rm = TRUE,
          collin.rm = TRUE
        )
    } else {
      X_endog <-
        model.matrix(object,
          type = "iv.endo",
          na.rm = TRUE,
          collin.rm = TRUE
        )
      X_exog <-
        model.matrix(object,
          type = "iv.exo",
          na.rm = TRUE,
          collin.rm = TRUE
        )
      instruments <-
        model.matrix(object,
          type = "iv.inst",
          na.rm = TRUE,
          collin.rm = TRUE
        )
    }

    Y <- model.matrix(object, type = "lhs")

    weights <- weights(object)
    if (is.null(weights)) {
      has_weights <- FALSE
      weights <- rep(1, N)
    } else {
      has_weights <- TRUE
    }

    if (has_weights) {
      if (!is.null(fe)) {
        rlang::abort(
          "boottest() unfortunately currently does not support WLS and fixed
          effects. Please set fe = NULL to run a bootstrap with WLS.", 
          use_cli_format = TRUE
        )
      }
    }

    fixed_effect <- NULL
    k2 <- 0
    W <- n_fe <- NULL


    if (has_fe) {
      # if(!is.null(fe)){
      get_fe <- transform_fe(
        object = object,
        X = X,
        Y = Y,
        fe = fe,
        N = N,
        has_weights = has_weights,
        engine = engine, 
        bootstrap_type = bootstrap_type, 
        clustid_char = clustid
      )


      X <- get_fe$X
      Y <- get_fe$Y
      fixed_effect <- get_fe$fixed_effect
      W <- get_fe$W
      n_fe <- get_fe$n_fe
      k2 <- get_fe$k2
      # }
    }


    # get cluster variable
    if (!is.null(clustid)) {
      clustid_list <- get_cluster(
        object = object,
        clustid_char = clustid,
        N = N,
        bootcluster = bootcluster,
        call_env = call_env
      )

      vcov_sign <- clustid_list$vcov_sign
      clustid <- clustid_list$clustid
      clustid_dims <- ncol(clustid)
      N_G <- clustid_list$N_G
      cluster_names <- clustid_list$cluster_names

      cluster_bootcluster <- clustid_list$cluster_bootcluster
      bootcluster <- clustid_list$bootcluster
      all_c <- clustid_list$all_c
    } else {
      vcov_sign <-
        clustid_dims <-
        clustid <- bootcluster <- N_G <- cluster_names <- NULL
      cluster_bootcluster <- bootcluster <- all_c <- NULL
    }


    # iv prep
    instruments <- X_exog <- X_endog <- NULL
    # if(is_iv){
    #  R0 <- rep(0, n_exog + n_endog)
    #  R0[
    # match(param, c(names(object$exogenous), names(object$endogenous)))] <- R
    #  names(R0) <- c(names(object$exogenous), names(object$endogenous))
    # } else {
    if (!is.matrix(R)) {
      R0 <- rep(0, length(colnames(X)))
      R0[match(param, colnames(X))] <- R
      names(R0) <- colnames(X)
    } else {
      q <- nrow(R)
      p <- ncol(R)
      R0 <- matrix(0, q, ncol(X))
      R0[, 1:p] <- R
    }
    # }



    res <- list(
      Y = Y,
      X = X,
      weights = weights,
      fixed_effect = fixed_effect,
      W = W,
      n_fe = n_fe,
      N = N,
      k = k,
      k2 = k2,
      clustid = clustid,
      vcov_sign = vcov_sign,
      clustid_dims = clustid_dims,
      N_G = N_G,
      cluster_bootcluster = cluster_bootcluster,
      bootcluster = bootcluster,
      N_G_bootcluster = length(unique(bootcluster[[1]])),
      R0 = R0,
      # model_frame = model_frame,
      X_exog = X_exog,
      X_endog = X_endog,
      instruments = instruments,
      has_fe = has_fe,
      all_c = all_c
    )

    if (is_iv) {
      class(res) <- c("preprocess", "iv")
    } else {
      class(res) <- c("preprocess", "ols")
    }

    res
  }


preprocess2.felm <-
  function(object,
           clustid,
           R,
           param,
           fe,
           engine,
           bootcluster, 
           bootstrap_type) {
    #' preprocess data for objects of type felm
    #'
    #' @param object an object of type felm
    #' @param clustid a character string containing the name(s) of the
    #' cluster variables
    #' @param R Hypothesis Vector giving linear combinations of coefficients.
    #' @param fe character vector. name of the fixed effect to be projected
    #'  out in the bootstrap
    #' @param param character vector. names of the parameter(s) to test
    #' @param engine The bootstrap algorithm to run. Either "R" or
    #'  "WildBootTests.jl"
    #' @param bootcluster a character string containing the name(s) of the
    #'  bootcluster variables. Alternatively, "min" or "max"
    #' @param bootstrap_type Which bootstrap type should be run? 
    #' '11', '13', '31' or  33'. For more infos, 
    #' see the paper by MacKinnon, Nielsen & Webb (2022)
    #'
    #' @noRd
    #'
    #' @method preprocess2 felm

    call <- object$call
    call_env <- environment(formula(object))
    fml <- formula(object)
    fml <- Formula::as.Formula(fml)

    N <- nobs(object)
    # lm and felm don't drop NAs due to multicollinearity, while fixest does
    k <- length(na.omit(coef(object)))
    p <- object$p

    is_iv <- FALSE
    if (
      suppressWarnings(
        formula(
          Formula::as.Formula(
            eval(
              object$formula
            )
          ),
          lhs = 0,
          rhs = 3
        )
      ) != "~0") {
      rlang::abort(
        "IV regression is currently not supported by boottest() for
        objects of type 'felm'. You can either use 'fixest::feols()'
        or 'ivreg::ivreg' for IV-regression.", 
        use_cli_format = TRUE
      )
      is_iv <- TRUE
    }

    X <- model_matrix(object, type = "rhs", collin.rm = TRUE)
    Y <- model.response(model.frame(object))
    has_fe <- ifelse(length(names(object$fe)) > 0, TRUE, FALSE)

    weights <- weights(object)
    if (is.null(weights)) {
      has_weights <- FALSE
      weights <- rep(1, N)
    } else {
      has_weights <- TRUE
    }

    if (has_weights) {
      if (!is.null(fe)) {
        rlang::abort(
          "boottest() unfortunately currently does not support WLS and
          fixed effects. Please set fe = NULL to run a bootstrap with WLS.",
          use_cli_format = TRUE
        )
      }
    }

    if (has_fe) {
      get_fe <- transform_fe(
        object = object,
        X = X,
        Y = Y,
        fe = fe,
        N = N,
        has_weights = has_weights,
        engine = engine, 
        bootstrap_type = bootstrap_type, 
        clustid_char = clustid
      )
      X <- get_fe$X
      Y <- get_fe$Y
      fixed_effect <- get_fe$fixed_effect
      W <- get_fe$W
      n_fe <- get_fe$n_fe
      k2 <- get_fe$k2
    } else {
      fixed_effect <- NULL
      k2 <- 0
      W <- n_fe <- NULL
      W <- n_fe <- NULL
    }

    # get cluster variable
    if (!is.null(clustid)) {
      clustid_list <- get_cluster(
        object = object,
        clustid_char = clustid,
        N = N,
        bootcluster = bootcluster,
        call_env = call_env
      )

      vcov_sign <- clustid_list$vcov_sign
      clustid <- clustid_list$clustid
      clustid_dims <- ncol(clustid)
      N_G <- clustid_list$N_G
      cluster_names <- clustid_list$cluster_names

      cluster_bootcluster <- clustid_list$cluster_bootcluster
      bootcluster <- clustid_list$bootcluster
      all_c <- clustid_list$all_c
    } else {
      vcov_sign <-
        clustid_dims <-
        clustid <- bootcluster <- N_G <- cluster_names <- NULL
      cluster_bootcluster <- bootcluster <- all_c <- NULL
    }

    # iv prep
    instruments <- X_exog <- X_endog <- NULL
    # if(is_iv){
    #  R0 <- rep(0, n_exog + n_endog)
    #  R0[
    # match(param, c(names(object$exogenous), names(object$endogenous)))] <- R
    #  names(R0) <- c(names(object$exogenous), names(object$endogenous))
    # } else {
    if (!is.matrix(R)) {
      R0 <- rep(0, length(colnames(X)))
      R0[match(param, colnames(X))] <- R
      names(R0) <- colnames(X)
    } else {
      # need matrix input for mboottest
      q <- nrow(R)
      p <- ncol(R)
      R0 <- matrix(0, q, ncol(X))
      R0[, 1:p] <- R
    }
    # }
    
    res <- list(
      Y = Y,
      X = X,
      weights = weights,
      fixed_effect = fixed_effect,
      W = W,
      n_fe = n_fe,
      N = N,
      k = k,
      k2 = k2,
      clustid = clustid,
      vcov_sign = vcov_sign,
      clustid_dims = clustid_dims,
      N_G = N_G,
      cluster_bootcluster = cluster_bootcluster,
      bootcluster = bootcluster,
      N_G_bootcluster = length(unique(bootcluster[[1]])),
      R0 = R0,
      X_exog = NULL,
      X_endog = NULL,
      instruments = NULL,
      has_fe = has_fe,
      all_c = all_c
    )
    
    if (is_iv) {
      class(res) <- c("preprocess", "iv")
    } else {
      class(res) <- c("preprocess", "ols")
    }
    
    res
  }


preprocess2.lm <-
  function(object,
           clustid,
           R,
           param,
           engine,
           bootcluster, 
           bootstrap_type = NULL) {
    #' preprocess data for objects of type lm
    #'
    #' @param object an object of type lm
    #' @param clustid a character string containing the name(s) of the
    #'  cluster variables
    #' @param R Hypothesis Vector giving linear combinations of coefficients.
    #' @param param character vector. names of the parameter(s) to test
    #' @param engine The bootstrap algorithm to run. Either "R" or
    #' "WildBootTests.jl"
    #' @param bootcluster a character string containing the name(s) of
    #' the bootcluster variables. Alternatively, "min" or "max"
    #'
    #' @noRd
    #'
    #' @method preprocess2 lm
    
    call <- object$call
    call_env <- environment(formula(object))
    fml <- formula(object)
    fml <- Formula::as.Formula(fml)
    
    N <- nobs(object)
    # lm and felm don't drop NAs due to multicollinearity, while fixest does
    k <- length(na.omit(coef(object)))
    p <- object$p
    
    is_iv <- FALSE
    
    X <- model_matrix(object, collin.rm = TRUE)
    Y <- model.response(model.frame(object))
    has_fe <- FALSE
    
    weights <- weights(object)
    if (is.null(weights)) {
      has_weights <- FALSE
      weights <- rep(1, N)
    } else {
      has_weights <- TRUE
    }
    
    # get cluster variable
    if (!is.null(clustid)) {
      clustid_list <- get_cluster(
        object = object,
        clustid_char = clustid,
        N = N,
        bootcluster = bootcluster,
        call_env = call_env
      )
      
      vcov_sign <- clustid_list$vcov_sign
      clustid <- clustid_list$clustid
      clustid_dims <- ncol(clustid)
      N_G <- clustid_list$N_G
      cluster_names <- clustid_list$cluster_names
      
      cluster_bootcluster <- clustid_list$cluster_bootcluster
      bootcluster <- clustid_list$bootcluster
      all_c <- clustid_list$all_c
    } else {
      vcov_sign <-
        clustid_dims <-
        clustid <- bootcluster <- N_G <- cluster_names <- NULL
      cluster_bootcluster <- bootcluster <- all_c <- NULL
    }
    
    
    instruments <- X_exog <- X_endog <- NULL
    if (!is.matrix(R)) {
      R0 <- rep(0, length(colnames(X)))
      R0[match(param, colnames(X))] <- R
      names(R0) <- colnames(X)
    } else {
      q <- nrow(R)
      p <- ncol(R)
      R0 <- matrix(0, q, ncol(X))
      R0[, 1:p] <- R
    }
    
    
    
    res <- list(
      Y = Y,
      X = X,
      weights = weights,
      fixed_effect = NULL,
      W = NULL,
      n_fe = NULL,
      N = N,
      k = k,
      k2 = 0,
      clustid = clustid,
      vcov_sign = vcov_sign,
      clustid_dims = clustid_dims,
      N_G = N_G,
      cluster_bootcluster = cluster_bootcluster,
      bootcluster = bootcluster,
      N_G_bootcluster = length(unique(bootcluster[[1]])),
      R0 = R0,
      X_exog = NULL,
      X_endog = NULL,
      instruments = NULL,
      has_fe = has_fe,
      all_c = all_c
    )
    
    class(res) <- c("preprocess", "ols")
    
    res
  }



preprocess2.ivreg <-
  function(object,
           clustid,
           R,
           param,
           engine,
           bootcluster, 
           bootstrap_type = NULL) {
    #' preprocess data for objects of type ivreg
    #'
    #' @param object an object of type ivreg
    #' @param clustid a character string containing the name(s) of the
    #'  cluster variables
    #' @param R Hypothesis Vector giving linear combinations of coefficients.
    #' @param param character vector. names of the parameter(s) to test
    #' @param engine The bootstrap algorithm to run. Either "R" or
    #'  "WildBootTests.jl"
    #' @param bootcluster a character string containing the name(s) of
    #' the bootcluster variables. Alternatively, "min" or "max"
    #'
    #' @noRd
    #'
    #' @method preprocess2 ivreg
    
    call <- object$call
    call_env <- environment(formula(object))
    fml <- formula(object)
    fml <- Formula::as.Formula(fml)
    is_iv <- TRUE
    
    N <- nobs(object)
    # lm and felm don't drop NAs due to multicollinearity, while fixest does
    k <- length(na.omit(coef(object)))
    p <- object$p
    
    is_iv <- FALSE
    has_fe <- FALSE
    
    X_endog <-
      model.matrix(
        object,
        component = "regressors", na.rm = TRUE
      )[, object$endogenous, drop = FALSE]
    X_exog <-
      model.matrix(
        object,
        component = "instruments", na.rm = TRUE
      )[, object$exogenous, drop = FALSE]
    instruments <-
      model.matrix(
        object,
        component = "instruments", na.rm = TRUE
      )[, object$instruments, drop = FALSE]
    Y <- model.response(model.frame(object))
    
    n_exog <- length(object$exogenous)
    n_endog <- length(object$endogenous)
    n_instruments <- length(object$instruments)
    
    weights <- weights(object)
    if (is.null(weights)) {
      has_weights <- FALSE
      weights <- rep(1, N)
    } else {
      has_weights <- TRUE
    }
    
    # get cluster variable
    if (!is.null(clustid)) {
      clustid_list <- get_cluster(
        object = object,
        clustid_char = clustid,
        N = N,
        bootcluster = bootcluster,
        call_env = call_env
      )
      
      vcov_sign <- clustid_list$vcov_sign
      clustid <- clustid_list$clustid
      clustid_dims <- ncol(clustid)
      N_G <- clustid_list$N_G
      cluster_names <- clustid_list$cluster_names
      
      cluster_bootcluster <- clustid_list$cluster_bootcluster
      bootcluster <- clustid_list$bootcluster
      all_c <- clustid_list$all_c
    } else {
      vcov_sign <-
        clustid_dims <-
        clustid <- bootcluster <- N_G <- cluster_names <- NULL
      cluster_bootcluster <- bootcluster <- all_c <- NULL
    }
    
    
    # iv prep
    R0 <- rep(0, n_exog + n_endog)
    R0[
      match(
        param, 
        c(
          names(object$exogenous), names(object$endogenous)
        )
      )
    ] <- R
    names(R0) <- c(
      names(object$exogenous), 
      names(object$endogenous)
    )
    
    res <- list(
      Y = Y,
      X = NULL,
      weights = weights,
      fixed_effect = NULL,
      W = NULL,
      n_fe = NULL,
      N = N,
      k = k,
      k2 = 0,
      clustid = clustid,
      vcov_sign = vcov_sign,
      clustid_dims = clustid_dims,
      N_G = N_G,
      cluster_bootcluster = cluster_bootcluster,
      bootcluster = bootcluster,
      N_G_bootcluster = length(unique(bootcluster[[1]])),
      R0 = R0,
      # model_frame = model_frame,
      X_exog = X_exog,
      X_endog = X_endog,
      instruments = instruments,
      has_fe = has_fe,
      all_c = all_c
    )
    
    
    class(res) <- c("preprocess", "iv")
    
    res
  }


demean_fe <- function(X, Y, fe, has_weights, N) {
  #' function deamens design matrix X and depvar Y if fe != NULL
  #'
  #' @param X the design matrix X
  #' @param Y the dependent variable Y as a numeric vector
  #' @param fe the name of the fixed effect to be projected out
  #' @param has_weights logical - have regression weights been used in the
  #' original model?
  #' @param N the number of observations
  #'
  #' @return A list that includes - among other things - the demeaned
  #' design matrix X and depvar Y
  #'
  #' @noRd
  
  g <- collapse::GRP(fe, call = FALSE)
  X <- collapse::fwithin(X, g)
  Y <- collapse::fwithin(Y, g)
  
  fixed_effect_W <- as.factor(fe[, 1])
  
  if (!has_weights) {
    levels(fixed_effect_W) <-
      (1 / table(fe)) # because duplicate levels are forbidden
  } else {
    rlang::abort(
      "Currently, boottest() does not jointly support regression weights /
      WLS and fixed effects. If you want to use
      boottest() for inference based on WLS, please set fe = NULL.", 
      use_cli_format = TRUE
    )
    # levels(fixed_effect_W) <- 1 / table(fixed_effect)
  }
  
  W <- Matrix::Diagonal(N, as.numeric(as.character(fixed_effect_W)))
  n_fe <- length(unique(fe[, 1]))
  
  res <- list(
    X = X,
    Y = Y,
    fixed_effect_W,
    W = W,
    n_fe = n_fe
  )
  
  res
}


transform_fe <-
  function(object, X, Y, fe, has_weights, N, engine, 
           bootstrap_type, clustid_char) {
    #' preprocess the model fixed effects
    #'
    #'  If is.null(fe) == TRUE, all
    #' fixed effects specified in the fixest or felm model are simply added
    #' as dummy variables to the design matrix X. If !is.null(fe), the fixed
    #' effect specified via fe is projected out in the bootstrap - all other
    #' fixed effects are added as dummy variables
    #' @param object the regression object
    #' @param X the design matrix of the regression object
    #' @param fe character vector, name of the fe to be projected out, or NULL
    #' @param has_weights logical - have regression weights been used in the
    #'  original model?
    #' @param N the number of observations
    #' @param engine bootstrap algorithm to run. Either "R" or
    #' "WildBootTests.jl"
    #'
    #' @return a list containing X - the design matrix plus additionally
    #'  attached fixed effects, and
    #'         fixed_effect - a data.frame containing the fixed effect to
    #'         be projected out
    #'
    #' @noRd
    
    all_fe <- model_matrix(object, type = "fixef", collin.rm = TRUE)
    # make sure all fixed effects variables are characters
    
    n_fe <- ncol(all_fe)
    all_fe_names <- names(all_fe)
    k2 <- Reduce("+", lapply(all_fe, function(x) {
      length(unique(x))
    }))
    
    fe_df <- W <- n_fe <- NULL
    
    # if a fe is to be projected out in the bootstrap
    if (!is.null(fe)) {
      # add all fe except for fe to data frame
      fe_df <- all_fe[, fe, drop = FALSE]
      add_fe <- all_fe[, all_fe_names != fe, drop = FALSE]
      add_fe_names <- names(add_fe)
      
      # nothing to add if only one fixed effect in model
      if (length(add_fe_names) != 0) {
        fml_fe <- reformulate(add_fe_names, response = NULL)
        
        add_fe_dummies <-
          model.matrix(
            fml_fe, model.frame(
              fml_fe,
              data = as.data.frame(
                add_fe
              )
            )
          )
        # drop the intercept
        add_fe_dummies <-
          add_fe_dummies[, -which(colnames(add_fe_dummies) == "(Intercept)")]
        X <-
          as.matrix(collapse::add_vars(as.data.frame(X), add_fe_dummies))
      }
      
      # project out fe
      if (engine == "R") {
        if(bootstrap_type != "fnw11"){
          if(fe != clustid_char){
            rlang::abort("Only cluster fixed effects are supported for bootstrap_types 
                 '11', '13', '31', '33'.", 
                use_cli_format = TRUE
                )
          }
        }
        
        # WildBootTests.jl does demeaning internally
        prep_fe <- demean_fe(X, Y, fe_df, has_weights, N)
        X <- prep_fe$X
        Y <- prep_fe$Y
        W <- prep_fe$W
        n_fe <- prep_fe$n_fe
      }
    } else {
      add_fe <- all_fe
      add_fe_names <- names(add_fe)
      fml_fe <- reformulate(add_fe_names, response = NULL)
      add_fe_dummies <-
        model.matrix(fml_fe, model.frame(fml_fe, data = as.data.frame(add_fe)))
      X <-
        as.matrix(collapse::add_vars(as.data.frame(X), add_fe_dummies))
    }
    
    res <- list(
      X = X,
      Y = Y,
      W = W,
      n_fe = n_fe,
      k2 = k2,
      fixed_effect = fe_df
    )
    
    res
  }


get_cluster <-
  function(object,
           clustid_char,
           bootcluster,
           N,
           call_env) {
    #' function creates a data.frame with cluster variables
    #'
    #' @param object An object of type lm, fixest, felm or ivreg
    #' @param clustid_char the name of the cluster variable(s) as
    #' a character vector
    #' @param bootcluster the name of the bootcluster variable(s)
    #'  as a character vector, or "min" or "max" for multiway clustering
    #' @param N the number of observations used in the bootstrap
    #' @param call_env the environment in which the 'object' was evaluated
    #'
    #' @noRd
    #'
    #' @return a list, containing, among other things, a data.frame of the
    #'  cluster variables,
    #'         a data.frame of the bootcluster variable(s), and a helper
    #'         matrix, all_c, used in `engine_julia()`
    
    # ----------------------------------------------------------------------- #
    # Note: a large part of the following code was taken and adapted from the
    # sandwich R package, which is distributed under GPL-2 | GPL-3
    # Zeileis A, Köll S, Graham N (2020). "Various Versatile Variances:
    # An object-Oriented Implementation of Clustered Covariances in R."
    # _Journal of Statistical Software_,  *95*(1), 1-36.
    # doi: 10.18637/jss.v095.i01 (URL: https://doi.org/10.18637/jss.v095.i01).
    
    # changes by Alexander Fischer:
    # no essential changes, but slight reorganization of pieces of code
    
    dreamerr::check_arg(clustid_char, "character scalar|charakter vector")
    dreamerr::check_arg(bootcluster, "character scalar | character vector")
    
    clustid_fml <- reformulate(clustid_char)
    # Step 1: create cluster df
    
    
    manipulate_object <- function(object){
      if(inherits(object, "fixest")){
        if(!is.null(object$fixef_vars)){
          update(object, . ~ + 1 | . + 1)
        } else {
          update(object, . ~ + 1 )
        }
      } else {
        object
      }
    }
    
    cluster_tmp <-
      if ("Formula" %in% loadedNamespaces()) {
        ## FIXME to suppress potential warnings due to | in Formula
        suppressWarnings(
          expand.model.frame(
            model = 
              manipulate_object(object),
            extras = clustid_fml,
            na.expand = FALSE,
            envir = call_env
          )
        )
      } else {
        expand.model.frame(
          model = 
            manipulate_object(object),
          extras = clustid_fml,
          na.expand = FALSE,
          envir = call_env
        )
      }
    
    # if(inherits(cluster_tmp, "try-error")){
    #   if(inherits(object, "fixest") || inherits(object, "felm")){
    #     if(grepl("non-numeric argument to binary operator$", 
    #              attr(cluster_tmp, "condition")$message)){
    #       rlang::abort("In your model, you have specified multiple fixed effects,
    #            none of which are of type factor. While `fixest::feols()` and
    #            `lfe::felm()` handle this case without any troubles,  
    #            `boottest()` currently cannot handle this case - please 
    #            change the type of (at least one) fixed effect(s) to factor.
    #            If this does not solve the error, please report the issue
    #            at https://github.com/s3alfisc/fwildclusterboot.")
    #     }
    #     if(grepl("operations are possible only for numeric, logical
    #              or complex types$", 
    #              attr(cluster_tmp, "condition")$message)){
    #       rlang::abort("Either a fixed effect or a cluster variable in your fixest()
    #            or felm() model is currently specified as a character. 
    #            'boottest()' relies on 'expand.model.frame',
    #            which can not handle these variable types in models.
    #            Please change these character variables to factors. ")
    #     }
    #   }
    # }
    
    cluster_df <-
      model.frame(clustid_fml, cluster_tmp, na.action = na.pass)
    # without cluster intersection
    N_G <-
      vapply(cluster_df, function(x) {
        length(unique(x))
      }, numeric(1))
    
    # Step 1: decode bootcluster variable
    
    # create a bootcluster vector
    if (length(bootcluster) == 1) {
      if (bootcluster == "max") {
        # use both vars
        bootcluster_char <- clustid_char
      } else if (bootcluster == "min") {
        # only minimum var
        bootcluster_char <- clustid_char[which.min(N_G)]
      } else {
        bootcluster_char <- bootcluster
      }
    } else {
      bootcluster_char <- bootcluster
    }
    
    # add bootcluster variable to formula of clusters
    cluster_bootcluster_fml <-
      update(
        clustid_fml, paste(
          "~ . +", paste(
            bootcluster_char,
            collapse = " + "
          )
        )
      )
    
    
    cluster_bootcluster_tmp <-
      if ("Formula" %in% loadedNamespaces()) {
        ## FIXME to suppress potential warnings due to | in Formula
        suppressWarnings(
          expand.model.frame(
            model = 
              manipulate_object(object),
            extras = cluster_bootcluster_fml,
            na.expand = FALSE,
            envir = call_env
          )
        )
      } else {
        expand.model.frame(
          model = 
            manipulate_object(object),
          extras = cluster_bootcluster_fml,
          na.expand = FALSE,
          envir = call_env
        )
      }
    
    # data.frame as needed for WildBootTests.jl
    cluster_bootcluster_df <- model.frame(
      cluster_bootcluster_fml,
      cluster_bootcluster_tmp,
      na.action = na.pass
    )
    
    # if(inherits(cluster_tmp, "try-error")){
    #   if(inherits(object, "fixest") || inherits(object, "felm")){
    #     if(
    #       grepl(
    #         "non-numeric argument to binary operator$",
    #         attr(
    #           cluster_tmp, "condition"
    #         )$message
    #       )
    #     ){
    #       rlang::abort(
    #         "In your model, you have specified multiple fixed effects, 
    #       none of which are of type factor. While `fixest::feols()` and 
    #       `lfe::felm()` handle this case without any troubles,  `boottest()`
    #       currently cannot handle this case - please change the type of
    #       (at least one) fixed effect(s) to factor. If this does not solve
    #       the error, please report the issue at 
    #       https://github.com/s3alfisc/fwildclusterboot."
    #       )
    #     }
    #     if(
    #       grepl(
    #   "operations are possible only for numeric, logical or complex types$",
    #         attr(
    #           cluster_tmp,
    #           "condition")$message
    #       )
    #     ){
    #       rlang::abort(
    #         "Either a fixed effect or a cluster variable in your fixest() or
    #       felm() model is currently specified as a character. 'boottest()' 
    #       relies on 'expand.model.frame', which can not handle these variable
    #       types in models. 
    #       Please change these character variables to factors."
    #       )
    #     }
    #   }
    # }
    
    # data.frames with clusters, bootcluster
    cluster <- cluster_bootcluster_df[, clustid_char, drop = FALSE]
    bootcluster <-
      cluster_bootcluster_df[, bootcluster_char, drop = FALSE]
    
    if (!any(bootcluster_char %in% clustid_char)) {
      is_subcluster <- TRUE
      if (!(any(names(bootcluster) %in% c(clustid_char, names(coef(
        object
      )))))) {
        rlang::abort(
          "A bootcluster variable is neither contained in the cluster
          variables nor in the model coefficients.", 
          use_cli_format = TRUE
        )
      }
    } else {
      is_subcluster <- FALSE
    }
    
    ## handle omitted or excluded observations (works for lfe, lm)
    if ((N != NROW(cluster)) &&
        !is.null(object$na.action) &&
        (class(object$na.action) %in% c("exclude", "omit"))) {
      cluster <- cluster[-object$na.action, , drop = FALSE]
    }
    
    if ((N != NROW(bootcluster)) &&
        !is.null(object$na.action) &&
        (class(object$na.action) %in% c("exclude", "omit"))) {
      bootcluster <- bootcluster[-object$na.action, , drop = FALSE]
    }
    
    if (N != nrow(cluster) && inherits(object, "fixest")) {
      cluster <- cluster[unlist(object$obs_selection), , drop = FALSE]
      bootcluster <-
        bootcluster[unlist(object$obs_selection), , drop = FALSE]
    }
    
    if (NROW(cluster) != N) {
      rlang::abort(
        "The number of observations in 'cluster' and 'nobs()' do not match", 
        use_cli_format = TRUE
      )
    }
    if (NROW(bootcluster) != N) {
      rlang::abort(
        "The number of observations in 'bootcluster' and 'nobs()' do not match", 
        use_cli_format = TRUE
      )
    }
    
    if (any(is.na(cluster))) {
      rlang::abort(
        "`boottest()` cannot handle NAs in `clustid` variables that are not
        part of the estimated model object.", 
        use_cli_format = TRUE
      )
    }
    if (any(is.na(bootcluster))) {
      rlang::abort(
        "`boottest()` cannot handle NAs in `bootcluster` variables that are
        not part of the estimated model object.", 
        use_cli_format = TRUE
      )
    }
    
    
    clustid_dims <- length(clustid_char)
    
    i <- !vapply(cluster, is.numeric, logical(1))
    cluster[i] <- lapply(cluster[i], as.character)
    
    # taken from multiwayvcov::cluster.boot
    acc <- list()
    for (i in 1:clustid_dims) {
      acc <-
        append(acc, utils::combn(1:clustid_dims, i, simplify = FALSE))
    }
    
    vcov_sign <- vapply(acc, function(i) {
      (-1)^(length(i) + 1)
    }, numeric(1))
    acc <- acc[-1:-clustid_dims]
    
    if (clustid_dims > 1) {
      for (i in acc) {
        cluster <- cbind(cluster, Reduce(paste, cluster[, i]))
        names(cluster)[length(names(cluster))] <-
          Reduce(paste, names(cluster[, i]))
      }
    }
    
    N_G <- vapply(cluster, function(x) {
      length(unique(x))
    }, numeric(1))
    
    # now do all the other bootcluster things
    c1 <-
      bootcluster_char[which(!(bootcluster_char %in% clustid_char))]
    # both bootstrapping and error cluster: all variables in clustid_char that
    # are also in bootcluster
    c2 <- clustid_char[which(clustid_char %in% bootcluster_char)]
    # only error cluster: variables in clustid_char not in c1, c2
    c3 <- clustid_char[which(!(clustid_char %in% c(c1, c2)))]
    all_c <- c(c1, c2, c3)
    
    if (length(bootcluster_char) > 1) {
      bootcluster <- as.data.frame(Reduce(paste, bootcluster))
      names(bootcluster) <- Reduce(paste, bootcluster_char)
    }
    
    
    res <- list(
      vcov_sign = vcov_sign,
      clustid_dims = clustid_dims,
      clustid = cluster,
      N_G = N_G,
      cluster_names = names(cluster),
      all_c = all_c,
      bootcluster = bootcluster,
      cluster_bootcluster = cluster_bootcluster_df
    )
    
    res
  }
Any scripts or data that you put into this service are public.
fwildclusterboot documentation built on March 7, 2023, 5:33 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
fwildclusterboot
Fast Wild Cluster Bootstrap Inference for Linear Models

R/preprocess2.R
In fwildclusterboot: Fast Wild Cluster Bootstrap Inference for Linear Models

Defines functions get_cluster transform_fe demean_fe preprocess2.ivreg preprocess2.lm preprocess2.felm preprocess2.fixest preprocess2

Try the fwildclusterboot package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fwildclusterboot Fast Wild Cluster Bootstrap Inference for Linear Models

R/preprocess2.R In fwildclusterboot: Fast Wild Cluster Bootstrap Inference for Linear Models

Defines functions get_cluster transform_fe demean_fe preprocess2.ivreg preprocess2.lm preprocess2.felm preprocess2.fixest preprocess2

Try the fwildclusterboot package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fwildclusterboot
Fast Wild Cluster Bootstrap Inference for Linear Models

R/preprocess2.R
In fwildclusterboot: Fast Wild Cluster Bootstrap Inference for Linear Models