R/did-std.R

Defines functions double_did_compute ddid_boot did_compute_weights ddid_fit did_std

Documented in ddid_boot ddid_fit did_compute_weights did_std double_did_compute

#' Implement the Double DID under the standard design
#' @param formula Formula.
#' @param data Panel data.
#' @param id_unit A variable name for unit index. Required for panel data.
#' @param id_time A variable name for time index.
#' @param is_panel A boolean argument. Should be \code{FALSE} for the repeated cross-sectional data.
#' @param option A list of option parameters.
#' @return Double DID estimates.
#' @keywords internal
did_std <- function(
  formula, data, id_unit, id_time, is_panel = TRUE, option
) {

  ## input check
  if (isFALSE(is_panel)) id_unit <- NULL
  if (isTRUE(is_panel) && is.null(id_unit)) {
    stop("A vaiable name should be provided to id_unit.")
  }

  ## prepare formulas
  fm_prep <- did_formula(formula, is_panel)

  ##
  ## handle cluster variable
  ##
  var_cluster <- option$id_cluster
  if (is.null(var_cluster) && isTRUE(is_panel)) {
    var_cluster <- "id_unit"
    var_cluster_pre <- id_unit
  }

  ##
  ## transform data
  ##
  if (isTRUE(is_panel)) {
    dat_did <- did_panel_data(
      fm_prep$var_outcome, fm_prep$var_treat, fm_prep$var_covars,
      var_cluster_pre, id_unit, id_time, data
    )
  } else {
    dat_did <- did_rcs_data(
      fm_prep$var_outcome, fm_prep$var_treat, fm_prep$var_post,
      fm_prep$var_covars, var_cluster, id_time, data
    )
  }

  ## -------------------------------- ##
  ## estimate ATT via DID and sDID    ##
  ## -------------------------------- ##

  ## point estimate for DID and sDID
  fit_did <- lapply(option$lead, function(ll) ddid_fit(fm_prep$fm_did, dat_did, lead = ll))

  ## compute weights via bootstrap --> compute W
  boot_out  <- did_compute_weights(fm_prep, dat_did, var_cluster, is_panel, option)

  ## compute double did estimate and variance
  estimates <- double_did_compute(fit_did, boot_out, option$lead, option$se_boot)

  ## compute double did estimate
  return(estimates)
}

#' Fit DID and sDID
#'
#' @param data An output of \code{did_panel_data()}.
#' @param formula A formula of the form \code{y ~ Gi + It + Gi * It + x1 + x2}.
#' @param lead A value of the lead parameter.
#' @importFrom purrr map_dbl
#' @importFrom dplyr filter
#' @importFrom rlang .data
#' @importFrom stats lm
#' @return A numeric vector that contains the DID estimate and the sequential DID estimate.
#' @keywords internal
ddid_fit <- function(formula, data, lead = 0) {
  time_use <- c(-1, lead)
  est <- map_dbl(formula, function(fm) {
    fit <- lm(fm, data = filter(data, .data$id_time_std %in% time_use))
    return(fit$coef['Gi:It'])
  })

  return(est)
}


#' Compute Weighting Matrix via Bootstrap
#' @param formula Formula.
#' @param dat_did Panel data.
#' @param var_cluster A variable used for clustering standard errors.
#' @param is_panel A boolean argument to indicate if data is in the panel format or the repeated cross-section format.
#' @param option A list of options.
#' @return A list of double DID weights.
#' @importFrom future.apply future_lapply
#' @importFrom future plan multicore sequential
#' @importFrom purrr map
#' @importFrom dplyr pull
#' @importFrom rlang !! sym
#' @importFrom stats cov
#' @keywords internal
did_compute_weights <- function(
  formula, dat_did, var_cluster, is_panel, option
) {

  ## setup cluster ID for bootstrap
  if (is.null(var_cluster)) {
    id_cluster_vec <- 1:nrow(dat_did)
  } else {
    id_cluster_vec <- unique(pull(dat_did, !!sym(var_cluster)))
  }

  ## --------------------------------------------
  ## Compute weights
  ## --------------------------------------------
  ## setup worker
  setup_parallel(option$parallel)

  ## use future_lapply to implement the bootstrap parallel
  est_boot <- future_lapply(1:option$n_boot, function(i) {
    tryCatch({
      ddid_boot(formula, dat_did, id_cluster_vec, var_cluster, is_panel, option$lead)
    }, error = function(e) {
      NULL
    })
  }, future.seed = TRUE)
  est_boot <- est_boot[lengths(est_boot) != 0]

  ## convert VCVO to weights
  weights_save <- vector("list", length = length(option$lead))
  for (ll in 1:length(option$lead)) {
    tmp    <- do.call(rbind, map(est_boot, ~.x[[ll]]))
    VCOV   <- cov(tmp)
    W      <- solve(VCOV)

    ## compute weights based on W
    w_did  <- (W[1,1] + W[1,2]) / sum(W)
    w_sdid <- (W[2,2] + W[1,2]) / sum(W)
    weights_save[[ll]] <- list(W = W, vcov = VCOV, weights = c(w_did, w_sdid))
  }

  return(list(weights = weights_save, estimates = est_boot))
}

#' Bootstrap
#' @inheritParams did_compute_weights
#' @param lead A vector of lead parameters.
#' @return A list of DID and sDID estimates.
#' @keywords internal
ddid_boot <- function(
  fm_prep, dat_did, id_cluster_vec, var_cluster, is_panel, lead
) {
  ## sample index
  id_boot <- sample(id_cluster_vec,
    size = length(id_cluster_vec), replace = TRUE
  )

  ## create dataset
  dat_tmp <- list()
  for (j in 1:length(id_boot)) {
    if (is.null(var_cluster)) {
      id_tmp <- id_boot[j]
    } else {
      id_tmp <- which(dat_did[,var_cluster] == id_boot[j])
    }
    dat_tmp[[j]] <- dat_did[id_tmp, ]
    dat_tmp[[j]]$id_unit <- j
  }

  ## create did_data object
  if (isTRUE(is_panel)) {
    dat_boot <- did_panel_data(
      var_outcome = "outcome", var_treat = 'treatment', fm_prep$var_covars,
      var_cluster, id_unit = "id_unit", id_time = 'id_time',
      data = do.call(rbind, dat_tmp)
    )
  } else {
    dat_boot <- did_rcs_data(
      var_outcome = "outcome", var_treat = "Gi", var_post = "It",
      fm_prep$var_covars, var_cluster,
      id_time = "id_time", do.call(rbind, dat_tmp)
    )
  }

  ## fit DID and sDID
  est <- lapply(lead, function(ll) ddid_fit(fm_prep$fm_did, dat_boot, ll))
  return(est)
}


#' Compute Point and Variance Estimates
#' @param fit A lift of fitted objects.
#' @param boot An output from bootstrap.
#' @param lead A vector of lead parameters.
#' @param se_boot
#'    A boolean argument to indicate if standard errors are computed based on the bootstrap-based
#'    (i.e., empirical variance), or based on the asymptotic approximation.
#' @return A list of estimates and weights.
#' @keywords internal
#' @importFrom dplyr as_tibble bind_rows
#' @importFrom stats var quantile
double_did_compute <- function(fit, boot, lead, se_boot) {

  weights  <- boot$weights
  boot_est <- boot$estimates
  estimates <- W_save <- vector("list", length = length(lead))
  for (ll in 1:length(lead)) {
    ## compute the
    ddid <- as.vector(weights[[ll]]$weights %*% fit[[ll]])

    ## variance estimate for Double DID
    if (isTRUE(se_boot)) {
      ## bootstrap-based variance
      ddid_boot <- purrr::map_dbl(
        boot_est, ~ as.vector(weights[[ll]]$weights %*% .x[[ll]]))
      ddid_var  <- var(ddid_boot, na.rm = TRUE)
      ci_ddid <- quantile(ddid_boot, prob = c(0.025, 0.975))

      ## compute the confidence intervals 
      boot_did_l <- do.call(rbind, map(boot_est, ~.x[[ll]]))
      var_tmp <- apply(boot_did_l, 2, var)
      var_did <- var_tmp[1]
      var_sdid <- var_tmp[2]
      ci_did <- apply(boot_did_l, 2, function(x) quantile(x, prob = c(0.025, 0.975)))

    } else {
      ## asymptotic variance formula
      ddid_var <- (1 / sum(weights[[ll]]$W))

      ## variance estimate for DID and sDID
      var_did  <- weights[[ll]]$vcov[1,1]
      var_sdid <- weights[[ll]]$vcov[2,2]

      ci_ddid <- c(
        ddid - qnorm(0.975) * sqrt(ddid_var),
        ddid + qnorm(0.975) * sqrt(ddid_var)
      )
      ci_did  <- rbind(
        fit[[ll]] - qnorm(0.975) * sqrt(c(var_did, var_sdid)),
        fit[[ll]] + qnorm(0.975) * sqrt(c(var_did, var_sdid))
      )
    }

    ## summarize estimates
    estimates[[ll]] <- data.frame(
      estimator   = c("Double-DID", "DID", "sDID"),
      lead        = lead[ll],
      estimate    = c(ddid, fit[[ll]]),
      std.error   = c(sqrt(ddid_var), sqrt(var_did), sqrt(var_sdid)),
      ci.low      = c(ci_ddid[1], ci_did[1, ]),
      ci.high     = c(ci_ddid[2], ci_did[2, ]),
      ddid_weight = c(NA, weights[[ll]]$weights)
    )

    W_save[[ll]] <- weights[[ll]]$W
  }

  estimates <- as_tibble(bind_rows(estimates))
  return(list(estimates = estimates, weights = W_save))
}
naoki-egami/DIDdesign documentation built on June 4, 2024, 2:59 p.m.