#' @title did_attgt
#'
#' @description Takes a "local" data.frame and computes
#' an estimate of a group time average treatment effect
#' and a corresponding influence function using a difference in differences
#' approach.
#'
#' The code relies on \code{gt_data} having certain variables defined.
#' In particular, there should be an \code{id} column (individual identifier),
#' \code{D} (treated group identifier), \code{period} (time period), \code{name}
#' (equal to "pre" for pre-treatment periods and equal to "post" for post
#' treatment periods), \code{Y} (outcome).
#'
#' In our case, we call \code{pte::two_by_two_subset} which sets up the
#' data to have this format before the call to \code{did_attgt}.
#'
#' @param gt_data data that is "local" to a particular group-time average
#' treatment effect
#' @param xformla one-sided formula for covariates used in the propensity score
#' and outcome regression models
#' @param ... extra function arguments; not used here
#'
#' @return attgt_if
#'
#' @export
did_attgt <- function(gt_data, xformla, ...) {
#-----------------------------------------------------------------------------
# handle covariates
#-----------------------------------------------------------------------------
# for outcome regression, get pre-treatment values
Xpre <- model.frame(xformla, data=subset(gt_data,name=="pre"))
# convert two period panel into one period
gt_data_outcomes <- tidyr::pivot_wider(gt_data[,c("D","id","period","name","Y")], id_cols=c(id, D),
names_from=c(name),
values_from=c(Y))
# merge outcome and covariate data
gt_dataX <- cbind.data.frame(gt_data_outcomes, Xpre)
# treatment dummy variable
D <- gt_dataX$D
# pre- and post-treatment outcomes
Y_post <- gt_dataX$post
Y_pre <- gt_dataX$pre
# call DRDID functions to make the computations;
# just like in `did` package
gt_dataX <- droplevels(gt_dataX)
attgt <- DRDID::drdid_panel(y1=Y_post,
y0=Y_pre,
D=D,
covariates=model.matrix(xformla,
data=gt_dataX),
inffunc=TRUE)
# return attgt
attgt_if(attgt=attgt$ATT, inf_func=attgt$att.inf.func)
}
#' @title pte_attgt
#'
#' @description `pte_attgt` takes a "local" data.frame and computes
#' an estimate of a group time average treatment effect
#' and a corresponding influence function. This function generalizes
#' a number of existing methods.
#'
#' The code relies on \code{this.data} having certain variables defined.
#' In particular, there should be an \code{id} column (individual identifier),
#' \code{G} (group identifier), \code{period} (time period), \code{name}
#' (equal to "pre" for pre-treatment periods and equal to "post" for post
#' treatment periods), \code{Y} (outcome).
#'
#' In our case, we call \code{pte::two_by_two_subset} which sets up the
#' data to have this format before the call to `pte_attgt`
#'
#' @param gt_data data that is "local" to a particular group-time average
#' treatment effect
#' @param xformla one-sided formula for covariates used in the propensity score
#' and outcome regression models
#' @param d_outcome Whether or not to take the first difference of the outcome.
#' The default is FALSE. To use difference-in-differences, set this to be TRUE.
#' @param d_covs_formula A formula for time varying covariates to enter the
#' first estimation step models. The default is not to include any, and, hence,
#' to only include pre-treatment covariats.
#' @param lagged_outcome_cov Whether to include the lagged outcome as a covariate.
#' Default is FALSE.
#' @param est_method Which type of estimation method to use. Default is "dr" for
#' doubly robust. The other option is "reg" for regression adjustment.
#' @param ... extra function arguments; not used here
#'
#' @return attgt_if
#'
#' @export
pte_attgt <- function(gt_data, xformla, d_outcome=FALSE, d_covs_formula=~-1, lagged_outcome_cov=FALSE, est_method="dr", ...) {
this.g <- subset(gt_data, name=="post" & D==1)$G
this.tp <- unique(subset(gt_data, name=="post")$period)
#-----------------------------------------------------------------------------
# handle covariates
#-----------------------------------------------------------------------------
# pre-treatment covariates
Xpre <- model.frame(xformla, data=subset(gt_data, name=="pre"))
.w <- subset(gt_data, name=="pre")$.w
# change in covariates
dX <- model.frame(d_covs_formula, data=subset(gt_data, name=="post")) - model.frame(d_covs_formula, data=subset(gt_data,name=="pre"))
if (ncol(dX) > 0) colnames(dX) <- paste0("d", colnames(dX))
# lagged outcome
#if (lagged_outcome_cov) lagY_formula <- ~ -1 + Y else lagY_formula <- ~ -1
#lagY <- model.frame(lagY_formula, data=subset(gt_data, name=="pre"))
# convert two period panel into one period
gt_data_outcomes <- tidyr::pivot_wider(gt_data[,c("D","id","period","name","Y")], id_cols=c(id, D),
names_from=c(name),
values_from=c(Y))
# merge outcome and covariate data
gt_dataX <- cbind.data.frame(gt_data_outcomes, Xpre, dX, .w)
# treatment dummy variable
D <- gt_dataX$D
# post treatment outcome
Y <- gt_dataX$post
if (d_outcome) Y <- gt_dataX$post - gt_dataX$pre
# estimate attgt
# DRDID::drdid_panel is for panel data, but we can hack it
# to work in levels by just setting outcomes in "first period"
# to be equal to 0 for all units
gt_dataX <- droplevels(gt_dataX)
use_formula <- BMisc::toformula("", c(BMisc::rhs.vars(xformla), colnames(dX)))
if (lagged_outcome_cov) use_formula <- BMisc::addCovToFormla("pre", use_formula)
covmat <- model.matrix(use_formula, data=gt_dataX)
covmat2 <- covmat[D==0,]
# www <- gt_dataX[D==0,]$.w
n_unt <- sum(1-D)
precheck_reg <- qr(t(covmat2)%*%covmat2/n_unt)
keep_covs <- precheck_reg$pivot[1:precheck_reg$rank]
covmat <- covmat[,keep_covs]
# if group is too small, switch to reg adjustment
if (est_method=="dr") {
pscore_est <- glm(D ~ covmat, family=binomial(link="logit"), weights=(.w/sum(.w)))
pscore <-predict(pscore_est, type="response")
if (max(pscore) > 0.99) {
est_method <- "reg"
warning(paste0("Switching to regression adjustment because of small group size for group ",
this.g, " in time period ", this.tp))
}
}
if (est_method == "dr") {
attgt <- DRDID::drdid_panel(y1=Y,
y0=rep(0,length(Y)),
D=D,
covariates=covmat,
i.weights=(.w/sum(.w)),
inffunc=TRUE)
} else if (est_method == "reg") {
attgt <- DRDID::reg_did_panel(y1=Y,
y0=rep(0,length(Y)),
D=D,
covariates=covmat,
i.weights=(.w/sum(.w)),
inffunc=TRUE)
} else if (est_method == "grf") {
# sampling weights not supported here
# code requires custom version of grf package
tau.forest <- causal_forest(X=covmat, Y=Y, W=D)
# predict(tau.forest)$predictions[D==1]
grf_res <- average_treatment_effect(tau.forest, method="AIPW", target.sample = "treated")
this_n <- nrow(covmat)
this_if <- as.matrix(grf_res$inf_func * this_n)
#V <- t(this_if) %*% this_if / this_n
#sqrt(V) / sqrt(this_n)
#V <- t(grf_res$inf_func/sqrt(this_n)) %*% grf_res$inf_func/sqrt(this_n)
attgt <- list(ATT=grf_res$estimate, att.inf.func=this_if)
} else if (est_method=="lasso") {
# code adapted from: https://thomaswiemann.com/ddml/articles/did.html
learners = list(what=ddml::mdl_glmnet)
learners_DX = learners
att_fit <- ddml::ddml_att(y=Y,
D=D,
X=covmat,
learners=learners,
learners_DX=learners_DX,
sample_folds=10,
silent=TRUE)
inf.func <- att_fit$psi_b + att_fit$att * att_fit$psi_a
attgt <- list(ATT=att_fit$att, att.inf.func=inf.func)
} else {
stop(paste0("est_method: ", est_method, " is not supported"))
}
# return attgt
pte::attgt_if(attgt=attgt$ATT, inf_func=attgt$att.inf.func)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.