Nothing
# getCohortATTsFinalOLS
#' @title Calculate Cohort-Specific ATTs and Standard Errors for ETWFE
#' @description Computes the Average Treatment Effect on the Treated (ATT) for
#' each cohort, along with their standard errors and confidence intervals if
#' requested and feasible.
#' @param X_final Numeric matrix; the final design matrix, potentially
#' transformed by `Omega_sqrt_inv` and the fusion transformation.
#' @param treat_inds Integer vector; indices in the original (untransformed)
#' coefficient vector that correspond to the base treatment effects.
#' @param num_treats Integer; total number of base treatment effect parameters.
#' @param first_inds Integer vector; indices of the first treatment effect for
#' each cohort within the block of `num_treats` treatment effect parameters.
#' @param c_names Character vector; names of the `R` treated cohorts.
#' @param tes Numeric vector; estimated treatment effects in the original
#' parameterization for all `num_treats` possible cohort-time combinations.
#' @param sig_eps_sq Numeric scalar; variance of the idiosyncratic error term.
#' @param R Integer; total number of treated cohorts.
#' @param N Integer; total number of units.
#' @param T Integer; total number of time periods.
#' @param p Integer; total number of parameters in the model.
#' @param alpha Numeric scalar; significance level for confidence intervals
#' (e.g., 0.05 for 95% CIs).
#' @return A list containing:
#' \item{cohort_te_df}{Dataframe with cohort names, estimated ATTs, SEs, and
#' confidence interval bounds.}
#' \item{cohort_tes}{Named numeric vector of estimated ATTs for each cohort.}
#' \item{cohort_te_ses}{Named numeric vector of standard errors for cohort ATTs.}
#' \item{psi_mat}{Matrix used in SE calculation for overall ATT.}
#' \item{gram_inv}{(Potentially NA) Inverse of the Gram matrix for selected
#' features, used in SE calculation.}
#' \item{d_inv_treat_sel}{(If `fused=TRUE`) Relevant block of the inverse
#' fusion matrix for selected treatment effects.}
#' @details The function first computes the Gram matrix inverse (`gram_inv`).
#' Then, for each cohort `r`, it calculates the average
#' of the relevant `tes`. It uses `getPsiRFused` or
#' `getPsiRUnfused` to get a `psi_r` vector, which is then used with
#' `gram_inv` to find the standard error for that cohort's ATT.
#' @keywords internal
#' @noRd
getCohortATTsFinalOLS <- function(
X_final,
treat_inds,
num_treats,
first_inds,
c_names,
tes,
sig_eps_sq,
R,
N,
T,
p,
alpha = 0.05
) {
stopifnot(length(tes) <= num_treats)
stopifnot(all(!is.na(tes)))
stopifnot(nrow(X_final) == N * T)
stopifnot(ncol(X_final) == p)
X_to_pass <- X_final
# Start by getting Gram matrix needed for standard errors
res <- getGramInv(
N = N,
T = T,
X_final = X_to_pass,
treat_inds = treat_inds,
num_treats = num_treats,
calc_ses = TRUE
)
gram_inv <- res$gram_inv
calc_ses <- res$calc_ses
# First, each cohort
cohort_tes <- rep(as.numeric(NA), R)
cohort_te_ses <- rep(as.numeric(NA), R)
psi_mat <- matrix(0, num_treats, R)
for (r in 1:R) {
# Get indices corresponding to rth treatment
first_ind_r <- first_inds[r]
if (r < R) {
last_ind_r <- first_inds[r + 1] - 1
} else {
last_ind_r <- num_treats
}
stopifnot(last_ind_r >= first_ind_r)
stopifnot(all(first_ind_r:last_ind_r %in% 1:num_treats))
cohort_tes[r] <- mean(tes[first_ind_r:last_ind_r])
psi_r <- getPsiRUnfused(
first_ind_r,
last_ind_r,
sel_treat_inds_shifted = 1:num_treats,
gram_inv = gram_inv
)
stopifnot(length(psi_r) == num_treats)
psi_mat[, r] <- psi_r
# Get standard errors
if (calc_ses) {
cohort_te_ses[r] <- sqrt(
sig_eps_sq *
as.numeric(t(psi_r) %*% gram_inv %*% psi_r) /
(N * T)
)
}
}
stopifnot(length(c_names) == R)
stopifnot(length(cohort_tes) == R)
if (all(!is.na(gram_inv))) {
stopifnot(length(cohort_te_ses) == R)
cohort_te_df <- data.frame(
c_names,
cohort_tes,
cohort_te_ses,
cohort_tes - stats::qnorm(1 - alpha / 2) * cohort_te_ses,
cohort_tes + stats::qnorm(1 - alpha / 2) * cohort_te_ses
)
names(cohort_te_ses) <- c_names
names(cohort_tes) <- c_names
} else {
cohort_te_df <- data.frame(
c_names,
cohort_tes,
rep(NA, R),
rep(NA, R),
rep(NA, R)
)
}
colnames(cohort_te_df) <- c(
"Cohort",
"Estimated TE",
"SE",
"ConfIntLow",
"ConfIntHigh"
)
stopifnot(length(tes) == nrow(psi_mat))
ret <- list(
cohort_te_df = cohort_te_df,
cohort_tes = cohort_tes,
cohort_te_ses = cohort_te_ses,
psi_mat = psi_mat,
gram_inv = gram_inv,
calc_ses = calc_ses
)
return(ret)
}
# getTeResultsOLS
#' @title Calculate Overall ATT and its Standard Error for ETWFE
#' @description Computes the overall Average Treatment Effect on the Treated
#' (ATT) by taking a weighted average of cohort-specific ATTs. It also
#' calculates the standard error for this overall ATT, potentially considering
#' variability from estimated cohort probabilities.
#' @param sig_eps_sq Numeric scalar; variance of the idiosyncratic error term.
#' @param N Integer; total number of units.
#' @param T Integer; total number of time periods.
#' @param R Integer; total number of treated cohorts.
#' @param num_treats Integer; total number of base treatment effect parameters.
#' @param cohort_tes Numeric vector; estimated ATTs for each of the `R` cohorts.
#' (Simple average of all of the estimated treatment effects for each cohort
#' across time.)
#' @param cohort_probs Numeric vector; weights for each cohort, typically
#' estimated probabilities of belonging to cohort `r` conditional on being
#' treated. Length `R`. Sums to 1.
#' @param psi_mat Numeric matrix; matrix where column `r` is `psi_r` (from
#' `getCohortATTsFinal`). Dimensions: `length(sel_treat_inds_shifted)` x `R`.
#' @param gram_inv Numeric matrix; inverse of the Gram matrix for selected
#' treatment effect features.
#' @param tes Numeric vector; all `num_treats` estimated treatment effects
#' (original parameterization).
#' @param cohort_probs_overall Numeric vector; estimated marginal probabilities
#' of belonging to each treated cohort P(W=r). Length `R`.
#' @param first_inds Integer vector; indices of the first treatment effect for
#' each cohort.
#' @param calc_ses Logical; if `TRUE`, calculate standard errors.
#' @param indep_probs Logical; if `TRUE`, assumes `cohort_probs` (and
#' `cohort_probs_overall`) were estimated from an independent sample, leading
#' to a different SE formula (sum of variances) compared to when they are
#' estimated from the same sample (conservative SE including a covariance term).
#' @return A list containing:
#' \item{att_hat}{Numeric scalar; the estimated overall ATT.}
#' \item{att_te_se}{Numeric scalar; the standard error for `att_hat`. NA if
#' `calc_ses` is `FALSE`.}
#' \item{att_te_se_no_prob}{Numeric scalar; standard error for `att_hat`
#' ignoring variability from estimating cohort probabilities (i.e., only
#' `att_var_1`). NA if `calc_ses` is `FALSE`.}
#' @details The overall ATT (`att_hat`) is `cohort_tes %*% cohort_probs`.
#' If `calc_ses` is `TRUE`:
#' - `att_var_1` (variance from `theta_hat` estimation) is computed using
#' `psi_att = psi_mat %*% cohort_probs` and `gram_inv`.
#' - `att_var_2` (variance from cohort probability estimation) is computed by
#' calling `getSecondVarTermDataApp`.
#' - `att_te_se` is `sqrt(att_var_1 + att_var_2)` if `indep_probs` is `TRUE`,
#' otherwise it's a conservative SE: `sqrt(att_var_1 + att_var_2 + 2*sqrt(att_var_1 * att_var_2))`.
#' - `att_te_se_no_prob` is `sqrt(att_var_1)`.
#' @keywords internal
#' @noRd
getTeResultsOLS <- function(
# model,
sig_eps_sq,
N,
T,
R,
num_treats,
cohort_tes,
cohort_probs,
psi_mat,
gram_inv,
tes,
cohort_probs_overall,
first_inds,
calc_ses,
indep_probs = FALSE
) {
att_hat <- as.numeric(cohort_tes %*% cohort_probs)
if (calc_ses) {
stopifnot(nrow(psi_mat) == length(tes))
stopifnot(nrow(psi_mat) <= num_treats)
stopifnot(ncol(psi_mat) == R)
# Get ATT standard error
# first variance term: convergence of theta
psi_att <- psi_mat %*% cohort_probs
att_var_1 <- sig_eps_sq *
as.numeric(t(psi_att) %*% gram_inv %*% psi_att) /
(N * T)
stopifnot(length(tes) <= num_treats)
stopifnot(length(tes) == nrow(psi_mat))
# Second variance term: convergence of cohort membership probabilities
att_var_2 <- getSecondVarTermOLS(
psi_mat = psi_mat,
tes = tes,
cohort_probs_overall = cohort_probs_overall,
first_inds = first_inds,
num_treats = num_treats,
N = N,
T = T,
R = R
)
if (indep_probs) {
att_te_se <- sqrt(att_var_1 + att_var_2)
} else {
att_te_se <- sqrt(
att_var_1 +
att_var_2 +
2 *
sqrt(
att_var_1 * att_var_2
)
)
}
att_te_se_no_prob <- sqrt(att_var_1)
} else {
att_te_se <- NA
att_te_se_no_prob <- NA
}
return(list(
att_hat = att_hat,
att_te_se = att_te_se,
att_te_se_no_prob = att_te_se_no_prob
))
}
# getSecondVarTermOLS
#' @title Calculate Second Variance Term for ATT Standard Error for ETWFE
#' @description Computes the second component of the variance for the Average
#' Treatment Effect on the Treated (ATT). This component accounts for the
#' variability due to the estimation of cohort membership probabilities.
#' @param psi_mat Numeric matrix; a matrix where each column `r` is the `psi_r`
#' vector used in calculating the ATT for cohort `r`. Dimensions:
#' `length(sel_treat_inds_shifted)` x `R`.
#' @param tes Numeric vector; the estimated treatment effects for all
#' `num_treats` possible cohort-time combinations.
#' @param cohort_probs_overall Numeric vector; estimated marginal probabilities
#' of belonging to each treated cohort (P(W=r)). Length `R`.
#' @param first_inds Integer vector; indices of the first treatment effect for
#' each cohort within the `num_treats` block.
#' @param num_treats Integer; total number of base treatment effect parameters.
#' @param N Integer; total number of units.
#' @param T Integer; total number of time periods.
#' @param R Integer; total number of treated cohorts.
#' @return A numeric scalar representing the second variance component for the
#' ATT.
#' @details This function calculates `Sigma_pi_hat`, the covariance matrix of
#' the cohort assignment indicators, and a Jacobian matrix. These are then
#' combined with `theta_hat_treat_sel` to compute the variance term as
#' `T * t(theta_hat_treat_sel) %*% t(jacobian_mat) %*% Sigma_pi_hat %*% jacobian_mat %*%
#' theta_hat_treat_sel / (N * T)`. The construction of the Jacobian involves averaging parts of
#' `d_inv_treat_sel` corresponding to different cohorts.
#' @keywords internal
#' @noRd
getSecondVarTermOLS <- function(
psi_mat,
tes,
cohort_probs_overall,
first_inds,
num_treats,
N,
T,
R
) {
stopifnot(length(tes) == nrow(psi_mat))
# Get Sigma_pi_hat, the (sample-estimated) covariance matrix for the
# sample proportions (derived from the multinomial distribution)
Sigma_pi_hat <- -outer(
cohort_probs_overall[1:(R)],
cohort_probs_overall[1:(R)]
)
diag(Sigma_pi_hat) <- cohort_probs_overall[1:R] *
(1 - cohort_probs_overall[1:R])
stopifnot(nrow(Sigma_pi_hat) == R)
stopifnot(ncol(Sigma_pi_hat) == R)
# Gather a list of the indices corresponding to the treatment coefficients
# for each cohort. (Will be used to construct the Jacobian matrix.)
sel_inds <- list()
for (r in 1:R) {
first_ind_r <- first_inds[r]
if (r < R) {
last_ind_r <- first_inds[r + 1] - 1
} else {
last_ind_r <- num_treats
}
stopifnot(last_ind_r >= first_ind_r)
sel_inds[[r]] <- first_ind_r:last_ind_r
if (r > 1) {
stopifnot(min(sel_inds[[r]]) > max(sel_inds[[r - 1]]))
stopifnot(length(sel_inds[[r]]) <= length(sel_inds[[r - 1]]))
}
}
stopifnot(all.equal(unlist(sel_inds), 1:num_treats))
# Construct Jacobian matrix corresponding to the mapping from the
# individual cohort probabilities to the proportions calculated for the ATT.
# See Proof of Theorem 6.1 for form.
jacobian_mat <- matrix(
as.numeric(NA),
nrow = R,
ncol = R
)
stopifnot(length(cohort_probs_overall) == R)
stopifnot(sum(cohort_probs_overall) < 1 - 10e-6)
for (r in 1:R) {
# All terms in rth column have the same value except the diagonal term
col_r_val <- -cohort_probs_overall[r] / sum(cohort_probs_overall)^2
jacobian_mat[, r] <- rep(col_r_val, R)
# Diagonal term
cons_r <- (sum(cohort_probs_overall) -
cohort_probs_overall[r]) /
sum(cohort_probs_overall)^2
jacobian_mat[r, r] <- cons_r
}
stopifnot(all(!is.na(jacobian_mat)))
stopifnot(nrow(psi_mat) <= num_treats)
stopifnot(ncol(psi_mat) == R)
stopifnot(length(tes) == nrow(psi_mat))
# Finally, calculate variance term for ATT from Sigma_pi_hat, Jacobian
# matrix, psi_mat, and beta_hat. See proof of Theorem D.2 for details.
att_var_2 <- T *
as.numeric(
t(tes) %*%
psi_mat %*%
t(jacobian_mat) %*%
Sigma_pi_hat %*%
jacobian_mat %*%
t(psi_mat) %*%
tes
) /
(N * T)
return(att_var_2)
}
# getPsiRUnfused
#' @title Calculate Psi Vector for Cohort ATT (Unfused Case)
#' @description Computes the `psi_r` vector for a specific cohort `r` when no
#' fusion penalization is applied to the treatment effects (or when calculating
#' SEs as if it were an OLS on selected variables). This vector is used in
#' standard error calculations for the cohort's Average Treatment Effect on
#' the Treated (ATT). In particular, this vector contains the constants
#' `1 / (T - r + 1)` that are multiplied by the cohort treatment effects
#' at each time, then summed, to get the average treatment effect for cohort
#' `r`. The vector `psi_r` places these constants in the correct positions so
#' that the inner product works as intended.
#' @param first_ind_r Integer; the index of the first treatment effect for
#' cohort `r` within the `num_treats` block of treatment effects.
#' @param last_ind_r Integer; the index of the last treatment effect for
#' cohort `r` within the `num_treats` block.
#' @param sel_treat_inds_shifted Integer vector; indices of all selected
#' treatment effects within the `num_treats` block, shifted to start from 1.
#' @param gram_inv Numeric matrix; the inverse of the Gram matrix for the
#' selected treatment effect features.
#' @return A numeric vector `psi_r` of length equal to
#' `length(sel_treat_inds_shifted)`. It contains weights (typically 1/k for
#' k selected effects in cohort r, 0 otherwise) to average the selected
#' treatment effect coefficients for cohort `r`.
#' @details The function identifies which of the `sel_treat_inds_shifted` fall
#' within the range `[first_ind_r, last_ind_r]`. For these identified
#' indices in `psi_r`, it assigns a value of 1. `psi_r` is then normalized by
#' dividing by its sum, effectively creating an averaging vector for the
#' selected treatment effects belonging to cohort `r`. If no treatment effects
#' for cohort `r` were selected, `psi_r` will be a zero vector.
#' @keywords internal
#' @noRd
getPsiRUnfused <- function(
first_ind_r,
last_ind_r,
sel_treat_inds_shifted,
gram_inv
) {
which_inds_ir <- sel_treat_inds_shifted %in% (first_ind_r:last_ind_r)
psi_r <- rep(0, length(sel_treat_inds_shifted))
if (sum(which_inds_ir) > 0) {
inds_r <- which(which_inds_ir)
stopifnot(is.integer(inds_r) | is.numeric(inds_r))
stopifnot(identical(inds_r, as.integer(round(inds_r))))
stopifnot(length(inds_r) >= 1)
stopifnot(length(inds_r) == length(unique(inds_r)))
stopifnot(length(inds_r) <= length(sel_treat_inds_shifted))
stopifnot(all(inds_r %in% 1:length(sel_treat_inds_shifted)))
stopifnot(max(inds_r) <= nrow(gram_inv))
stopifnot(max(inds_r) <= ncol(gram_inv))
stopifnot(min(inds_r) >= 0)
psi_r[inds_r] <- 1
stopifnot(sum(psi_r) > 0)
psi_r <- psi_r / sum(psi_r)
}
return(psi_r)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.