Nothing
# getTeResults2
#' @title Calculate Overall ATT and its Standard Error (Version 2)
#' @description Computes the overall Average Treatment Effect on the Treated
#' (ATT) by taking a weighted average of cohort-specific ATTs. It also
#' calculates the standard error for this overall ATT, potentially considering
#' variability from estimated cohort probabilities.
#'
#' The overall Average Treatment Effect on the Treated is
#' \deqn{\widehat{\text{ATT}}
#' \;=\;
#' \sum_{r=1}^{R}\widehat{\tau}_{\text{ATT},r}\,
#' \widehat{\pi}_{r\mid\tau},}
#' a weighted mean of cohort-specific ATTs with weights
#' \(\widehat{\pi}_{r\mid\tau}=N_r/N_\tau\).
#' The variance has **two additive pieces**
#'
#' * *Term 1* - randomness from \(\widehat\theta\):
#' \(\sigma^{2}(NT)^{-1}\psi_{\text{att}}^{\!\top}\widehat G^{-1}\psi_{\text{att}}\)
#' with \(\psi_{\text{att}}=\Psi\,\widehat\pi\) and
#' \(\Psi=(\psi_1,\dots,\psi_R)\).
#' * *Term 2* - randomness from \(\widehat\pi\) itself.
#' It equals \(N^{-1}\theta_{\text{sel}}^{\!\top}J^{\!\top}
#' \widehat\Sigma_\pi J\theta_{\text{sel}}\) when cohort counts and outcomes
#' come from independent samples, and it enters a conservative sum of
#' variances otherwise.
#' @param sig_eps_sq Numeric scalar; variance of the idiosyncratic error term.
#' @param N Integer; total number of units.
#' @param T Integer; total number of time periods.
#' @param R Integer; total number of treated cohorts.
#' @param num_treats Integer; total number of base treatment effect parameters.
#' @param cohort_tes Numeric vector; estimated ATTs for each of the `R` cohorts.
#' (Simple average of all of the estimated treatment effects for each cohort
#' across time.)
#' @param cohort_probs Numeric vector; weights for each cohort, typically
#' estimated probabilities of belonging to cohort `r` conditional on being
#' treated. Length `R`. Sums to 1.
#' @param psi_mat Numeric matrix; matrix where column `r` is `psi_r` (from
#' `getCohortATTsFinal`). Dimensions: `length(sel_treat_inds_shifted)` x `R`.
#' @param gram_inv Numeric matrix; inverse of the Gram matrix for selected
#' treatment effect features.
#' @param sel_treat_inds_shifted Integer vector; indices of selected treatment
#' effects within the `num_treats` block (shifted to start from 1).
#' @param tes Numeric vector; all `num_treats` estimated treatment effects
#' (original parameterization).
#' @param d_inv_treat_sel Numeric matrix; block of the inverse fusion matrix for
#' selected treatment effects.
#' @param cohort_probs_overall Numeric vector; estimated marginal probabilities
#' of belonging to each treated cohort P(W=r). Length `R`.
#' @param first_inds Integer vector; indices of the first treatment effect for
#' each cohort.
#' @param theta_hat_treat_sel Numeric vector; estimated coefficients in
#' transformed (fused) space for selected treatment effects.
#' @param calc_ses Logical; if `TRUE`, calculate standard errors.
#' @param indep_probs Logical; if `TRUE`, assumes `cohort_probs` (and
#' `cohort_probs_overall`) were estimated from an independent sample, leading
#' to a different SE formula (sum of variances) compared to when they are
#' estimated from the same sample (conservative SE including a covariance term).
#' @return A list containing:
#' \item{att_hat}{Numeric scalar; the estimated overall ATT.}
#' \item{att_te_se}{Numeric scalar; the standard error for `att_hat`. NA if
#' `calc_ses` is `FALSE`.}
#' \item{att_te_se_no_prob}{Numeric scalar; standard error for `att_hat`
#' ignoring variability from estimating cohort probabilities (i.e., only
#' `att_var_1`). NA if `calc_ses` is `FALSE`.}
#' @details The overall ATT (`att_hat`) is `cohort_tes %*% cohort_probs`.
#' If `calc_ses` is `TRUE`:
#' - `att_var_1` (variance from `theta_hat` estimation) is computed using
#' `psi_att = psi_mat %*% cohort_probs` and `gram_inv`.
#' - `att_var_2` (variance from cohort probability estimation) is computed by
#' calling `getSecondVarTermDataApp`.
#' - `att_te_se` is `sqrt(att_var_1 + att_var_2)` if `indep_probs` is `TRUE`,
#' otherwise it's a conservative SE: `sqrt(att_var_1 + att_var_2 + 2*sqrt(att_var_1 * att_var_2))`.
#' - `att_te_se_no_prob` is `sqrt(att_var_1)`.
#'
#' `indep_probs = TRUE` implements the independent-sample
#' variance (`att_var_1 + att_var_2`);
#' `indep_probs = FALSE` returns the conservative bound
#' `att_var_1 + att_var_2 + 2sqrt(att_var_1*att_var_2)`.
#'
#' All matrices required for Term 2 are produced upstream:
#' * `psi_mat` from [getCohortATTsFinal()]
#' * `d_inv_treat_sel` from the same routine
#' * the Jacobian \(J\) is built in
#' [getSecondVarTermDataApp()] using `d_inv_treat_sel`
#' @inheritParams getTeResults2
#' @seealso [getSecondVarTermDataApp()]
#' @keywords internal
#' @noRd
getTeResults2 <- function(
# model,
sig_eps_sq,
N,
T,
R,
num_treats,
cohort_tes,
cohort_probs,
psi_mat,
gram_inv,
sel_treat_inds_shifted,
tes,
d_inv_treat_sel,
cohort_probs_overall,
first_inds,
theta_hat_treat_sel,
calc_ses,
indep_probs = FALSE
) {
stopifnot(all(!is.na(cohort_probs)))
# point estimate ATT
att_hat <- as.numeric(cohort_tes %*% cohort_probs)
if (calc_ses) {
stopifnot(all(!is.na(psi_mat)))
stopifnot(!is.na(sig_eps_sq))
stopifnot(all(!is.na(gram_inv)))
# Get ATT standard error
# first variance term: convergence of theta
psi_att <- psi_mat %*% cohort_probs
att_var_1 <- sig_eps_sq *
as.numeric(t(psi_att) %*% gram_inv %*% psi_att) /
(N * T)
stopifnot(!is.na(att_var_1))
# Second variance term: convergence of cohort membership probabilities
att_var_2 <- getSecondVarTermDataApp(
# cohort_probs = cohort_probs,
psi_mat = psi_mat,
sel_treat_inds_shifted = sel_treat_inds_shifted,
tes = tes,
d_inv_treat_sel = d_inv_treat_sel,
cohort_probs_overall = cohort_probs_overall,
first_inds = first_inds,
theta_hat_treat_sel = theta_hat_treat_sel,
num_treats = num_treats,
N = N,
T = T,
R = R,
fused = TRUE
)
stopifnot(!is.na(att_var_2))
# Combine the two variance terms
if (indep_probs) {
att_te_se <- sqrt(att_var_1 + att_var_2)
} else {
att_te_se <- sqrt(
att_var_1 +
att_var_2 +
2 *
sqrt(
att_var_1 * att_var_2
)
)
}
att_te_se_no_prob <- sqrt(att_var_1)
} else {
att_te_se <- NA
att_te_se_no_prob <- NA
}
return(list(
att_hat = att_hat,
att_te_se = att_te_se,
att_te_se_no_prob = att_te_se_no_prob
))
}
# checkFetwfeInputs
#' @title Check Inputs for the main `fetwfe` function
#' @description Validates the inputs provided to the main `fetwfe` function,
#' ensuring they meet type, dimension, and content requirements. Stops
#' execution with an error message if any check fails.
#' @param pdata Dataframe; the panel data set.
#' @param time_var Character; name of the time variable column.
#' @param unit_var Character; name of the unit variable column.
#' @param treatment Character; name of the treatment indicator column.
#' @param response Character; name of the response variable column.
#' @param covs Character vector; names of covariate columns. Default `c()`.
#' @param indep_counts Integer vector or NA; counts for independent cohort data.
#' Default `NA`.
#' @param sig_eps_sq Numeric or NA; variance of idiosyncratic noise. Default `NA`.
#' @param sig_eps_c_sq Numeric or NA; variance of unit-level random effects.
#' Default `NA`.
#' @param lambda.max Numeric or NA; maximum lambda for `gBridge`. Default `NA`.
#' @param lambda.min Numeric or NA; minimum lambda for `gBridge`. Default `NA`.
#' @param nlambda Integer; number of lambdas for `gBridge`. Default `100`.
#' @param q Numeric; Lq penalty exponent for `gBridge`. Default `0.5`.
#' @param verbose Logical; if TRUE, print progress. Default `FALSE`.
#' @param alpha Numeric; significance level for confidence intervals. Default `0.05`.
#' @param add_ridge Logical; if TRUE, add small ridge penalty. Default `FALSE`.
#' @return Logical `indep_count_data_available`, which is `TRUE` if valid
#' `indep_counts` were provided, `FALSE` otherwise.
#' @details This function performs a series of `stopifnot` checks on each
#' parameter. For example:
#' - `pdata` must be a dataframe with at least 4 rows.
#' - `time_var`, `unit_var`, `treatment`, `response` must be single characters,
#' present in `pdata`, and the corresponding columns must have the correct
#' type (e.g., integer for time, character for unit, 0/1 integer for treatment).
#' - `covs` if provided, must be characters, present in `pdata`, and columns
#' must be numeric, integer, or factor.
#' - `indep_counts` if provided, must be positive integers.
#' - `sig_eps_sq`, `sig_eps_c_sq` if provided, must be non-negative numerics.
#' - `lambda.max`, `lambda.min` if provided, must be valid numerics
#' (`lambda.max > lambda.min >= 0`).
#' - `q` must be in `(0, 2]`.
#' - `alpha` must be in `(0, 1)`.
#' Issues a warning if `alpha > 0.5`.
#' @keywords internal
#' @noRd
checkFetwfeInputs <- function(
pdata,
time_var,
unit_var,
treatment,
response,
covs = c(),
indep_counts = NA,
sig_eps_sq = NA,
sig_eps_c_sq = NA,
lambda.max = NA,
lambda.min = NA,
nlambda = 100,
q = 0.5,
verbose = FALSE,
alpha = 0.05,
add_ridge = FALSE
) {
res <- checkEtwfeInputs(
pdata = pdata,
time_var = time_var,
unit_var = unit_var,
treatment = treatment,
response = response,
covs = covs,
indep_counts = indep_counts,
sig_eps_sq = sig_eps_sq,
sig_eps_c_sq = sig_eps_c_sq,
verbose = verbose,
alpha = alpha,
add_ridge = add_ridge
)
indep_count_data_available <- res$indep_count_data_available
pdata <- res$pdata
rm(res)
if (any(!is.na(lambda.max))) {
stopifnot(is.numeric(lambda.max) | is.integer(lambda.max))
stopifnot(length(lambda.max) == 1)
stopifnot(lambda.max > 0)
}
if (any(!is.na(lambda.min))) {
stopifnot(is.numeric(lambda.min) | is.integer(lambda.min))
stopifnot(length(lambda.min) == 1)
stopifnot(lambda.min >= 0)
if (any(!is.na(lambda.max))) {
stopifnot(lambda.max > lambda.min)
}
}
stopifnot(is.numeric(q) | is.integer(q))
stopifnot(length(q) == 1)
stopifnot(q > 0)
stopifnot(q <= 2)
return(list(
pdata = pdata,
indep_count_data_available = indep_count_data_available
))
}
# getPsiRFused
#' @title Calculate Psi Vector and D-inverse Block for Cohort ATT (Fused Case)
#' @description Computes the `psi_r` vector and the relevant block of the
#' inverse fusion transformation matrix (`d_inv_treat_sel`) for a specific
#' cohort `r`. These are used in standard error calculations for the cohort's
#' Average Treatment Effect on the Treated (ATT) when fusion penalization
#' has been applied.
#'
#' For a treated cohort \(r\) let
#' \(M_r=T-r+1\) be the number of post-treatment periods.
#' The CATT is the *simple average*
#' \(M_r^{-1}\sum_{t=r}^{T}\tau_{\text{ATT}}(r,t)\).
#' In the fused basis each \(\tau_{\text{ATT}}(r,t)\) is a row of
#' \(D^{(2)}(\mathcal R)^{-1}\widehat\theta\),
#' so the averaging weights are the **row-means** of the corresponding block
#' of the inverse fusion matrix.
#' This helper:
#'
#' * extracts those rows (`first_ind_r:last_ind_r`) from
#' `d_inv_treat`,
#' * averages them column-wise to form \(\psi_r\),
#' * returns the block itself (`d_inv_treat_sel`) because later routines need
#' it for probability-variance propagation.
#' @param first_ind_r Integer; the index of the first treatment effect parameter
#' for cohort `r` within the original `num_treats` block (1-based).
#' @param last_ind_r Integer; the index of the last treatment effect parameter
#' for cohort `r` within the original `num_treats` block (1-based).
#' @param sel_treat_inds_shifted Integer vector; indices (1-based) of the
#' treatment effects that were selected by the model, relative to the start
#' of the `num_treats` block. E.g., if original indices 5, 7 were selected
#' from a block starting at index 1, this would be c(5, 7).
#' @param d_inv_treat Numeric matrix; the full inverse two-way fusion
#' transformation matrix for all `num_treats` treatment effects. Dimensions:
#' `num_treats` x `num_treats`.
#' @return A list containing:
#' \item{psi_r}{Numeric vector. It's the column means of the sub-matrix of
#' `d_inv_treat` corresponding to rows `first_ind_r:last_ind_r` and columns
#' specified by `sel_treat_inds_shifted`. If `first_ind_r == last_ind_r`,
#' it's just that specific row of `d_inv_treat` (subsetted by selected columns).}
#' \item{d_inv_treat_sel}{Numeric matrix. The sub-matrix of `d_inv_treat`
#' with rows `first_ind_r:last_ind_r` and columns corresponding to
#' `sel_treat_inds_shifted`.}
#' @details `psi_r` effectively averages the rows of `d_inv_treat` (that correspond
#' to cohort `r`'s treatment effects) for the columns that were actually
#' selected by the model. `d_inv_treat_sel` is this specific block of the
#' `d_inv_treat` matrix.
#'
#' * If only one transformed treatment coefficient was selected,
#' both the mean and the returned block are forced to the correct
#' 1 x 1 or 1 x *`k`* shape so that higher-level code can
#' `rbind()` the blocks without special cases.
#' @inheritParams getPsiRFused
#' @seealso [getCohortATTsFinal()]
#' @keywords internal
#' @noRd
getPsiRFused <- function(
first_ind_r,
last_ind_r,
sel_treat_inds_shifted,
d_inv_treat
) {
stopifnot(length(sel_treat_inds_shifted) >= 0)
stopifnot(last_ind_r >= first_ind_r)
# Get psi vector: the part of D inverse that we need to look at is the
# block corresponding to the treatment effect estimates, which is the
# num_treats x num_treats matrix yielded by
# genInvTwoWayFusionTransformMat(num_treats, first_inds).
# Correct rows of matrix
## psi_r := column-wise mean of those rows (weights for average treatment
## effect for cohort r)
##
## * If |S| > 1, result is a vector length |S|
## * If |S| == 1, treat the scalar mean as length-1 vector
if (last_ind_r > first_ind_r) {
if (length(sel_treat_inds_shifted) > 1) {
psi_r <- colMeans(d_inv_treat[
first_ind_r:last_ind_r,
sel_treat_inds_shifted
])
} else {
psi_r <- mean(d_inv_treat[
first_ind_r:last_ind_r,
sel_treat_inds_shifted
])
}
if (length(sel_treat_inds_shifted) == 1) {
# Need to coerce this object to be a matrix with one column so it
# works smoothly with rbind() later
d_inv_treat_sel <- matrix(
d_inv_treat[first_ind_r:last_ind_r, sel_treat_inds_shifted],
ncol = 1
)
} else {
d_inv_treat_sel <- d_inv_treat[
first_ind_r:last_ind_r,
sel_treat_inds_shifted
]
}
} else {
psi_r <- d_inv_treat[first_ind_r:last_ind_r, sel_treat_inds_shifted]
# Since first_ind_r and last_ind_r are the same, need to coerce this
# object to be a matrix with one row so that it works smoothly with
# rbind() later
## Block of D^{-1} used later for probability-variance term
d_inv_treat_sel <- matrix(
d_inv_treat[first_ind_r:last_ind_r, sel_treat_inds_shifted],
nrow = 1
)
}
stopifnot(is.matrix(d_inv_treat_sel))
return(list(psi_r = psi_r, d_inv_treat_sel = d_inv_treat_sel))
}
# getBetaBIC
#' @title Select Optimal Coefficients using BIC from gBridge Fit
#' @description From a `gBridge` fit object (which contains solutions for a
#' path of lambda penalties), this function selects the optimal set of
#' coefficients based on the Bayesian Information Criterion (BIC). It also
#' returns the chosen lambda index and the size of the selected model.
#' Coefficients are returned on their original scale.
#' @param fit A `gBridge` fit object, typically the output from `grpreg::gBridge()`.
#' @param N Integer; the total number of unique units.
#' @param T Integer; the total number of time periods.
#' @param p Integer; the total number of predictor variables (excluding intercept)
#' in the model matrix `X_mod`.
#' @param X_mod Numeric matrix; the design matrix (potentially transformed for
#' FETWFE, and **not** yet GLStransformed or scaled/centered by `my_scale`) that was used to generate `y`.
#' It's used here to calculate SSE on the original scale of `y`.
#' @param y Numeric vector; the original response variable (before GLS transform and centering)
#' used to fit the model. Length `N*T`.
#' @param scale_center Numeric vector; the centering values used to scale `X_mod`
#' before fitting `gBridge`. Length `p`.
#' @param scale_scale Numeric vector; the scaling values used to scale `X_mod`
#' before fitting `gBridge`. Length `p`.
#' @return A list containing:
#' \item{theta_hat}{Numeric vector of length `p+1`. The selected coefficients
#' (including intercept at `theta_hat[1]`) on their original data scale.}
#' \item{lambda_star_ind}{Integer; the index of the lambda value in `fit$lambda`
#' that resulted in the best BIC.}
#' \item{lambda_star_model_size}{Integer; the number of non-zero coefficients
#' (excluding intercept) in the selected model.}
#' @details The function iterates through each lambda in `fit$lambda`. For each:
#' 1. It extracts the intercept (`eta_s`) and slopes (`beta_s`) on the scaled data.
#' 2. It converts these coefficients back to the original data scale using
#' `scale_center` and `scale_scale`.
#' 3. It calculates the Sum of Squared Errors (SSE) using `sse_bridge()` with
#' the original-scale coefficients, original `y`, and `X_mod`.
#' 4. It computes the BIC value: `N*T*log(SSE/(N*T)) + s*log(N*T)`, where `s`
#' is the number of non-zero coefficients (including intercept).
#' The set of coefficients corresponding to the minimum BIC is chosen. If multiple
#' lambdas yield the same minimum BIC, the one resulting in the smallest model
#' size (fewest non-zero coefficients) is selected.
#' The final returned `theta_hat` also has its slopes and intercept adjusted back to the original scale.
#' @keywords internal
#' @noRd
getBetaBIC <- function(fit, N, T, p, X_mod, y, scale_center, scale_scale) {
stopifnot(length(y) == N * T)
n_lambda <- ncol(fit$beta)
BICs <- rep(as.numeric(NA), n_lambda)
model_sizes <- rep(as.integer(NA), n_lambda)
stopifnot(nrow(fit$beta) == p + 1)
for (k in 1:n_lambda) {
## --- extract coefficients on the scaled data -------------
eta_s <- fit$beta[1, k] # intercept (scaled space)
beta_s <- fit$beta[2:(p + 1), k] # slopes (scaled space)
## --- convert to original scale ---------------------------
beta_hat_k <- beta_s / scale_scale
eta_hat_k <- eta_s - sum(scale_center * beta_hat_k)
# Residual sum of squares
mse_hat <- sse_bridge(
eta_hat_k,
beta_hat_k,
y = y,
X_mod = X_mod,
N = N,
T = T
)
# Number of fitted coefficients
s <- sum(fit$beta[, k] != 0)
model_sizes[k] <- s
stopifnot(is.na(BICs[k]))
BICs[k] <- N * T * log(mse_hat) + s * log(N * T)
}
lambda_star_ind <- which(BICs == min(BICs))
if (length(lambda_star_ind) == 1) {
lambda_star_final_ind <- lambda_star_ind
theta_hat <- fit$beta[, lambda_star_final_ind]
} else {
# Choose smallest model size among models with equal BIC
model_sizes_star <- model_sizes[lambda_star_ind]
min_model_size_ind <- which(model_sizes_star == min(model_sizes_star))
lambda_star_final_ind <- lambda_star_ind[min_model_size_ind][1]
stopifnot(length(lambda_star_final_ind) == 1)
theta_hat <- fit$beta[, lambda_star_final_ind]
}
stopifnot(length(lambda_star_final_ind) == 1)
stopifnot(length(theta_hat) == p + 1)
stopifnot(all(!is.na(theta_hat)))
#
# Rescale coefficients back to the original scale.
# The coefficient vector theta_hat is of length (p+1), with the first entry
# as the intercept.
# For predictors: original coefficient = beta_scaled / scale_j.
# The intercept is adjusted as: intercept_original =
# intercept_scaled - sum(center_j * (beta_scaled/scale_j)).
#
adjusted_theta_hat <- theta_hat
if (length(scale_scale) != p) {
stop("Length of scale_scale does not match number of predictors (p).")
}
for (j in 2:(p + 1)) {
adjusted_theta_hat[j] <- theta_hat[j] / scale_scale[j - 1]
}
adjusted_theta_hat[1] <- theta_hat[1] -
sum(scale_center * (theta_hat[2:(p + 1)] / scale_scale))
return(list(
theta_hat = adjusted_theta_hat,
lambda_star_ind = lambda_star_final_ind,
lambda_star_model_size = model_sizes[lambda_star_final_ind]
))
}
#' Core Estimation Logic for Fused Extended Two-Way Fixed Effects
#'
#' @description
#' This function implements the core estimation steps of the FETWFE methodology.
#' It takes a pre-processed design matrix and response, applies transformations
#' for fusion penalties, handles variance components, performs bridge regression,
#' selects the optimal penalty via BIC, and calculates treatment effects and
#' their standard errors.
#'
#' @param X_ints The design matrix with all fixed effects, covariates, treatment
#' dummies, and their interactions, as produced by `prepXints`.
#' @param y The centered response vector, as produced by `prepXints`.
#' @param in_sample_counts An integer vector named with cohort identifiers
#' (including "Never_treated"), indicating the number of units in each cohort
#' within the data used for estimation.
#' @param N The number of unique units.
#' @param T The number of unique time periods.
#' @param d The number of covariates.
#' @param p The total number of columns in `X_ints` (total parameters).
#' @param num_treats The total number of unique treatment effect parameters.
#' @param first_inds A numeric vector indicating the starting column index for
#' each cohort's first treatment effect within the treatment effect block.
#' @param indep_counts (Optional) An integer vector of counts for how many units
#' appear in the untreated cohort plus each of the other `R` cohorts, derived
#' from an independent dataset. Used for asymptotically exact standard errors for
#' the ATT. Default is `NA`.
#' @param sig_eps_sq (Optional) Numeric; the known variance of the observation-level
#' IID noise. If `NA`, it will be estimated. Default is `NA`.
#' @param sig_eps_c_sq (Optional) Numeric; the known variance of the unit-level IID
#' noise (random effects). If `NA`, it will be estimated. Default is `NA`.
#' @param lambda.max (Optional) Numeric; the maximum `lambda` penalty parameter for
#' the bridge regression grid search. If `NA`, `grpreg` selects it. Default is `NA`.
#' @param lambda.min (Optional) Numeric; the minimum `lambda` penalty parameter.
#' If `NA`, `grpreg` selects it. Default is `NA`.
#' @param nlambda (Optional) Integer; the number of `lambda` values in the grid.
#' Default is 100.
#' @param q (Optional) Numeric; the power of the Lq penalty for fusion regularization
#' (0 < q <= 2). `q=0.5` is default, `q=1` is lasso, `q=2` is ridge.
#' Default is 0.5.
#' @param verbose Logical; if `TRUE`, prints progress messages. Default is `FALSE`.
#' @param alpha Numeric; significance level for confidence intervals (e.g., 0.05 for
#' 95% CIs). Default is 0.05.
#' @param add_ridge (Optional) Logical; if `TRUE`, adds a small L2 penalty to
#' the untransformed coefficients to stabilize estimation. Default is `FALSE`.
#'
#' @details
#' The function executes the following main steps:
#' \enumerate{
#' \item **Input Checks:** Validates the provided parameters.
#' \item **Coordinate Transformation:** Calls `transformXintImproved` to transform
#' `X_ints` into `X_mod`. This transformation allows a standard bridge
#' regression penalty on `X_mod` to achieve the desired fusion penalties
#' on the original coefficients.
#' \item **Variance Component Handling:**
#' \itemize{
#' \item If `sig_eps_sq` or `sig_eps_c_sq` are `NA`, `estOmegaSqrtInv` is
#' called to estimate them from the data using a fixed-effects ridge
#' regression.
#' \item Constructs the covariance matrix `Omega` and its inverse square
#' root `Omega_sqrt_inv`.
#' \item Pre-multiplies `y` and `X_mod` by `sqrt(sig_eps_sq) * Omega_sqrt_inv`
#' (via Kronecker product) to obtain `y_final` and `X_final`, effectively
#' performing a GLS transformation.
#' }
#' \item **Optional Ridge Penalty:** If `add_ridge` is `TRUE`, `X_final_scaled`
#' (scaled version of `X_final`) and `y_final` are augmented to add an L2
#' penalty on the *original* (untransformed) coefficient scale. This involves
#' using `genFullInvFusionTransformMat` to get the inverse of the overall
#' fusion transformation matrix.
#' \item **Cohort Probabilities:** Calculates cohort membership probabilities
#' conditional on being treated, using `in_sample_counts` and `indep_counts`
#' if available.
#' \item **Bridge Regression:** Fits a bridge regression model using
#' `grpreg::gBridge` on `X_final_scaled` and `y_final` with the specified `q`
#' and lambda sequence.
#' \item **Coefficient Selection (BIC):** Calls `getBetaBIC` to select the
#' optimal `lambda` using BIC and retrieve the corresponding estimated
#' coefficients (`theta_hat` in the transformed space).
#' \item **Handle Zero-Feature Case:** If BIC selects a model with zero features,
#' treatment effects are set to zero.
#' \item **Coefficient Untransformation:** Calls `untransformCoefImproved` to
#' transform `theta_hat` back to the original coefficient space, yielding
#' `beta_hat`. If `add_ridge` was true, `beta_hat` is scaled.
#' \item **Treatment Effect Calculation:**
#' \itemize{
#' \item Extracts cohort-specific average treatment effects (CATTs) from
#' `beta_hat`.
#' \item Calls `getCohortATTsFinal` to calculate CATT point estimates,
#' standard errors (if `q < 1`), and confidence intervals. This involves
#' computing the Gram matrix and related quantities.
#' }
#' \item **Overall ATT Calculation:** Calls `getTeResults2` to calculate the
#' overall average treatment effect on the treated (ATT) and its standard
#' error, using both in-sample probabilities and independent probabilities
#' if `indep_counts` were provided.
#' }
#' The standard errors for CATTs are asymptotically exact. For ATT, if
#' `indep_counts` are provided, the SE is asymptotically exact; otherwise, it's
#' asymptotically conservative (if `q < 1`).
#'
#' @return A list containing detailed estimation results:
#' \item{in_sample_att_hat}{Estimated overall ATT using in-sample cohort probabilities.}
#' \item{in_sample_att_se}{Standard error for `in_sample_att_hat`.}
#' \item{in_sample_att_se_no_prob}{SE for `in_sample_att_hat` ignoring variability from estimating cohort probabilities.}
#' \item{indep_att_hat}{Estimated overall ATT using `indep_counts` cohort probabilities (NA if `indep_counts` not provided).}
#' \item{indep_att_se}{Standard error for `indep_att_hat` (NA if not applicable).}
#' \item{catt_hats}{A named vector of estimated CATTs for each cohort.}
#' \item{catt_ses}{A named vector of SEs for `catt_hats` (NA if `q >= 1`).}
#' \item{catt_df}{A data.frame summarizing CATTs, SEs, and confidence intervals.}
#' \item{theta_hat}{The vector of estimated coefficients in the *transformed* (fused) space, including the intercept as the first element.}
#' \item{beta_hat}{The vector of estimated coefficients in the *original* space (after untransforming `theta_hat`, excluding intercept).}
#' \item{treat_inds}{Indices in `beta_hat` corresponding to base treatment effects.}
#' \item{treat_int_inds}{Indices in `beta_hat` corresponding to treatment-covariate interactions.}
#' \item{cohort_probs}{Estimated cohort probabilities conditional on being treated, from `in_sample_counts`.}
#' \item{indep_cohort_probs}{Estimated cohort probabilities from `indep_counts` (NA if not provided).}
#' \item{sig_eps_sq}{The (possibly estimated) variance of observation-level noise.}
#' \item{sig_eps_c_sq}{The (possibly estimated) variance of unit-level random effects.}
#' \item{lambda.max}{The maximum lambda value used in `grpreg`.}
#' \item{lambda.max_model_size}{Model size for `lambda.max`.}
#' \item{lambda.min}{The minimum lambda value used in `grpreg`.}
#' \item{lambda.min_model_size}{Model size for `lambda.min`.}
#' \item{lambda_star}{The lambda value selected by BIC.}
#' \item{lambda_star_model_size}{Model size for `lambda_star`.}
#' \item{X_ints}{The original input design matrix from `prepXints`.}
#' \item{y}{The original input centered response vector from `prepXints`.}
#' \item{X_final}{The design matrix after fusion transformation and GLS weighting.}
#' \item{y_final}{The response vector after GLS weighting.}
#' \item{N, T, R, d, p}{Dimensions used in estimation.}
#' @keywords internal
#' @noRd
fetwfe_core <- function(
X_ints,
y,
in_sample_counts,
N,
T,
d,
p,
num_treats,
first_inds,
indep_counts = NA,
sig_eps_sq = NA,
sig_eps_c_sq = NA,
lambda.max = NA,
lambda.min = NA,
nlambda = 100,
q = 0.5,
verbose = FALSE,
alpha = 0.05,
add_ridge = FALSE
) {
ret <- check_etwfe_core_inputs(
in_sample_counts = in_sample_counts,
N = N,
T = T,
sig_eps_sq = sig_eps_sq,
sig_eps_c_sq = sig_eps_c_sq,
indep_counts = indep_counts,
verbose = verbose,
alpha = alpha,
add_ridge = add_ridge
)
R <- ret$R
c_names <- ret$c_names
indep_count_data_available <- ret$indep_count_data_available
rm(ret)
if (any(!is.na(lambda.max))) {
stopifnot(is.numeric(lambda.max) | is.integer(lambda.max))
stopifnot(length(lambda.max) == 1)
stopifnot(lambda.max >= 0)
}
if (any(!is.na(lambda.min))) {
stopifnot(is.numeric(lambda.min) | is.integer(lambda.min))
stopifnot(length(lambda.min) == 1)
stopifnot(lambda.min >= 0)
if (any(!is.na(lambda.max))) {
stopifnot(lambda.max >= lambda.min)
}
}
stopifnot(is.numeric(q) | is.integer(q))
stopifnot(length(q) == 1)
stopifnot(q > 0)
stopifnot(q <= 2)
#
#
# Step 1: change coordinates of data so that regular bridge regression
# penalty applied to transformed dataframe results in FETWFE penalty applied
# to original data
#
#
if (verbose) {
message("Transforming matrix...")
}
# Transform matrix (change of coordinates so that fitting regular bridge
# regression results in FETWFE fusion penalties)
X_mod <- transformXintImproved(
X_int = X_ints,
N = N,
T = T,
R = R,
d = d,
num_treats = num_treats,
first_inds = first_inds
)
res <- prep_for_etwfe_regresion(
verbose = verbose,
sig_eps_sq = sig_eps_sq,
sig_eps_c_sq = sig_eps_c_sq,
y = y,
X_ints = X_ints,
X_mod = X_mod,
N = N,
T = T,
R = R,
d = d,
p = p,
num_treats = num_treats,
add_ridge = add_ridge,
first_inds = first_inds,
in_sample_counts = in_sample_counts,
indep_count_data_available = indep_count_data_available,
indep_counts = indep_counts
)
X_final_scaled <- res$X_final_scaled
y_final <- res$y_final
scale_center <- res$scale_center
scale_scale <- res$scale_scale
cohort_probs <- res$cohort_probs
cohort_probs_overall <- res$cohort_probs_overall
indep_cohort_probs <- res$indep_cohort_probs
indep_cohort_probs_overall <- res$indep_cohort_probs_overall
X_final <- res$X_final
lambda_ridge <- res$lambda_ridge
sig_eps_sq <- res$sig_eps_sq
sig_eps_c_sq <- res$sig_eps_c_sq
rm(res)
#
#
# Step 4: estimate bridge regression and extract fitted coefficients
#
#
# Estimate bridge regression
if (verbose) {
message("Estimating bridge regression...")
t0 <- Sys.time()
}
if (!is.na(lambda.max) & !is.na(lambda.min)) {
fit <- grpreg::gBridge(
X = X_final_scaled,
y = y_final,
gamma = q,
lambda.max = lambda.max,
lambda.min = lambda.min,
nlambda = nlambda
)
} else if (!is.na(lambda.max)) {
fit <- grpreg::gBridge(
X = X_final_scaled,
y = y_final,
gamma = q,
lambda.max = lambda.max,
nlambda = nlambda
)
} else if (!is.na(lambda.min)) {
fit <- grpreg::gBridge(
X = X_final_scaled,
y = y_final,
gamma = q,
lambda.min = lambda.min,
nlambda = nlambda
)
} else {
fit <- grpreg::gBridge(
X = X_final_scaled,
y = y_final,
gamma = q,
nlambda = nlambda
)
}
if (verbose) {
message("Done! Time for estimation:")
message(Sys.time() - t0)
}
# For diagnostics later, store largest and smallest lambda, as well as
# corresponding smallest and largest model sizes, to return.
lambda.max <- max(fit$lambda)
lambda.max_model_size <- sum(fit$beta[, ncol(fit$beta)] != 0)
lambda.min <- min(fit$lambda)
lambda.min_model_size <- sum(fit$beta[, 1] != 0)
# Select a single set of fitted coefficients by using BIC to choose among
# the penalties that were fitted
res <- getBetaBIC(
fit = fit,
N = N,
T = T,
p = p,
X_mod = X_mod,
y = y,
scale_center = scale_center,
scale_scale = scale_scale
)
theta_hat <- res$theta_hat # This includes intercept
lambda_star_ind <- res$lambda_star_ind
lambda_star_model_size <- res$lambda_star_model_size
lambda_star <- fit$lambda[lambda_star_ind]
# c_names <- names(in_sample_counts)[2:(R + 1)] # Moved definition up
stopifnot(length(c_names) == R)
# Indices corresponding to base treatment effects
treat_inds <- getTreatInds(R = R, T = T, d = d, num_treats = num_treats)
if (d > 0) {
stopifnot(max(treat_inds) + 1 <= p)
stopifnot(
max(treat_inds) == R + T - 1 + d + R * d + (T - 1) * d + num_treats
)
treat_int_inds <- (max(treat_inds) + 1):p
stopifnot(length(treat_int_inds) == num_treats * d)
} else {
stopifnot(max(treat_inds) <= p)
stopifnot(max(treat_inds) == R + T - 1 + num_treats)
treat_int_inds <- c()
}
# Handle edge case where no features are selected (model_size includes intercept)
if (lambda_star_model_size <= 1 && all(theta_hat[2:(p + 1)] == 0)) {
# only intercept might be non-zero
if (verbose) {
message(
"No features selected (or only intercept); all treatment effects estimated to be 0."
)
}
if (q < 1) {
ret_se <- 0
} else {
ret_se <- NA
}
catt_df_to_ret <- data.frame(
Cohort = c_names,
`Estimated TE` = rep(0, R),
SE = rep(ret_se, R),
ConfIntLow = rep(ret_se, R),
ConfIntHigh = rep(ret_se, R),
check.names = FALSE
)
return(list(
in_sample_att_hat = 0,
in_sample_att_se = ret_se,
in_sample_att_se_no_prob = ret_se,
indep_att_hat = 0,
indep_att_se = ret_se,
catt_hats = setNames(rep(0, R), c_names),
catt_ses = setNames(rep(ret_se, R), c_names),
catt_df = catt_df_to_ret,
theta_hat = theta_hat, # Includes intercept
beta_hat = rep(0, p), # Slopes are all zero
treat_inds = treat_inds,
treat_int_inds = treat_int_inds,
cohort_probs = cohort_probs,
indep_cohort_probs = indep_cohort_probs,
sig_eps_sq = sig_eps_sq,
sig_eps_c_sq = sig_eps_c_sq,
lambda.max = lambda.max,
lambda.max_model_size = lambda.max_model_size,
lambda.min = lambda.min,
lambda.min_model_size = lambda.min_model_size,
lambda_star = lambda_star,
lambda_star_model_size = lambda_star_model_size,
X_ints = X_ints,
y = y,
X_final = X_final,
y_final = y_final,
N = N,
T = T,
R = R,
d = d,
p = p,
calc_ses = q < 1
))
}
# estimated coefficients (slopes in transformed space)
theta_hat_slopes <- theta_hat[2:(p + 1)]
# Indices of selected features in transformed feature space (among slopes)
sel_feat_inds <- which(theta_hat_slopes != 0)
sel_treat_inds <- sel_feat_inds[sel_feat_inds %in% treat_inds]
stopifnot(length(sel_treat_inds) == length(unique(sel_treat_inds)))
stopifnot(length(sel_treat_inds) <= length(sel_feat_inds))
stopifnot(length(sel_treat_inds) <= length(treat_inds))
stopifnot(is.integer(sel_treat_inds) | is.numeric(sel_treat_inds))
# 1. Get theta_hat_slopes[treat_inds] -> these are the transformed treatment coefficients
# 2. Find which of these are non-zero: `which(theta_hat_slopes[treat_inds] != 0)` -> these are
# indices *within* the treat_inds block. Let's call this `sel_treat_inds_relative_to_block`.
# `sel_treat_inds` itself contains the global indices of selected treatment features.
# So `theta_hat_slopes[sel_treat_inds]` are the non-zero transformed treatment coefs.
theta_hat_treat_block_transformed = theta_hat_slopes[treat_inds]
sel_treat_inds_shifted <- which(theta_hat_treat_block_transformed != 0) # these are 1 to num_treats
stopifnot(all(sel_treat_inds_shifted >= 1))
stopifnot(all(sel_treat_inds_shifted <= num_treats))
# Handle edge case where no treatment features selected
if (length(sel_treat_inds_shifted) == 0) {
if (verbose) {
message(
"No treatment features selected; all treatment effects estimated to be 0."
)
}
if (q < 1) {
ret_se <- 0
} else {
ret_se <- NA
}
catt_df_to_ret <- data.frame(
Cohort = c_names,
`Estimated TE` = rep(0, R),
SE = rep(ret_se, R),
ConfIntLow = rep(ret_se, R),
ConfIntHigh = rep(ret_se, R),
check.names = FALSE
)
# Need to untransform theta_hat_slopes to get beta_hat for consistency
beta_hat_early_exit <- untransformCoefImproved(
beta_hat_mod = theta_hat_slopes, # Pass slopes only
first_inds = first_inds,
T = T,
R = R,
p = p,
d = d,
num_treats = num_treats
)
if (add_ridge) {
lambda_ridge <- ifelse(is.na(lambda_ridge), 0, lambda_ridge)
beta_hat_early_exit <- beta_hat_early_exit * (1 + lambda_ridge)
}
return(list(
in_sample_att_hat = 0,
in_sample_att_se = ret_se,
in_sample_att_se_no_prob = ret_se,
indep_att_hat = 0,
indep_att_se = ret_se,
catt_hats = setNames(rep(0, R), c_names),
catt_ses = setNames(rep(ret_se, R), c_names),
catt_df = catt_df_to_ret,
theta_hat = theta_hat, # Full theta_hat with intercept
beta_hat = beta_hat_early_exit, # Untransformed slopes
treat_inds = treat_inds,
treat_int_inds = treat_int_inds,
cohort_probs = cohort_probs,
indep_cohort_probs = indep_cohort_probs,
sig_eps_sq = sig_eps_sq,
sig_eps_c_sq = sig_eps_c_sq,
lambda.max = lambda.max,
lambda.max_model_size = lambda.max_model_size,
lambda.min = lambda.min,
lambda.min_model_size = lambda.min_model_size,
lambda_star = lambda_star,
lambda_star_model_size = lambda_star_model_size,
X_ints = X_ints,
y = y,
X_final = X_final,
y_final = y_final,
N = N,
T = T,
R = R,
d = d,
p = p,
calc_ses = q < 1
))
}
#
#
# Step 5: transform estimated coefficients back to original feature
# space
#
#
beta_hat <- untransformCoefImproved(
beta_hat_mod = theta_hat_slopes, # Pass slopes only
first_inds = first_inds,
T = T,
R = R,
p = p,
d = d,
num_treats = num_treats
)
# If using ridge regularization, multiply the "naive" estimated coefficients
# by 1 + lambda_ridge, similar to suggestion in original elastic net paper.
if (add_ridge) {
lambda_ridge <- ifelse(is.na(lambda_ridge), 0, lambda_ridge)
beta_hat <- beta_hat * (1 + lambda_ridge)
}
# Get actual estimated treatment effects (in original, untransformed space)
tes <- beta_hat[treat_inds]
stopifnot(length(tes) == num_treats)
# Checks based on transformed coefficients (theta_hat_slopes)
stopifnot(all(theta_hat_slopes[treat_inds][sel_treat_inds_shifted] != 0))
stopifnot(all(
theta_hat_slopes[treat_inds][setdiff(
1:num_treats,
sel_treat_inds_shifted
)] ==
0
))
stopifnot(length(first_inds) == R)
stopifnot(max(first_inds) <= num_treats)
stopifnot(length(sel_feat_inds) > 0) # sel_feat_inds are indices in theta_hat_slopes
stopifnot(length(sel_treat_inds_shifted) > 0) # sel_treat_inds_shifted are indices within the treat_inds block of theta_hat_slopes
#
#
# Step 6: calculate cohort-specific treatment effects and standard
# errors
#
#
res <- getCohortATTsFinal(
X_final = X_final, # This is X_mod * GLS_transform_matrix
sel_feat_inds = sel_feat_inds, # Indices of non-zero elements in theta_hat_slopes
treat_inds = treat_inds, # Global indices for treatment effects
num_treats = num_treats,
first_inds = first_inds,
sel_treat_inds_shifted = sel_treat_inds_shifted, # Indices (1 to num_treats) of non-zero transformed treat. coefs.
c_names = c_names,
tes = tes, # Untransformed treatment effect estimates (beta_hat[treat_inds])
sig_eps_sq = sig_eps_sq,
R = R,
N = N,
T = T,
fused = TRUE,
calc_ses = q < 1,
p = p, # Total number of original parameters (columns in X_ints)
alpha = alpha
)
cohort_te_df <- res$cohort_te_df
cohort_tes <- res$cohort_tes
cohort_te_ses <- res$cohort_te_ses
psi_mat <- res$psi_mat
gram_inv <- res$gram_inv
d_inv_treat_sel <- res$d_inv_treat_sel
calc_ses <- res$calc_ses
rm(res)
if (calc_ses) {
stopifnot(nrow(d_inv_treat_sel) == num_treats)
stopifnot(ncol(d_inv_treat_sel) == length(sel_treat_inds_shifted))
}
#
#
# Step 7: calculate overall average treatment effect on treated units
#
#
# Get overal estimated ATT!
# theta_hat_treat_sel needs to be the selected non-zero *transformed* treatment coefficients
# sel_treat_inds contains global indices of selected transformed features that are treatment effects
theta_hat_treat_sel_for_att <- theta_hat_slopes[sel_treat_inds]
in_sample_te_results <- getTeResults2(
sig_eps_sq = sig_eps_sq,
N = N,
T = T,
R = R,
num_treats = num_treats,
cohort_tes = cohort_tes, # CATTs (point estimates)
cohort_probs = cohort_probs, # In-sample pi_r | treated
psi_mat = psi_mat,
gram_inv = gram_inv,
sel_treat_inds_shifted = sel_treat_inds_shifted,
tes = tes, # Untransformed treatment effect estimates beta_hat[treat_inds]
d_inv_treat_sel = d_inv_treat_sel,
cohort_probs_overall = cohort_probs_overall, # In-sample pi_r (unconditional on treated)
first_inds = first_inds,
theta_hat_treat_sel = theta_hat_treat_sel_for_att, # Selected non-zero transformed treat coefs
calc_ses = calc_ses,
indep_probs = FALSE
)
in_sample_att_hat <- in_sample_te_results$att_hat
in_sample_att_se <- in_sample_te_results$att_te_se
in_sample_att_se_no_prob <- in_sample_te_results$att_te_se_no_prob
if ((q < 1) & calc_ses) {
stopifnot(!is.na(in_sample_att_se))
}
if (indep_count_data_available) {
indep_te_results <- getTeResults2(
sig_eps_sq = sig_eps_sq,
N = N,
T = T,
R = R,
num_treats = num_treats,
cohort_tes = cohort_tes,
cohort_probs = indep_cohort_probs, # indep pi_r | treated
psi_mat = psi_mat,
gram_inv = gram_inv,
sel_treat_inds_shifted = sel_treat_inds_shifted,
tes = tes,
d_inv_treat_sel = d_inv_treat_sel,
cohort_probs_overall = indep_cohort_probs_overall, # indep pi_r (unconditional)
first_inds = first_inds,
theta_hat_treat_sel = theta_hat_treat_sel_for_att,
calc_ses = calc_ses,
indep_probs = TRUE
)
indep_att_hat <- indep_te_results$att_hat
indep_att_se <- indep_te_results$att_te_se
} else {
indep_att_hat <- NA
indep_att_se <- NA
}
return(list(
in_sample_att_hat = in_sample_att_hat,
in_sample_att_se = in_sample_att_se,
in_sample_att_se_no_prob = in_sample_att_se_no_prob,
indep_att_hat = indep_att_hat,
indep_att_se = indep_att_se,
catt_hats = cohort_tes, # Already named if applicable from getCohortATTsFinal
catt_ses = cohort_te_ses, # Already named if applicable
catt_df = cohort_te_df,
theta_hat = theta_hat, # Full theta_hat (with intercept)
beta_hat = beta_hat, # Untransformed slopes
treat_inds = treat_inds,
treat_int_inds = treat_int_inds,
cohort_probs = cohort_probs,
indep_cohort_probs = indep_cohort_probs,
sig_eps_sq = sig_eps_sq,
sig_eps_c_sq = sig_eps_c_sq,
lambda.max = lambda.max,
lambda.max_model_size = lambda.max_model_size,
lambda.min = lambda.min,
lambda.min_model_size = lambda.min_model_size,
lambda_star = lambda_star,
lambda_star_model_size = lambda_star_model_size,
X_ints = X_ints,
y = y,
X_final = X_final,
y_final = y_final,
N = N,
T = T,
R = R,
d = d,
p = p,
calc_ses = calc_ses
))
}
#-------------------------------------------------------------------------------
# Transformation Matrices and Coefficient Transformations
#-------------------------------------------------------------------------------
#' Transform a Full Design Matrix by \( \boldsymbol D_N^{-1} \)
#'
#' @description
#' Takes the *raw* stacked panel design matrix
#' \eqn{\tilde{\boldsymbol Z}\in\mathbb R^{NT\times p}}
#' and post-multiplies it by the block-diagonal inverse fusion matrix
#' \(\boldsymbol D_N^{-1}\) from Lemma 3:
#' \deqn{
#' \boldsymbol D_N^{-1}
#' = \operatorname{diag}\!\bigl(
#' (D^{(1)}(R))^{-1},\;
#' (D^{(1)}(T-1))^{-1},\;
#' I_{d},\;
#' I_{d}\otimes(D^{(1)}(R))^{-1},\;
#' I_{d}\otimes(D^{(1)}(T-1))^{-1},\;
#' (D^{(2)}(\mathcal R))^{-1},\;
#' I_{d}\otimes(D^{(2)}(\mathcal R))^{-1}
#' \bigr).
#' }
#' The result is a matrix ready for vanilla bridge regression (Lasso,
#' elastic-net, \eqn{\ell_q} etc.), where the penalty on the transformed
#' coefficients reproduces the complex fusion penalty on the original ones.
#'
#' @details
#' The columns of `X_int` **must** appear in the order:
#' \enumerate{
#' \item Cohort fixed-effects (length \eqn{R})
#' \item Time fixed-effects (length \eqn{T-1})
#' \item Main covariates (length \eqn{d})
#' \item Covariate \(\times\) cohort interactions (length \eqn{dR})
#' \item Covariate \(\times\) time interactions (length \eqn{d(T-1)})
#' \item Base treatment effects (length `num_treats`)
#' \item Covariate \(\times\) treatment interactions (length `d*num_treats`)
#' }
#' Each block is transformed exactly by the corresponding diagonal block of
#' \(\boldsymbol D_N^{-1}\) using helper functions
#' `genBackwardsInvFusionTransformMat()` and
#' `genInvTwoWayFusionTransformMat()`.
#'
#' Side-effect `stopifnot()` guards verify both the expected column order and
#' the absence of `NA`s after transformation.
#'
#' @param X_int Numeric matrix \eqn{NT\times p}. Original design matrix.
#' @param N,T,R Integers. Panel dimensions and number of treated cohorts.
#' @param d Integer. Number of time-invariant covariates.
#' @param num_treats Integer. Total number of base treatment-effect dummies
#' \eqn{\mathfrak W}.
#' @param first_inds Optional integer vector of length \eqn{R}.
#' Starting column indices of the first treatment dummy of each cohort inside
#' the treatment block. If `NA` (default) they are computed by
#' `getFirstInds()`.
#'
#' @return
#' A numeric matrix `X_mod` with the **same dimensions** as `X_int` but whose
#' columns are the transformed regressors
#' \(\bigl[\tilde{\boldsymbol Z}\,\boldsymbol D_N^{-1}\bigr]_{NT\times p}\).
#'
#' @seealso
#' * `genBackwardsInvFusionTransformMat()`
#' * `genInvTwoWayFusionTransformMat()`
#'
#' @examples
#' set.seed(1)
#' R <- 2; T <- 5; d <- 1; N <- 10
#' num_treats <- getNumTreats(R, T)
#' p <- R + (T-1) + d + d*R + d*(T-1) + num_treats + d*num_treats
#' X_int <- matrix(rnorm(N*T*p), N*T, p)
#' X_mod <- transformXintImproved(
#' X_int, N=N, T=T, R=R, d=d, num_treats=num_treats
#' )
#' # The two matrices have identical dimensions:
#' dim(X_mod) # NT x p
#' @keywords internal
#' @noRd
transformXintImproved <- function(
X_int,
N,
T,
R,
d,
num_treats,
first_inds = NA
) {
p <- getP(R = R, T = T, d = d, num_treats = num_treats)
stopifnot(p == ncol(X_int))
X_mod <- matrix(as.numeric(NA), nrow = N * T, ncol = p)
stopifnot(nrow(X_int) == N * T)
# Transform cohort fixed effects
X_mod[, 1:R] <- X_int[, 1:R] %*% genBackwardsInvFusionTransformMat(R)
# Transform time fixed effects
X_mod[, (R + 1):(R + T - 1)] <- X_int[, (R + 1):(R + T - 1)] %*%
genBackwardsInvFusionTransformMat(T - 1)
# Copy X (the main covariate block; may be empty when d==0)
if (d > 0) {
stopifnot(all(is.na(X_mod[, (R + T - 1 + 1):(R + T - 1 + d)])))
X_mod[, (R + T - 1 + 1):(R + T - 1 + d)] <- X_int[,
(R + T - 1 + 1):(R + T - 1 + d)
]
stopifnot(all(!is.na(X_mod[, 1:(R + T - 1 + d)])))
stopifnot(all(is.na(X_mod[, (R + T - 1 + d + 1):p])))
}
# For cohort effects interacted with X: we have d*R columns to deal with.
# For each individual feature, this will be handled using
# genTransformedMatFusion.
if (any(is.na(first_inds))) {
first_inds <- getFirstInds(R = R, T = T)
}
if (d > 0) {
for (j in 1:d) {
# Get indices corresponding to interactions between feature j and cohort
# fixed effects--these are the first feature, the (1 + d)th feature, and
# so on R times
feat_1 <- R + T - 1 + d + j
feat_R <- R + T - 1 + d + (R - 1) * d + j
feat_inds_j <- seq(feat_1, feat_R, by = d)
stopifnot(length(feat_inds_j) == R)
stopifnot(all(is.na(X_mod[, feat_inds_j])))
X_mod[, feat_inds_j] <- X_int[, feat_inds_j] %*%
genBackwardsInvFusionTransformMat(R)
}
stopifnot(all(!is.na(X_mod[, 1:(R + T - 1 + d + R * d)])))
stopifnot(all(is.na(X_mod[, (R + T - 1 + d + R * d + 1):p])))
# Similar for time effects interacted with X
for (j in 1:d) {
# Get indices corresponding to interactions between feature j and time
# fixed effects--these are the first feature, the (1 + d)th feature, and
# so on T - 1 times
feat_1 <- R + T - 1 + d + R * d + j
feat_T_minus_1 <- R + T - 1 + d + R * d + (T - 2) * d + j
feat_inds_j <- seq(feat_1, feat_T_minus_1, by = d)
stopifnot(length(feat_inds_j) == T - 1)
stopifnot(all(is.na(X_mod[, feat_inds_j])))
X_mod[, feat_inds_j] <- X_int[, feat_inds_j] %*%
genBackwardsInvFusionTransformMat(T - 1)
}
stopifnot(all(!is.na(X_mod[, 1:(R + T - 1 + d + R * d + (T - 1) * d)])))
stopifnot(all(is.na(X_mod[,
(R + T - 1 + d + R * d + (T - 1) * d + 1):p
])))
}
# Now base treatment effects. For each cohort, will penalize base term, then
# fuse remaining terms toward it. Also, for each cohort, will penalize base
# treatment effect of this cohort to base of previous cohort. New function
# genTransformedMatTwoWayFusion does this.
feat_inds <- (R + T - 1 + d + R * d + (T - 1) * d + 1):(R +
T -
1 +
d +
R * d +
(T - 1) * d +
num_treats)
# Now ready to generate the appropriate transformed matrix
stopifnot(all(is.na(X_mod[, feat_inds])))
X_mod[, feat_inds] <- X_int[, feat_inds] %*%
genInvTwoWayFusionTransformMat(num_treats, first_inds, R)
if (d > 0) {
# Lastly, penalize interactions between each treatment effect and each feature.
# Feature-wise, we can do this with genTransformedMatTwoWayFusion, in the same
# way that we did for previous interactions with X.
for (j in 1:d) {
# Recall that we have arranged the last d*num_Feats features in X_int
# as follows: the first d are the first column of treat_mat_long interacted
# with all of the columns of X, and so on. So, the columns that interact
# the jth feature with all of the treatment effects are columns j, j + 1*d,
# j + 2*d, ..., j + (num_treats - 1)*d.
inds_j <- seq(j, j + (num_treats - 1) * d, by = d)
stopifnot(length(inds_j) == num_treats)
inds_j <- inds_j + R + T - 1 + d + R * d + (T - 1) * d + num_treats
# Now ready to generate the appropriate transformed matrix
stopifnot(all(is.na(X_mod[, inds_j])))
X_mod[, inds_j] <- X_int[, inds_j] %*%
genInvTwoWayFusionTransformMat(num_treats, first_inds, R)
stopifnot(all(!is.na(X_mod[, inds_j])))
}
}
stopifnot(all(!is.na(X_mod)))
stopifnot(ncol(X_mod) == p)
stopifnot(nrow(X_mod) == N * T)
return(X_mod)
}
#' Back-transform Bridge-Regression Coefficients
#' \( \widehat{\boldsymbol\beta}
#' = \boldsymbol D_N^{-1}\,\widehat{\boldsymbol\theta}\)
#'
#' @description
#' After fitting a bridge (Lasso, Elastic-Net, \eqn{\ell_q}) regression on the
#' transformed design matrix
#' \eqn{\widetilde{\boldsymbol Z}\,\boldsymbol D_N^{-1}}
#' (see `transformXintImproved()`), the solver returns parameter estimates
#' \eqn{\widehat{\boldsymbol\theta}}.
#' This helper multiplies that vector by the block-diagonal matrix
#' \eqn{\boldsymbol D_N^{-1}} from the paper,
#' thereby recovering the original-scale coefficients
#' \eqn{\widehat{\boldsymbol\beta}} for the FETWFE model.
#'
#' @details
#' The matrix
#' \deqn{
#' \boldsymbol D_N^{-1}
#' = \operatorname{diag}\Bigl(
#' (D^{(1)}(R))^{-1},\;
#' (D^{(1)}(T\!-\!1))^{-1},\;
#' I_d,\;
#' I_d\!\otimes\!(D^{(1)}(R))^{-1},\;
#' I_d\!\otimes\!(D^{(1)}(T\!-\!1))^{-1},\;
#' (D^{(2)}(\mathcal R))^{-1},\;
#' I_d\!\otimes\!(D^{(2)}(\mathcal R))^{-1}
#' \Bigr)
#' }
#' corresponds to seven consecutive blocks in the column ordering used by
#' `transformXintImproved()`:
#' \enumerate{
#' \item Cohort fixed-effect coefficients (length \eqn{R})
#' \item Time fixed-effects (length \eqn{T-1})
#' \item Main covariates (length \eqn{d})
#' \item Covariate x cohort interactions (\eqn{dR})
#' \item Covariate x time interactions \eqn{d(T-1)}
#' \item Base treatment effects (\eqn{\mathfrak W =} `num_treats`)
#' \item Covariate x treatment interactions (\eqn{d\mathfrak W})
#' }
#'
#' For each block the function premultiplies the slice of
#' \code{beta_hat_mod} with the *same* inverse-fusion matrix that was used as a
#' post-multiplier when building the transformed design matrix:
#'
#' | block | helper used | mathematical symbol |
#' |-------|-------------|---------------------|
#' | cohort FE | `genBackwardsInvFusionTransformMat(R)` | \((D^{(1)}(R))^{-1}\) |
#' | time FE | `genBackwardsInvFusionTransformMat(T-1)` | \((D^{(1)}(T-1))^{-1}\) |
#' | covariate blocks | identity copy | \(I_d\) |
#' | covariate x cohort | same helper, repeated for each feature | \(I_d\otimes(D^{(1)}(R))^{-1}\) |
#' | covariate x time | idem with \(T-1\) | \(I_d\otimes(D^{(1)}(T-1))^{-1}\) |
#' | base treatment | `genInvTwoWayFusionTransformMat()` | \((D^{(2)}(\mathcal R))^{-1}\) |
#' | covariate x treatment | same two-way helper for each feature | \(I_d\otimes(D^{(2)}(\mathcal R))^{-1}\) |
#'
#' @param beta_hat_mod Numeric vector (length \eqn{p}).
#' Estimated coefficients returned by a penalised fit on the transformed
#' design matrix.
#' @param T Integer. Total time periods.
#' @param R Integer. Number of treated cohorts.
#' @param p Integer. Total number of columns in the **original** design matrix
#' \eqn{p = p_N}.
#' @param d Integer. Number of time-invariant covariates.
#' @param num_treats Integer. Count of base treatment-effect dummies
#' \eqn{\mathfrak W}.
#' @param first_inds Optional integer vector of length \eqn{R}.
#' Starting indices (1-based, inside the treatment-dummy block) of the first
#' effect for each cohort. If `NA` they are reconstructed with
#' **`getFirstInds()`**.
#'
#' @return Numeric vector \code{beta_hat} (length \eqn{p}) equal to
#' \eqn{\boldsymbol D_N^{-1}\,\widehat{\boldsymbol\theta}}.
#'
#' @references
#' Wooldridge, J. M. (2021). Two-way fixed effects, the two-way mundlak
#' regression, and difference-in-differences estimators.
#' \emph{Available at SSRN 3906345}.
#' \doi{10.2139/ssrn.3906345}.
#' Tibshirani & Taylor (2011), "The Solution Path of the Generalized
#' Lasso".
#' Faletto, G (2025). Fused Extended Two-Way Fixed Effects for
#' Difference-in-Differences with Staggered Adoptions.
#' \emph{arXiv preprint arXiv:2312.05985}.
#' \url{https://arxiv.org/abs/2312.05985}.
#'
#' @seealso
#' * `transformXintImproved()` - forward transformation of the design matrix.
#' * `genBackwardsInvFusionTransformMat()`,
#' `genInvTwoWayFusionTransformMat()` - block-wise inverse matrices.
#'
#' @examples
#' ## toy example: one covariate, two treated cohorts, T = 5
#' R <- 2; T <- 5; d <- 1
#' num_treats <- getNumTreats(R, T)
#' p <- R + (T-1) + d + d*R + d*(T-1) + num_treats + d*num_treats
#'
#' ## pretend we ran a penalised regression in the transformed space
#' beta_hat_mod <- rnorm(p)
#'
#' ## back-transform:
#' beta_hat <- untransformCoefImproved(
#' beta_hat_mod, T=T, R=R, p=p, d=d,
#' num_treats=num_treats
#' )
#' length(beta_hat) # == p
#' @keywords internal
#' @noRd
untransformCoefImproved <- function(
beta_hat_mod,
T,
R,
p,
d,
num_treats,
first_inds = NA
) {
stopifnot(length(beta_hat_mod) == p)
beta_hat <- rep(as.numeric(NA), p)
if (any(is.na(first_inds))) {
first_inds <- getFirstInds(R = R, T = T)
}
# First handle R cohort fixed effects effects
beta_hat[1:R] <- genBackwardsInvFusionTransformMat(R) %*% beta_hat_mod[1:R]
stopifnot(all(!is.na(beta_hat[1:R])))
stopifnot(all(is.na(beta_hat[(R + 1):p])))
# Next, T - 1 time fixed effects
beta_hat[(R + 1):(R + T - 1)] <- genBackwardsInvFusionTransformMat(
T - 1
) %*%
beta_hat_mod[(R + 1):(R + T - 1)]
stopifnot(all(!is.na(beta_hat[1:(R + T - 1)])))
stopifnot(all(is.na(beta_hat[(R + T):p])))
# Coefficients for X (if any)
if (d > 0) {
beta_hat[(R + T):(R + T - 1 + d)] <- beta_hat_mod[
(R + T):(R + T - 1 + d)
]
stopifnot(all(!is.na(beta_hat[1:(R + T - 1 + d)])))
stopifnot(all(is.na(beta_hat[(R + T + d):p])))
# Next, coefficients for cohort effects interacted with X. For each individual
# feature, this will be handled using untransformVecFusion.
for (j in 1:d) {
# Get indices corresponding to interactions between feature j and cohort
# fixed effects--these are the first feature, the (1 + d)th feature, and
# so on R times
feat_1 <- R + T - 1 + d + j
feat_R <- R + T - 1 + d + (R - 1) * d + j
feat_inds_j <- seq(feat_1, feat_R, by = d)
stopifnot(length(feat_inds_j) == R)
stopifnot(all(is.na(beta_hat[feat_inds_j])))
beta_hat[feat_inds_j] <- genBackwardsInvFusionTransformMat(R) %*%
beta_hat_mod[feat_inds_j]
stopifnot(all(!is.na(beta_hat[feat_inds_j])))
}
stopifnot(all(!is.na(beta_hat[1:(R + T - 1 + d + R * d)])))
stopifnot(all(is.na(beta_hat[(R + T - 1 + d + R * d + 1):p])))
# Similar for time effects interacted with X
for (j in 1:d) {
# Get indices corresponding to interactions between feature j and time
# fixed effects--these are the first feature, the (1 + d)th feature, and
# so on T - 1 times
feat_1 <- R + T - 1 + d + R * d + j
feat_T_minus_1 <- R + T - 1 + d + R * d + (T - 2) * d + j
feat_inds_j <- seq(feat_1, feat_T_minus_1, by = d)
stopifnot(length(feat_inds_j) == T - 1)
stopifnot(all(is.na(beta_hat[feat_inds_j])))
beta_hat[feat_inds_j] <- genBackwardsInvFusionTransformMat(
T - 1
) %*%
beta_hat_mod[feat_inds_j]
stopifnot(all(!is.na(beta_hat[feat_inds_j])))
}
stopifnot(all(
!is.na(beta_hat[1:(R + T - 1 + d + R * d + (T - 1) * d)])
))
stopifnot(all(is.na(beta_hat[
(R + T - 1 + d + R * d + (T - 1) * d + 1):p
])))
}
# Now base treatment effects.
feat_inds <- (R + T - 1 + d + R * d + (T - 1) * d + 1):(R +
T -
1 +
d +
R * d +
(T - 1) * d +
num_treats)
stopifnot(all(is.na(beta_hat[feat_inds])))
beta_hat[feat_inds] <- genInvTwoWayFusionTransformMat(
num_treats,
first_inds,
R
) %*%
beta_hat_mod[feat_inds]
if (d > 0) {
stopifnot(all(
!is.na(beta_hat[
1:(R + T - 1 + d + R * d + (T - 1) * d + num_treats)
])
))
stopifnot(all(is.na(beta_hat[
(R + T - 1 + d + R * d + (T - 1) * d + num_treats + 1):p
])))
# Lastly, interactions between each treatment effect and each feature.
# Feature-wise, we can do this with untransformTwoWayFusionCoefs, in the same
# way that we did for previous interactions with X.
for (j in 1:d) {
# Recall that we have arranged the last d*num_Feats features in X_int
# as follows: the first d are the first column of treat_mat_long interacted
# with all of the columns of X, and so on. So, the columns that interact
# the jth feature with all of the treatment effects are columns j, j + 1*d,
# j + 2*d, ..., j + (num_treats - 1)*d.
inds_j <- seq(j, j + (num_treats - 1) * d, by = d)
stopifnot(length(inds_j) == num_treats)
inds_j <- inds_j + R + T - 1 + d + R * d + (T - 1) * d + num_treats
# Now ready to untransform the estimated coefficients
stopifnot(all(is.na(beta_hat[inds_j])))
beta_hat[inds_j] <- genInvTwoWayFusionTransformMat(
num_treats,
first_inds,
R
) %*%
beta_hat_mod[inds_j]
stopifnot(all(!is.na(beta_hat[inds_j])))
}
}
stopifnot(all(!is.na(beta_hat)))
return(beta_hat)
}
# genBackwardsFusionTransformMat
#' @title Generate Backward Fusion Transformation Matrix
#' @description Creates a square transformation matrix `D` of size `n_vars` x
#' `n_vars`. When pre-multiplied by a coefficient vector `beta`, `D %*% beta`
#' yields a transformed vector `theta` where `theta_i = beta_i - beta_{i+1}`
#' for `i < n_vars`, and `theta_{n_vars} = beta_{n_vars}`. This is used to
#' penalize coefficients towards the *next* coefficient in sequence, and the
#' last coefficient directly.
#' @param n_vars Integer; the number of variables (coefficients) in the block
#' to be transformed. This will be the dimension of the output matrix.
#' @return A numeric matrix of dimension `n_vars` x `n_vars`.
#' @details The resulting matrix `D` has 1s on the main diagonal. For each row
#' `i` (from 1 to `n_vars - 1`), it has a -1 at column `i+1`. All other
#' elements are 0.
#' @examples
#' genBackwardsFusionTransformMat(3)
#' # Output:
#' # [,1] [,2] [,3]
#' # [1,] 1 -1 0
#' # [2,] 0 1 -1
#' # [3,] 0 0 1
#' @keywords internal
#' @noRd
genBackwardsFusionTransformMat <- function(n_vars) {
# Generates D matrix in relation theta = D beta, where D beta is what
# we want to penalize (for a single set of coefficients where we want to
# penalize last coefficient directly and penalize remaining coefficints
# towards the next coefficient)
D <- matrix(0, n_vars, n_vars)
for (i in 1:n_vars) {
for (j in 1:n_vars) {
if (i == j) {
D[i, j] <- 1
}
if (j == i + 1) {
D[i, j] <- -1
}
}
}
return(D)
}
#' Inverse "Backwards-Difference" Transformation Matrix \( \bigl(D^{(1)}(t)\bigr)^{-1} \)
#'
#' @description
#' Generates the \eqn{t\times t} upper-triangular matrix of 1s whose inverse is
#' the first-difference operator
#' \eqn{D^{(1)}(t)}
#' defined in Eq. (14) of the paper:
#' \deqn{
#' D^{(1)}(t) =
#' \begin{bmatrix}
#' 1 & -1 & & & 0\\
#' & 1 & -1 & & \\
#' & & \ddots & \ddots & \\
#' & & & 1 & -1 \\
#' 0 & & & & 1
#' \end{bmatrix}.
#' }
#' Multiplying a block of coefficients by this inverse converts finite
#' differences back to cumulative sums, which is what the bridge-penalty
#' expects.
#'
#' @details
#' The matrix has ones on and strictly above the main diagonal
#' (\eqn{D^{-1}_{ij}=1_{\{i\le j\}}}).
#' It is its own Cholesky factor, \eqn{D^{-1} = U = U^\top}.
#'
#' After constructing `D_inv`, the routine calls
#' `genBackwardsFusionTransformMat(n_vars)`, forms both
#' `D %*% D_inv` and `D_inv %*% D`, and checks that their
#' maximum element-wise deviation from the identity matrix is below
#' `tol = 1e-12`. Failing the check raises an error, protecting downstream
#' computations from a silent algebraic bug.
#'
#' @param n_vars Integer. Dimension \eqn{t}.
#'
#' @return A \eqn{n\_vars \times n\_vars} numeric matrix equal to
#' \(\bigl(D^{(1)}(n\_vars)\bigr)^{-1}\).
#' @keywords internal
#' @noRd
genBackwardsInvFusionTransformMat <- function(n_vars) {
# Generates inverse of D matrix in relation theta = D beta, where D beta is
# what we want to penalize (for a single set of coefficients where we want
# to penalize last coefficient directly and penalize remaining coefficints
# towards the next coefficient)
D_inv <- matrix(0, n_vars, n_vars)
diag(D_inv) <- 1
D_inv[upper.tri(D_inv)] <- 1
stopifnot(nrow(D_inv) == n_vars)
stopifnot(ncol(D_inv) == n_vars)
## -- self-test: D_inv %*% D == I
D <- genBackwardsFusionTransformMat(n_vars)
tol <- 1e-12
if (!all.equal(D_inv %*% D, diag(n_vars), tolerance = tol)) {
stop(
"genBackwardsInvFusionTransformMat(): self-test failed - ",
"result is not the matrix inverse of D."
)
}
return(D_inv)
}
# genInvFusionTransformMat (TODO: unused for now)
#' @title Generate Inverse of Forward Fusion Transformation Matrix
#' @description Creates the inverse of a "forward" fusion transformation matrix.
#' A forward fusion matrix `D_forward` (not explicitly generated here) would
#' transform `beta` to `theta` such that `theta_1 = beta_1` and
#' `theta_i = beta_i - beta_{i-1}` for `i > 1`. This function returns
#' `D_forward_inv`. If `theta = D_forward %*% beta`, then
#' `beta = D_forward_inv %*% theta`. This is used when the first coefficient
#' in a sequence is penalized directly, and subsequent coefficients are
#' penalized towards the *previous* coefficient.
#' @param n_vars Integer; the number of variables (coefficients), determining
#' the dimension of the output matrix.
#' @return A numeric matrix `D_forward_inv` of dimension `n_vars` x `n_vars`.
#' @details The resulting matrix is a lower triangular matrix with all elements
#' on and below the main diagonal equal to 1, and all elements above the main
#' diagonal equal to 0.
#' @examples
#' genInvFusionTransformMat(3)
#' # Output:
#' # [,1] [,2] [,3]
#' # [1,] 1 0 0
#' # [2,] 1 1 0
#' # [3,] 1 1 1
#' @keywords internal
#' @noRd
genInvFusionTransformMat <- function(n_vars) {
# Generates inverse of D matrix in relation theta = D beta, where D beta is
# what we want to penalize (for a single set of coefficients where we want
# to penalize first coefficient directly and penalize remaining coefficints
# towards the previous coefficient)
D_inv <- matrix(0, n_vars, n_vars)
diag(D_inv) <- 1
D_inv[lower.tri(D_inv)] <- 1
return(D_inv)
}
# getSecondVarTermDataApp
#' @title Calculate Second Variance Term for ATT Standard Error (Data Application)
#' @description Computes the second component of the variance for the Average
#' Treatment Effect on the Treated (ATT). This component accounts for the
#' variability due to the estimation of cohort membership probabilities.
#' Computes **Term 2** of the overall-ATT variance:
#' \deqn{\frac{1}{N}\,
#' \hat\theta_{\!\text{sel}}^{\!\top}
#' J^{\!\top}\widehat\Sigma_\pi J
#' \hat\theta_{\!\text{sel}},}
#' where
#' \itemize{
#' \item \(\widehat\Sigma_\pi\) is the multinomial covariance of the
#' cohort-count vector
#' (\(\widehat\pi_r(1-\widehat\pi_r)\) on the diagonal,
#' \(-\widehat\pi_r\widehat\pi_s\) off-diagonal).
#' \item \(J\) is the Jacobian of the weighting function
#' \(f_r(\pi)=\pi_r/\sum_{k}\pi_k\) evaluated at
#' \(\widehat\pi\). In matrix form
#' \eqn{J_{rs}=
#' \begin{cases}
#' (1-\widehat\pi_r)/S^2,& r=s,\\[4pt]
#' -\,\widehat\pi_r /S^2,& r\neq s,
#' \end{cases}} with \(S=\sum_k\widehat\pi_k\).
#' \item Each column of `d_inv_treat_sel` is a selected **transformed**
#' treatment coefficient; row-averaging over the rows belonging to
#' cohort \(s\) produces the part of \(J\) that multiplies
#' \(\theta_{\text{sel}}\).
#' }
#' @param psi_mat Numeric matrix; a matrix where each column `r` is the `psi_r`
#' vector used in calculating the ATT for cohort `r`. Dimensions:
#' `length(sel_treat_inds_shifted)` x `R`.
#' @param sel_treat_inds_shifted Integer vector; indices of the selected
#' treatment effects within the `num_treats` block, shifted to start from 1.
#' @param tes Numeric vector; the estimated treatment effects for all
#' `num_treats` possible cohort-time combinations.
#' @param cohort_probs_overall Numeric vector; estimated marginal probabilities
#' of belonging to each treated cohort (P(W=r)). Length `R`.
#' @param first_inds Integer vector; indices of the first treatment effect for
#' each cohort within the `num_treats` block.
#' @param theta_hat_treat_sel Numeric vector; estimated coefficients in the
#' transformed (fused) space, corresponding only to the selected treatment
#' effects.
#' @param num_treats Integer; total number of base treatment effect parameters.
#' @param N Integer; total number of units.
#' @param T Integer; total number of time periods.
#' @param R Integer; total number of treated cohorts.
#' @param fused Logical; if `TRUE`, assumes fusion penalization was used,
#' affecting how standard errors and related matrices are computed.
#' @param d_inv_treat_sel Numeric matrix; the relevant block of the inverse
#' two-way fusion transformation matrix corresponding to selected treatment
#' effects. Dimensions: `num_treats` (or fewer if selection occurs) x
#' `length(sel_treat_inds_shifted)`. Does not need to be provided if fused
#' is FALSE.
#' @return A numeric scalar representing the second variance component for the
#' ATT.
#' @details This function calculates `Sigma_pi_hat`, the covariance matrix of
#' the cohort assignment indicators, and a Jacobian matrix. These are then
#' combined with `theta_hat_treat_sel` to compute the variance term as
#' `T * t(theta_hat_treat_sel) %*% t(jacobian_mat) %*% Sigma_pi_hat %*% jacobian_mat %*%
#' theta_hat_treat_sel / (N * T)`. The construction of the Jacobian involves averaging parts of
#' `d_inv_treat_sel` corresponding to different cohorts.
#'
#' The function is currentky written for the fused workflow only
#' (`fused = TRUE` guard). It first reconstructs \(J\) from
#' `d_inv_treat_sel` and the cohort probabilities, then plugs everything into
#' the quadratic form above and finally rescales by \(T/(N T)=1/N\).
#' @inheritParams getSecondVarTermDataApp
#' @seealso [getTeResults2()]
#' @keywords internal
#' @noRd
getSecondVarTermDataApp <- function(
# cohort_probs,
psi_mat,
sel_treat_inds_shifted,
tes,
cohort_probs_overall,
first_inds,
theta_hat_treat_sel,
num_treats,
N,
T,
R,
fused = TRUE,
d_inv_treat_sel = NA
) {
if (fused) {
stopifnot(all(!is.na(d_inv_treat_sel)))
stopifnot(ncol(d_inv_treat_sel) == length(sel_treat_inds_shifted))
}
stopifnot(length(theta_hat_treat_sel) == length(sel_treat_inds_shifted))
# Get Sigma_pi_hat, the (sample-estimated) covariance matrix for the
# sample proportions (derived from the multinomial distribution)
Sigma_pi_hat <- -outer(
cohort_probs_overall[1:(R)],
cohort_probs_overall[1:(R)]
)
diag(Sigma_pi_hat) <- cohort_probs_overall[1:(R)] *
(1 - cohort_probs_overall[1:(R)])
stopifnot(nrow(Sigma_pi_hat) == R)
stopifnot(ncol(Sigma_pi_hat) == R)
# Jacobian
##
## J_{rs} = (S-pi_r)/S^2 if r = s
## -pi_r /S^2 if r != s
##
## where S := sum(cohort_probs_overall)
##
## Each column block of d_inv_treat_sel belongs to a selected theta coordinate;
## averaging the rows of cohorts gives the vector needed to multiply theta_sel.
##
jacobian_mat <- matrix(
as.numeric(NA),
nrow = R,
ncol = length(sel_treat_inds_shifted)
)
if (fused) {
# Gather a list of the indices corresponding to the treatment coefficients
# for each cohort
sel_inds <- list()
for (r in 1:R) {
first_ind_r <- first_inds[r]
if (r < R) {
last_ind_r <- first_inds[r + 1] - 1
} else {
last_ind_r <- num_treats
}
stopifnot(last_ind_r >= first_ind_r)
sel_inds[[r]] <- first_ind_r:last_ind_r
if (r > 1) {
stopifnot(min(sel_inds[[r]]) > max(sel_inds[[r - 1]]))
stopifnot(length(sel_inds[[r]]) < length(sel_inds[[r - 1]]))
}
}
stopifnot(all.equal(unlist(sel_inds), 1:num_treats))
for (r in 1:R) {
## diagonal contribution
cons_r <- (sum(cohort_probs_overall) -
cohort_probs_overall[r]) /
sum(cohort_probs_overall)^2
if (length(sel_treat_inds_shifted) > 1) {
jacobian_mat[r, ] <- cons_r *
colMeans(d_inv_treat_sel[sel_inds[[r]], , drop = FALSE])
} else {
jacobian_mat[r, ] <- cons_r *
mean(d_inv_treat_sel[sel_inds[[r]], , drop = FALSE])
}
## off-diagonal: subtract sum_{s!=r} pi_r / S^2 x block-mean of cohort s
for (r_double_prime in setdiff(1:R, r)) {
cons_r_double_prime <- cohort_probs_overall[r] /
sum(cohort_probs_overall)^2
jacobian_mat[r, ] <- jacobian_mat[r, ] -
cons_r_double_prime *
colMeans(d_inv_treat_sel[
sel_inds[[r_double_prime]],
,
drop = FALSE
])
}
}
}
stopifnot(all(!is.na(jacobian_mat)))
## variance term: theta_sel' J' sum_pi J theta_sel / N
if (fused) {
att_var_2 <- T *
as.numeric(
t(theta_hat_treat_sel) %*%
t(jacobian_mat) %*%
Sigma_pi_hat %*%
jacobian_mat %*%
theta_hat_treat_sel
) /
(N * T)
}
if (!fused) {
stop("this function is not implemented as of now.")
}
return(att_var_2)
}
#' Inverse Two-Way-Fusion Transformation Matrix \( \bigl(D^{(2)}(\mathcal R)\bigr)^{-1} \)
#'
#' @description
#' Constructs the **inverse** of the block-lower-triangular matrix
#' \eqn{D^{(2)}(\mathcal R)} that appears in Lemma \eqn{3} of the paper.
#' Each treated cohort \(r\) has a "first" post-treatment coefficient
#' \(\tau_{r,0}\) and a string of subsequent coefficients
#' \(\tau_{r,1},\dots,\tau_{r,T-r}\).
#' The fusion penalty (1) pulls every \(\tau_{r,k}\;(k>0)\) toward its
#' predecessor \(\tau_{r,k-1}\) *within* the same cohort **and**
#' (2) pulls the first effect of cohort \(r\) toward the first effect of cohort
#' \(r-1\).
#' Multiplying the raw design sub-matrix by the output of
#' `genInvTwoWayFusionTransformMat()` therefore changes coordinates from
#' \eqn{\boldsymbol\beta} to
#' \eqn{\boldsymbol\theta = D^{(2)}(\mathcal R)\,\boldsymbol\beta},
#' so that an \eqn{\ell_q} penalty on \eqn{\theta} is exactly the desired
#' two-level fusion penalty on \eqn{\beta}.
#'
#' @details
#' * The returned matrix is **upper-triangular with 1s** on and above the main
#' diagonal and zeros elsewhere, except for extra 1s that implement the
#' cross-cohort link on the first effect of each cohort (see paper, Eq. (18)).
#' * Its inverse contains only \{-1,0,1\} and recreates Eq. (17) of the paper.
#' * Determinant is 1, so the transformation is volume-preserving.
#'
#' @param n_vars Integer. Total number of base treatment-effect coefficients
#' ( \eqn{\mathfrak W} in the paper).
#' @param first_inds Integer vector of length \eqn{R}.
#' `first_inds[r]` is the **1-based** column index of \(\tau_{r,0}\)
#' inside the block of the \eqn{n\_vars} treatment columns.
#' @param R Integer. Number of treated cohorts.
#'
#' @return A numeric matrix of size \eqn{n\_vars \times n\_vars} that is
#' exactly \(\bigl(D^{(2)}(\mathcal R)\bigr)^{-1}\).
#'
#' @references
#' Faletto, G (2025). Fused Extended Two-Way Fixed Effects for
#' Difference-in-Differences with Staggered Adoptions.
#' \emph{arXiv preprint arXiv:2312.05985}.
#' \url{https://arxiv.org/abs/2312.05985}.
#'
#' @examples
#' R <- 3; T <- 6
#' num_treats <- getNumTreats(R, T)
#' first <- getFirstInds(R, T)
#' Dinv <- genInvTwoWayFusionTransformMat(num_treats, first, R)
#' # verify Dinv %*% solve(Dinv) = I
#' all.equal(Dinv %*% solve(Dinv), diag(num_treats))
#' @keywords internal
#' @noRd
genInvTwoWayFusionTransformMat <- function(n_vars, first_inds, R) {
stopifnot(length(n_vars) == 1, n_vars >= 0)
stopifnot(is.numeric(R), length(R) == 1, R >= 0)
if (R > 0) {
stopifnot(is.numeric(first_inds), length(first_inds) == R)
# Add checks for consistency of first_inds if n_vars > 0
if (n_vars > 0) {
stopifnot(first_inds[1] == 1)
if (R > 1) {
stopifnot(all(diff(first_inds) > 0)) # Must be strictly increasing
}
# Check that the last effect of the last cohort aligns with n_vars
# M_R = n_vars - first_inds[R] + 1. This M_R must be > 0 if first_inds[R] <= n_vars.
stopifnot(first_inds[R] <= n_vars)
} else {
# n_vars == 0
stopifnot(R == 0) # If n_vars is 0, R must be 0. first_inds should be empty.
}
} else {
# R == 0
stopifnot(length(first_inds) == 0, n_vars == 0)
}
D_inv <- matrix(0, nrow = n_vars, ncol = n_vars)
if (n_vars == 0) {
# Handles R=0 correctly
return(D_inv)
}
# Part 1: Set up the column structure for \tilde{U} blocks and
# the first column of each diagonal block.
# For each cohort `c` (from 1 to R), the column in D_inv corresponding to its
# first treatment effect (i.e., absolute column index `first_inds[c]`)
# should have 1s from its own row (`first_inds[c]`) down to `n_vars`.
if (R > 0) {
for (cohort_idx in 1:R) {
col_to_fill <- first_inds[cohort_idx]
D_inv[col_to_fill:n_vars, col_to_fill] <- 1
}
}
# Part 2: Form the correct diagonal blocks.
# Each diagonal block is (D^{(1)}(M_c)^{-1})^T, which is a fully
# lower triangular matrix of 1s (all elements on and below diagonal are 1).
# This will correctly overwrite the 1s placed in Part 1 for these diagonal parts.
if (R > 0) {
for (cohort_idx in 1:R) {
block_start_idx <- first_inds[cohort_idx]
if (cohort_idx < R) {
block_end_idx <- first_inds[cohort_idx + 1] - 1
} else {
# This is the last cohort
block_end_idx <- n_vars
}
# Ensure block indices are valid (e.g. cohort has at least one effect)
if (block_start_idx > block_end_idx) {
# This case implies cohort_idx has zero effects.
# This should ideally not happen if first_inds and n_vars are consistent
# with cohorts having at least one effect.
# If it can happen, 'continue' or 'next' might be appropriate.
# For now, assume M_x >= 1 for all cohorts included in R.
next
}
# Fill this diagonal block D_inv[block_start_idx:block_end_idx, block_start_idx:block_end_idx]
for (row_abs in block_start_idx:block_end_idx) {
# Absolute row index in D_inv
# For the current row_abs within its block, set columns from
# the start of the block (block_start_idx) up to the current row_abs to 1.
D_inv[row_abs, block_start_idx:row_abs] <- 1
}
}
}
# The original code had a stop for R < 2.
# This revised logic handles R=0 and R=1 correctly without a special stop.
# If R=1, Part 1 sets D_inv[1:n_vars, 1] <- 1.
# Part 2 (with cohort_idx=1, block_start_idx=1, block_end_idx=n_vars) then correctly
# forms the full (D^{(1)}(n_vars)^{-1})^T matrix.
return(D_inv)
}
# getCohortATTsFinal
#' @description
#' Computes the **Cohort Average Treatment Effect on the Treated**
#' \deqn{\tau_{\text{ATT},r}\;=\;\frac1{T-r+1}\sum_{t=r}^{T}\tau_{\text{ATT}}(r,t)}
#' for every treated cohort \(r\in\{1,\dots,R\}\).\cr
#' In the fused-parameterisation used by the estimator, each
#' \(\tau_{\text{ATT}}(r,t)\) is a *row* of
#' \((D^{(2)}(\mathcal R))^{-1}\widehat\theta\).
#' Averaging those rows within a cohort produces a
#' **weight vector** \(\psi_r\) such that
#' \(\widehat{\tau}_{\text{ATT},r}=\psi_r^{\!\top}\widehat\theta\).
#' The function
#'
#' * constructs every \(\psi_r\) (via [getPsiRFused()])
#' * builds the Gram inverse
#' \(\bigl((NT)^{-1}X_{\hat{\mathcal S}}^{\top}X_{\hat{\mathcal S}}\bigr)^{-1}\)
#' for the *selected* treatment-effect columns
#' * returns point estimates, \(\sqrt{\widehat{\operatorname{Var}}}\) and
#' Wald intervals
#' \(\widehat{\tau}_{\text{ATT},r}\pm z_{1-\alpha/2}\,
#' \widehat{\operatorname{SE}}(\widehat{\tau}_{\text{ATT},r})\).
#'
#' @inheritParams getCohortATTsFinal
#' @param X_final Numeric matrix; the final design matrix, potentially
#' transformed by `Omega_sqrt_inv` and the fusion transformation.
#' @param sel_feat_inds Integer vector; indices of all features selected by the
#' penalized regression in the transformed space.
#' @param treat_inds Integer vector; indices in the original (untransformed)
#' coefficient vector that correspond to the base treatment effects.
#' @param num_treats Integer; total number of base treatment effect parameters.
#' @param first_inds Integer vector; indices of the first treatment effect for
#' each cohort within the block of `num_treats` treatment effect parameters.
#' @param sel_treat_inds_shifted Integer vector; indices of selected treatment
#' effects within the `num_treats` block (shifted to start from 1).
#' @param c_names Character vector; names of the `R` treated cohorts.
#' @param tes Numeric vector; estimated treatment effects in the original
#' parameterization for all `num_treats` possible cohort-time combinations.
#' @param sig_eps_sq Numeric scalar; variance of the idiosyncratic error term.
#' @param R Integer; total number of treated cohorts.
#' @param N Integer; total number of units.
#' @param T Integer; total number of time periods.
#' @param fused Logical; if `TRUE`, assumes fusion penalization was used,
#' affecting how standard errors and related matrices are computed.
#' @param calc_ses Logical; if `TRUE`, attempts to calculate standard errors.
#' This is typically `TRUE` if `q < 1`.
#' @param p Integer; total number of parameters in the model.
#' @param alpha Numeric scalar; significance level for confidence intervals
#' (e.g., 0.05 for 95% CIs).
#' @return A list containing:
#' \item{cohort_te_df}{Dataframe with cohort names, estimated ATTs, SEs, and
#' confidence interval bounds.}
#' \item{cohort_tes}{Named numeric vector of estimated ATTs for each cohort.}
#' \item{cohort_te_ses}{Named numeric vector of standard errors for cohort ATTs.}
#' \item{psi_mat}{Matrix used in SE calculation for overall ATT.}
#' \item{gram_inv}{(Potentially NA) Inverse of the Gram matrix for selected
#' features, used in SE calculation.}
#' \item{d_inv_treat_sel}{(If `fused=TRUE`) Relevant block of the inverse
#' fusion matrix for selected treatment effects.}
#' \item{calc_ses}{Logical, indicating if SEs were actually calculated.}
#' @details The function first computes the Gram matrix inverse (`gram_inv`) if
#' `calc_ses` is `TRUE`. Then, for each cohort `r`, it calculates the average
#' of the relevant `tes`. If SEs are calculated, it uses `getPsiRFused` or
#' `getPsiRUnfused` to get a `psi_r` vector, which is then used with
#' `gram_inv` to find the standard error for that cohort's ATT.
#'
#' The finite-sample variance estimator is
#' \deqn{\widehat{\operatorname{Var}}(\widehat{\tau}_{\text{ATT},r})
#' =\frac{\sigma^{2}}{NT}\;
#' \psi_r^{\!\top}\,
#' \widehat G^{-1}\psi_r ,}
#' where \(\widehat G^{-1}\) is the Gram inverse restricted to the selected
#' treatment-effect features.
#' All matrix slices come from the *inverse fusion matrix*
#' \(D^{(2)}(\mathcal R)^{-1}\) and therefore depend only on the known
#' cohort structure.
#'
#' When `calc_ses = TRUE`, the routine also accumulates a
#' block `d_inv_treat_sel` -
#' the sub-matrix of \(D^{(2)}(\mathcal R)^{-1}\) with
#' **all rows** but **only the selected columns**.
#' This block is later required by [getSecondVarTermDataApp()]
#' to propagate sampling noise in the cohort-probability estimates.
#' @seealso [getPsiRFused()], [getTeResults2()]
#' @keywords internal
#' @noRd
getCohortATTsFinal <- function(
X_final,
sel_feat_inds,
treat_inds,
num_treats,
first_inds,
sel_treat_inds_shifted,
c_names,
tes,
sig_eps_sq,
R,
N,
T,
fused,
calc_ses,
p,
alpha = 0.05
) {
stopifnot(max(sel_treat_inds_shifted) <= num_treats)
stopifnot(min(sel_treat_inds_shifted) >= 1)
stopifnot(length(tes) == num_treats)
stopifnot(all(!is.na(tes)))
stopifnot(nrow(X_final) == N * T)
X_to_pass <- X_final
# Start by getting Gram matrix needed for standard errors
if (calc_ses) {
res <- getGramInv(
N = N,
T = T,
X_final = X_to_pass,
sel_feat_inds = sel_feat_inds,
treat_inds = treat_inds,
num_treats = num_treats,
sel_treat_inds_shifted = sel_treat_inds_shifted,
calc_ses = calc_ses
)
gram_inv <- res$gram_inv
calc_ses <- res$calc_ses
} else {
gram_inv <- NA
}
## We need the tau sub-matrix of D^{-1} ONLY if we are in fused workflow
if (fused) {
# Get the parts of D_inv that have to do with treatment effects
d_inv_treat <- genInvTwoWayFusionTransformMat(num_treats, first_inds, R)
## we will progressively rbind() the selected-column rows of this matrix
d_inv_treat_sel <- matrix(
0,
nrow = 0,
ncol = length(sel_treat_inds_shifted)
)
}
# First, each cohort
cohort_tes <- rep(as.numeric(NA), R) # cohort average treatment effect point estimates
cohort_te_ses <- rep(as.numeric(NA), R) # standard errors
psi_mat <- matrix(0, length(sel_treat_inds_shifted), R)
# loop over cohorts
for (r in 1:R) {
# Get indices corresponding to marginal treatment effects for rth cohort
first_ind_r <- first_inds[r]
if (r < R) {
last_ind_r <- first_inds[r + 1] - 1
} else {
last_ind_r <- num_treats
}
stopifnot(last_ind_r >= first_ind_r)
stopifnot(all(first_ind_r:last_ind_r %in% 1:num_treats))
# average treatment effect for cohort r: simple mean of these treatment
# effects
cohort_tes[r] <- mean(tes[first_ind_r:last_ind_r])
if (calc_ses) {
# Calculate standard errors
if (fused) {
# build psi_r and extract block of D^{-1} for these rows/
# selected columns
res_r <- getPsiRFused(
first_ind_r,
last_ind_r,
sel_treat_inds_shifted,
d_inv_treat
)
psi_r <- res_r$psi_r
stopifnot(
nrow(res_r$d_inv_treat_sel) ==
last_ind_r -
first_ind_r +
1
)
stopifnot(
ncol(res_r$d_inv_treat_sel) ==
length(sel_treat_inds_shifted)
)
stopifnot(is.matrix(res_r$d_inv_treat_sel))
d_inv_treat_sel <- rbind(d_inv_treat_sel, res_r$d_inv_treat_sel)
if (nrow(d_inv_treat_sel) != last_ind_r) {
err_mes <- paste(
"nrow(d_inv_treat_sel) == last_ind_r is not TRUE. ",
"nrow(d_inv_treat_sel): ",
nrow(d_inv_treat_sel),
". num_treats: ",
num_treats,
". R: ",
R,
". first_inds: ",
paste(first_inds, collapse = ", "),
". r: ",
r,
". first_ind_r: ",
first_ind_r,
". last_ind_r: ",
last_ind_r,
". nrow(res_r$d_inv_treat_sel):",
nrow(res_r$d_inv_treat_sel)
)
stop(err_mes)
}
rm(res_r)
} else {
psi_r <- getPsiRUnfused(
first_ind_r,
last_ind_r,
sel_treat_inds_shifted,
gram_inv
)
}
stopifnot(length(psi_r) == length(sel_treat_inds_shifted))
psi_mat[, r] <- psi_r
# Get standard errors
## Variance of the treatment effect for cohort r is sigma^2 /(NT) x
# pt(psi_r) %*% gram_inv %*% psi_r (see paper)
cohort_te_ses[r] <- sqrt(
sig_eps_sq *
as.numeric(
t(psi_r) %*%
gram_inv %*%
psi_r
) /
(N * T)
)
}
}
if (fused & calc_ses) {
if (nrow(d_inv_treat_sel) != num_treats) {
err_mes <- paste(
"nrow(d_inv_treat_sel) == num_treats is not TRUE. ",
"nrow(d_inv_treat_sel): ",
nrow(d_inv_treat_sel),
". num_treats: ",
num_treats,
". R: ",
R,
". first_inds: ",
paste(first_inds, collapse = ", "),
"."
)
stop(err_mes)
}
}
stopifnot(length(c_names) == R)
stopifnot(length(cohort_tes) == R)
if (calc_ses & all(!is.na(gram_inv))) {
stopifnot(length(cohort_te_ses) == R)
cohort_te_df <- data.frame(
c_names,
cohort_tes,
cohort_te_ses,
cohort_tes - stats::qnorm(1 - alpha / 2) * cohort_te_ses,
cohort_tes + stats::qnorm(1 - alpha / 2) * cohort_te_ses
)
names(cohort_te_ses) <- c_names
names(cohort_tes) <- c_names
} else {
cohort_te_df <- data.frame(
c_names,
cohort_tes,
rep(NA, R),
rep(NA, R),
rep(NA, R)
)
}
colnames(cohort_te_df) <- c(
"Cohort",
"Estimated TE",
"SE",
"ConfIntLow",
"ConfIntHigh"
)
if (fused) {
stopifnot(is.matrix(d_inv_treat_sel))
ret <- list(
cohort_te_df = cohort_te_df,
cohort_tes = cohort_tes,
cohort_te_ses = cohort_te_ses,
psi_mat = psi_mat,
gram_inv = gram_inv,
d_inv_treat_sel = d_inv_treat_sel,
calc_ses = calc_ses
)
} else {
ret <- list(
cohort_te_df = cohort_te_df,
cohort_tes = cohort_tes,
cohort_te_ses = cohort_te_ses,
psi_mat = psi_mat,
gram_inv = gram_inv,
calc_ses = calc_ses
)
}
return(ret)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.