################################################################################
## Ridge-augmented SCM
################################################################################
#' Ridge augmented weights (possibly with covariates)
#'
#' @param wide_data Output of `format_data`
#' @param synth_data Output of `format_synth`
#' @param Z Matrix of covariates, default is NULL
#' @param lambda Ridge hyper-parameter, if NULL use CV
#' @param ridge Include ridge or not
#' @param scm Include SCM or not
#' @param lambda_min_ratio Ratio of the smallest to largest lambda when tuning lambda values
#' @param n_lambda Number of lambdas to consider between the smallest and largest lambda value
#' @param lambda_max Initial (largest) lambda, if NULL sets it to be (1+norm(X_1-X_c))^2
#' @param holdout_length Length of conseuctive holdout period for when tuning lambdas
#' @param min_1se If TRUE, chooses the maximum lambda within 1 standard error of the lambda that minimizes the CV error, if FALSE chooses the optimal lambda; default TRUE
#' @param V V matrix for synth, default NULL
#' @param residualize Whether to residualize auxiliary covariates or balance directly, default TRUE
#' @param ... optional arguments for outcome model
#' @noRd
#' @return \itemize{
#' \item{"weights"}{Ridge ASCM weights}
#' \item{"l2_imbalance"}{Imbalance in pre-period outcomes, measured by the L2 norm}
#' \item{"scaled_l2_imbalance"}{L2 imbalance scaled by L2 imbalance of uniform weights}
#' \item{"mhat"}{Outcome model estimate (zero in this case)}
#' \item{"lambda"}{Value of the ridge hyperparameter}
#' \item{"ridge_mhat"}{The ridge regression predictions (for estimating the bias)}
#' \item{"synw"}{The synth weights(for estimating the bias)}
#' \item{"lambdas"}{List of lambda values evaluated to tune ridge regression}
#' \item{"lambda_errors"}{"The MSE associated with each lambda term in lambdas."}
#' \item{"lambda_errors_se"}{"The SE of the MSE associated with each lambda term in lambdas."}
#' }
fit_ridgeaug_formatted <- function(wide_data, synth_data,
Z=NULL, lambda=NULL, ridge=T, scm=T,
lambda_min_ratio = 1e-8, n_lambda = 20,
lambda_max = NULL,
holdout_length = 1, min_1se = T,
V = NULL,
residualize = FALSE, ...) {
extra_params = list(...)
if (length(extra_params) > 0) {
warning("Unused parameters in using ridge augmented weights: ", paste(names(extra_params), collapse = ", "))
}
X <- wide_data$X
y <- wide_data$y
trt <- wide_data$trt
lambda_errors <- NULL
lambda_errors_se <- NULL
lambdas <- NULL
## center outcomes
X_cent <- apply(X, 2, function(x) x - mean(x[trt==0]))
X_c <- X_cent[trt==0,,drop=FALSE]
X_1 <- matrix(colMeans(X_cent[trt==1,,drop=FALSE]), nrow=1)
y_cent <- apply(y, 2, function(x) x - mean(x[trt==0]))
y_c <- y_cent[trt==0,,drop=FALSE]
t0 <- ncol(X_c)
V <- make_V_matrix(t0, V)
# apply V matrix transformation
X_c <- X_c %*% V
X_1 <- X_1 %*% V
new_synth_data <- synth_data
## if there are auxiliary covariates, use them
if(!is.null(Z)) {
## center covariates
Z_cent <- apply(Z, 2, function(x) x - mean(x[trt==0]))
Z_c <- Z_cent[trt==0,,drop=FALSE]
Z_1 <- matrix(colMeans(Z_cent[trt==1,,drop=FALSE]), nrow=1)
if(residualize) {
## regress out covariates
Xc_hat <- Z_c %*% solve(t(Z_c) %*% Z_c) %*% t(Z_c) %*% X_c
X1_hat <- Z_1 %*% solve(t(Z_c) %*% Z_c) %*% t(Z_c) %*% X_c
# take residuals
res_t <- X_1 - X1_hat
res_c <- X_c - Xc_hat
X_c <- res_c
X_1 <- res_t
X_cent[trt == 0,] <- res_c
X_cent[trt == 1,] <- res_t
new_synth_data$Z1 <- t(res_t)
new_synth_data$X1 <- t(res_t)
new_synth_data$Z0 <- t(res_c)
new_synth_data$X0 <- t(res_c)
} else {
# standardize covariates to be on the same scale as the outcomes
sdz <- apply(Z_c, 2, sd)
sdx <- sd(X_c)
Z_c <- sdx * t(t(Z_c) / sdz)
Z_1 <- sdx * Z_1 / sdz
# concatenate
X_c <- cbind(X_c, Z_c)
X_1 <- cbind(X_1, Z_1)
new_synth_data$Z1 <- t(X_1)
new_synth_data$X1 <- t(X_1)
new_synth_data$Z0 <- t(X_c)
new_synth_data$X0 <- t(X_c)
V <- diag(ncol(X_c))
}
} else {
new_synth_data$Z1 <- t(X_1)
new_synth_data$X1 <- t(X_1)
new_synth_data$Z0 <- t(X_c)
new_synth_data$X0 <- t(X_c)
}
out <- fit_ridgeaug_inner(X_c, X_1, trt, new_synth_data,
lambda, ridge, scm,
lambda_min_ratio, n_lambda,
lambda_max,
holdout_length, min_1se)
weights <- out$weights
synw <- out$synw
lambda <- out$lambda
lambdas <- out$lambdas
lambda_errors <- out$lambda_errors
lambda_errors_se <- out$lambda_errors_se
# add back in covariate weights
if(!is.null(Z)) {
if(residualize) {
no_cov_weights <- weights
ridge_w <- t(t(Z_1) - t(Z_c) %*% weights) %*%
solve(t(Z_c) %*% Z_c) %*% t(Z_c)
weights <- weights + t(ridge_w)
} else {
no_cov_weights <- NULL
}
}
l2_imbalance <- sqrt(sum((synth_data$X0 %*% weights - synth_data$X1)^2))
## primal objective value scaled by least squares difference for mean
uni_w <- matrix(1/ncol(synth_data$X0), nrow=ncol(synth_data$X0), ncol=1)
unif_l2_imbalance <- sqrt(sum((synth_data$X0 %*% uni_w - synth_data$X1)^2))
scaled_l2_imabalance <- l2_imbalance / unif_l2_imbalance
## no outcome model
mhat <- matrix(0, nrow=nrow(y), ncol=ncol(y))
ridge_mhat <- mhat
if(!is.null(Z)) {
if(residualize) {
ridge_mhat <- ridge_mhat + Z_cent %*% solve(t(Z_c) %*% Z_c) %*%
t(Z_c) %*% y_c
## regress out covariates for outcomes
yc_hat <- ridge_mhat[trt == 0,, drop = F]
# take residuals of outcomes
y_c <- y_c - yc_hat
} else {
X_cent <- cbind(X_cent, Z_cent)
}
}
if(ridge) {
ridge_mhat <- ridge_mhat + X_cent %*% solve(t(X_c) %*% X_c +
lambda * diag(ncol(X_c))) %*%
t(X_c) %*% y_c
}
output <- list(weights = weights,
l2_imbalance = l2_imbalance,
scaled_l2_imbalance = scaled_l2_imabalance,
mhat = mhat,
lambda = lambda,
ridge_mhat = ridge_mhat,
synw = synw,
lambdas = lambdas,
lambda_errors = lambda_errors,
lambda_errors_se = lambda_errors_se)
if(!is.null(Z)) {
output$no_cov_weights <- no_cov_weights
z_l2_imbalance <- sqrt(sum((t(Z_c) %*% weights - t(Z_1))^2))
z_unif_l2_imbalance <- sqrt(sum((t(Z_c) %*% uni_w - t(Z_1))^2))
z_scaled_l2_imbalance <- z_l2_imbalance / z_unif_l2_imbalance
output$covariate_l2_imbalance <- z_l2_imbalance
output$scaled_covariate_l2_imbalance <- z_scaled_l2_imbalance
}
return(output)
}
#' Helper function to fit ridge ASCM
#' @param X_c Matrix of control lagged outcomes
#' @param X_1 Vector of treated leagged outcomes
#' @param trt Vector of treatment indicators
#' @param synth_data Output of `format_synth`
#' @param lambda Ridge hyper-parameter, if NULL use CV
#' @param ridge Include ridge or not
#' @param scm Include SCM or not
#' @param lambda_min_ratio Ratio of the smallest to largest lambda when tuning lambda values
#' @param n_lambda Number of lambdas to consider between the smallest and largest lambda value
#' @param lambda_max Initial (largest) lambda, if NULL sets it to be (1+norm(X_1-X_c))^2
#' @param holdout_length Length of conseuctive holdout period for when tuning lambdas
#' @param min_1se If TRUE, chooses the maximum lambda within 1 standard error of the lambda that minimizes the CV error, if FALSE chooses the optimal lambda; default TRUE
#' @noRd
#' @return \itemize{
#' \item{"weights"}{Ridge ASCM weights}
#' \item{"lambda"}{Value of the ridge hyperparameter}
#' \item{"synw"}{The synth weights(for estimating the bias)}
#' \item{"lambdas"}{List of lambda values evaluated to tune ridge regression}
#' \item{"lambda_errors"}{"The MSE associated with each lambda term in lambdas."}
#' \item{"lambda_errors_se"}{"The SE of the MSE associated with each lambda term in lambdas."}
#' }
fit_ridgeaug_inner <- function(X_c, X_1, trt, synth_data,
lambda, ridge, scm,
lambda_min_ratio, n_lambda,
lambda_max,
holdout_length, min_1se) {
lambda_errors <- NULL
lambda_errors_se <- NULL
lambdas <- NULL
## if SCM fit scm
if(scm) {
syn <- fit_synth_formatted(synth_data)$weights
} else {
## else use uniform weights
syn <- rep(1 / sum(trt == 0), sum(trt == 0))
}
if(ridge) {
if(is.null(lambda)) {
cv_out <- cv_lambda(X_c, X_1, synth_data, trt, holdout_length, scm,
lambda_max, lambda_min_ratio, n_lambda, min_1se)
lambda <- cv_out$lambda
lambda_errors <- cv_out$lambda_errors
lambda_errors_se <- cv_out$lambda_errors_se
lambdas <- cv_out$lambdas
}
# get ridge weights
ridge_w <- t(t(X_1) - t(X_c) %*% syn) %*%
solve(t(X_c) %*% X_c + lambda * diag(ncol(X_c))) %*% t(X_c)
} else {
ridge_w <- matrix(0, ncol = sum(trt == 0), nrow=1)
}
## combine weights
weights <- syn + t(ridge_w)
return(list(weights = weights,
synw = syn,
lambda = lambda,
lambdas = lambdas,
lambda_errors = lambda_errors,
lambda_errors_se = lambda_errors_se))
}
#' Choose max lambda as largest eigenvalue of control X
#' @param X_c matrix of control lagged outcomes
#' @noRd
#' @return max lambda
get_lambda_max <- function(X_c) {
svd(X_c)$d[1] ^ 2
}
#' Create list of lambdas
#' @param lambda_min_ratio Ratio of the smallest to largest lambda when tuning lambda values
#' @param n_lambda Number of lambdas to consider between the smallest and largest lambda value
#' @param lambda_max Initial (largest) lambda, if NULL sets it to be (1+norm(X_1-X_c))^2
#' @noRd
#' @return List of lambdas
create_lambda_list <- function(lambda_max, lambda_min_ratio, n_lambda) {
scaler <- (lambda_min_ratio) ^ (1/n_lambda)
lambdas <- lambda_max * (scaler ^ (seq(0:n_lambda) - 1))
return(lambdas)
}
#' Choose either the lambda that minimizes CV MSE or largest lambda within 1 se of min
#' @param lambdas list of lambdas
#' @param lambda_errors The MSE associated with each lambda term in lambdas.
#' @param lambda_errors_se The SE of the MSE associated with each lambda
#' @param min_1se If TRUE, chooses the maximum lambda within 1 standard error of the lambda that minimizes the CV error, if FALSE chooses the optimal lambda; default TRUE
#' @noRd
#' @return optimal lambda
choose_lambda <- function(lambdas, lambda_errors, lambda_errors_se, min_1se) {
# lambda with smallest error
min_idx <- which.min(lambda_errors)
min_error <- lambda_errors[min_idx]
min_se <- lambda_errors_se[min_idx]
lambda_min <- lambdas[min_idx]
# max lambda with error within one se of min
lambda_1se <- max(lambdas[lambda_errors <= min_error + min_se])
return(if(min_1se) lambda_1se else lambda_min)
}
#' Choose best lambda with CV
#' @param X_c Matrix of control lagged outcomes
#' @param X_1 Vector of treated leagged outcomes
#' @param synth_data Output of `format_synth`
#' @param trt Vector of treatment indicators
#' @param holdout_length Length of conseuctive holdout period for when tuning lambdas
#' @param scm Include SCM or not
#' @param lambda_max Initial (largest) lambda, if NULL sets it to be (1+norm(X_1-X_c))^2
#' @param lambda_min_ratio Ratio of the smallest to largest lambda when tuning lambda values
#' @param n_lambda Number of lambdas to consider between the smallest and largest lambda value
#' @param min_1se If TRUE, chooses the maximum lambda within 1 standard error of the lambda
#' @noRd
#' @return \itemize{
#' \item{"lambda"}{Value of the ridge hyperparameter}
#' \item{"lambdas"}{List of lambda values evaluated to tune ridge regression}
#' \item{"lambda_errors"}{"The MSE associated with each lambda term in lambdas."}
#' \item{"lambda_errors_se"}{"The SE of the MSE associated with each lambda term}
#' }
cv_lambda <- function(X_c, X_1, synth_data, trt, holdout_length, scm,
lambda_max, lambda_min_ratio, n_lambda, min_1se) {
if(is.null(lambda_max)) {
lambda_max <- get_lambda_max(X_c)
}
lambdas <- create_lambda_list(lambda_max, lambda_min_ratio, n_lambda)
lambda_out <- get_lambda_errors(lambdas, X_c, X_1,
synth_data, trt,
holdout_length, scm)
lambda_errors <- lambda_out$lambda_errors
lambda_errors_se <- lambda_out$lambda_errors_se
lambda <- choose_lambda(lambdas, lambda_errors, lambda_errors_se, min_1se)
return(list(lambda = lambda, lambda_errors = lambda_errors,
lambda_errors_se = lambda_errors_se, lambdas = lambdas))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.