################################################################################
## Code for inference
################################################################################
######## RANDOMIZATION INFERENCE
est_att <- function(metadata, fitfunc, trt_unit) {
#' Fit weights and get att estimates
#' @param metadata with treatment time
#' @param fitfunc Partially applied fitting function which takes in
#' the number of the treated unit
#' @param trt_unit Number of the treated unit
#'
#' @return Dataframe with ATT estimates
## fit the method
fit <- fitfunc(trt_unit)
## compute the att
suppressMessages(suppressWarnings(
att <- compute_att(fit$outcomes, metadata, trt_unit=trt_unit)
))
return(att %>% select(time, att) %>% mutate(unit = trt_unit))
}
compute_stat <- function(att, pretimes, posttimes, statfunc) {
#' Compute a test statistic statfunc({Y_t | t in times})
#' @param att ATT estimates in a dataframe
#' @param pretimes Vector of times in pre-period
#' @param posttimes Vector of times in post-period
#' @param statfunc Function to compute test stat, takes in pre and post period
#'
#' @return Value of test statistic
## limit to times
post <- (att %>% filter(time %in% posttimes) %>% select(att))$att
pre <- (att %>% filter(time %in% pretimes) %>% select(att))$att
## cat(pre, "\n")
## cat(post, "\n")
## cat("\n")
## compute test stat
tstat <- statfunc(pre, post)
return(tstat)
}
firpo_inf <- function(metadata, fitfunc, units, trt_unit,
pretimes, posttimes, statfuncs, include_treat) {
#' Get the permutation distribution of the test stat
#' assuming equal probability of treatment
#' @param metadata with treatment time
#' @param fitfunc Partially applied fitting function which takes in
#' the number of the treated unit
#' @param units Numbers of the units to permute treatment around
#' @param trt_unit Treated unit
#' @param pretimes Vector of times in pre-period
#' @param posttimes Vector of times in post-period
#' @param statfuncs Function to compute test stats
#' @param include_treat Whether to include the treated unit
#'
#' @return att estimates, test statistics, p-values
## compute atts
atts <- bind_rows(lapply(units, function(u) est_att(metadata, fitfunc, u)))
## compute test statistics
stats <- lapply(statfuncs,
function(statfunc)
by(atts, atts$unit,
function(df)
compute_stat(data.frame(df),
pretimes, posttimes,
statfunc)))
stats <- lapply(stats, function(s)
data.frame(list(unit=as.numeric(names(s)),
stat=sapply(s, function(x) x))))
## compute the "p value"
pvals <- lapply(stats,
function(s) {
est <- mean(s[s$unit == trt_unit, 2])
pval <- sum(s$stat >= est) / dim(s)[1]
})
return(list(atts=atts, stats=stats, pvals=pvals))
}
firpo_inf_synth <- function(outcomes, metadata, trt_unit,
pos=FALSE,
statfuncs=c(rmse_ratio,mean_abs, abs_tstat),
include_treat=FALSE,
cols=list(unit="unit", time="time",
outcome="outcome", treated="treated")) {
#' Get the permutation distribution of SC estimate test statistics
#' assuming equal probability of treatment
#' @param outcomes Tidy dataframe with the outcomes and meta data
#' @param metadata Dataframe of metadata
#' @param trt_unit Unit that is treated (target for regression), default: 0
#' @param pos Whether to only consider units with positive weights, default False
#' @param statfuncs Function to compute test stats
#' @param include_treat Whether to include the treated unit in randomization distribution
#' @param cols Column names corresponding to the units,
#' time variable, outcome, and treated indicator
#'
#' @return att estimates, test statistics, p-values
#' @export
synfunc <- function(u) {
if(!include_treat) {
if(u == trt_unit) {
get_synth(outcomes,
metadata, u, cols=cols)
} else {
get_synth(outcomes[outcomes[cols$unit] != trt_unit,],
metadata, u, cols=cols)
}
} else {
get_synth(outcomes,
metadata, u, cols=cols)
}
}
## ## create a fitting function for synth
## synfunc <- function(u) {
## get_synth(outcomes,
## metadata, u, cols=cols)
## }
## fit synth once
sc <- get_synth(outcomes, metadata, trt_unit, cols)
## use all pre and post periods and units
times <- unique(sc$outcomes$time)
pretimes <- times[which(times < metadata$t_int)]
posttimes <- times[which(times >= metadata$t_int)]
units <- unique(sc$outcomes$unit)
if(pos) {
ctrls <- units[which(units != trt_unit)]
ctrls <- ctrls[which(round(sc$weights, 3) > 0)]
units <- c(trt_unit, ctrls)
}
## permuate treatment label
inf <- firpo_inf(metadata, synfunc, units, trt_unit,
pretimes, posttimes, statfuncs, include_treat)
inf$sc <- sc
return(inf)
}
wpermtest <- function(metadata, fitfunc, units, weights, trt_unit,
pretimes, posttimes, statfuncs) {
#' Get the permutation distribution of the test stat
#' assuming equal probability of treatment
#' @param metadata with treatment time
#' @param fitfunc Partially applied fitting function which takes in
#' the number of the treated unit
#' @param units Numbers of the units to permute treatment around
#' @param weights Weights for the permutation distribution
#' @param trt_unit Treated unit
#' @param pretimes Vector of times in pre-period
#' @param posttimes Vector of times in post-period
#' @param statfuncs Function to compute test stats
#'
#' @return att estimates, test statistics, p-values
## compute atts
atts <- bind_rows(lapply(units, function(u) est_att(metadata, fitfunc, u)))
## compute test statistics
stats <- lapply(statfuncs,
function(statfunc)
by(atts, atts$unit,
function(df)
compute_stat(data.frame(df),
pretimes, posttimes,
statfunc)))
stats <- lapply(stats, function(s)
data.frame(list(unit=as.numeric(names(s)),
stat=sapply(s, function(x) x))))
## estimate marginal probabilities
n_sim <- 10000
sims <- sapply(1:n_sim,
function(x)
sample(units, 1, prob=weights))
probs <-sapply(units, function(x) sum(sims == x)) / n_sim
## compute the "p value"
pvals <- lapply(stats,
function(s) {
est <- mean(s[s$unit == trt_unit, 2])
pval <- sum(probs * (s$stat >= est))
})
return(list(atts=atts, stats=stats, pvals=pvals))
}
wpermtest_sc <- function(outcomes, metadata, trt_unit,
statfuncs=c(rmse_ratio, mean_abs, abs_tstat),
cols=list(unit="unit", time="time",
outcome="outcome", treated="treated")) {
#' Get the weighted permutation distribution of SC estimate test statistics
#' estimating p-scores with logit-link synth
#' @param outcomes Tidy dataframe with the outcomes and meta data
#' @param metadata Dataframe of metadata
#' @param trt_unit Unit that is treated (target for regression), default: 0
#' @param statfuncs Function to compute test stats
#' @param cols Column names corresponding to the units,
#' time variable, outcome, and treated indicator
#'
#' @return att estimates, test statistics, p-values
#' @export
ipw <- format_ipw(outcomes, metadata, cols=cols)
## create a fitting function for synth
synfunc <- function(u) {
if(u == trt_unit) {
get_synth(outcomes,
metadata, u, cols=cols)
} else {
get_synth(outcomes[outcomes[cols$unit] != trt_unit,],
metadata, u, cols=cols)
}
}
## fit synth once
sc <- get_synth(outcomes, metadata, trt_unit, cols)
## get estimated propensity scores
suppressMessages(ent <- get_entropy(outcomes, metadata, trt_unit, eps=sc$primal_obj,
cols=cols))
pscores <- 1 / (1 + exp(-ipw$X %*% ent$dual))
## use all pre and post periods and units
times <- unique(sc$outcomes$time)
pretimes <- times[which(times < metadata$t_int)]
posttimes <- times[which(times >= metadata$t_int)]
units <- unique(sc$outcomes$unit)
odds <- exp(ipw$X %*% ent$dual)
## permute treatment label
inf <- wpermtest(metadata, synfunc, units, pscores, trt_unit,
pretimes, posttimes, statfuncs)
inf$ent <- ent
inf$pscores <- pscores
return(inf)
}
wpermtest2 <- function(metadata, fitfunc, units, weights, trt_unit,
pretimes, posttimes, statfuncs) {
#' Get the permutation distribution of the test stat
#' assuming equal probability of treatment
#' @param metadata with treatment time
#' @param fitfunc Partially applied fitting function which takes in
#' the number of the treated unit
#' @param units Numbers of the units to permute treatment around
#' @param weights Weights for the permutation distribution
#' @param trt_unit Treated unit
#' @param pretimes Vector of times in pre-period
#' @param posttimes Vector of times in post-period
#' @param statfuncs Function to compute test stats
#'
#' @return att estimates, test statistics, p-values
## compute atts
atts <- bind_rows(lapply(units, function(u) est_att(metadata, fitfunc, u)))
## compute test statistics
stats <- lapply(statfuncs,
function(statfunc)
by(atts, atts$unit,
function(df)
compute_stat(data.frame(df),
pretimes, posttimes,
statfunc)))
stats <- lapply(stats, function(s)
data.frame(list(unit=as.numeric(names(s)),
stat=sapply(s, function(x) x))))
## compute the "p value"
pvals <- lapply(stats,
function(s) {
est <- mean(s[s$unit == trt_unit, 2])
if(max(s[s$unit != trt_unit,]$stat) < est) {
pval <- max(weights)
} else {
pval <- sum(weights * (s[s$unit != trt_unit,]$stat >= est))
}
})
return(list(atts=atts, stats=stats, pvals=pvals))
}
wpermtest2_sc <- function(outcomes, metadata, trt_unit,
statfuncs=c(rmse_ratio, mean_abs, abs_tstat),
cols=list(unit="unit", time="time",
outcome="outcome", treated="treated")) {
#' Get the weighted permutation distribution of SC estimate test statistics
#' estimating p-scores with logit-link synth
#' @param outcomes Tidy dataframe with the outcomes and meta data
#' @param metadata Dataframe of metadata
#' @param trt_unit Unit that is treated (target for regression), default: 0
#' @param statfuncs Function to compute test stats
#' @param cols Column names corresponding to the units,
#' time variable, outcome, and treated indicator
#'
#' @return att estimates, test statistics, p-values
#' @export
ipw <- format_ipw(outcomes, metadata, cols=cols)
## create a fitting function for synth
synfunc <- function(u) {
if(u == trt_unit) {
get_synth(outcomes,
metadata, u, cols=cols)
} else {
get_synth(outcomes[outcomes[cols$unit] != trt_unit,],
metadata, u, cols=cols)
}
}
## fit synth once
sc <- get_synth(outcomes, metadata, trt_unit, cols)
## get estimated propensity scores
suppressMessages(ent <- get_entropy(outcomes, metadata, trt_unit, eps=sc$primal_obj,
cols=cols))
pscores <- 1 / (1 + exp(-ipw$X %*% ent$dual))
## use all pre and post periods and units
times <- unique(sc$outcomes$time)
pretimes <- times[which(times < metadata$t_int)]
posttimes <- times[which(times >= metadata$t_int)]
units <- unique(sc$outcomes$unit)
ctrls <- units[which(units != trt_unit)]
posunits <- which(round(sc$weights, 3) > 0)
ctrls <- ctrls[posunits]
units <- c(trt_unit, ctrls)
odds <- exp(ipw$X %*% ent$dual)
## permute treatment label
inf <- wpermtest2(metadata, synfunc, units, ent$weights[posunits], trt_unit,
pretimes, posttimes, statfuncs)
inf$ent <- ent
inf$pscores <- pscores
return(inf)
}
importance_test <- function(metadata, fitfunc, units, probs, trt_unit,
pretimes, posttimes, statfuncs, n_sim) {
#' Estimate p-value with non-uniform probabilities of treatment with
#' importance sampling
#' @param metadata with treatment time
#' @param fitfunc Partially applied fitting function which takes in
#' the number of the treated unit
#' @param units Numbers of the units to permute treatment around
#' @param probs Propensity scores
#' @param trt_unit Treated unit
#' @param pretimes Vector of times in pre-period
#' @param posttimes Vector of times in post-period
#' @param statfuncs Function to compute test stats
#' @param n_sim Number of montecarlo samples
#'
#' @return att estimates, test statistics, p-values
## compute atts
atts <- bind_rows(lapply(units, function(u) est_att(metadata, fitfunc, u)))
## compute test statistics
stats <- lapply(statfuncs,
function(statfunc)
by(atts, atts$unit,
function(df)
compute_stat(data.frame(df),
pretimes, posttimes,
statfunc)))
stats <- lapply(stats, function(s) sapply(s, function(x) x))
## treat one unit, compute test statistic, reweight by propensities
sims <- sapply(1:n_sim,
function(x) {
tr <- sample(length(units[-trt_unit]), 1)
tvec <- numeric(length(probs))
tvec[tr] <- 1
prob <- tvec * log(probs) + (1-tvec) * log(1-probs)
prob <- exp(sum(prob - max(prob)))
c(tr, prob)
})
## normalize probabilities
sims <- t(sims)
sims[,2] <- sims[,2] / sum(sims[,2])
## compute the p value
pvals <- lapply(stats,
function(stat) {
notrt <- stat[-trt_unit]
sum((notrt[sims[,1]] > stat[trt_unit]) * sims[,2])
})
return(list(atts=atts, stats=stats, pvals=pvals))
}
importance_test_sc <- function(outcomes, metadata, trt_unit,
statfuncs=c(rmse_ratio, mean_abs, abs_tstat),
n_sim=1000,
cols=list(unit="unit", time="time",
outcome="outcome", treated="treated")) {
#' Get the weighted permutation distribution of SC estimate test statistics
#' estimating p-scores with logit-link synth
#' @param outcomes Tidy dataframe with the outcomes and meta data
#' @param metadata Dataframe of metadata
#' @param trt_unit Unit that is treated (target for regression), default: 0
#' @param statfuncs Function to compute test stats
#' @param n_sim Number of montecarlo samples
#' @param cols Column names corresponding to the units,
#' time variable, outcome, and treated indicator
#'
#' @return att estimates, test statistics, p-values
#' @export
ipw <- format_ipw(outcomes, metadata, cols=cols)
## create a fitting function for synth
synfunc <- function(u) {
if(u == trt_unit) {
get_synth(outcomes,
metadata, u, cols=cols)
} else {
get_synth(outcomes[outcomes[cols$unit] != trt_unit,],
metadata, u, cols=cols)
}
}
## fit synth once
sc <- get_synth(outcomes, metadata, trt_unit, cols)
## get estimated propensity scores
probs <- sc$weights / (1 + sc$weights)
## use all pre and post periods and units
times <- unique(sc$outcomes$time)
pretimes <- times[which(times < metadata$t_int)]
posttimes <- times[which(times >= metadata$t_int)]
units <- unique(sc$outcomes$unit)
## permute treatment label
inf <- importance_test(metadata, synfunc, units, probs, trt_unit,
pretimes, posttimes, statfuncs, n_sim)
return(inf)
}
weighted_test <- function(metadata, fitfunc, units, probs, trt_unit,
pretimes, posttimes, statfuncs) {
#' Estimate p-value with non-uniform probabilities of treatment with
#' SCM weights
#' @param metadata with treatment time
#' @param fitfunc Partially applied fitting function which takes in
#' the number of the treated unit
#' @param units Numbers of the units to permute treatment around
#' @param probs Propensity scores
#' @param trt_unit Treated unit
#' @param pretimes Vector of times in pre-period
#' @param posttimes Vector of times in post-period
#' @param statfuncs Function to compute test stats
#'
#' @return att estimates, test statistics, p-values
## compute atts
atts <- bind_rows(lapply(units, function(u) est_att(metadata, fitfunc, u)))
## compute test statistics
stats <- lapply(statfuncs,
function(statfunc)
by(atts, atts$unit,
function(df)
compute_stat(data.frame(df),
pretimes, posttimes,
statfunc)))
stats <- lapply(stats, function(s) sapply(s, function(x) x))
## treat one unit, compute test statistic, reweight by propensities
## compute and normalize probabilities
probs <- sapply(1:length(probs),
function(i) probs[i] * prod(1-probs[-i]))
probs <- probs / sum(probs)
## compute the p value
pvals <- lapply(stats,
function(stat) {
if(length(probs) == length(units) - 1) {
notrt <- stat[-trt_unit]
sum((notrt >= stat[trt_unit]) * probs)
} else {
sum((stat >= stat[trt_unit]) * probs)
}
})
return(list(atts=atts, stats=stats, pvals=pvals, probs=probs))
}
valid_test <- function(metadata, fitfunc, units, probs, trt_unit,
pretimes, posttimes, statfuncs) {
#' Estimate p-value with non-uniform probabilities of treatment with
#' SCM weights
#' @param metadata with treatment time
#' @param fitfunc Partially applied fitting function which takes in
#' the number of the treated unit
#' @param units Numbers of the units to permute treatment around
#' @param probs Propensity scores
#' @param trt_unit Treated unit
#' @param pretimes Vector of times in pre-period
#' @param posttimes Vector of times in post-period
#' @param statfuncs Function to compute test stats
#'
#' @return att estimates, test statistics, p-values
#' @export
## compute atts
atts <- bind_rows(lapply(units, function(u) est_att(metadata, fitfunc, u)))
## compute test statistics
stats <- lapply(statfuncs,
function(statfunc)
by(atts, atts$unit,
function(df)
compute_stat(data.frame(df),
pretimes, posttimes,
statfunc)))
stats <- lapply(stats, function(s) sapply(s, function(x) x))
## treat one unit, compute test statistic, reweight by propensities
## compute and normalize probabilities
probs <- sapply(1:length(probs),
function(i) probs[i] * prod(1-probs[-i]))
probs <- probs / sum(probs)
## compute the p value
pvals <- lapply(stats,
function(stat) {
sum((stat >= stat[trt_unit]) * probs)
})
return(list(atts=atts, stats=stats, pvals=pvals, probs=probs))
}
weighted_test_sc <- function(outcomes, metadata, trt_unit,
statfuncs=c(rmse_ratio, mean_abs, abs_tstat),
cols=list(unit="unit", time="time",
outcome="outcome", treated="treated")) {
#' Get the weighted permutation distribution of SC estimate test statistics
#' estimating p-scores with logit-link synth
#' @param outcomes Tidy dataframe with the outcomes and meta data
#' @param metadata Dataframe of metadata
#' @param trt_unit Unit that is treated (target for regression), default: 0
#' @param statfuncs Function to compute test stats
#' @param n_sim Number of montecarlo samples
#' @param cols Column names corresponding to the units,
#' time variable, outcome, and treated indicator
#'
#' @return att estimates, test statistics, p-values
#' @export
ipw <- format_ipw(outcomes, metadata, cols=cols)
## create a fitting function for synth
synfunc <- function(u) {
if(u == trt_unit) {
get_synth(outcomes,
metadata, u, cols=cols)
} else {
get_synth(outcomes[outcomes[cols$unit] != trt_unit,],
metadata, u, cols=cols)
}
}
## fit synth once
sc <- get_synth(outcomes, metadata, trt_unit, cols)
## get estimated propensity scores
probs <- sc$weights / (1 + sc$weights)
units <- unique(sc$outcomes$unit)
times <- unique(sc$outcomes$time)
pretimes <- times[which(times < metadata$t_int)]
posttimes <- times[which(times >= metadata$t_int)]
## permute treatment label
inf <- weighted_test(metadata, synfunc, units, probs, trt_unit,
pretimes, posttimes, statfuncs)
return(inf)
}
weighted_test_bal <- function(outcomes, metadata, trt_unit, hyperparam,
link=c("logit", "linear", "pos-linear"),
regularizer=c(NULL, "l1", "l2", "ridge", "linf"),
normalized=TRUE,
statfuncs=c(rmse_ratio, mean_abs, abs_tstat),
cols=list(unit="unit", time="time",
outcome="outcome", treated="treated")) {
#' Get the weighted permutation distribution of SC estimate test statistics
#' estimating p-scores with logit-link synth
#' @param outcomes Tidy dataframe with the outcomes and meta data
#' @param metadata Dataframe of metadata
#' @param trt_unit Unit that is treated (target for regression), default: 0
#' @param statfuncs Function to compute test stats
#' @param n_sim Number of montecarlo samples
#' @param cols Column names corresponding to the units,
#' time variable, outcome, and treated indicator
#'
#' @return att estimates, test statistics, p-values
#' @export
ipw <- format_ipw(outcomes, metadata, cols=cols)
## create a fitting function for synth
synfunc <- function(u) {
if(u == trt_unit) {
get_synth(outcomes,
metadata, u, cols=cols)
} else {
get_synth(outcomes[outcomes[cols$unit] != trt_unit,],
metadata, u, cols=cols)
}
}
## fit synth once
bal <- get_balancer(outcomes, metadata, trt_unit, hyperparam,
link, regularizer, normalized, cols=cols)
## get back the intercept
alpha <- -logsumexp(ipw$X[ipw$trt==0,] %*% bal$dual)
## get estimated p scores
probs <- 1 / (1 + exp(-alpha - ipw$X %*% bal$dual))
units <- unique(bal$outcomes$unit)
times <- unique(bal$outcomes$time)
pretimes <- times[which(times < metadata$t_int)]
posttimes <- times[which(times >= metadata$t_int)]
## permute treatment label
inf <- weighted_test(metadata, synfunc, units, probs, trt_unit,
pretimes, posttimes, statfuncs)
return(inf)
}
#' test stat is ratio of RMSEs in post and pre
#' @export
rmse_ratio <- function(pre, post) {
sqrt(mean(post^2)) /
sqrt(mean(pre^2))
}
#' mean absolute difference
#' @export
mean_abs <- function(pre, post) {
mean(abs(post))
}
#' absolute value of the t statistic
#' @export
abs_tstat <- function(pre, post) {
abs( mean(post) / (sd(post) / sqrt(length(post))))
}
## test stat that is diff in diff
diff_n_diff <- function(pre, post) {
return(mean(post) - mean(pre))
}
## test stat which is mean of absolute differences in post - pre
abs_diff <- function(pre, post) {
return(mean(post) - mean(abs(pre)))
}
## test stat which is difference in means / rmse in pre period
norm_diff <- function(pre, post) {
return(mean(post) / sqrt(mean(pre^2)))
}
## test stat which is the difference in mse in post and pre
mse_diff <- function(pre, post) {
return(mean(post^2) - mean(pre^2))
}
## test stat is last value
last <- function(pre, post) {
return(post[length(post)])
}
### Chernozhukov method
#' Weighted Least Squares Standard Errors
#' @param outcomes Tidy dataframe with the outcomes and meta data
#' @param metadata Dataframe of metadata
#' @param ns Number of samples from the permutation distribution to draw
#' @param trt_unit Treated unit
#' @param cols Column names corresponding to the units,
#' time variable, outcome, and treated indicator
#' @param order of test statistic
#'
#' @return outcomes with additional synthetic control added and weights
#' @export
cherno_test <- function(outcomes, metadata, ns=1000, q=c(2,1), trt_unit=1,
permtype=c("iid", "block"),
cols=list(unit="unit", time="time",
outcome="outcome", treated="treated")) {
## format data for synth
syn_data <- format_data(outcomes, metadata, trt_unit, cols=cols)
## pre and post outcomes
trtmat <- syn_data$synth_data$Y0plot
ctrlmat <- syn_data$synth_data$Y1plot
t0 <- nrow(syn_data$synth_data$Z0)
t_final <- nrow(syn_data$synth_data$Y0plot)
if(permtype == "iid") {
teststats <- matrix(0, nrow=ns, ncol=length(q))
for(i in 1:ns) {
## sample from permutation distribution
reorder <- sample(1:t_final, t_final)
## fit synth with reordered time periods
new_synth_data <- syn_data
new_synth_data$synth_data$X0 <- syn_data$synth_data$Y0plot[reorder,,drop=FALSE][1:t0,]
new_synth_data$synth_data$X1 <- syn_data$synth_data$Y1plot[reorder,,drop=FALSE][1:t0,]
syn <- fit_synth_formatted(new_synth_data)
## get treatment effect estimates
att <- syn_data$synth_data$Y1plot[reorder,,drop=FALSE][(t0+1):t_final,,drop=FALSE] -
syn_data$synth_data$Y0plot[reorder,,drop=FALSE][(t0+1):t_final,,drop=FALSE] %*% syn$weights
teststats[i,] <- sapply(1:length(q),
function(j) mean(abs(att)^q[j])^(1/q[j]))
}
} else if(permtype=="block") {
teststats <- matrix(0, nrow=t_final, ncol=length(q))
for(i in 1:t_final) {
## increment time by one step and wrap
reorder <- (1:t_final + i) %% t_final + 1
## fit synth with reordered time periods
new_synth_data <- syn_data
new_synth_data$synth_data$X0 <- syn_data$synth_data$Y0plot[reorder,,drop=FALSE][1:t0,]
new_synth_data$synth_data$X1 <- syn_data$synth_data$Y1plot[reorder,,drop=FALSE][1:t0,]
syn <- fit_synth_formatted(new_synth_data)
## get treatment effect estimates
att <- syn_data$synth_data$Y1plot[reorder,,drop=FALSE][(t0+1):t_final,,drop=FALSE] -
syn_data$synth_data$Y0plot[reorder,,drop=FALSE][(t0+1):t_final,,drop=FALSE] %*% syn$weights
teststats[i,] <- sapply(1:length(q),
function(j) mean(abs(att)^q[j])^(1/q[j]))
}
} else {
stop("permtype must be one of c('iid', 'block')")
}
## compute test stat for actual data
syn <- fit_synth_formatted(syn_data)
real_att <- syn_data$synth_data$Y1plot[(t0+1):t_final,,drop=FALSE] -
syn_data$synth_data$Y0plot[(t0+1):t_final,,drop=FALSE] %*% syn$weights
real_teststat <- sapply(1:length(q),
function(j) mean(abs(real_att)^q[j])^(1/q[j]))
pvals <- sapply(1:length(q), function(i) mean(teststats[,i] >= real_teststat[i]))
return(pvals)
}
##### SAMPLING INFERENCE
standard_error_ <- function(metadata, fitfunc, units, trt_unit,
posttimes, weights=NULL) {
#' Internal function to estimate the variance of the SC estimate
#' @param metadata with treatment time
#' @param fitfunc Partially applied fitting function which takes in
#' the number of the treated unit
#' @param units Numbers of the control units to include when estimating the se
#' @param trt_unit Treated unit
#' @param posttimes Vector of times in post-period
#' @param weights Weights to use in SC estimate, default: NULL (Doudchenko-Imbens 2017)
#'
#' @return att estimates, test statistics, p-values
## compute atts
atts <- lapply(c(trt_unit, units), function(u) est_att(metadata, fitfunc, u))
trt_att <- est_att(metadata, fitfunc, trt_unit)
## compute error for each time period and each unit
errs <- sapply(atts[-1], function(att) att[att$time %in% posttimes,]$att)
## get standard error estimates
if(is.null(weights)) {
comb_func <- function(x) sqrt(sum(x^2)) / length(units)
} else {
## 4 different estimators
## second moment
comb_func <- function(x) sqrt(mean(x^2)) / sqrt(length(units))
ses1 <- c(rep(NA, dim(trt_att)[1] - length(posttimes)), apply(errs, 1, comb_func))
att1 <- trt_att$att
## weighted second moment
comb_func <- function(x) sqrt(sum(x^2 * weights)) / sqrt(sum(weights)^2/sum(weights^2))
## comb_func <- function(x) sqrt(sum(x^2 * weights^2)) /
## sqrt(sum(weights^2)) * sqrt(sum(weights^2))
ses2 <- c(rep(NA, dim(trt_att)[1] - length(posttimes)), apply(errs, 1, comb_func))
att2 <- trt_att$att
## unweighted variance
## comb_func <- function(x) sqrt(mean((x - mean(x))^2)) / sqrt(length(units))
comb_func <- function(x) sqrt(mean(x^2) - mean(x)^2) / sqrt(length(units))
ses3 <- c(rep(NA, dim(trt_att)[1] - length(posttimes)), apply(errs, 1, comb_func))
## bias adjustment
att3 <- trt_att$att - c(rep(0, dim(trt_att)[1] - length(posttimes)),
apply(errs, 1, mean))
## weighted variance
## comb_func <- function(x) sqrt(sum(x^2 * weights^2) / sum(weights^2) -
## sum(x * weights)^2 / sum(weights)) * sqrt(sum(weights^2))
## comb_func <- function(x) sqrt(sum((x-sum(x*weights))^2 * weights)) / sqrt(sum(weights)^2/sum(weights^2))
comb_func <- function(x) sqrt(sum(weights * x^2) / sum(weights) -
sum(weights * x)^2 / sum(weights)^2) / sqrt(sum(weights)^2/sum(weights^2))
ses4 <- c(rep(NA, dim(trt_att)[1] - length(posttimes)), apply(errs, 1, comb_func))
## bias adjustment
att4 <- trt_att$att - c(rep(0, dim(trt_att)[1] - length(posttimes)),
apply(errs, 1, function(x) sum(x * weights) / sum(weights)))
ses <- c(ses1, ses2, ses3, ses4)
weighted <- rep(c(rep(FALSE, length(ses1)), rep(TRUE, length(ses1))), 2)
centered = c(rep(FALSE, 2 * length(ses1)), rep(TRUE, 2 * length(ses1)))
}
## combine into one dataframe
trt_att <- rbind(trt_att, trt_att, trt_att, trt_att)
trt_att$att <- c(att1, att2, att3, att4)
trt_att$se <- ses
trt_att$weighted <- weighted
trt_att$centered <- centered
return(trt_att)
}
#' Estimate the variance of the SC estimate
#' @param outcomes Tidy dataframe with the outcomes and meta data
#' @param metadata Dataframe of metadata
#' @param trt_unit Treated unit
#' @param use_weights Whether to use weights in se estimate, default: FALSE
#' @param cols Column names corresponding to the units,
#' time variable, outcome, and treated indicator
#'
#' @return att estimates, test statistics, p-values
#' @export
di_standard_error <- function(outcomes, metadata, trt_unit=1, use_weights=FALSE,
cols=list(unit="unit", time="time",
outcome="outcome", treated="treated")) {
## format data once
data_out <- format_data(outcomes, metadata, trt_unit, cols=cols)
n_c <- dim(data_out$synth_data$Z0)[2]
t0 <- dim(data_out$synth_data$Z0)[1]
t_final <- dim(data_out$synth_data$Y0plot)[1]
errs <- matrix(0, n_c, t_final - t0)
## iterate over control units
for(i in 1:n_c) {
new_data_out <- data_out
new_data_out$synth_data$Z0 <- data_out$synth_data$Z0[, -i]
new_data_out$synth_data$Y0plot <- data_out$synth_data$Y0plot[, -i]
new_data_out$synth_data$Z1 <- data_out$synth_data$Z0[, i, drop=FALSE]
new_data_out$synth_data$Y1plot <- data_out$synth_data$Y0plot[, i, drop=FALSE]
## get synth weights
syn <- fit_synth_formatted(new_data_out)
## estimate satt
errs[i,] <- new_data_out$synth_data$Y1plot[(t0+1):t_final,] -
new_data_out$synth_data$Y0plot[(t0+1):t_final,] %*% syn$weights
}
## att on actual sample
syn <- fit_synth_formatted(data_out)
att <- as.numeric(data_out$synth_data$Y1plot -
data_out$synth_data$Y0plot %*% syn$weights)
## standard errors
if(use_weights) {
se <- sqrt(t(errs^2) %*% syn$weights)
} else {
se <- sqrt(apply(errs^2, 2, mean))
}
## combine into dataframe
out <- outcomes %>% distinct(time)
out$att <- att
out$se <- c(rep(NA, t0), se)
return(out)
}
#### BOOTSTRAP
#' Bootstrap standard errors for estimated counterfactual with synth
#' @param outcomes Tidy dataframe with the outcomes and meta data
#' @param metadata Dataframe of metadata
#' @param n_boot Number of bootstrap samples
#' @param trt_unit Treated unit
#' @param pred_int Whether to add outcome control variance to make pred interval
#' @param cols Column names corresponding to the units,
#' time variable, outcome, and treated indicator
#'
#' @return att estimates, test statistics, p-values
#' @export
bootstrap_sc <- function(outcomes, metadata, n_boot, trt_unit=1, pred_int=F,
cols=list(unit="unit", time="time",
outcome="outcome", treated="treated")) {
## format data once
data_out <- format_data(outcomes, metadata, trt_unit, cols=cols)
n_c <- dim(data_out$synth_data$Z0)[2]
t0 <- dim(data_out$synth_data$Z0)[1] + 1
t_final <- dim(data_out$synth_data$Y0plot)[1]
atts <- matrix(0, n_boot, length(t0:t_final))
## fit on bootstrap samples
for(b in 1:n_boot) {
## resample controls
ctrls <- sample(1:n_c, n_c, replace=TRUE)
new_data_out <- data_out
new_data_out$synth_data$Z0 <- data_out$synth_data$Z0[, ctrls]
new_data_out$synth_data$Y0plot <- data_out$synth_data$Y0plot[, ctrls]
## get synth weights
syn <- fit_synth_formatted(new_data_out)
## estimate satt
atts[b,] <- new_data_out$synth_data$Y1plot[t0:t_final,] -
new_data_out$synth_data$Y0plot[t0:t_final,] %*% syn$weights
}
## att on actual sample
syn <- fit_synth_formatted(data_out)
att <- as.numeric(data_out$synth_data$Y1plot -
data_out$synth_data$Y0plot %*% syn$weights)
yhat <- as.numeric(data_out$synth_data$Y0plot %*% syn$weights)
## standard errors
se <- apply(atts, 2, sd)
if(pred_int) {
## estimate of control potential outcome variance
sig2 <- sapply(t0:t_final, function(i)
sum(syn$weights * (data_out$synth_data$Y0plot[i,] - yhat[i])^2))
se <- sqrt(se^2 + sig2)
}
## return(list(att, atts))
## bias
bias <- as.numeric(apply(atts, 2, mean)) - att[t0:t_final]
## combine into dataframe
out <- outcomes %>% distinct(time)
out$att <- att
out$se <- c(rep(NA, t0-1), se)
out$bias <- c(rep(NA, t0-1), bias)
## quantiles
att_post <- att[t0:t_final]
centered <- atts - t(matrix(att_post, nrow=length(att_post), ncol=dim(atts)[1]))
out$cilength <- c(rep(NA, t0-1), apply(abs(centered), 2, function(x) quantile(x, .95)))
out$lower <- c(rep(NA, t0-1), apply(atts, 2, function(x) quantile(x, .025)))
out$upper <- c(rep(NA, t0-1), apply(atts, 2, function(x) quantile(x, .975)))
out <- out %>% mutate(lower=2*att-upper, upper=2*att-lower)
return(out)
}
#' Bootstrap standard errors for estimated counterfactual with synth
#' @param outcomes Tidy dataframe with the outcomes and meta data
#' @param metadata Dataframe of metadata
#' @param n_boot Number of bootstrap samples
#' @param trt_unit Treated unit
#' @param pred_int Whether to add outcome control variance to make pred interval
#' @param lambda Ridge hyper-parameter, if NULL then CV
#' @param scm Whether to augment SCM with ridge
#' @param ridge Include ridge or not
#' @param cols Column names corresponding to the units,
#' time variable, outcome, and treated indicator
#'
#' @return att estimates, test statistics, p-values
#' @export
bootstrap_ridgeaug <- function(outcomes, metadata, n_boot, trt_unit=1, pred_int=F,
lambda=NULL, scm=T, ridge=T,
cols=list(unit="unit", time="time",
outcome="outcome", treated="treated")) {
## format data once
data_out <- format_data(outcomes, metadata, trt_unit, cols=cols)
ipw_out <- format_ipw(outcomes, metadata, cols=cols)
n_c <- dim(data_out$synth_data$Z0)[2]
t0 <- dim(data_out$synth_data$Z0)[1] + 1
t_final <- dim(data_out$synth_data$Y0plot)[1]
## att on actual sample
aug <- fit_ridgeaug_formatted(ipw_out, data_out, lambda, scm, ridge)
att <- as.numeric(data_out$synth_data$Y1plot -
data_out$synth_data$Y0plot %*% aug$weights)
yhat <- as.numeric(data_out$synth_data$Y0plot %*% aug$weights)
## use one lambda
lambda <- aug$lambda
atts <- matrix(0, n_boot, length(t0:t_final))
## fit on bootstrap samples
for(b in 1:n_boot) {
## resample controls
ctrls <- sample(1:n_c, n_c, replace=TRUE)
new_data_out <- data_out
new_data_out$synth_data$Z0 <- data_out$synth_data$Z0[, ctrls]
new_data_out$synth_data$Y0plot <- data_out$synth_data$Y0plot[, ctrls]
new_ipw_out <- ipw_out
new_ipw_out$X[ipw_out$trt==0,] <- ipw_out$X[ipw_out$trt==0,][ctrls,]
if(ncol(ipw_out$y) == 1) {
new_ipw_out$y[ipw_out$trt==0] <- ipw_out$y[ipw_out$trt==0][ctrls]
} else {
new_ipw_out$y[ipw_out$trt==0,] <- ipw_out$y[ipw_out$trt==0,][ctrls,]
}
## get synth weights
aug <- fit_ridgeaug_formatted(new_ipw_out, new_data_out, lambda, scm, ridge)
## estimate satt
atts[b,] <- new_data_out$synth_data$Y1plot[t0:t_final,] -
new_data_out$synth_data$Y0plot[t0:t_final,] %*% aug$weights
}
## standard errors
se <- apply(atts, 2, sd)
if(pred_int) {
## estimate of control potential outcome variance
sig2 <- sapply(t0:t_final, function(i)
sum(aug$weights * (data_out$synth_data$Y0plot[i,] - yhat[i])^2))
se <- sqrt(se^2 + sapply(sig2, function(x) max(x, 0)))
}
## return(list(att, atts))
## bias
bias <- as.numeric(apply(atts, 2, mean)) - att[t0:t_final]
## combine into dataframe
out <- outcomes %>% distinct(time)
out$att <- att
out$yhat <- yhat
out$se <- c(rep(NA, t0-1), se)
out$bias <- c(rep(NA, t0-1), bias)
## quantiles
att_post <- att[t0:t_final]
centered <- atts - t(matrix(att_post, nrow=length(att_post), ncol=dim(atts)[1]))
out$cilength <- c(rep(NA, t0-1), apply(abs(centered), 2, function(x) quantile(x, .95)))
out$lower <- c(rep(NA, t0-1), apply(atts, 2, function(x) quantile(x, .025)))
out$upper <- c(rep(NA, t0-1), apply(atts, 2, function(x) quantile(x, .975)))
out <- out %>% mutate(lower=2*att-upper, upper=2*att-lower)
return(out)
}
#' Bootstrap standard errors for estimated counterfactual with synth
#' @param outcomes Tidy dataframe with the outcomes and meta data
#' @param metadata Dataframe of metadata
#' @param n_boot Number of bootstrap samples
#' @param hyperparam Regularization hyperparameter
#' @param trt_unit Treated unit
#' @param link Link function for weights
#' @param regularizer Dual of balance criterion
#' @param normalized Whether to normalize the weights
#' @param outcome_col Column name which identifies outcomes, if NULL then
#' assume only one outcome
#' @param cols Column names corresponding to the units,
#' time variable, outcome, and treated indicator
#' @param opts Optimization options
#' \itemize{
#' \item{MAX_ITERS }{Maximum number of iterations to run}
#' \item{EPS }{Error tolerance}}
#'
#' @return outcomes with additional synthetic control added and weights
#' @export
bootstrap_bal <- function(outcomes, metadata, n_boot, hyperparam, trt_unit=1,
link=c("logit", "linear", "pos-linear"),
regularizer=c(NULL, "l1", "l2", "ridge", "linf"),
normalized=TRUE,
cols=list(unit="unit", time="time",
outcome="outcome", treated="treated"),
opts=list()) {
## format data once
data_out <- format_ipw(outcomes, metadata, trt_unit, cols=cols)
n_c <- sum(data_out$trt==0)
n_t <- sum(data_out$trt==1)
t0 <- dim(data_out$X)[2]+1
t_final <- t0 + dim(data_out$y)[2] - 1
atts <- matrix(0, n_boot, length(t0:t_final))
## controls and trt matrix
Xc <- data_out$X[data_out$trt==0,,drop=FALSE]
Xt <- data_out$X[data_out$trt==1,,drop=FALSE]
## outcomes
yc <- data_out$y[data_out$trt==0,,drop=FALSE]
yt <- data_out$y[data_out$trt==1,,drop=FALSE]
## fit on bootstrap samples
for(b in 1:n_boot) {
## resample controls
ctrls <- sample(1:n_c, n_c, replace=TRUE)
## resample treated units
trts <- sample(1:n_t, n_t, replace=TRUE)
new_data_out <- data_out
new_data_out$X <- rbind(Xt[trts,,drop=FALSE], Xc[ctrls,,drop=FALSE])
new_data_out$trt <- c(rep(1, n_t), rep(0,n_c))
new_data_out$y <- rbind(yt[trts,,drop=FALSE], yc[ctrls,,drop=FALSE])
## get balancer
capture.output(bal <- fit_balancer_formatted(new_data_out$X, new_data_out$trt,
link=link, regularizer=regularizer,
hyperparam=hyperparam, normalized=normalized,
opts=opts))
## estimate satt
atts[b,] <- colMeans(new_data_out$y[new_data_out$trt==1,,drop=FALSE]) -
as.numeric(t(new_data_out$y[new_data_out$trt==0,,drop=FALSE]) %*% bal$weights)
}
## att on actual sample
capture.output(bal <- fit_balancer_formatted(data_out$X, data_out$trt,
link=link, regularizer=regularizer,
hyperparam=hyperparam, normalized=normalized,
opts=opts))
att_pre <- colMeans(new_data_out$X[data_out$trt==1,,drop=FALSE]) -
as.numeric(t(data_out$X[data_out$trt==0,,drop=FALSE]) %*% bal$weights)
att_post <- colMeans(data_out$y[data_out$trt==1,,drop=FALSE]) -
as.numeric(t(data_out$y[data_out$trt==0,,drop=FALSE]) %*% bal$weights)
att <- c(att_pre, att_post)
## standard errors
se <- apply(atts, 2, sd)
## return(list(att, atts))
## bias
bias <- as.numeric(apply(atts, 2, mean)) - att[t0:t_final]
## combine into dataframe
out <- outcomes %>% distinct(time)
out$att <- att
out$se <- c(rep(NA, t0-1), se)
out$bias <- c(rep(NA, t0-1), bias)
## quantiles
centered <- atts - t(matrix(att_post, nrow=length(att_post), ncol=dim(atts)[1]))
## out$lower <- c(rep(NA, t0-1), apply(abs(centered), 2, function(x) quantile(x, .025)))
out$cilength <- c(rep(NA, t0-1), apply(abs(centered), 2, function(x) quantile(x, .95)))
out$lower <- c(rep(NA, t0-1), apply(atts, 2, function(x) quantile(x, .025)))
out$upper <- c(rep(NA, t0-1), apply(atts, 2, function(x) quantile(x, .975)))
out <- out %>% mutate(lower=2*att-upper, upper=2*att-lower)
return(out)
}
#' Compute model-based weighted least squares SE estimates of the ATT
#' @param X Matrix of pre-period outcomes
#' @param y Matrix of post-period outcomes
#' @param trt Treatment indicator
#' @param weights Balancing weights
#' @param pred_int Whether to add outcome control variance to make pred interval
#' @param hc Type of HC variance estimate (-1 for homoskedastic)
wls_se_ <- function(X=NULL, y, trt, weights, pred_int, hc) {
## ## combine back into a panel structure
## n <- nrow(y)
## ids <- 1:n
## if(is.null(X)) {
## t0 <- 0
## } else {
## t0 <- dim(X)[2]
## }
## t_final <- t0 + dim(y)[2]
## new_weights <- numeric(0)
## new_weights[trt==0] <- weights
## new_weights[trt==1] <- 1
## if(!is.null(X)) {
## pnl1 <- data.frame(X)
## colnames(pnl1) <- 1:t0
## pnl1 <- pnl1 %>% mutate(trt=trt, post=1, id=ids, weight=new_weights) %>%
## gather(time, val, -trt, -post, -id, -weight)
## } else {
## pnl1 <- data.frame(time=NULL, val=NULL, trt=NULL, post=NULL, id=NULL, weight=NULL)
## }
## pnl2 <- data.frame(y)
## colnames(pnl2) <- (t0+1):t_final
## pnl2 <- pnl2 %>% mutate(trt=trt, post=1, id=ids, weight=new_weights) %>%
## gather(time, val, -trt, -post, -id, -weight)
## pnl <- bind_rows(pnl1, pnl2) %>%
## mutate(time=factor(time,
## levels=1:t_final)) %>%
## filter(trt==0)
## ## get att estimates with WLS
## if(t_final > 1) {
## ## fit <- pnl %>%
## ## lm(val ~ time + time:trt -1,
## ## .,
## ## weights=.$weight)
## fit <- pnl %>%
## lm(val ~ time -1,
## .,
## weights=.$weight)
## } else {
## ## fit <- lm(val ~ trt,
## ## pnl,
## ## weights=pnl$weight)
## fit <- lm(val ~ 1,
## pnl,
## weights=pnl$weight)
## }
## ## att <- as.numeric(coef(fit)[(t_final+1):(2*t_final)])
## yhat <- as.numeric(coef(fit)[1:t_final])
## ## se <- as.numeric(coef(summary(fit))[,2][(t_final+1):(2*t_final)])
## ## se <- sqrt(diag(sandwich::vcovHC(fit, type="HC", sandwich=T)))[(t_final+1):(2*t_final)]
## se <- sqrt(diag(sandwich::vcovHC(fit, type="HC", sandwich=T)))
comb <- cbind(X, y)
yhat <- as.numeric(t(comb[trt==0,]) %*% weights)
att <- as.numeric(colMeans(comb[trt==1,,drop=F]) - yhat)
comb <- comb[trt==0,]
if(hc == 0) {
## heteroskedastic robust variance estimate (huber white)
se <- sqrt(sapply(1:length(att),
function(i) sum(weights^2 * (comb[,i] - yhat[i])^2)))
} else if(hc == 2) {
se <- sqrt(sapply(1:length(att),
function(i) sum(weights^2/(1-weights) * (comb[,i] - yhat[i])^2)))
} else if(hc == 3) {
se <- sqrt(sapply(1:length(att),
function(i) sum((weights/(1-weights))^2 * (comb[,i] - yhat[i])^2)))
}else if(hc == -1){
## homoskedastic estimate, different for each time
vars_t <- sapply(1:length(att), function(i) var(comb[,i]))
se <- sqrt(vars_t * sum(weights^2))
}
if(pred_int) {
sig2 <- sapply(1:length(att), function(i) sum(weights * (comb[,i] - yhat[i])^2))
## sig2 <- summary(fit)$sigma^2
} else {
sig2 <- 0
}
## add variance for prediction interval
## sig2 <- summary(fit)$sigma^2 / sum(trt==1)
se <- sqrt(se^2 + sapply(sig2, function(x) max(x,0)))
return(list(yhat=yhat, att=att, se=se))
}
#' Weighted Least Squares Standard Errors
#' @param outcomes Tidy dataframe with the outcomes and meta data
#' @param metadata Dataframe of metadata
#' @param trt_unit Treated unit
#' @param pred_int Whether to add outcome control variance to make pred interval
#' @param hc Type of HC variance estimate (-1 for homoskedastic)
#' @param cols Column names corresponding to the units,
#' time variable, outcome, and treated indicator
#'
#' @return outcomes with additional synthetic control added and weights
#' @export
wls_se_synth <- function(outcomes, metadata, trt_unit=1, pred_int=F,
hc=0,
cols=list(unit="unit", time="time",
outcome="outcome", treated="treated")) {
## fit synth
syn <- get_synth(outcomes, metadata, trt_unit, cols)
## format for WLS
ipw_dat <- format_ipw(outcomes, metadata, NULL, cols)
## get att and standard error estimates
att_se <- wls_se_(ipw_dat$X, ipw_dat$y, ipw_dat$trt, syn$weights, pred_int, hc)
##
## combine into dataframe
out <- outcomes %>% distinct(time)
out$att <- att_se$att
out$se <- att_se$se
out$yhat <- att_se$yhat
## out$att <- c(rep(NA, ncol(ipw_dat$X)),
## att_se$att)
## out$se <- c(rep(NA, ncol(ipw_dat$X)),
## att_se$se)
return(out)
}
#' Weighted Least Squares Standard Errors
#' @param outcomes Tidy dataframe with the outcomes and meta data
#' @param metadata Dataframe of metadata
#' @param trt_unit Treated unit
#' @param pred_int Whether to add outcome control variance to make pred interval
#' @param hc Type of HC variance estimate (-1 for homoskedastic)
#' @param lambda Ridge hyperparemter, default NULL
#' @param scm Include SCM or not
#' @param ridge Include ridge or not
#' @param cols Column names corresponding to the units,
#' time variable, outcome, and treated indicator
#'
#' @return outcomes with additional synthetic control added and weights
#' @export
wls_se_ridgeaug <- function(outcomes, metadata, trt_unit=1, pred_int=F, hc=0,
lambda=NULL, scm=T, ridge=T,
cols=list(unit="unit", time="time",
outcome="outcome", treated="treated")) {
## fit synth
aug <- get_ridgeaug(outcomes, metadata, trt_unit, lambda, scm, ridge, cols)
## format for WLS
ipw_dat <- format_ipw(outcomes, metadata, NULL, cols)
## get att and standard error estimates
att_se <- wls_se_(ipw_dat$X, ipw_dat$y, ipw_dat$trt, aug$weights, pred_int, hc)
##
## combine into dataframe
out <- outcomes %>% distinct(time)
out$att <- att_se$att
out$se <- att_se$se
out$yhat <- att_se$yhat
## out$att <- c(rep(NA, ncol(ipw_dat$X)),
## att_se$att)
## out$se <- c(rep(NA, ncol(ipw_dat$X)),
## att_se$se)
return(out)
}
#' Bootstrap standard errors for estimated counterfactual with synth
#' @param outcomes Tidy dataframe with the outcomes and meta data
#' @param metadata Dataframe of metadata
#' @param hyperparam Regularization hyperparameter
#' @param trt_unit Treated unit
#' @param link Link function for weights
#' @param regularizer Dual of balance criterion
#' @param normalized Whether to normalize the weights
#' @param outcome_col Column name which identifies outcomes, if NULL then
#' assume only one outcome
#' @param cols Column names corresponding to the units,
#' time variable, outcome, and treated indicator
#' @param opts Optimization options
#' \itemize{
#' \item{MAX_ITERS }{Maximum number of iterations to run}
#' \item{EPS }{Error tolerance}}
#'
#' @return outcomes with additional synthetic control added and weights
#' @export
wls_se_bal <- function(outcomes, metadata, hyperparam, trt_unit=1,
link=c("logit", "linear", "pos-linear"),
regularizer=c(NULL, "l1", "l2", "ridge", "linf"),
normalized=TRUE,
cols=list(unit="unit", time="time",
outcome="outcome", treated="treated"),
opts=list()) {
## format data once
ipw_dat <- format_ipw(outcomes, metadata, NULL, cols)
capture.output(bal <- fit_balancer_formatted(ipw_dat$X, ipw_dat$trt,
link=link, regularizer=regularizer,
hyperparam=hyperparam, normalized=normalized,
opts=opts))
## get att and standard error estimates
att_se <- wls_se_(ipw_dat$X, ipw_dat$y, ipw_dat$trt, bal$weights)
##
## combine into dataframe
out <- outcomes %>% distinct(time)
out$att <- att_se$att
out$se <- att_se$se
out$yhat <- att_se$yhat
return(out)
}
#' Compute standard errors from survey glm
#' @param X Matrix of pre-period outcomes
#' @param y Matrix of post-period outcomes
#' @param trt Treatment indicator
#' @param weights Balancing weights
#' @param pred_int Whether to add outcome control variance to make pred interval
svyglm_se_ <- function(X=NULL, y, trt, weights, pred_int) {
## combine back into a panel structure
n <- nrow(y)
ids <- 1:n
if(is.null(X)) {
t0 <- 0
} else {
t0 <- dim(X)[2]
}
t_final <- t0 + dim(y)[2]
new_weights <- numeric(0)
new_weights[trt==0] <- weights
new_weights[trt==1] <- 1
if(!is.null(X)) {
pnl1 <- data.frame(X)
colnames(pnl1) <- 1:t0
pnl1 <- pnl1 %>% mutate(trt=trt, post=1, id=ids, weight=new_weights) %>%
gather(time, val, -trt, -post, -id, -weight)
} else {
pnl1 <- data.frame(time=NULL, val=NULL, trt=NULL, post=NULL, id=NULL, weight=NULL)
}
pnl2 <- data.frame(y)
colnames(pnl2) <- (t0+1):t_final
pnl2 <- pnl2 %>% mutate(trt=trt, post=1, id=ids, weight=new_weights) %>%
gather(time, val, -trt, -post, -id, -weight)
pnl <- bind_rows(pnl1, pnl2) %>%
mutate(time=factor(time,
levels=1:t_final)) %>%
filter(trt==0)
## make desing
design <- survey::svydesign(id=~1, weights=~weight, data=pnl)
## get att estimates with WLS
if(t_final > 1) {
## fit <- pnl %>%
## lm(val ~ time + time:trt -1,
## .,
## weights=.$weight)
fit <- survey::svyglm(val ~ time -1, design)
} else {
## fit <- lm(val ~ trt,
## pnl,
## weights=pnl$weight)
fit <- survey::svyglm(val ~ 1, design)
}
## att <- as.numeric(coef(fit)[(t_final+1):(2*t_final)])
yhat <- as.numeric(coef(fit))
## se <- as.numeric(coef(summary(fit))[,2][(t_final+1):(2*t_final)])
se <- coef(summary(fit))[,2]
comb <- cbind(X, y)
att <- colMeans(comb[trt==1,,drop=F]) - yhat
comb <- comb[trt==0,]
if(pred_int) {
sig2 <- sapply(1:length(att), function(i) sum(weights * (comb[,i] - yhat[i])^2))
} else {
sig2 <- 0
}
se <- sqrt(se^2 + sig2)
return(list(att=att, se=se))
}
#' svyglm standard errors
#' @param outcomes Tidy dataframe with the outcomes and meta data
#' @param metadata Dataframe of metadata
#' @param trt_unit Treated unit
#' @param pred_int Whether to add outcome control variance to make pred interval
#' @param cols Column names corresponding to the units,
#' time variable, outcome, and treated indicator
#'
#' @return outcomes with additional synthetic control added and weights
#' @export
svyglm_se_synth <- function(outcomes, metadata, trt_unit=1, pred_int=F,
cols=list(unit="unit", time="time",
outcome="outcome", treated="treated")) {
## fit synth
syn <- get_synth(outcomes, metadata, trt_unit, cols)
## format for WLS
ipw_dat <- format_ipw(outcomes, metadata, NULL, cols)
## get att and standard error estimates
att_se <- svyglm_se_(ipw_dat$X, ipw_dat$y, ipw_dat$trt, syn$weights, pred_int)
##
## combine into dataframe
out <- outcomes %>% distinct(time)
out$att <- att_se$att
out$se <- att_se$se
## out$att <- c(rep(NA, ncol(ipw_dat$X)),
## att_se$att)
## out$se <- c(rep(NA, ncol(ipw_dat$X)),
## att_se$se)
return(out)
}
#' svyglm standard errors
#' @param outcomes Tidy dataframe with the outcomes and meta data
#' @param metadata Dataframe of metadata
#' @param trt_unit Treated unit
#' @param pred_int Whether to add outcome control variance to make pred interval
#' @param cols Column names corresponding to the units,
#' time variable, outcome, and treated indicator
#'
#' @return outcomes with additional synthetic control added and weights
#' @export
svyglm_se_synth <- function(outcomes, metadata, trt_unit=1, pred_int=F,
cols=list(unit="unit", time="time",
outcome="outcome", treated="treated")) {
## fit synth
syn <- get_synth(outcomes, metadata, trt_unit, cols)
## format for WLS
ipw_dat <- format_ipw(outcomes, metadata, NULL, cols)
## get att and standard error estimates
att_se <- svyglm_se_(ipw_dat$X, ipw_dat$y, ipw_dat$trt, syn$weights, pred_int)
##
## combine into dataframe
out <- outcomes %>% distinct(time)
out$att <- att_se$att
out$se <- att_se$se
## out$att <- c(rep(NA, ncol(ipw_dat$X)),
## att_se$att)
## out$se <- c(rep(NA, ncol(ipw_dat$X)),
## att_se$se)
return(out)
}
#' Use leave out one estimates (placebo gaps) to estimate unit-level variance
#' Do this for ridge-augmented synth
#' @param outcomes Tidy dataframe with the outcomes and meta data
#' @param metadata Dataframe of metadata
#' @param covs Dataframe of covariates if not null then uses
#' covariates in outcome model; default NULL
#' @param trt_unit Treated unit
#' @param lambda Ridge hyper-parameter, if NULL use CV
#' @param scm Include SCM or not
#' @param ridge Include ridge or not
#' @param use_weights Whether to use weights in se estimate, default: FALSE
#' @param hc Type of HC variance estimate (-1 for homoskedastic)
#' @param trt_effect Whether to compute standard errors for treatment effect
#' or counterfactual mean
#' @param cols Column names corresponding to the units,
#' time variable, outcome, and treated indicator
#'
#' @return att estimates, test statistics, p-values
#' @export
loo_se_ridgeaug <- function(outcomes, metadata, covs=NULL,
trt_unit=1, lambda=NULL,
scm=T, ridge=T, use_weights=T, hc=-1,
trt_effect=T,
cols=list(unit="unit", time="time",
outcome="outcome", treated="treated")) {
## format data once
data_out <- format_data(outcomes, metadata, trt_unit, cols=cols)
ipw_dat <- format_ipw(outcomes, metadata, cols=cols)
if(!is.null(covs)) {
Z <- as.matrix(covs)
} else {
Z <- NULL
}
n_c <- dim(data_out$synth_data$Z0)[2]
t0 <- dim(data_out$synth_data$Z0)[1]
t_final <- dim(data_out$synth_data$Y0plot)[1]
errs <- matrix(0, n_c, t_final - t0)
## att on actual sample
if(is.null(Z)) {
aug_t <- fit_ridgeaug_formatted(ipw_dat, data_out, lambda, scm, ridge)
} else {
aug_t <- fit_ridgeaug_cov_formatted(ipw_dat, data_out, Z, lambda, scm)
}
att <- as.numeric(data_out$synth_data$Y1plot -
data_out$synth_data$Y0plot %*% aug_t$weights)
lam <- aug_t$lambda
## iterate over control units
for(i in 1:n_c) {
## reset synth data to make a control a treated
new_data_out <- data_out
new_data_out$synth_data$Z0 <- data_out$synth_data$Z0[, -i]
new_data_out$synth_data$Y0plot <- data_out$synth_data$Y0plot[, -i]
new_data_out$synth_data$Z1 <- data_out$synth_data$Z0[, i, drop=FALSE]
new_data_out$synth_data$Y1plot <- data_out$synth_data$Y0plot[, i, drop=FALSE]
## reset ipw data to change treatment assignment
new_ipw_dat <- ipw_dat
new_ipw_dat$X <- ipw_dat$X[ipw_dat$trt==0,,drop=F]
new_ipw_dat$y <- ipw_dat$y[ipw_dat$trt==0,,drop=F]
new_ipw_dat$trt <- numeric(nrow(new_ipw_dat$X))
new_ipw_dat$trt[i] <- 1
if(is.null(Z)) {
## get ridge_aug weights
aug <- fit_ridgeaug_formatted(new_ipw_dat, new_data_out, lam, scm, ridge)
} else {
new_Z <- Z[ipw_dat$trt==0,,drop=F]
aug <- fit_ridgeaug_cov_formatted(new_ipw_dat, new_data_out, new_Z, lam, scm)
}
## estimate satt
errs[i,] <- new_data_out$synth_data$Y1plot[(t0+1):t_final,] -
new_data_out$synth_data$Y0plot[(t0+1):t_final,] %*% aug$weights
}
## standard errors
if(use_weights) {
if(hc == 0) {
se <- sqrt(t(errs^2) %*% aug_t$weights^2)
} else if(hc ==-1) {
if(trt_effect) {
se <- sqrt((1 / sum(ipw_dat$trt==1) + ## contribution from treated unit
sum(aug_t$weights^2)) * ## contribution from weights
apply(errs^2, 2, mean)) ## estimate of variance
} else {
se <- sqrt(apply(errs^2, 2, mean)) * sqrt(sum(aug_t$weights^2))
}
} else if(hc == "scm") {
## use synth weights on DI
sc <- fit_synth_formatted(data_out)
se <- sqrt(t(errs^2) %*% sc$weights)
}
} else {
se <- sqrt(apply(errs^2, 2, mean))
}
## combine into dataframe
out <- outcomes %>% distinct(time)
out$yhat <- as.numeric(data_out$synth_data$Y0plot %*% aug_t$weights)
out$att <- att
out$se <- c(rep(NA, t0), se)
return(out)
}
#' Use closed form weighted variance estiamte for ridge-augmented
#' @param outcomes Tidy dataframe with the outcomes and meta data
#' @param metadata Dataframe of metadata
#' @param trt_unit Treated unit
#' @param lambda Ridge hyper-parameter, if NULL use CV
#' @param scm Include SCM or not
#' @param ridge Include ridge or not
#' @param cols Column names corresponding to the units,
#' time variable, outcome, and treated indicator
#'
#' @return att estimates, test statistics, p-values
#' @export
wvar_se_ridgeaug <- function(outcomes, metadata, trt_unit=1, lambda=NULL,
scm=T, ridge=T,
cols=list(unit="unit", time="time",
outcome="outcome", treated="treated")) {
## format data once
data_out <- format_data(outcomes, metadata, trt_unit, cols=cols)
ipw_dat <- format_ipw(outcomes, metadata, cols=cols)
n_c <- dim(data_out$synth_data$Z0)[2]
t0 <- dim(data_out$synth_data$Z0)[1]
t_final <- dim(data_out$synth_data$Y0plot)[1]
errs <- matrix(0, n_c, t_final - t0)
## att on actual sample
aug_t <- fit_ridgeaug_formatted(ipw_dat, data_out, lambda, scm, ridge)
att <- as.numeric(data_out$synth_data$Y1plot -
data_out$synth_data$Y0plot %*% aug_t$weights)
## weighted variance estimate
se <- apply(ipw_dat$y, 2, function(y) sd(aug_t$weights * y[ipw_dat$trt==0]))
## combine into dataframe
out <- outcomes %>% distinct(time)
out$yhat <- as.numeric(data_out$synth_data$Y0plot %*% aug_t$weights)
out$att <- att
out$se <- c(rep(NA, t0), se)
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.