# load packages
library(tidyverse)
library(MatchIt)
#' Calculate block bootstrap confidence interval
#' @param match GroupMatch output
#' @param dat Dataset GroupMatch was performed on
#' @param treat_variable Name of the treatment variable in the dataset
#' @param outcome_variable Name of the outcome variable in the dataset
#' @param trajectory_id Name of the column that identifies trajectories
#' @param ci_level Confidence level
#' @param reps Number of bootstrap reps
#' @param outcome_model Outcome model, if bias correction is used
#' @return Vector with confidence interval endpoints
bootstrap_ci <- function(match, dat, treat_variable = "treat",
outcome_variable = "y_obs",
trajectory_id = "personID",
ci_level = 0.95,
reps = 1000, outcome_model = NA){
# add matching weights to data
dat <- matrix_to_weights(match$cells, dat)
# Sample only matched data
matched_dat <- filter(dat, match_wt != 0)
# get number of matched treated units
num_matched_treated <- nrow(filter(matched_dat, !!sym(treat_variable) == 1))
# initialize boostrap estimates
boots_bias_correct <- rep(0, reps)
# unique trajectory IDs
matched_trajectory_ids <- unique(matched_dat[[trajectory_id]])
# iterate over bootstrap replicates
for(i in 1:reps){
sample_trajectory_ids <- data.frame(trajectory_id = sample(matched_trajectory_ids,
length(matched_trajectory_ids),
replace = T))
names(sample_trajectory_ids)[1] <- trajectory_id
sample_dat <- left_join(sample_trajectory_ids, matched_dat, by = trajectory_id)
# get bias correction values
if(is.na(outcome_model) == F){
preds <- predict(outcome_model, sample_dat)
} else {
preds <- rep(0, nrow(sample_dat))
}
# get bootstrap value
boots_bias_correct[i] <- (1/num_matched_treated) * sum((2*sample_dat[[treat_variable]] - rep(1, nrow(sample_dat)))*(sample_dat$match_wt*(sample_dat[[outcome_variable]] - preds)))
}
# get confidence interval
alpha <- 1 - ci_level
boot_bias_correct_ci <- quantile(boots_bias_correct, c(alpha / 2, 1 - alpha / 2))
# return confidence interval
return(boot_bias_correct_ci)
}
#' Run falsification test
#' @param control_data Dataset with only control units
#' @param control_variables Vector of variables to match on
#' @param outcome_model Outcome model, if bias correction is used
#' @param outcome_variable Name of the outcome variable in the dataset
#' @param time_variable Name of the time variable in the dataset
#' @param trajectory_id Name of the column that identifies trajectories
#' @param time_points Vector of two time points to run falsification test on
#' @param rand_reps Number of reps to randomly divide dataset
#' @param caliper Matching caliper
#' @param reps Number of permutation test reps
#' @return List with p-value from test and mahalanobis distance of dataset for random split
control_test <- function(control_data, control_variables, outcome_model = NA,
outcome_variable, time_variable, trajectory_id,
time_points, rand_reps = 100,
caliper = 0.2, reps = 1000){
# filter down to just the time points
control_data <- control_data %>%
filter(!!sym(time_variable) %in% time_points)
# apply bias correction, if applicable
if(is.na(outcome_model) == F){
control_data$bias_correct <- predict(outcome_model, control_data)
} else {
control_data$bias_correct <- 0
}
# create matching formula
matching_formula <- paste("new_treat ~ ", paste(control_variables, collapse = ' + '))
# randomly split the data many times and take split with best balance
balance_value <- 100000000
for(i in 1:rand_reps){
# get control timepoints
new_control_ids <- sample(unique(control_data[[trajectory_id]]),
length(unique(control_data[[trajectory_id]])) / 2, replace = F)
# get treated timepoints
new_treat_ids <- setdiff(unique(control_data[[trajectory_id]]), new_control_ids)
# second timepoint is new control and first timepoint is new treatment
new_control_df <- control_data %>%
filter(!!sym(trajectory_id) %in% new_control_ids,
!!sym(time_variable) == time_points[2]) %>%
mutate(new_treat = 0)
new_treat_df <- control_data %>%
filter(!!sym(trajectory_id) %in% new_treat_ids,
!!sym(time_variable) == time_points[1]) %>%
mutate(new_treat = 1)
# Calculate mahalonbis distance
X1 <- new_control_df[control_variables]
X2 <- new_treat_df[control_variables]
X <- rbind(X1, X2)
n <- nrow(X)
p <- nrow(X2) / nrow(X)
X_mean_diff <- colMeans(X2) - colMeans(X1)
mahalanobis_value <- n * p * (1 - p) * t(X_mean_diff) %*% solve(cov(X)) %*% (X_mean_diff)
# check if balanced improved
if(mahalanobis_value < balance_value){
# run match
control_data_temp <- bind_rows(new_control_df, new_treat_df)
mout <- matchit(formula(matching_formula),
control_data_temp, replace = F, caliper = caliper)
new_control_data <- control_data_temp
balance_value <- mahalanobis_value
}
}
control_data <- new_control_data
matched_diffs <- (control_data[[outcome_variable]][control_data["new_treat"] == 1] -
control_data$bias_correct[control_data["new_treat"] == 1]) -
(control_data[[outcome_variable]][as.numeric(mout$match.matrix)] -
control_data$bias_correct[as.numeric(mout$match.matrix)])
matched_diffs <- matched_diffs[is.na(matched_diffs) == F]
ATT.match <- mean(matched_diffs)
# permutation test
perm_coefs <- rep(0, reps)
for(j in 1:reps){
perm_coefs[j] <- mean(matched_diffs * sample(c(-1, 1), length(matched_diffs), replace = T))
}
# calculate p-value
p_value <- mean(abs(perm_coefs) >= abs(ATT.match))
return(list(p_value, balance_value))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.