#-----------------------------------------------------------------------------#
#' Estimate Causal Effects in presence of interference
#'
#' @param formula The formula used to define the causal model. Has a minimum
#' of 4 parts, separated by \code{|} and \code{~} in a specific structure:
#' \code{outcome | exposure ~ propensity covariates | group}. The order matters,
#' and the pipes split the data frame into corresponding pieces. The part
#' separated by \code{~} is passed to the chosen \code{model_method} used to
#' estimate or fix propensity parameters.
#' @param propensity_integrand A function, which may be created by the user,
#' used to compute the IP weights. This defaults to \code{logit_integrand},
#' which calculates the product of inverse logits for individuals in a group:
#' \eqn{\prod_{j = 1}^{n_i} \{r \times h_{ij}(b_i)^{A_{ij}}\}\{1 - r \times
#' h_{ij}(b_i)\}^{1 - A_{ij}} f_b(b_i; \theta_s)}{prod(r * plogis(X * fixef + b)^A *
#' (1 - r * plogis(X * fixef+ b))^(1 - A)) *
#' dnorm(sd = sqrt(ranef))} where \deqn{h_{ij}(b_i) = logit^{-1}
#' (\mathbf{X}_{ij}\theta_a + b_i)} and \eqn{b_i} is a group-level random effect,
#' \eqn{f_b} is a \eqn{N(0, \theta_s)} density, and \eqn{r} is a known
#' randomization probability which may be useful if a participation vector is
#' included in the \code{formula}. If no random effect was included in the
#' \code{formula}, \code{logit_integrand} essentially ignores the random effect
#' and \eqn{f_b(b_i, \theta_s)} integrates to 1. See details for arguments that
#' can be passed to \code{logit_integrand}
#' @param loglihood_integrand A function, which may be created by the user, that
#' defines the log likelihood of the logit model used for \code{robust} variance
#' estimation. Generally, this will be the same function as
#' \code{propensity_integrand}. Indeed, this is the default.
#' @param allocations a vector of values in (0, 1). Increasing the number of
#' elements of the allocation vector greatly increases computation time; however,
#' a larger number of allocations will make plots look nicer. A minimum of two
#' allocations is required.
#' @param data the analysis data frame. This must include all the variables
#' defined in the \code{formula}.
#' @param model_method the method used to estimate or set the propensity model
#' parameters. Must be one of \code{'glm'}, \code{'glmer'}, \code{'HLfit'},
#' or \code{'oracle'}. Defaults to \code{'glmer'}. For a fixed effects only model
#' use \code{'glm'}, and to include random effects use\code{'glmer'} or \code{'HLfit'}.
#' \code{logit_integrand} only supports a single random effect for the grouping variable,
#' so if more random effects are included in the model, different \code{propensity_integrand}
#' and \code{loglihood_integrand} functions should be defined. When the propensity
#' parameters are known (as in simulations) or if estimating parameters by
#' other methods, use the \code{'oracle'} option. See \code{model_options} for
#' details on how to pass the oracle parameters.
#' @param model_options a list of options passed to the function in
#' \code{model_method}. Defaults to \code{list(family = binomial(link = 'logit'))}.
#' When \code{model_method = 'oracle'}, the list must have two elements (1)
#' \code{fixed_effects} and (2) \code{random_effects}. If the model did not include
#' random effects, set \code{random.effects = NULL}.
#' @param causal_estimation_method currently only supports \code{'ipw'}.
#' @param causal_estimation_options A list. Current options are: (1) \code{variance_estimation} is
#' either \code{'naive'} or \code{'robust'}. See details. Defaults to \code{'robust'}.
#' @param conf.level level for confidence intervals. Defaults to \code{0.95}.
#' @param rescale.factor a scalar multiplication factor by which to rescale outcomes
#' and effects. Defaults to \code{1}.
#' @param runSilent if FALSE, status of computations are printed to console. Defaults to TRUE.
#' @param ... Used to pass additional arguments to internal functions such as
#' \code{numDeriv::grad()} or \code{integrate()}. Additionally, arguments can be
#' passed to the \code{propensity_integrand} and \code{loglihood_integrand} functions.
#' @inheritParams wght_calc
#'
#' @details The following formula includes a random effect for the group: \code{outcome |
#' exposure ~ propensity covariates + (1|group) | group}. In this instance, the
#' group variable appears twice. If the study design includes a "participation"
#' variable, this is easily added to the formula: \code{outcome | exposure |
#' participation ~ propensity covariates | group}.
#'
#' \code{logit_integrand} has two options that can be passed via the \code{...}
#' argument:
#' \itemize{
#' \item \code{randomization}: a scalar. This is the \eqn{r} in the formula just
#' above. It defaults to 1 in the case that a \code{participation} vector is not
#' included. The vaccine study example demonstrates use of this argument.
#' \item \code{integrate_allocation}: \code{TRUE/FALSE}. When group sizes grow
#' large (over 1000), the product term of \code{logit_integrand} tends quickly to 0.
#' When set to \code{TRUE}, the IP weights tend less quickly to 0.
#' Defaults to \code{FALSE}.
#' }
#'
#' If the true propensity model is known (e.g. in simulations) use
#' \code{variance_estimatation = 'naive'}; otherwise, use the default
#' \code{variance_estimatation = 'robust'}. Refer to the web appendix of
#' Heydrich-Perez et al. (2014) (\doi{10.1111/biom.12184})
#' for complete details.
#' @references Saul, B. and Hugdens, M. G. (2017).
#' A Recipe for {inferference}: {S}tart with Causal Inference. {A}dd Interference. {M}ix Well with {R}.
#' Journal of Statistical Software, 82(2), 1-21. \doi{10.18637/jss.v082.i02}
#'
#' Perez-Heydrich, C., Hudgens, M. G., Halloran, M. E., Clemens, J. D., Ali, M., & Emch, M. E. (2014).
#' Assessing effects of cholera vaccination in the presence of interference. Biometrics, 70(3), 731-741.
#'
#' Tchetgen Tchetgen, E. J., & VanderWeele, T. J. (2012). On causal inference in the presence of interference.
#' Statistical Methods in Medical Research, 21(1), 55-75.
#' @return Returns a list of overall and group-level IPW point estimates, overall
#' and group-level IPW point estimates (using the weight derivatives), derivatives
#' of the loglihood, the computed weight matrix, the computed
#' weight derivative array, and a summary.
#'
#' @export
#'
#-----------------------------------------------------------------------------#
interference <- function(formula,
propensity_integrand = 'logit_integrand',
loglihood_integrand = propensity_integrand,
allocations,
data,
model_method = "glmer",
model_options = list(family = stats::binomial(link = 'logit')),
causal_estimation_method = 'ipw',
causal_estimation_options = list(variance_estimation = 'robust'),
conf.level = 0.95,
rescale.factor = 1,
integrate_allocation = TRUE,
runSilent = TRUE,
...)
{
## Necessary bits ##
dots <- list(...)
integrandFUN <- match.fun(propensity_integrand)
likelihoodFUN <- match.fun(loglihood_integrand)
oracle <- model_method == 'oracle'
cformula <- Formula::Formula(formula)
len_lhs <- length(cformula)[1]
len_rhs <- length(cformula)[2]
data <- as.data.frame(data) # make sure data is data.frame not data.table, etc.
## For the sake of consistency downstream, reorder data frame by groups ##
group_var <- attr(stats::terms(cformula, lhs = 0, rhs = len_rhs), 'term.labels')
data <- data[order(data[ , group_var]), ]
## Parse out the formula into necessary pieces ##
Y <- Formula::model.part(cformula, data = data, lhs = 1, drop = TRUE)
A <- Formula::model.part(cformula, data = data, lhs = 2, drop = TRUE)
G <- Formula::model.part(cformula, data = data, rhs = len_rhs, drop = TRUE)
# Used when there is 'participation' variable
if(len_lhs > 2){
B <- Formula::model.part(cformula, data = data, lhs = len_lhs, drop = TRUE)
} else {
B <- A
}
propensity_formula <- formula(stats::terms(cformula, lhs = len_lhs, rhs = -2))
random.count <- length(lme4::findbars(propensity_formula))
trt_lvls <- sort(unique(A))
N <- length(unique(G))
k <- length(allocations)
l <- length(trt_lvls)
## Warnings ##
if(model_method == 'glm' & random.count > 0 ){
stop('propensity_formula appears to include a random effect when "glm" was chosen \n
for parameter estimation. Set model_method to "glmer" to include a random effect')
}
if(propensity_integrand == "logit_integrand" & random.count > 1){
stop('Logit integrand is designed to handle only 1 random effect.')
}
if(min(allocations) < 0 | max(allocations) > 1){
stop('Allocations must be between 0 and 1')
}
if(length(allocations) < 2){
warning('At least 2 allocations must be specified in order to estimate indirect effects')
}
if(N == 1){
stop('The group variable must have at least 2 groups (more is better).')
}
if(!(causal_estimation_options$variance_estimation %in% c('naive', 'robust'))){
stop("The variance estimation method should be 'naive' or 'robust'")
}
#### Compute Parameter Estimates ####
estimation_args <- append(list(formula = propensity_formula, data = data),
model_options)
parameters <- list()
if(model_method == "glmer"){
propensity_model <- do.call(lme4::glmer, args = estimation_args)
parameters$fixed_effects <- lme4::getME(propensity_model, 'fixef')
parameters$random_effects <- lme4::getME(propensity_model, 'theta')
X <- lme4::getME(propensity_model, "X")
if(sum(parameters$random_effects == 0) > 0){
stop('At least one random effect was estimated as 0. This will lead to a
non-invertible matrix if using robust variance estimation.')
}
} else if(model_method == "HLfit"){
propensity_model <- do.call(spaMM::HLfit, args = estimation_args)
parameters$fixed_effects <- propensity_model$fixef
parameters$random_effects <- sqrt(propensity_model$lambda[[1]])
X <- stats::model.matrix(propensity_model)
if(sum(parameters$random_effects == 0) > 0){
stop('At least one random effect was estimated as 0. This will lead to a
non-invertible matrix if using robust variance estimation.')
}
} else if(model_method == "glm"){
propensity_model <- do.call("glm", args = estimation_args)
parameters$fixed_effects <- stats::coef(propensity_model)
parameters$random_effects <- NULL
X <- stats::model.matrix(propensity_model)
} else if(model_method == "oracle"){
parameters$fixed_effects <- model_options[[1]]
parameters$random_effects <- model_options[[2]]
X <- stats::model.matrix(propensity_formula, data)
if(length(parameters$fixed_effects) != ncol(X)){
stop('oracle fixed effects vector must have length of # of columns of X')
}
}
#### Compute Effect Estimates ####
out <- list()
grid <- effect_grid(allocations = allocations, treatments = trt_lvls)
# Will have other causal estimation types in the future
if('ipw' %in% causal_estimation_method)
{
ipw_args <- append(append(dots, causal_estimation_options),
list(propensity_integrand = integrandFUN,
loglihood_integrand = likelihoodFUN,
allocations = allocations,
parameters = unlist(parameters),
runSilent = runSilent,
integrate_allocation = integrate_allocation,
Y = Y, X = X, A = A, B = B, G = G))
ipw <- do.call(ipw_interference, args = ipw_args)
out <- append(out, ipw)
if(runSilent != TRUE){print('Computing effect estimates...')} #BB 2015-06-23
estimate_args <- list(obj = ipw,
variance_estimation = causal_estimation_options$variance_estimation,
alpha1 = grid$alpha1,
trt.lvl1 = grid$trt1,
alpha2 = grid$alpha2,
trt.lvl2 = grid$trt2,
marginal = grid$marginal,
effect_type = grid$effect_type,
rescale.factor = rescale.factor,
conf.level = conf.level,
print = FALSE)
est <- do.call(ipw_effect_calc, args = estimate_args)
# 2/21/15: ipw_effect_calc returns a data.frame of lists, but these should
# be numeric columns. Here's a quick fix.
est <- apply(t(est), 2, as.numeric)
out$estimates <- cbind(grid, est)
}
#### Summary ####
# count missing weights by allocation
weights_na <- apply(out$weights, 2, function(x) sum(is.na(x)))
if(!oracle){
out$models <- list(propensity_model = propensity_model)
} else {
out$models <- list(propensity_model = model_options)
}
out$summary <- list(formula = deparse(formula),
oracle = oracle,
conf.level = conf.level,
ngroups = N,
nallocations = k,
npredictors = length(parameters$fixed_effects),
ntreatments = l,
allocations = allocations,
treatments = trt_lvls,
weights_na_count = weights_na,
variance_estimation = causal_estimation_options$variance_estimation)
class(out) <- "interference"
if(runSilent != TRUE){print('Interference complete')} #BB 2015-06-23
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.