#' @title Estimating effects of incremental propensity score interventions
#'
#' @description \code{ipsi} is used to estimate effects of incremental
#' propensity score interventions, i.e., estimates of mean outcomes if the odds
#' of receiving treatment were multiplied by a factor delta.
#'
#' @usage ipsi(y, a, x.trt, x.out, time, id, delta.seq, nsplits, ci_level = 0.95,
#' progress_bar = TRUE, return_ifvals = FALSE, fit,
#' sl.lib=c("SL.earth","SL.gam","SL.glm","SL.glmnet","SL.glm.interaction", "SL.mean","SL.ranger","rpart"))
#'
#' @param y Outcome of interest measured at end of study.
#' @param a Binary treatment.
#' @param x.trt Covariate matrix for treatment regression.
#' @param x.out Covariate matrix for outcome regression.
#' @param time Measurement time.
#' @param id Subject identifier.
#' @param delta.seq Sequence of delta increment values for incremental
#' propensity score intervention.
#' @param nsplits Integer number of sample splits for nuisance estimation. If
#' \code{nsplits = 1}, sample splitting is not used, and nuisance functions are
#' estimated n full sample (in which case validity of standard errors and
#' confidence intervals requires empirical process conditions). Otherwise must
#' have \code{nsplits > 1}.
#' @param ci_level A \code{numeric} value giving the level (1 - alpha) of the
#' confidence interval to be computed around the point estimate.
#' @param progress_bar A \code{logical} value indicating whether to print a
#' customized progress bar as various stages of computation reach completion.
#' The default is \code{TRUE}, printing a progress bar to inform the user.
#' @param return_ifvals A \code{logical} indicating whether the estimated
#' observation-level values of the influence function ought to be returned as
#' part of the output object. The default is \code{FALSE} as these values are
#' rarely of interest in standard usage.
#' @param fit How nuisance functions should be estimated. Options are "rf" for
#' random forests via the \code{ranger} package, or "sl" for super learner.
#' @param sl.lib sl.lib algorithm library for SuperLearner.
#' Default library includes "earth", "gam", "glm", "glmnet", "glm.interaction",
#' "mean", "ranger", "rpart.
#'
#' @section Details:
#' Treatment and covariates are expected to be time-varying and measured
#' throughout the course of the study. Therefore if \code{n} is the number of
#' subjects and \code{T} the number of timepoints, then \code{a}, \code{time},
#' and \code{id} should all be vectors of length \code{n}x\code{T}, and
#' \code{x.trt} and \code{x.out} should be matrices with \code{n}x\code{T} rows.
#' However \code{y} should be a vector of length \code{n} since it is only
#' measured at the end of the study. The subject ordering should be consistent
#' across function inputs, based on the ordering specified by \code{id}. See
#' example below for an illustration.
#'
#' @return A list containing the following components:
#' \item{res}{ estimates/SEs and uniform CIs for population means.}
#' \item{res.ptwise}{ estimates/SEs and pointwise CIs for population means.}
#' \item{calpha}{ multiplier bootstrap critical value.}
#'
#' @importFrom stats qnorm as.formula
#' @importFrom ranger ranger
#'
#' @export
#'
#' @examples
#' n <- 500
#' T <- 4
#'
#' time <- rep(1:T, n)
#' id <- rep(1:n, rep(T, n))
#' x.trt <- matrix(rnorm(n * T * 5), nrow = n * T)
#' x.out <- matrix(rnorm(n * T * 5), nrow = n * T)
#' a <- rbinom(n * T, 1, .5)
#' y <- rnorm(mean=1,n)
#'
#' d.seq <- seq(0.1, 5, length.out = 10)
#'
#' ipsi.res <- ipsi(y, a, x.trt, x.out, time, id, d.seq)
#' @references Kennedy EH. Nonparametric causal effects based on incremental
#' propensity score interventions.
#' \href{https://arxiv.org/abs/1704.00211}{arxiv:1704.00211}
#
ipsi <- function(y, a, x.trt, x.out, time, id, delta.seq,
nsplits = 2, ci_level = 0.95,
progress_bar = TRUE, return_ifvals = FALSE,
fit = "rf",
sl.lib = c("SL.earth", "SL.gam", "SL.glm", "SL.glm.interaction", "SL.mean", "SL.ranger", "SL.rpart")) {
require("SuperLearner")
require("earth")
require("gam")
require("ranger")
require("rpart")
# setup storage
ntimes <- length(table(time))
end <- max(time)
n <- length(unique(id))
ynew <- rep(NA, n * ntimes)
ynew[time == end] <- y
dat <- data.frame(time = time, id = id, y = ynew, a = a)
k <- length(delta.seq)
ifvals <- matrix(nrow = n, ncol = k)
est.eff <- rep(NA, k)
wt <- matrix(nrow = n * ntimes, ncol = k)
cumwt <- matrix(nrow = n * ntimes, ncol = k)
rt <- matrix(nrow = n * ntimes, ncol = k)
vt <- matrix(nrow = n * ntimes, ncol = k)
x.trt <- data.frame(x.trt)
x.out <- data.frame(x.out); x.out$a <- a
if (progress_bar) {
pb <- txtProgressBar(
min = 0, max = 2 * nsplits * length(delta.seq) + 3,
style = 3
)
}
s <- sample(rep(seq_len(nsplits), ceiling(n / nsplits))[seq_len(n)])
slong <- rep(s, rep(ntimes, n))
if (progress_bar) {
pbcount <- 0
}
for (split in seq_len(nsplits)) {
if (progress_bar) {
Sys.sleep(0.1)
setTxtProgressBar(pb, pbcount)
pbcount <- pbcount + 1
}
# fit treatment model
if (fit=="rf"){
trtmod <- ranger::ranger(stats::as.formula("a ~ ."),
dat = cbind(x.trt, a = dat$a)[slong != split, ])
dat$ps <- predict(trtmod, data = x.trt)$predictions
}
if (fit=="sl"){
trtmod <- SuperLearner(dat$a[slong!=split], x.trt[slong!=split,],
newX = x.trt, SL.library = sl.lib, family = binomial)
dat$ps <- trtmod$SL.predict
}
for (j in seq_len(k)) {
if (progress_bar) {
Sys.sleep(0.1)
setTxtProgressBar(pb, pbcount)
pbcount <- pbcount + 1
}
delta <- delta.seq[j]
# compute weights
wt[, j] <- (delta * dat$a + 1 - dat$a) / (delta * dat$ps + 1 - dat$ps)
cumwt[, j] <- as.numeric(t(aggregate(wt[, j],
by = list(dat$id),
cumprod
)[, -1]))
vt[, j] <- (1 - delta) * (dat$a * (1 - dat$ps) -
(1 - dat$a) * delta * dat$ps) / delta
# fit outcome models
outmod <- vector("list", ntimes)
rtp1 <- dat$y[dat$time == end]
if (progress_bar) {
Sys.sleep(0.1)
setTxtProgressBar(pb, pbcount)
pbcount <- pbcount + 1
}
for (i in seq_len(ntimes)) {
t <- rev(unique(dat$time))[i]
# counterfactual case for treatment: A = 1
newx1 <- x.out[dat$time == t, ]
newx1$a <- 1
# counterfactual case for no treatment: A = 0
newx0 <- x.out[dat$time == t, ]
newx0$a <- 0
if (fit=="rf"){
outmod[[i]] <- ranger::ranger(stats::as.formula("rtp1 ~ ."),
dat = cbind(x.out,rtp1)[dat$time == t & slong != split, ])
m1 <- predict(outmod[[i]], data = newx1)$predictions
m0 <- predict(outmod[[i]], data = newx0)$predictions
}
if (fit=="sl"){
print(c(i,j)); flush.console()
outmod[[i]] <- SuperLearner(rtp1[s!=split],
x.out[dat$time==t & slong!=split,],SL.library = sl.lib,
newX=rbind(newx1,newx0))
m1 <- outmod[[i]]$SL.predict[1:dim(newx1)[1]]
m0 <- outmod[[i]]$SL.predict[(dim(newx1)[1]+1):(dim(newx1)[1]+dim(newx0)[1])]
}
pi.t <- dat$ps[dat$time == t]
rtp1 <- (delta * pi.t * m1 + (1 - pi.t) * m0) /
(delta * pi.t + 1 - pi.t)
rt[dat$time == t, j] <- rtp1
}
# compute influence function values
ifvals[s == split, j] <- ((cumwt[, j] * dat$y)[dat$time == end] +
aggregate(cumwt[, j] * vt[, j] * rt[, j],
by = list(dat$id), sum
)[, -1])[s == split]
}
}
# compute estimator
for (j in seq_len(k)) {
est.eff[j] <- mean(ifvals[, j])
}
# compute asymptotic variance
sigma <- sqrt(apply(ifvals, 2, var))
ci_norm_bounds <- abs(stats::qnorm(p = (1 - ci_level) / 2))
eff.ll <- est.eff - ci_norm_bounds * sigma / sqrt(n)
eff.ul <- est.eff + ci_norm_bounds * sigma / sqrt(n)
# multiplier bootstrap
if (progress_bar) {
Sys.sleep(0.1)
setTxtProgressBar(pb, pbcount)
pbcount <- pbcount + 1
}
eff.mat <- matrix(rep(est.eff, n), nrow = n, byrow = TRUE)
sig.mat <- matrix(rep(sigma, n), nrow = n, byrow = TRUE)
ifvals2 <- (ifvals - eff.mat) / sig.mat
nbs <- 10000
mult <- matrix(2 * rbinom(n * nbs, 1, 0.5) - 1, nrow = n, ncol = nbs)
maxvals <- sapply(seq_len(nbs), function(col) {
max(abs(apply(mult[, col] * ifvals2, 2, sum) / sqrt(n)))
})
calpha <- quantile(maxvals, ci_level)
eff.ll2 <- est.eff - calpha * sigma / sqrt(n)
eff.ul2 <- est.eff + calpha * sigma / sqrt(n)
if (progress_bar) {
Sys.sleep(0.1)
setTxtProgressBar(pb, pbcount)
close(pb)
}
res <- data.frame(
increment = delta.seq, est = est.eff, se = sigma,
ci.ll = eff.ll2, ci.ul = eff.ul2
)
res2 <- data.frame(
increment = delta.seq, est = est.eff, se = sigma,
ci.ll = eff.ll, ci.ul = eff.ul
)
# output
if (return_ifvals) {
return(invisible(list(
res = res, res.ptwise = res2, calpha = calpha,
ifvals = (ifvals - est.eff)
)))
} else {
return(invisible(list(res = res, res.ptwise = res2, calpha = calpha)))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.