Nothing
############################################################################################
# iv.R
#
# Standard instrumental variable analysis (for binary data).
#
# Code by
#
# - Ricardo Silva (ricardo@stats.ucl.ac.uk)
# - Robin Evans (robin.evans@stats.ox.ac.uk)
#
# Current version: 07/05/2015
# First version: 31/03/2014
#' @title Bayesian Analysis of Binary Instrumental Variables
#'
#' @description
#' Perform Bayesian instrumental variable analysis for binary data. This assumes the
#' number of covariates is small enough. Notice that a set of size greater than
#' 20 will raise a flag and require explicit permission from the user by setting
#' \code{force_use} to \code{TRUE}.
#'
#' @references
#' \url{http://ftp.cs.ucla.edu/pub/stat_ser/r199-jasa.pdf}
#'
#' @param problem a \code{CausalFX} problem instance.
#' @param w index of instrumental variable within the data.
#' @param Z array of indices for covariate adjustment.
#' @param prior_table Dirichlet hyperparameter for contingency tables.
#' @param M number of Monte Carlo samples to compute posterior distributions.
#' @param verbose print relevant messages.
#' @param reject_level if first iteration of rejection sampling has a proportion of rejected
#' samples larger than this, reject the model and return no bounds.
#' @param force_use if TRUE, ignore any warnings about size of \code{Z}.
#'
#' @return A list containing two fields,
#' \item{\code{bound}}{the point estimate of lower and upper bounds.}
#' \item{\code{bound_post}}{samples from the posterior distribution over bounds.}
#'
#' @details
#' Given a joint distribution, this function finds a lower bound and an upper bound on the average causal effect of
#' treatment \eqn{X} on outcome \eqn{Y}, adjusting for covariate set \eqn{Z}. The joint distribution
#' is estimated by assigning a prior to joint contingency table of \code{w}, \code{Z} and the treatment/outcome pair indexed in \code{problem},
#' and ACE bounds are inferred by computing the posterior on the contingency table implied by the instrumental variable assumption.
#' The prior is proportional to a Dirichlet distribution with an effective sample size \code{prior_table}. It assigns probability
#' zero to models which do not satisfy the constraints of the (conditional)
#' instrumental variable, as described by Balke and Pearl (1997, \emph{Journal of the American Statistical Association}, vol. 92,
#' 1171--1176). Hence, the prior is not an exact Dirichlet distribution, but only proportional to one.
#' Posterior samples for the lower and upper bound are generated by rejection sampling using the unconstrained model as
#' a proposal. If the model is a bad fit to the data, this might take much computing time. Setting \code{reject_level}
#' to a level smaller than 1 may stop the rejection sampler earlier and reject the model, returning no bounds.
#'
#' @examples
#'
#' ## Generate synthetic problems until a (conditional) instrumental variable can be found
#'
#' while (TRUE) {
#' problem <- simulateWitnessModel(p = 4, q = 4, par_max = 3, M = 1000)
#' s <- covsearch(problem, pop_solve = TRUE)
#' if (length(s$witness) > 0) {
#' w <- s$witness[1]
#' Z <- s$Z[[1]]
#' break
#' }
#' }
#'
#' ## Calculate true effect for evaluation purposes
#' sol_pop <- covsearch(problem, pop_solve = TRUE)
#' effect_pop <- synthetizeCausalEffect(problem)
#' cat(sprintf("ACE (true) = %1.2f\n", effect_pop$effect_real))
#'
#' ## Binary IV bounds
#' sol_iv <- iv(problem, w, Z, prior_table = 10, M = 1000)
#' summary(sol_iv)
#'
#' @export
iv <- function(problem, w, Z, prior_table, M, verbose = FALSE, reject_level = 0.9, force_use = FALSE) {
if (class(problem) != "cfx") {
stop("a cfx object is necessary")
}
if (!validateData(problem$data)) {
stop("Data must be binary, encoded numerically as 0s and 1s")
}
num_Z <- length(Z)
if (num_Z > 20 && !force_use) {
stop("Size of covariate set very large. To force use, activate the force_use flag")
}
num_states <- 2^num_Z
dat <- problem$data[, c(Z, w, problem$X_idx, problem$Y_idx)]
counts <- dtoc(dat)
dim(counts) <- c(num_states, 2, 2, 2)
if (num_Z > 0) {
Z_totals <- marginTable(counts, 1)
c_Z <- rdirichlet(M, Z_totals + prior_table / num_states)
} else {
c_Z <- matrix(rep(1, M), ncol = 1)
}
bounds_post <- matrix(rep(0, 2 * M), ncol = 2, nrow = M)
for (i in 1:num_states) {
if (verbose) cat(sprintf("State %d out of %d \n", i, num_states))
pre_bounds <- matrix(ncol = 2, nrow = M)
num_remaining <- M
c_wxy <- as.numeric(counts[i, , , ]) + prior_table / (8 * num_states)
first_trial <- TRUE
while (num_remaining > 0) {
wxy_samples <- rdirichlet(num_remaining, c_wxy)
b <- binAnalyticalIV(wxy_samples)
num_valid <- sum(b$valid)
if (num_valid == 0) next
next_v <- (M - num_remaining + 1):(M - num_remaining + num_valid)
pre_bounds[next_v, ] <- b$bounds[b$valid == 1, ]
num_remaining <- num_remaining - num_valid
if (first_trial && num_remaining / M > reject_level) {
cat("Model falsified\n")
out <- list(bounds = NULL, bounds_post = NULL, w = w, Z = Z, problem = problem)
class(out) <- "iv"
return(out)
}
first_trial <- FALSE
if (verbose) cat(sprintf(" Remaining: %d out of %d\n", num_remaining, M))
}
bounds_post[, 1] <- bounds_post[, 1] + pre_bounds[, 1] * c_Z[, i]
bounds_post[, 2] <- bounds_post[, 2] + pre_bounds[, 2] * c_Z[, i]
}
out <- list(bounds = colMeans(bounds_post), bounds_post = bounds_post, w = w, Z = Z, problem = problem)
class(out) <- "iv"
return(out)
}
binAnalyticalIV <- function(P_ZETA) {
# binAnalyticalIV
#
# For a treatment X, outcome Y and instrument W, This uses the analytical solution for the basic IV model {W -> X, X -> Y, X <- U -> Y},
# U latent, ACE given by P(Y = 1 | do(X = 1)) - P(Y = 1 | do(X = 0)).
#
# * Input:
#
# - P_ZETA: 8-column matrix of probabilitys, where P_ZETA[,1:4] is P(YX | W = 0), and
# entries 1, 2, 3, 4 correspond to YX = {00, 01, 10, 11}. Analogously,
# P_ZETA[,1:4] is P(YX | W = 1).
#
# * Output:
#
# - bottom, upper: the bounds on the ACE
w_0_upper <- pmin(1 - P_ZETA[, 1],
1 - P_ZETA[, 5],
P_ZETA[, 2] + P_ZETA[, 3] + P_ZETA[, 7] + P_ZETA[, 8],
P_ZETA[, 3] + P_ZETA[, 4] + P_ZETA[, 6] + P_ZETA[, 7])
w_0_bottom <- pmax(P_ZETA[, 7],
P_ZETA[, 3],
P_ZETA[, 3] + P_ZETA[, 4] - P_ZETA[, 5] - P_ZETA[, 8],
-P_ZETA[, 1] - P_ZETA[, 4] + P_ZETA[, 7] + P_ZETA[, 8])
w_1_upper <- pmin(1 - P_ZETA[, 6],
1 - P_ZETA[, 2],
P_ZETA[, 3] + P_ZETA[, 4] + P_ZETA[, 5] + P_ZETA[, 8],
P_ZETA[, 1] + P_ZETA[, 4] + P_ZETA[, 7] + P_ZETA[, 8])
w_1_bottom <- pmax(P_ZETA[, 8],
P_ZETA[, 4],
-P_ZETA[, 2] - P_ZETA[, 3] + P_ZETA[, 7] + P_ZETA[, 8],
P_ZETA[, 3] + P_ZETA[, 4] - P_ZETA[, 6] - P_ZETA[, 7])
return(list(bounds = cbind(w_1_bottom - w_0_upper, w_1_upper - w_0_bottom),
valid = (w_0_upper > w_0_bottom) * (w_1_upper > w_1_bottom)))
}
#' @title Summarize Binary Instrumental Variable Analyses
#'
#' @description
#' \code{summary} method for class "\code{iv}".
#'
#' @param object an object of class "\code{iv}", usually a result of a call to \code{\link{iv}}.
#' @param ... other parameters, ignored.
#' @return Besides fields from the \code{iv} object, this includes a list summary statistics,
#' \item{\code{lq}}{an array of 5 entries corresponding to evenly space quantiles of the lower bound ("minimum", 25\%, median, 75%, "maximum") as
#' given by a Monte Carlo approximation.}
#' \item{\code{lci}}{95\% marginal posterior credible interval for lower bound.}
#' \item{\code{uq}}{an array of 5 entries corresponding to evenly space quantiles of the upper bound ("minimum", 25\%, median, 75%, "maximum") as
#' given by a Monte Carlo approximation.}
#' \item{\code{uci}}{95\% marginal posterior credible interval for upper bound.}
#'
#' @seealso
#' The model fitting function \code{\link{iv}}.
#'
#' @export
summary.iv <- function(object, ...) {
if (class(object) != "iv") {
stop("an iv object is necessary")
}
if (!is.null(object$bounds)) {
lq <- quantile(object$bounds_post[, 1], probs = seq(0, 1, 0.25), na.rm = TRUE)
lci <- quantile(object$bounds_post[, 1], probs = c(0.025, 0.975), na.rm = TRUE)
uq <- quantile(object$bounds_post[, 2], probs = seq(0, 1, 0.25), na.rm = TRUE)
uci <- quantile(object$bounds_post[, 2], probs = c(0.025, 0.975), na.rm = TRUE)
} else {
lq <- NULL
lci <- NULL
uq <- NULL
uci <- NULL
}
ans <- list(lq = lq, lcqi = lci, uq = uq, uci = uci,
X_idx = object$problem$X_idx, Y_idx = object$problem$Y_idx, varnames = object$problem$varnames,
w = object$w, Z = object$Z, bounds = object$bounds, bounds_post = object$bounds_post)
class(ans) <- "summary.iv"
return(ans)
}
#' @title Print Summaries of Binary Instrumental Variable Analyses
#'
#' @description
#' Print output of \code{summary} method for class "\code{iv}".
#'
#' @param x an object of class "\code{summary.iv}", usually a result of a call to \code{\link{summary.iv}}.
#' @param ... other parameters, ignored.
#'
#' @details
#' Variable names with more than 20 characters are truncated.
#'
#' @export
print.summary.iv <- function(x, ...) {
if (class(x) != "summary.iv") {
stop("an summary.iv object is necessary")
}
cat("\n")
cat(sprintf("BINARY INSTRUMENTAL VARIABLE ANALYSIS (Treatment %s, Outcome %s)\n",
x$varnames[[x$X_idx]], x$varnames[[x$Y_idx]]))
cat("\n")
cat(sprintf(" IV: %s\n", x$varnames[[x$w]]))
if (length(x$Z) == 0) {
cat(sprintf(" Covariate set: EMPTY\n"))
} else {
cat(sprintf(" Covariate set: %s\n", x$varnames[[x$Z[1]]]))
for (i in (1 + seq_len(length(x$Z) - 1))) {
cat(sprintf(" %s\n", x$varnames[[x$Z[i]]]))
}
}
cat("\n")
if (is.null(x$bounds)) {
cat("Model rejected.\n")
return()
}
lq <- x$lq
lci <- x$lci
uq <- x$uq
uci <- x$uci
cat(sprintf(" Estimated Interval: [% 1.2f, % 1.2f]\n\n", x$bounds[1], x$bounds[2]))
cat(sprintf(" Lower bound quantiles: (% 1.2f, % 1.2f, % 1.2f, % 1.2f, % 1.2f)\n", lq[[1]], lq[[2]], lq[[3]], lq[[4]], lq[[5]]))
cat(sprintf(" 95 per cent credible interval: [% 1.2f, % 1.2f]\n", lci[[1]], lci[[2]]))
cat(sprintf(" Upper bound quantiles: (% 1.2f, % 1.2f, % 1.2f, % 1.2f, % 1.2f)\n", uq[[1]], uq[[2]], uq[[3]], uq[[4]], uq[[5]]))
cat(sprintf(" 95 per cent credible interval: [% 1.2f, % 1.2f]\n", uci[[1]], uci[[2]]))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.