Nothing
#' Inference on Average Overall Effect Parameters
#'
#' Inference on the average overall effect of the IV on the outcome,
#' that on the treatment receipt, and the local average overall effect
#' in the presence of network spillover of unknown form
#'
#' @details
#' The `overall()` function estimates the average overall effect of the IV
#' on the outcome, that on the treatment receipt, and
#' the local average overall effect via inverse probability weighting
#' in the approximate neighborhood interference framework.
#' The function also computes the standard errors and the confidence intervals
#' for the target parameters based on the network HAC estimation and
#' the wild bootstrap.
#' For more details, see Hoshino and Yanagi (2023).
#' The lengths of `Y`, `D`, `Z`, `S` and
#' of the row and column of `A` must be the same.
#' `K` must be a positive integer.
#' `bw` must be `NULL` or a non-negative number.
#' `B` must be `NULL` or a positive number.
#' `alp` must be a positive number between 0 and 0.5.
#'
#' @param Y An n-dimensional outcome vector
#' @param D An n-dimensional binary treatment vector
#' @param Z An n-dimensional binary instrumental vector
#' @param S An n-dimensional logical vector to indicate whether each unit
#' belongs to the sub-population S
#' @param A An n times n symmetric binary adjacency matrix
#' @param K A scalar to indicate the range of neighborhood
#' used for constructing the interference set.
#' Default is 1.
#' @param bw A scalar of the bandwidth used for the HAC estimation and
#' the wild bootstrap.
#' If `bw = NULL`, the rule-of-thumb bandwidth proposed by Leung (2022) is used.
#' Default is NULL.
#' @param B The number of bootstrap repetitions.
#' If `B = NULL`, the wild bootstrap is skipped.
#' Default is NULL.
#' @param alp The significance level. Default is 0.05.
#'
#' @returns A data.frame containing the following elements:
#' \item{est}{The parameter estimate}
#' \item{HAC_SE}{The standard error computed by the network HAC estimation}
#' \item{HAC_CI_L}{The lower bound of the confidence interval computed by
#' the network HAC estimation}
#' \item{HAC_CI_U}{The upper bound of the confidence interval computed by
#' the network HAC estimation}
#' \item{wild_SE}{The standard error computed by the wild bootstrap}
#' \item{wild_CI_L}{The lower bound of the confidence interval computed by
#' the wild bootstrap}
#' \item{wild_CI_U}{The upper bound of the confidence interval computed by
#' the wild bootstrap}
#' \item{bw}{The bandwidth used for the HAC estimation
#' and the wild bootstrap}
#' \item{size}{The size of the subpopulation S}
#'
#' @examples
#' # Generate artificial data
#' set.seed(1)
#' n <- 2000
#' data <- latenetwork::datageneration(n = n)
#'
#' # Arguments
#' Y <- data$Y
#' D <- data$D
#' Z <- data$Z
#' S <- rep(TRUE, n)
#' A <- data$A
#' K <- 1
#' bw <- NULL
#' B <- NULL
#' alp <- 0.05
#'
#' # Estimation
#' latenetwork::overall(Y = Y,
#' D = D,
#' Z = Z,
#' S = S,
#' A = A,
#' K = K,
#' bw = bw,
#' B = B,
#' alp = alp)
#'
#' @references Hoshino, T., & Yanagi, T. (2023).
#' Causal inference with noncompliance and unknown interference.
#' arXiv preprint arXiv:2108.07455.
#'
#' Leung, M.P. (2022).
#' Causal inference under approximate neighborhood interference.
#' Econometrica, 90(1), pp.267-293.
#'
#' @export
#'
overall <- function(Y,
D,
Z,
S,
A,
K = 1,
bw = NULL,
B = NULL,
alp = 0.05) {
# Error handling -------------------------------------------------------------
error <- errorhandling(Y = Y,
D = D,
Z = Z,
IEM = NULL,
S = S,
A = A,
bw = bw,
B = B,
alp = alp)
# Variable definitions -------------------------------------------------------
# Distance matrix
graph0 <- igraph::graph_from_adjacency_matrix(adjmatrix = A,
mode = "undirected")
distance0 <- igraph::distances(graph = graph0,
v = igraph::V(graph0),
to = igraph::V(graph0),
algorithm = "dijkstra")
# Variable definitions
Y0 <- Y
D0 <- D
Z0 <- Z
Y <- Y[S]
D <- D[S]
Z <- Z[S]
A <- A[S, S]
graph <- igraph::graph_from_adjacency_matrix(adjmatrix = A,
mode = "undirected")
distance <- distance0[S, S]
# Size of the sub-population S
size <- sum(S)
# List of neighbors
neighbor_list <- list()
for (i in which(S)) {
neighbor <- which(distance0[i, ] >= 0 & distance0[i, ] <= K)
neighbor_list <- append(neighbor_list, list(neighbor))
}
# Bandwidth ------------------------------------------------------------------
if (is.null(bw)) {
# Find largest connected component
connect <- igraph::components(graph = graph)
connect_member <-
which(connect$membership == statip::mfv(connect$membership))
# Average path length
average_path_length <-
sum(distance[connect_member, connect_member]) /
(length(connect_member) * (length(connect_member) - 1))
# Average degree
average_degree <- sum(A) / size
# Rule-of-thumb bandwidth proposed by Leung (2022)
if (average_degree > 1) {
bw0 <- ifelse(average_path_length < 2 * log(size) / log(average_degree),
0.5 * average_path_length,
average_path_length^(1/3))
bw <- round(max(bw0, 2 * K))
} else {
bw <- 2 * K
}
}
# Causal inference -----------------------------------------------------------
# Function to compute the generalized propensity score
# Input "z": A value of IV
# Output: A value of GPS
GPS <- function(z) {
mean(Z == z)
}
# Function to compute the conditional outcome mean
# Input "z": A value of IV
# Output: A value of the conditional outcome mean
mu_Y <- function(z) {
summand <- rep(0, size)
for (i in 1:size) {
neighbor <- neighbor_list[[i]]
if (length(neighbor) > 0) {
summand[i] <- sum(Y0[neighbor]) * (Z[i] == z) / GPS(z = z)
}
}
return(mean(summand))
}
# Function to compute the conditional treatment mean for AOED
# Input "z": A value of IV
# Output: A value of the conditional treatment mean for AOED
mu_D_AOED <- function(z) {
summand <- rep(0, size)
for (i in 1:size) {
neighbor <- neighbor_list[[i]]
if (length(neighbor) > 0) {
summand[i] <- sum(D0[neighbor]) * (Z[i] == z) / GPS(z = z)
}
}
return(mean(summand))
}
# Function to compute the conditional treatment mean for ADED
# Input "z": A value of IV
# Output: A value of the conditional treatment mean for ADED
mu_D_ADED <- function(z) {
mean(D * (Z == z)) / GPS(z = z)
}
# Function to compute AOEY
# Input: Nothing
# Output: A value of AOEY
AOEY <- function() {
mu_Y(z = 1) - mu_Y(z = 0)
}
# Function to compute AOED
# Input: Nothing
# Output: A value of AOED
AOED <- function() {
mu_D_AOED(z = 1) - mu_D_AOED(z = 0)
}
# Function to compute ADED
# Input: Nothing
# Output: A value of ADED
ADED <- function() {
mu_D_ADED(z = 1) - mu_D_ADED(z = 0)
}
# Function to compute LAOE
# Input: Nothing
# Output: A value of LAOE
LAOE <- function() {
AOEY() / ADED()
}
# Estimates
est_AOEY <- AOEY()
est_AOED <- AOED()
est_ADED <- ADED()
est_LAOE <- LAOE()
# Variable definitions
W_Z1 <- (Z == 1)
W_Z0 <- (Z == 0)
W_Y <- W_D_AOED <- W_D_ADED <- rep(0, size)
for (i in 1:size) {
neighbor <- neighbor_list[[i]]
multiplier <- (Z[i] == 1) / GPS(z = 1) - (Z[i] == 0) / GPS(z = 0)
W_Y[i] <- sum(Y0[neighbor]) * multiplier
W_D_AOED[i] <- sum(D0[neighbor]) * multiplier
W_D_ADED[i] <- D[i] * multiplier
}
V_AOEY <- W_Y -
W_Z1 * mu_Y(z = 1) / GPS(z = 1) +
W_Z0 * mu_Y(z = 0) / GPS(z = 0)
V_AOED <- W_D_AOED -
W_Z1 * mu_D_AOED(z = 1) / GPS(z = 1) +
W_Z0 * mu_D_AOED(z = 0) / GPS(z = 0)
V_ADED <- W_D_ADED -
W_Z1 * mu_D_ADED(z = 1) / GPS(z = 1) +
W_Z0 * mu_D_ADED(z = 0) / GPS(z = 0)
V_LAOE <- V_AOEY / est_ADED - V_ADED * est_AOEY / est_ADED^2
# Function to compute HAC standard error and confidence interval
# Input: Nothing
# Output: A list containing HAC standard error and confidence interval
HAC_func <- function() {
# Standard error
neighbor_mat <- (distance <= bw)
SE_AOEY <- sqrt(t(V_AOEY) %*% neighbor_mat %*% V_AOEY / size^2)
SE_AOED <- sqrt(t(V_AOED) %*% neighbor_mat %*% V_AOED / size^2)
SE_ADED <- sqrt(t(V_ADED) %*% neighbor_mat %*% V_ADED / size^2)
SE_LAOE <- sqrt(t(V_LAOE) %*% neighbor_mat %*% V_LAOE / size^2)
# Critical value based on asymptotic normality
cvnorm <- stats::qnorm(1 - alp / 2)
# Confidence interval
CI_AOEY <- c(est_AOEY - cvnorm * SE_AOEY,
est_AOEY + cvnorm * SE_AOEY)
CI_AOED <- c(est_AOED - cvnorm * SE_AOED,
est_AOED + cvnorm * SE_AOED)
CI_ADED <- c(est_ADED - cvnorm * SE_ADED,
est_ADED + cvnorm * SE_ADED)
CI_LAOE <- c(est_LAOE - cvnorm * SE_LAOE,
est_LAOE + cvnorm * SE_LAOE)
# Result
HAC_AOEY <- c(SE_AOEY, CI_AOEY)
HAC_AOED <- c(SE_AOED, CI_AOED)
HAC_ADED <- c(SE_ADED, CI_ADED)
HAC_LAOE <- c(SE_LAOE, CI_LAOE)
return(list(AOEY = HAC_AOEY,
AOED = HAC_AOED,
ADED = HAC_ADED,
LAOE = HAC_LAOE))
}
# Function to compute the Omega matrix used for wild bootstrap
# Input: Nothing
# Output: the Omega matrix
Omega_func <- function() {
# Compute the numerator
neighbor_mat <- (distance <= bw)
numerator <- NULL
for (i in 1:size) {
intersect_mat <- sweep(x = neighbor_mat,
MARGIN = 2,
FUN = "*",
neighbor_mat[i, ])
numerator <- rbind(numerator,
apply(intersect_mat, MARGIN = 1, FUN = sum))
}
# Compute the denominator
denominator <- sum(neighbor_mat) / size
return(numerator/denominator)
}
# Function to compute wild bootstrap standard error and confidence interval
# Input: Nothing
# Output: A list containing bootstrap standard error and confidence interval
wild_func <- function() {
# Square root matrix of Omega
Omega <- Omega_func()
Omega <- methods::as(Omega, "dgCMatrix")
E <- eigen(Omega)
E$values[abs(E$values) < 1e-10] <- 0
sqrt_Omega <- E$vectors %*% diag(sqrt(E$values)) %*% t(E$vectors)
# Bootstrap
AOEY_boot <- AOED_boot <- ADED_boot <- LAOE_boot <- NULL
for (r in 1:B) {
zeta <- stats::rnorm(size)
R_vector <- sqrt_Omega %*% zeta
V_AOEY_boot <- V_AOEY * R_vector
V_AOED_boot <- V_AOED * R_vector
V_ADED_boot <- V_ADED * R_vector
V_LAOE_boot <- V_LAOE * R_vector
AOEY_boot <- c(AOEY_boot, mean(V_AOEY_boot))
AOED_boot <- c(AOED_boot, mean(V_AOED_boot))
ADED_boot <- c(ADED_boot, mean(V_ADED_boot))
LAOE_boot <- c(LAOE_boot, mean(V_LAOE_boot))
}
# Standard error
SE_AOEY <- stats::sd(AOEY_boot)
SE_AOED <- stats::sd(AOED_boot)
SE_ADED <- stats::sd(ADED_boot)
SE_LAOE <- stats::sd(LAOE_boot)
# Confidence interval
q_AOEY <- stats::quantile(AOEY_boot,
probs = c(alp / 2, 1 - alp / 2))
q_AOED <- stats::quantile(AOED_boot,
probs = c(alp / 2, 1 - alp / 2))
q_ADED <- stats::quantile(ADED_boot,
probs = c(alp / 2, 1 - alp / 2))
q_LAOE <- stats::quantile(LAOE_boot,
probs = c(alp / 2, 1 - alp / 2))
CI_AOEY <- c(est_AOEY + q_AOEY[1],
est_AOEY + q_AOEY[2])
CI_AOED <- c(est_AOED + q_AOED[1],
est_AOED + q_AOED[2])
CI_ADED <- c(est_ADED + q_ADED[1],
est_ADED + q_ADED[2])
CI_LAOE <- c(est_LAOE + q_LAOE[1],
est_LAOE + q_LAOE[2])
# Result
wild_AOEY <- c(SE_AOEY, CI_AOEY)
wild_AOED <- c(SE_AOED, CI_AOED)
wild_ADED <- c(SE_ADED, CI_ADED)
wild_LAOE <- c(SE_LAOE, CI_LAOE)
return(list(AOEY = wild_AOEY,
AOED = wild_AOED,
ADED = wild_ADED,
LAOE = wild_LAOE))
}
# HAC estimation
HAC <- HAC_func()
# Wild bootstrap
if (!is.null(B)) {
wild <- wild_func()
} else {
wild <- list(AOEY = rep(NA, 3),
AOED = rep(NA, 3),
ADED = rep(NA, 3),
LAOE = rep(NA, 3))
}
# Result
Estimate <- rbind(c(est_AOEY, HAC$AOEY, wild$AOEY, bw, size),
c(est_AOED, HAC$AOED, wild$AOED, bw, size),
c(est_ADED, HAC$ADED, wild$ADED, bw, size),
c(est_LAOE, HAC$LAOE, wild$LAOE, bw, size)
)
colnames(Estimate) <- c("est",
"HAC_SE",
"HAC_CI_L",
"HAC_CI_U",
"wild_SE",
"wild_CI_L",
"wild_CI_U",
"bw",
"size")
rownames(Estimate) <- c("AOEY", "AOED", "ADED", "LAOE")
Estimate <- as.data.frame(Estimate)
return(Estimate)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.