#' Confidence Intervals for Conditional Effects and Evaluation Measures
#'
#' Wald-style confidence intervals for \code{\link[data.table]{data.table}}s
#' with additional columns for estimates of the conditional average treatment
#' effect (CATE) and related segmentation evaluation measures.
#'
#' @param estimation_results A \code{\link[data.table]{data.table}} containing
#' the input data, augmented to include cross-validated nuisance parameter
#' estimates, an estimate of the CATE, and a treatment rule assigned based on
#' the estimated CATE via \code{\link{assign_rule}}. This input object should
#' be generated by successive calls to \code{\link{set_est_data}} and
#' \code{\link{est_cate}}, or through a wrapper function that composes these
#' function calls automatically. For cases in which \code{\link{assign_rule}}
#' has been called prior, any additional information is preserved.
#' @param segment_by A \code{character} vector specifying the column names in
#' \code{data_obs} that correspond to the covariates over which segmentation
#' should be performed. This should be a strict subset of \code{baseline}.
#' @param param_type A \code{character} string (of length one) indicating the
#' type of estimates for which a Wald-style confidence interval is to be
#' computed. The choice of \code{"cate"} indicates that this routine is being
#' invoked thru \code{\link{summarize_segments}} while \code{"effect_measure"}
#' indicates that the input was generated by \code{\link{est_effect}}.
#' @param outcome_type A \code{character} string (of length one) indicating
#' whether the outcome variable is binary or continuous-valued. For cases in
#' which the outcome is binary, the confidence interval for the parameter
#' estimate is constructed on the logit scale and then back-transformed, in
#' order to respect the fact that the estimate must lie in the unit interval.
#' @param coverage A \code{numeric} indicating the nominal rate at which the
#' Wald-style confidence interval is intended to cover the target parameter.
#'
#' @importFrom assertthat assert_that
#' @importFrom data.table setcolorder ":="
#' @importFrom stats qnorm qlogis plogis
#'
#' @return A \code{\link[data.table]{data.table}} matching exactly the input
#' provided in \code{estimation_results}, augmented to include the lower and
#' upper bounds from the Wald-style confidence intervals constructed.
#'
#' @keywords internal
ci_wald <- function(estimation_results,
segment_by = NULL,
param_type = c("cate", "effect_measure"),
outcome_type = c("continuous", "binary"),
coverage = getOption("sherlock.ci_covers")) {
# first, let's get Z_(1 - alpha)
wald_mult <- c(-1, 1) * abs(stats::qnorm(p = (1 - coverage) / 2))
# assume continuous outcome if more than two levels in outcome node
if (outcome_type == "continuous") {
# NOTE: standard error already scaled (i.e., from Var(D_n)/n)
if (param_type == "cate") {
# check that segmentation covariates are included
assertthat::assert_that(!is.null(segment_by))
# compute the interval around the CATE estimate
estimation_results[, lwr_ci := (wald_mult[1] * std_err + cate)]
estimation_results[, upr_ci := (wald_mult[2] * std_err + cate)]
} else if (param_type == "effect_measure") {
# compute the interval around the effect measure estimate
estimation_results[, lwr_ci := (wald_mult[1] * std_err + estimate)]
estimation_results[, upr_ci := (wald_mult[2] * std_err + estimate)]
}
} else if (outcome_type == "binary") {
# compute the gradient for ratios by application of the delta method
ratio_gradient_delta <- function(theta) {
(1 / theta) + (1 / (1 - theta))
}
# for binary outcomes, create CI on the logit scale and back-transform
if (param_type == "cate") {
theta_ratio <- estimation_results[, stats::qlogis(cate)]
ratio_grad_theta <- estimation_results[, ratio_gradient_delta(cate)]
} else if (param_type == "effect_measure") {
theta_ratio <- estimation_results[, stats::qlogis(estimate)]
ratio_grad_theta <- estimation_results[, ratio_gradient_delta(estimate)]
}
# compute SE on logit scale, back-transform, append lower/upper bounds
se_eif_logit <- estimation_results[, sqrt(ratio_grad_theta^2 * std_err^2)]
estimation_results[, lwr_ci := stats::plogis(wald_mult[1] * se_eif_logit +
theta_ratio)]
estimation_results[, upr_ci := stats::plogis(wald_mult[2] * se_eif_logit +
theta_ratio)]
}
# re-arrange columns
if (param_type == "cate") {
data.table::setcolorder(
estimation_results,
c(segment_by, "count", "cate", "lwr_ci", "upr_ci", "std_err")
)
} else if (param_type == "effect_measure") {
data.table::setcolorder(
estimation_results,
c("estimate", "lwr_ci", "upr_ci", "std_err")
)
}
# return augmented data.table
return(estimation_results[])
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.