#' Compute risk-adjusted disparate impact estimate for a policy
#'
#' @param controls character vector of additional controls to consider in the
#' second-stage model
#' @param down_sample (Optional) proportion (between 0 and 1) or number (greater
#' than 1) of rows to sample, if down sampling the (test) data; default is 1
#' (i.e., use all data)
#' @param seed random seed to set
#' @param ... additional arguments to pass to \code{fit} function from
#' \code{\link{rad_control}} for fine-tuning
#'
#' @return tidy data frame with columns \item{term}{the group members considered
#' minority} \item{estimate}{log-odds of treatment, relative to base_group
#' (equivalent to logistic regression coefficient)}
#' \item{std.error}{coefficient standard errors for \code{*_coef} methods; TO
#' BE IMPLEMENTED for \code{*_avg} methods (FOR NOW, ALL ZERO)!}
#' \item{statistic/p.value}{(for \code{*_coef} methods) corresponding values
#' from model fit} \item{ptrt_base/minor}{(for \code{*_avg} methods) estimated
#' average treatment probability for base/minority groups} \item{method}{the
#' method used} \item{controls}{features controlled for}
#'
#' @inheritParams .validate_input
#' @inheritParams rad_control
#' @export
compute_rad <-
function(pol,
controls = NULL,
base_group = NULL,
minority_groups = NULL,
fit_fn = "logit_coef",
down_sample = 1,
use_speedglm = TRUE,
seed = round(stats::runif(1)*1e4),
...) {
set.seed(seed)
# Input validation
groups <- .validate_input(pol, base_group, minority_groups)
base_group <- groups$base
minority_groups <- groups$minority
rc <- rad_control(pol, controls, fit_fn = fit_fn,
use_speedglm = use_speedglm)
d <- pol$data
# Restrict data to groups of interest
target_group_ind <- d[[pol$grouping]] %in% c(base_group, minority_groups)
d <- d[target_group_ind, ]
d[[pol$grouping]] <- forcats::fct_drop(d[[pol$grouping]])
# Down-sample and filter to test fold
test_df <- .down_sample(d[d$fold__ == "test", ], down_sample)
# Make sure that the base_group is first level
# (see @details of .compute_estimate)
test_df[[pol$grouping]] <- forcats::fct_relevel(test_df[[pol$grouping]],
base_group)
ret <- .compute_estimate(test_df, rc, ...)
return(ret)
}
#' Compute benchmark test for disparate impact of a policy
#'
#' @param controls character vector of additional controls to consider (i.e.,
#' valid benchmarks)
#' @param kitchen_sink logical; if TRUE, ignore \code{controls} argument, and
#' include all variables given with the policy, i.e., policy$features
#' (default: FALSE)
#'
#' @return tidy data frame of benchmark coefficients
#'
#' @inheritParams .validate_input
#' @export
compute_bm <-
function(pol,
controls = NULL,
kitchen_sink = FALSE,
base_group = NULL,
minority_groups = NULL) {
# Input validation
groups <- .validate_input(pol, base_group, minority_groups)
base_group <- groups$base
minority_groups <- groups$minority
if (kitchen_sink) {
controls <- pol$features
}
d <- pol$data
test_df <- d[d$fold__ == "test", ]
ret <- purrr::map_dfr(minority_groups, function(comp) {
target_group_ind <- test_df[[pol$grouping]] %in% c(base_group, comp)
tmp_df <- test_df[target_group_ind, ]
tmp_df[[pol$grouping]] <- forcats::fct_drop(tmp_df[[pol$grouping]])
tmp_df[[pol$grouping]] <- forcats::fct_relevel(tmp_df[[pol$grouping]],
base_group)
coefs <- .get_estimate(tmp_df, pol$treatment, pol$grouping,
c(controls),
fun = function(f, d, w)
stats::glm(f, d, family = stats::binomial))
coefs[grepl(pol$grouping, coefs$term), ]
})
if (kitchen_sink) ret$controls <- "kitchen sink"
return(ret)
}
#' Compute outcome tests for disparate impact of a policy
#'
#' @param controls character vector of additional controls to consider (i.e.,
#' conditional groupings)
#'
#' @return data frame, grouped by the group and additional columns specified in
#' the \code{controls} arguments and corresponding \code{hitrate}, i.e.,
#' \code{P(outcome = 1 | treatment = risk_treatment)}, where
#' \code{risk_treatment = ifelse(pol$risk_col == "resp_trt", 1, 0)}.
#'
#' @inheritParams .validate_input
#' @export
compute_ot <- function(pol,
controls = NULL) {
d <- pol$data
test_df <- d[d$fold__ == "test", ]
v_treatment <- rlang::sym(pol$treatment)
v_outcome <- rlang::sym(pol$outcome)
risk_treatment <- ifelse(pol$risk_col == "resp_trt", 1, 0)
test_df %>%
filter(!!v_treatment == risk_treatment) %>%
group_by_(.dots = c(pol$grouping, controls)) %>%
summarize(hitrate = mean(!!v_outcome), count = n())
}
#' Given a data frame and \code{\link{rad_control} object}, compute RAD estimate
#'
#' @param d data frame that has all necessary columns
#' @param rc a \code{\link{rad_control}} object
#' @param ... additional arguments passed to \code{rc$fit} function
#'
#' @details This helper method relies on the proper factoring of data \code{d}.
#' The grouping variable is extracted from \code{rc$grouping}, while the
#' \code{base_group} and \code{minority_groups} are determined by the levels
#' ordering in \code{d[[rc$grouping]]}, i.e., make sure groups are properly
#' ordered before calling!
#'
#' @return tidy dataframe of estimated rad results
.compute_estimate <- function(d, rc, weighted = FALSE, ...) {
# Fit model
groups <- .get_groups(d[[rc$grouping]])
base_group <- groups[1]
minority_groups <- groups[-1]
ret <- purrr::map_dfr(minority_groups, function(group) {
target_group_ind <- d[[rc$grouping]] %in% c(base_group, group)
tmp_df <- d[target_group_ind, ]
if (weighted) {
m <- rc$fit(tmp_df, w = tmp_df$weights__, ...)
} else {
m <- rc$fit(tmp_df, ...)
}
if (rc$method == "coef") {
coefs <- m %>%
broom::tidy(parametric = TRUE) %>%
mutate(method = rc$method, controls = rc$label)
coefs[grepl(rc$grouping, coefs$term), ]
} else if (rc$method == "avg") {
ptrt <- purrr::map_dbl(c(base_group, group), function(x) {
counter_df <- tmp_df
counter_df[[rc$grouping]] <- x
if (weighted) {
stats::weighted.mean(rc$pred(m, counter_df), w = counter_df$weights__)
} else {
mean(rc$pred(m, counter_df))
}
})
odds <- ptrt / (1 - ptrt)
or <- odds[2]/odds[1]
# TODO: estimate standard errors? (just create column of 0 for now)
tibble(term = paste0(rc$grouping, group),
estimate = log(or),
std.error = 0,
ptrt_base = ptrt[1],
ptrt_minor = ptrt[2],
method = rc$method,
controls = rc$label)
} else {
stop("Unknown method specification from rad_control:", rc$method)
}
})
}
#' Validate input for
#'
#' @param pol object of class policy
#' @param base_group (Optional) single group that acts as the pivot/base; by
#' default, if the grouping variable is a factor, set to the first level,
#' otherwise set to the first of sorted unique values
#' @param minority_groups (Optional) groups to compare to the base group; by
#' default, set to every unique value other than the base group
#'
#' @return list of validated group members \item{base}{base group
#' members}\item{minority}{minority group members}
.validate_input <- function(pol, base_group, minority_groups) {
if (!("policy" %in% class(pol))) {
stop("Expected object of class policy")
}
if (length(base_group) > 1) {
stop("Specify a single base group.\n\tGot: ", base_group)
}
d <- pol$data
group_col <- d[[pol$grouping]]
groups <- .get_groups(group_col)
check_groups <- sapply(c(base_group, minority_groups),
function(x) x %in% groups)
if (!all(check_groups)) {
stop(sprintf("%s - not member of %s",
paste0(c(base_group, minority_groups)[!check_groups],
collapse = ","),
pol$grouping))
}
if (is.null(base_group)) {
base_group <- groups[1]
}
if (is.null(minority_groups)) {
minority_groups <- groups[!(groups == base_group)]
}
list(base = base_group, minority = minority_groups)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.