.make_formula <- function(y, vars) {
stats::as.formula(paste(y, "~", paste(vars, collapse = "+")))
}
logit <- stats::binomial()$linkfun
inv_logit <- stats::binomial()$linkinv
#' Overwrites a list with named arguments
#'
#' Returns a list that is the intersection of
#' \code{defaults} and \code{...} where named
#' values in \code{...} overwrite values in \code{defaults}.
#'
#' One or more of the elements in \code{...} can be a
#' \code{pairlist} (the structure in which dotted arguments
#' are stored) and this list will get expanded and substituted
#' into \code{defaults} too. This means that the same named argument
#' could be listed more than twice, in which case the right-most value
#' is used. See below for details.
#'
#' @param defaults list of default values.
#' Argument always be specified by name not position
#' @param ... a comma separated set of named
#'
#' \code{overwrite_list(defaults = list(a = 1, b = '2'), c = 1, a = '3')} will
#' return list(a = '1', b = '2', c = 1)
#'
#' To demonstrate the operation when one of the arguments is a \code{pairlist},
#' note that both of the functions below produce the same result. You can either pass the
#' elipsis (...) directly, or you can extract the arguments from the \code{call}
#' and pass them later.
#'
#' \code{
#' f = function(...) {
#' overwrite_list(defaults = list(a = 1, b = 'b'), b = 2, ...)
#' }
#' }
#'
#' \code{
#' f = function(...) {
#' # Extract passed arguments from call
#' dot_args = match.call(expand.dots = F)$...
#' overwrite_list(defaults = list(a = 1, b = 'b'), b = 2, dot_args)
#' }
#' }
#'
#' \code{f(k = 5)} will return \code{list(a = 1, b = 2, k = 5)}.
#'
#' \code{f(b = 10)} will return \code{list(a = 1, b = 10)}. Note
#' how the named argument \code{b} appears 3 times in the \code{overwrite_list}
#' call; once in the defaults, once explicitly specified for overwriting (\code{b = 2}),
#' and once indirectly specified via the arguments to \code{f} (\code{b = 10}).
#'
#' @export
overwrite_list <- function(defaults, ...) {
dot_args <- list(...)
# Expand pairlists (ie lists of arguments)
dot_args =
lapply(seq_along(dot_args), function(i) {
if (is.pairlist(dot_args[[i]])) {
do.call(c,dot_args[i])
} else {
dot_args[i]
}}) %>%
unlist(recursive = F)
for (v in seq_along(dot_args)) {
defaults[[names(dot_args)[v]]] <- dot_args[[v]]
}
defaults
}
#' Given a policy object, compute and return the appropriate risk column
.get_risk_col <- function(pol) {
# Input validation
if (!("policy" %in% class(pol))) {
stop("Expected object of class policy")
}
logit(pol$data[[paste0(pol$risk_col, "__")]])
}
#' Fit second stage model and extract disparate impact coefficient
#'
#' Given a data frame, treatment (cn_lhs), group (cn_tgt), additional controls
#' (controls), and some function for predicting treatment given risk, group and
#' additional controls, report the average odds ratio of treatment between
#' groups. The first factor is treated as a baseline group to compare against.
#'
#' @param d data frame that has all columns referenced in cn_lhs, cn_tgt, and
#' controls
#' @param cn_lhs name of column to use as LHS
#' @param cn_tgt the target column of interest; should be a factor where the
#' first factor is considered the base group to compare against
#' @param controls character vector of additional columns to control for
#' @param fun function(data, ...) used to fit model
#'
#' @return tidy dataframe of the glm model
.get_estimate <-
function(d,
cn_lhs,
cn_tgt,
controls = NULL,
fun = NULL) {
if (is.null(fun)) {
stop("Second-stage fitting function not specified")
}
f <- .make_formula(cn_lhs, c(controls, cn_tgt))
lbl_controls <- paste(c(cn_tgt, controls), collapse = ", ")
fun(f, d) %>%
broom::tidy() %>%
mutate(controls = lbl_controls)
}
#' Extract LHS, first element of RHS, and remainder of RHS from a formula
#'
#' Used for extracting relevant column names for features/grouping
#'
#' @param formula a formula where the first element of the RHS represents a
#' grouping variable
#'
#' @return list of elements treat, group, and feats
.extract_features <- function(formula) {
treat <- as.character(lazyeval::f_lhs(formula))
group <- labels(stats::terms(formula))[1]
feats <- labels(stats::terms(formula))[-1]
list(treat = treat, group = group, feats = feats)
}
#' Expand parameters by length
#'
#' Used for determining how to apply sensitivity parameters
#'
#' @param g vector of grouping variable
#' @param p paramter in size of either 1, equal to the number of levels in
#' \code{g}, or equal to the length of \code{g}
#'
#' @return vector of parameter \code{p} appropriately expanded
.expand_params <- function(g, p) {
len <- length(p)
if (len == 1) {
return(rep(p, length(g)))
} else if (is.factor(g) & len == length(levels(g))) {
return(p[as.numeric(g)])
} else if (len == length(g)) {
return(p)
} else {
stop("Mis-specified parameter assignments to grouping variable.\n\t",
"Expecting parameters of length 1",
if(is.factor(g)) paste0(", ", length(levels(g)), ","),
" or ", length(g), "\n\t",
"Received parameter of length: ", length(p))
}
}
#' Extract sensitivity parameters from a list of parameters for optimization
#'
#' @param params vector of parameters
#' @param free_params (Optional) logical vector of length 8 indicating which
#' parameters are specified in params. \code{length(params)} should equal
#' \code{sum(free_params == TRUE)}.
#' @param fixed_param_values (Optional) vector specifing the values of the fixed
#' parameters (ie when \code{free_params == FALSE})
#' @param q_range if true, the second parameter defines the log odds ratio
#' between q for base and minority groups
#' @param allow_sgv logical; whether to allow for subgroup validity; i.e., if
#' \code{TRUE}, the delta parameters (\code{dp}, \code{d0}, \code{d1}) will be
#' allowed to vary between base/minority groups, but if \code{FALSE}, a single
#' value for each delta parameter will be used for each base/minority pair
#'
#' @return named list of parameters
.extract_params <- function(params,
free_params = rep(T, 8),
fixed_param_values = NULL,
q_range = FALSE,
allow_sgv = FALSE) {
all_params = numeric(8)
all_params[free_params] = params
all_params[!free_params] = fixed_param_values
if (q_range) {
all_params[2] = inv_logit(logit(all_params[1]) + all_params[2])
}
if (!allow_sgv) {
all_params[4] <- all_params[3]
all_params[6] <- all_params[5]
all_params[8] <- all_params[7]
}
list(qb = all_params[1],
qm = all_params[2],
ab = all_params[3],
am = all_params[4],
d0b = all_params[5],
d0m = all_params[6],
d1b = all_params[7],
d1m = all_params[8])
}
#' Compute AUC given predictions and corresponding labels
#'
#' @param pred scalar predictions
#' @param label binary labels
#' @param ret_num logical, whether to return a numerical value (TRUE) or a
#' formatted string (FALSE: default)
#'
#' @return scalar AUC
.compute_auc <- function(pred, label, ret_num = FALSE) {
if (length(unique(label)) == 2) {
p <- ROCR::prediction(pred, label)
auc <- ROCR::performance(p, "auc")
if (ret_num) {
return(unlist(auc@y.values))
} else {
return(format(unlist(auc@y.values) * 100))
}
} else {
if (ret_num) {
return(-1)
} else {
return("-")
}
}
}
#' Down-sample a data set
#'
#' @param d original data frame
#' @param n parameter for down-sampling; if between 0 and 1, treated as a
#' fraction, if larger than 1, rounded and treated as the number of rows to
#' sample
#' @param verbose whether to print down-sampling message
#'
#' @return down-sampled data frame
.down_sample <- function(d, n, verbose = TRUE) {
N <- nrow(d)
if (n == 1) {
return(d)
}
if (n <= 0) {
stop("Down sampling parameter must be greater than 0.\n\tGot: ", n)
}
if (n < 1) {
ret <- sample_frac(d, size = n)
} else {
ret <- sample_n(d, size = n)
}
if (verbose) {
message("Down-sampled data to ", nrow(ret), "/", N,
" (", format(nrow(ret)/N * 100), "%) rows")
}
return(ret)
}
#' Safely scale/center covariates, avoiding NaNs
#'
#' @param x a numeric matrix (like object)
#'
#' @return scaled matrix with attributes \code{scaled:center}
#' and \code{scaled:scale} indicating the transformation performed.
.safe_scale <- function(x) {
means = colMeans(x, na.rm = T)
sds = apply(x, 2, function(c) stats::sd(c, na.rm = T))
badcols = sds < .Machine$double.eps*2
# Don't scale constant columns
sds[badcols] = 1
means[badcols] = 0
scale(x, center = means, scale = sds)
}
#' Extract group members from a column of groups
#'
#' @param group_col column of groups
#'
#' @return \code{levels(group_col)} if \code{group_col} is a factor, and
#' \code{unique(group_col)} otherwise
.get_groups <- function(group_col) {
if (is.factor(group_col)) {
return(levels(group_col))
} else {
return(unique(group_col))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.