#' Uplift Curve
#'
#' The uplift curve is analgous to the ROC curve used to evaluate the
#' performance of binary classification models. We order the observations
#' by the predicted treatment effect and then compare the cumulative lift
#' against the observed treatment effect. The observed treatment effect is
#' the lift we would achieve using random targeting or selection.
#'
#' @param y a vector of outcomes.
#' @param tmt a vector indicating which units received treatment.
#' @param pred_te a vector of predicted treatment effects.
#' @param bins the number of bins to use for building the uplift curve. More
#' bins will result in a smoother curve, but this is limited by the number of
#' distinct values \code{pred_te} as well as the number of treatment and
#' control units falling in each bin.
#'
#' @return a \code{hete_uplift} object with:
#' \item{uplift_curve}{A \code{data.frame} with the points of the uplift curve.}
#' \item{q}{The q/qini score of the model.}
#' \item{ate_observed}{The observed average treatment effect.}
#' \item{ate_predicted}{The mean predicted treatment effect.}
#'
#' @export
uplift <- function(y, tmt, pred_te, bins = 10) {
# boolean if the user passed 1/0
tmt <- as.logical(tmt)
if (is.factor(y)) {
# TODO: should we warn user we're doing this?
ref_level <- levels(y)[1]
y <- ifelse(y == ref_level, 0, 1)
}
ate <- mean(y[tmt]) - mean(y[!tmt])
frac <- seq(0, 1, 1 / bins)
random_lift <- ate * frac
# we want to order the scores from highest to lowest
qts <- stats::quantile(pred_te, probs = rev(frac))
model_lift <- purrr::map_dbl(qts, model_lift, y = y, tmt = tmt, pred_te = pred_te)
model_lift <- model_lift * frac
# the first one must be 0
model_lift[1] <- 0
df <- tibble::tibble(frac = frac, model = model_lift, random = random_lift)
structure(
list(
uplift_curve = df,
q = qini(frac, random_lift, model_lift),
ate_observed = ate,
ate_predicted = mean(pred_te)
),
class = "hete_uplift")
}
model_lift <- function(min_val, y, tmt, pred_te) {
y_ <- y[pred_te >= min_val]
tmt_ <- tmt[pred_te >= min_val]
mean(y_[tmt_]) - mean(y_[!tmt_])
}
#' @export
plot.hete_uplift <- function(x, ...) {
df_plot <- tidyr::gather(x$uplift_curve, key = "measure", value = "value", -c("frac"))
p <- ggplot2::ggplot(df_plot, ggplot2::aes_string(x = "frac", y = "value", color = "measure")) +
ggplot2::geom_line() +
ggplot2::scale_x_continuous(label = scales::percent) +
ggplot2::xlab("Population Fraction") +
ggplot2::ylab("Cumulative Lift") +
ggplot2::guides(color = ggplot2::guide_legend(title = NULL)) +
ggplot2::theme_bw()
p
}
qini <- function(target_frac, random, model) {
random_auc <- trapz(target_frac, random)
model_auc <- trapz(target_frac, model)
model_auc - random_auc
}
trapz <- function(x, y) {
delta <- diff(x)
y1 <- y[-1]
y2 <- y[-length(y)]
sum(((y1 + y2) / 2) * delta)
}
#' @importFrom rlang .data
plot_quantiles <- function(y, tmt, pred_te, bins = 10) {
df <- tibble::tibble(y = y, tmt = tmt, pred_te = pred_te)
# TODO: NSE fix here
df <- dplyr::mutate(df,
tmt_quantile = factor(dplyr::ntile(.data$pred_te, bins)),
ctl = ifelse(.data$tmt == 0, 1, 0))
# Thanks to https://edwinth.github.io/blog/dplyr-recipes/.
qt <- dplyr::group_by(df, !!rlang::sym("tmt_quantile"))
qt <- dplyr::mutate(qt,
y_0 = ifelse(.data$tmt == 0, .data$y, 0),
y_1 = ifelse(.data$tmt == 1, .data$y, 0))
qt <- dplyr::summarize(qt,
Predicted = mean(.data$pred_te),
ctl_mean = sum(.data$y_0) / sum(.data$ctl),
tmt_mean = sum(.data$y_1) / sum(.data$tmt),
Actual = .data$tmt_mean - .data$ctl_mean)
qt <- tidyr::gather(qt, key = "series", value = "tmt_effect", "Predicted", "Actual")
p <- ggplot2::ggplot(qt, ggplot2::aes_string(x = "tmt_quantile", y = "tmt_effect", fill = "series")) +
ggplot2::geom_bar(stat = "identity", position = "dodge") +
ggplot2::xlab("Predicted Treatment Effect Quantile") +
ggplot2::ylab("Treatment Effect") +
ggplot2::guides(fill = ggplot2::guide_legend(title = NULL)) +
ggplot2::theme_bw()
p
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.