R/plot_optimaldecision.R
In aihuman: Experimental Evaluation of Algorithm-Assisted Human Decision-Making

Documented in CalOptimalDecision PlotOptimalDecision

#' Calculate optimal decision & utility
#'
#' (1) Calculate optimal decision for each observation given each of c0 (cost of an outcome) and c1 (cost of an unnecessarily harsh decision) from the lists.
#' (2) Calculate difference in the expected utility between binary version of judge's decisions and DMF recommendations given each of c0 (cost of an outcome) and c1 (cost of an unnecessarily harsh decision) from the lists.
#'
#' @param data A \code{data.frame} or \code{matrix} of which columns consists of pre-treatment covariates, a binary treatment (Z), an ordinal decision (D), and an outcome variable (Y). The column names of the latter three should be specified as "Z", "D", and "Y" respectively.
#' @param mcmc.re A \code{mcmc} object generated by \code{AiEvalmcmc()} function.
#' @param c0.ls The list of cost of an outcome. See Section 3.7 for more details.
#' @param c1.ls The list of cost of an unnecessarily harsh decision. See Section 3.7 for more details.
#' @param dmf A numeric vector of binary DMF recommendations. If \code{null}, use judge's decisions (0 if the decision is 0 and 1 o.w; e.g., signature or cash bond).
#' @param rho A sensitivity parameter. The default is  \code{0} which implies the unconfoundedness assumption (Assumption 4).
#' @param burnin A proportion of burnin for the Markov chain. The default is \code{0}.
#' @param out.length An integer to specify the progress on the screen. Every \code{out.length}-th iteration is printed on the screen. The default is \code{500}.
#' @param ZX The data matrix for interaction terms. The default is the interaction between Z and all of the pre-treatment covariates (X).
#' @param size The number of parallel computing. The default is \code{5}.
#' @param include.utility.diff.mcmc A logical argument specifying whether to save \code{Utility.diff.control.mcmc} and \code{Utility.diff.treated.mcmc} for Figure S17. The default is \code{FALSE}.
#'
#' @return A \code{data.frame} of (1) the probability that the optimal decision for each observation being d in (0,1,...,k), (2) expected utility of binary version of judge's decision (g_d), (3) expected utility of binary DMF recommendation, and (4) the difference between (2) and (3). If \code{include.utility.diff.mcmc = TRUE}, returns a list of such \code{data.frame} and a \code{data.frame} that includes the result for mean and quantile of \code{Utility.diff.control.mcmc} and \code{Utility.diff.treated.mcmc} across mcmc samples.
#'
#' @examples
#' \donttest{
#' data(synth)
#' sample_mcmc <- AiEvalmcmc(data = synth, n.mcmc = 10)
#' sample_optd <- CalOptimalDecision(
#'   data = synth, mcmc.re = sample_mcmc,
#'   c0.ls = seq(0, 5, 1), c1.ls = seq(0, 5, 1),
#'   size = 1
#' ) # adjust the size
#' }
#'
#' @useDynLib aihuman, .registration=TRUE
#' @export
#'
CalOptimalDecision <- function(data,
                               mcmc.re,
                               c0.ls,
                               c1.ls,
                               dmf = NULL,
                               rho = 0,
                               burnin = 0,
                               out.length = 500,
                               ZX = NULL,
                               size = 5,
                               include.utility.diff.mcmc = FALSE) {
  if (size == 1) {
    message("Increase the size.")
    k <- length(unique(data$D)) - 1

    idx <- 1:nrow(data)

    if (include.utility.diff.mcmc) {
      dat <- dat.mcmc <- data.frame()
      for (i in c0.ls) {
        for (j in c1.ls) {
          apce <- CalAPCE(data,
            mcmc.re,
            subgroup = list(1:nrow(data), 1:nrow(data), 1:nrow(data), 1:nrow(data), 1:nrow(data)),
            rho = rho,
            burnin = burnin,
            out.length = out.length,
            ZX = ZX,
            c0 = i, c1 = j,
            save.individual.optimal.decision = TRUE,
            optimal.decision.only = TRUE,
            dmf = dmf
          )
          optimal <- apce$Optimal.D.mcmc
          g_d <- apce$Utility.g_d.mcmc
          g_dmf <- apce$Utility.g_dmf.mcmc
          control_utility_diff <- apce$Utility.diff.control.mcmc
          treated_utility_diff <- apce$Utility.diff.treated.mcmc
          dat <- rbind(dat, cbind(
            optimal,
            g_d, g_dmf, g_d - g_dmf,
            i, j, idx
          ))
          dat.mcmc <- rbind(dat.mcmc, cbind(
            mean(control_utility_diff), quantile(control_utility_diff, probs = 0.025), quantile(control_utility_diff, probs = 0.975),
            mean(treated_utility_diff), quantile(treated_utility_diff, probs = 0.025), quantile(treated_utility_diff, probs = 0.975),
            i, j
          ))
        }
      }
      colnames(dat) <- c(paste0("d", 0:k), "g_d", "g_dmf", "diff_utility", "c0", "c1", "idx")
      colnames(dat.mcmc) <- c(
        "mean_control_utility_diff", "lb_control_utility_diff", "ub_control_utility_diff",
        "mean_treated_utility_diff", "lb_treated_utility_diff", "ub_treated_utility_diff",
        "c0", "c1"
      )
      row.names(dat.mcmc) <- 1:nrow(dat.mcmc)

      res <- list(
        res.i = dat,
        res.mcmc = dat.mcmc
      )
    } else {
      dat <- data.frame()
      for (i in c0.ls) {
        for (j in c1.ls) {
          apce <- CalAPCE(data,
            mcmc.re,
            subgroup = list(1:nrow(data), 1:nrow(data), 1:nrow(data), 1:nrow(data), 1:nrow(data)),
            rho = rho,
            burnin = burnin,
            out.length = out.length,
            ZX = ZX,
            c0 = i, c1 = j,
            save.individual.optimal.decision = TRUE,
            optimal.decision.only = TRUE,
            dmf = dmf
          )
          optimal <- apce$Optimal.D.mcmc
          g_d <- apce$Utility.g_d.mcmc
          g_dmf <- apce$Utility.g_dmf.mcmc
          dat <- rbind(dat, cbind(
            optimal,
            g_d, g_dmf, g_d - g_dmf,
            i, j, idx
          ))
        }
      }
      colnames(dat) <- c(paste0("d", 0:k), "g_d", "g_dmf", "diff_utility", "c0", "c1", "idx")
      res <- dat
    }
  } else {
    k <- length(unique(data$D)) - 1

    idx <- 1:nrow(data)

    if (include.utility.diff.mcmc) {
      dat <- dat.mcmc <- data.frame()
      for (i in c0.ls) {
        for (j in c1.ls) {
          apce <- CalAPCEparallel(data,
            mcmc.re,
            subgroup = list(1:nrow(data), 1:nrow(data), 1:nrow(data), 1:nrow(data), 1:nrow(data)),
            rho = rho,
            burnin = burnin,
            out.length = out.length,
            ZX = ZX,
            c0 = i, c1 = j,
            save.individual.optimal.decision = TRUE,
            optimal.decision.only = TRUE,
            dmf = dmf,
            size = size
          )
          optimal <- apce$Optimal.D.mcmc
          g_d <- apce$Utility.g_d.mcmc
          g_dmf <- apce$Utility.g_dmf.mcmc
          control_utility_diff <- apce$Utility.diff.control.mcmc
          treated_utility_diff <- apce$Utility.diff.treated.mcmc
          dat <- rbind(dat, cbind(
            optimal,
            g_d, g_dmf, g_d - g_dmf,
            i, j, idx
          ))
          dat.mcmc <- rbind(dat.mcmc, cbind(
            mean(control_utility_diff), quantile(control_utility_diff, probs = 0.025), quantile(control_utility_diff, probs = 0.975),
            mean(treated_utility_diff), quantile(treated_utility_diff, probs = 0.025), quantile(treated_utility_diff, probs = 0.975),
            i, j
          ))
        }
      }
      colnames(dat) <- c(paste0("d", 0:k), "g_d", "g_dmf", "diff_utility", "c0", "c1", "idx")
      colnames(dat.mcmc) <- c(
        "mean_control_utility_diff", "lb_control_utility_diff", "ub_control_utility_diff",
        "mean_treated_utility_diff", "lb_treated_utility_diff", "ub_treated_utility_diff",
        "c0", "c1"
      )
      row.names(dat.mcmc) <- 1:nrow(dat.mcmc)

      res <- list(
        res.i = dat,
        res.mcmc = dat.mcmc
      )
    } else {
      dat <- data.frame()
      for (i in c0.ls) {
        for (j in c1.ls) {
          apce <- CalAPCEparallel(data,
            mcmc.re,
            subgroup = list(1:nrow(data), 1:nrow(data), 1:nrow(data), 1:nrow(data), 1:nrow(data)),
            rho = rho,
            burnin = burnin,
            out.length = out.length,
            ZX = ZX,
            c0 = i, c1 = j,
            save.individual.optimal.decision = TRUE,
            optimal.decision.only = TRUE,
            dmf = dmf,
            size = size
          )
          optimal <- apce$Optimal.D.mcmc
          g_d <- apce$Utility.g_d.mcmc
          g_dmf <- apce$Utility.g_dmf.mcmc
          dat <- rbind(dat, cbind(
            optimal,
            g_d, g_dmf, g_d - g_dmf,
            i, j, idx
          ))
        }
      }
      colnames(dat) <- c(paste0("d", 0:k), "g_d", "g_dmf", "diff_utility", "c0", "c1", "idx")
      res <- dat
    }
  }
  return(res)
}
#' Plot optimal decision
#'
#' See Figure 6 for example.
#'
#' @param res The data frame generated from \code{CalOptimalDecision}.
#' @param colname.d The column name of decision to be plotted.
#' @param idx The row index of observations to be included. The default is all the observations from the data.
#'
#' @return A ggplot.
#'
#' @examples
#' \donttest{
#' data(synth)
#' sample_mcmc <- AiEvalmcmc(data = synth, n.mcmc = 10)
#' sample_optd <- CalOptimalDecision(
#'   data = synth, mcmc.re = sample_mcmc,
#'   c0.ls = seq(0, 5, 1), c1.ls = seq(0, 5, 1),
#'   size = 1
#' ) # adjust the size
#' sample_optd$cash <- sample_optd$d1 + sample_optd$d2 + sample_optd$d3
#' PlotOptimalDecision(sample_optd, "cash")
#' }
#'
#' @import ggplot2
#' @importFrom metR geom_text_contour
#'
#' @useDynLib aihuman, .registration=TRUE
#' @export
#'
PlotOptimalDecision <- function(res,
                                colname.d,
                                idx = NULL) {
  c0 <- c1 <- decision <- NULL

  if (!is.null(idx)) {
    res <- res[res$idx %in% idx, ]
  }

  colnames(res)[colnames(res) == colname.d] <- "decision"

  dat <- res %>%
    group_by(c0, c1) %>%
    summarise(decision = mean(decision))

  p <- ggplot(res, aes(c0, c1, z = decision, fill = decision)) +
    geom_tile(alpha = 0.8) +
    scale_fill_distiller(
      limits = c(0, 1),
      palette = "Greys", direction = +1
    ) +
    stat_contour(color = "black", alpha = 0.8) +
    geom_text_contour(color = "black") +
    theme_bw() +
    theme(
      axis.ticks.x = element_blank(),
      panel.grid.major = element_blank(), panel.border = element_blank(),
      legend.position = "none",
      panel.background = element_blank(),
      axis.text = element_text(size = 18),
      legend.title = element_text(size = 12),
      legend.text = element_text(size = 12),
      plot.title = element_text(face = "bold", size = 20, hjust = 0.5),
      plot.subtitle = element_text(face = "bold", size = 15, hjust = 0.5),
      axis.title = element_text(size = 18)
    ) +
    labs(
      x = expression("Cost of outcome (" * c[0] * ")"),
      y = expression("Cost of unnecessarily harsh decision (" * c[1] * ")")
    )

  return(p)
}