R/autoplot.R
In predictNMB: Evaluate Clinical Prediction Models by Net Monetary Benefit

Documented in autoplot.predictNMBscreen autoplot.predictNMBsim theme_sim

#' @importFrom ggplot2 autoplot
#' @export
ggplot2::autoplot


#' @srrstats {EA5.0, EA5.0a, EA5.0b, EA5.1} \code{autoplot.predictNMBsim()}
#' applies two colours for the fill aesthetic by default (blue and grey) and
#' these can be controlled by the user by the \code{fill_cols} argument.
#' These colours were selected with appropriate interpretation of the interval
#' and accessibility in mind. All fonts and other colour schemes
#' (including for \code{autoplot.predictNMBscreen()}) use the defaults by
#' \code{ggplot} by default.
#' @srrstats {EA5.4, EA5.5} Units are included on all axes and use
#' \code{ggplot2} to produce sensibly rounded values.
#'
#' Whether or not to use linewidth vs size argument in ggplot2.
#' @return Returns logical - whether or not to use linewidth based on ggplot2.
#' version.
#' @noRd
use_linewidth <- function() {
  # if the available ggplot2 version is ahead of 3.4.0, then use `linewidth`
  # argument, otherwise will use size
  utils::compareVersion(
    as.character(utils::packageVersion("ggplot2")),
    "3.4.0"
  ) %in% c(0, 1)
}


#' Gather and wrangle appropriate data for plotting from simulation(s) object.
#'
#' @param x A \code{predictNMBsim} or \code{predictNMBscreen} object.
#' @param what What to plot: one of "nmb", "inb" or "cutpoints".
#' @param methods_order The order (left to right) to display the cutpoint
#' methods.
#' @param rename_vector A named vector for renaming the methods in the summary.
#' The values of the vector are the default names and the names given are the
#' desired names in the output.
#' @param inb_ref_col Which cutpoint method to use as the reference strategy
#' when calculating the incremental net monetary benefit. See \code{do_nmb_sim}
#' for more information.
#' @param conf.level The confidence level of the interval.
#'
#' @return Returns a \code{data.frame}.
#' @noRd
get_plot_data <- function(x,
                          what,
                          methods_order,
                          rename_vector,
                          inb_ref_col,
                          conf.level) {
  # select relevant data from predictNMBsim object
  if (what == "cutpoints") {
    data <- x$df_thresholds
  } else if (what == "qalys") {
    stopifnot(x$meta_data$track_qalys)
    data <- x$df_qalys
  } else if (what == "costs") {
    stopifnot(x$meta_data$track_qalys)
    data <- x$df_costs
  } else {
    data <- x$df_result
  }

  rename_vector <- update_rename_vector(rename_vector)
  data <- dplyr::rename(data, dplyr::any_of(rename_vector))

  if (what == "inb" | !is.na(inb_ref_col)) {
    data <-
      data %>%
      dplyr::mutate(dplyr::across(
        !n_sim,
        function(x) x - !!rlang::sym(inb_ref_col)
      )) %>%
      dplyr::select(-dplyr::all_of(inb_ref_col))
  }

  # pivot data to long format
  pivoted_data <- tidyr::pivot_longer(data, !n_sim)

  if (is.null(methods_order)) {
    methods_order <- data %>%
      dplyr::select(-n_sim) %>%
      names()
  }
  pivoted_data$name <- factor(pivoted_data$name, levels = methods_order)
  pivoted_data <- dplyr::filter(pivoted_data, !is.na(name))

  # add label (in_interval) for whether the observation is within the interval
  add_interval(pivoted_data, conf.level = ifelse(missing(conf.level), NA, conf.level))
}


#' Add column to plot data that indicates whether it is contained within the
#' confidence interval (indicated by conf.level).
#'
#' @param data \code{data.frame} with a grouping column, \code{name}, and
#' numeric outcome column, \code{value}.
#' @param conf.level The confidence level of the interval.
#'
#' @return Returns a \code{data.frame}.
#' @noRd
add_interval <- function(data, conf.level) {
  if (missing(conf.level) | is.na(conf.level)) {
    return(dplyr::mutate(data, percentile = NA, in_interval = NA))
  }

  probs <- c((1 - conf.level) / 2, 1 - (1 - conf.level) / 2)

  data %>%
    dplyr::group_by(name) %>%
    dplyr::arrange(value) %>%
    dplyr::mutate(percentile = dplyr::row_number() / dplyr::n()) %>%
    dplyr::ungroup() %>%
    dplyr::mutate(in_interval = percentile > probs[1] & percentile < probs[2])
}


#' Create plots of from predictNMB simulations.
#'
#' @param object A \code{predictNMBsim} object.
#' @param what What to summarise: one of "nmb", "inb", "cutpoints", "qalys" or
#' "costs". Defaults to "nmb".
#' @param inb_ref_col Which cutpoint method to use as the reference strategy
#' when calculating the incremental net monetary benefit.
#' See \code{do_nmb_sim} for more information.
#' @param conf.level The confidence level of the interval.
#' Defaults to 0.95 (coloured area of distribution represents 95% CIs).
#' @param methods_order The order (left to right) to display the
#' cutpoint methods.
#' @param n_bins The number of bins used when constructing histograms.
#' Defaults to 40.
#' @param label_wrap_width The number of characters in facet labels at which
#' the label is wrapped. Default is 12.
#' @param fill_cols Vector containing the colours used for fill aesthetic of
#' histograms. The first colour represents the area outside of the confidence
#' region, second colour shows the confidence region. Defaults to
#' \code{c("grey50", "#ADD8E6")}.
#' @param median_line_size Size of line used to represent the median of
#' distribution. Defaults to 2.
#' @param median_line_alpha Alpha (transparency) for line used to represent the median of
#' distribution. Defaults to 0.5.
#' @param median_line_col Colour of line used to represent the median of
#' distribution. Defaults to \code{"black"}.
#' @param rename_vector A named vector for renaming the methods in the summary.
#' The values of the vector are the default names and the names given are the
#' desired names in the output.
#' @param ... Additional (unused) arguments.
#'
#' @details
#' This plot method works with \code{predictNMBsim} objects that are created
#' using \code{do_nmb_sim()}. Can be used to visualise distributions from
#' simulations for different cutpoint methods.
#'
#' @srrstats {G2.4d} `as.factor()` used when necessary.
#'
#' @return Returns a \code{ggplot} object.
#' @export
#'
#' @examples
#' \donttest{
#' get_nmb <- function() c("TP" = -3, "TN" = 0, "FP" = -1, "FN" = -4)
#' sim_obj <- do_nmb_sim(
#'   sample_size = 200, n_sims = 50, n_valid = 10000, sim_auc = 0.7,
#'   event_rate = 0.1, fx_nmb_training = get_nmb, fx_nmb_evaluation = get_nmb,
#'   cutpoint_methods = c("all", "none", "youden", "value_optimising")
#' )
#'
#' autoplot(
#'   sim_obj,
#'   rename_vector = c(
#'     "Value- Optimising" = "value_optimising",
#'     "Treat- None" = "none",
#'     "Treat- All" = "all",
#'     "Youden Index" = "youden"
#'   )
#' ) + theme_sim()
#' }
autoplot.predictNMBsim <- function(object,
                                   what = c("nmb", "inb", "cutpoints", "qalys", "costs"),
                                   inb_ref_col = NA,
                                   conf.level = 0.95,
                                   methods_order = NULL,
                                   n_bins = 40,
                                   label_wrap_width = 12,
                                   fill_cols = c("grey50", "#ADD8E6"),
                                   median_line_size = 2,
                                   median_line_alpha = 0.5,
                                   median_line_col = "black",
                                   rename_vector,
                                   ...) {
  what <- match.arg(what)

  p_data <- get_plot_data(
    x = object,
    what = what,
    methods_order = methods_order,
    rename_vector = rename_vector,
    inb_ref_col = inb_ref_col,
    conf.level = conf.level
  )

  df_agg <-
    p_data %>%
    dplyr::group_by(name) %>%
    dplyr::summarize(m = stats::median(value))

  p <-
    p_data %>%
    ggplot2::ggplot(ggplot2::aes(value, fill = in_interval)) +
    ggplot2::geom_histogram(bins = n_bins) +
    ggplot2::coord_flip() +
    ggplot2::facet_wrap(
      ~name,
      labeller = ggplot2::label_wrap_gen(width = label_wrap_width),
      nrow = 1
    ) +
    ggplot2::guides(fill = "none") +
    ggplot2::scale_fill_manual(values = c(NA, fill_cols))

  my_plot_innards <- ggplot2::ggplot_build(p)

  extracted_points <- data.frame(
    outcome = my_plot_innards[["data"]][[1]][["x"]],
    count = my_plot_innards[["data"]][[1]][["y"]],
    in_interval = as.factor(my_plot_innards[["data"]][[1]][["group"]]),
    method = my_plot_innards[["data"]][[1]][["PANEL"]]
  )

  heights <-
    df_agg %>%
    tibble::rownames_to_column(var = "method_n") %>%
    dplyr::left_join(
      extracted_points,
      by = c("method_n" = "method"),
      multiple = "all"
    ) %>%
    dplyr::mutate(diff = abs(m - outcome)) %>%
    dplyr::group_by(name) %>%
    dplyr::arrange(diff) %>%
    dplyr::slice(1) %>%
    dplyr::ungroup()


  x_axis_title <- get_axis_label(what)

  if (use_linewidth()) {
    segment_layer <- ggplot2::geom_segment(
      data = heights,
      ggplot2::aes(x = m, xend = m, y = 0, yend = count),
      linewidth = median_line_size,
      alpha = median_line_alpha,
      col = median_line_col
    )
  } else {
    segment_layer <- ggplot2::geom_segment(
      data = heights,
      ggplot2::aes(x = m, xend = m, y = 0, yend = count),
      size = median_line_size,
      alpha = median_line_alpha,
      col = median_line_col
    )
  }

  p <-
    p +
    segment_layer +
    ggplot2::labs(
      x = x_axis_title,
      y = ""
    )

  p
}

#' Returns a \code{ggplot2} theme that reduces clutter in an \code{autoplot()}
#' of a \code{predictNMBsim} object.
#'
#' @return Returns a \code{ggplot2} theme.
#' @export
#'
#' @examples
#' \donttest{
#' get_nmb <- function() c("TP" = -3, "TN" = 0, "FP" = -1, "FN" = -4)
#' sim_obj <- do_nmb_sim(
#'   sample_size = 200, n_sims = 50, n_valid = 10000, sim_auc = 0.7,
#'   event_rate = 0.1, fx_nmb_training = get_nmb, fx_nmb_evaluation = get_nmb
#' )
#'
#' autoplot(sim_obj) + theme_sim()
#' }
#'
theme_sim <- function() {
  ggplot2::theme(
    panel.spacing = ggplot2::unit(0, "lines"),
    axis.ticks.x = ggplot2::element_blank(),
    axis.text.x = ggplot2::element_blank(),
    strip.background = ggplot2::element_rect(colour = "grey20"),
    panel.border = ggplot2::element_rect(fill = NA, colour = "grey20"),
  )
}

#' Create plots of from screened predictNMB simulations.
#'
#' @param object A \code{predictNMBscreen} object.
#' @param x_axis_var The desired screened factor to be displayed along the
#' x axis. For example, if the simulation screen was used with many values for
#' event rate, this could be "event_rate". Defaults to the first detected,
#' varied input.
#' @param constants Named vector. If multiple inputs were screened in this
#' object, this argument can be used to modify the selected values for all
#' those except the input that's varying along the x-axis. See the
#' \href{https://docs.ropensci.org/predictNMB/articles/summarising-results-with-predictNMB.html}{summarising methods vignette}.
#' @param what What to summarise: one of "nmb", "inb", "cutpoints", "qalys" or
#' "costs". Defaults to "nmb".
#' @param inb_ref_col Which cutpoint method to use as the reference strategy
#' when calculating the incremental net monetary benefit.
#' See \code{do_nmb_sim} for more information.
#' @param plot_range \code{logical}. Whether or not to plot the range of the
#' distribution as a thin line. Defaults to TRUE.
#' @param plot_conf_level \code{logical}. Whether or not to plot the confidence
#' region of the distribution as a thicker line. Defaults to TRUE.
#' @param plot_line \code{logical}. Whether or not to connect the medians of
#' the distributions for each method along the x-axis. Defaults to TRUE.
#' @param plot_alpha Alpha value (transparency) of all plot elements. Defaults to 0.5.
#' @param dodge_width The dodge width of plot elements. Can be used to avoid
#' excessive overlap between methods. Defaults to 0.
#' @param conf.level The confidence level of the interval.
#' Defaults to 0.95 (coloured area of distribution represents 95% CIs).
#' @param methods_order The order (left to right) to display the cutpoint
#' methods.
#' @param rename_vector A named vector for renaming the methods in the summary.
#' The values of the vector are the default names and the names given are the
#' desired names in the output.
#' @param ... Additional (unused) arguments.
#'
#' @details
#' This plot method works with \code{predictNMBscreen} objects that are
#' created using \code{screen_simulation_inputs()}. Can be used to visualise
#' distributions from many different simulations and assign a varying input
#' to the x-axis of the plot.
#'
#' @return Returns a \code{ggplot} object.
#' @export
#'
#' @examples
#' \donttest{
#' get_nmb <- function() c("TP" = -3, "TN" = 0, "FP" = -1, "FN" = -4)
#' sim_screen_obj <- screen_simulation_inputs(
#'   n_sims = 50, n_valid = 10000, sim_auc = seq(0.7, 0.9, 0.1),
#'   event_rate = c(0.1, 0.2, 0.3),
#'   fx_nmb_training = get_nmb, fx_nmb_evaluation = get_nmb,
#'   cutpoint_methods = c("all", "none", "youden", "value_optimising")
#' )
#'
#' autoplot(sim_screen_obj)
#'
#' autoplot(
#'   sim_screen_obj,
#'   x_axis_var = "event_rate",
#'   constants = c(sim_auc = 0.8),
#'   dodge_width = 0.02,
#'   rename_vector = c(
#'     "Value-Optimising" = "value_optimising",
#'     "Treat-None" = "none",
#'     "Treat-All" = "all",
#'     "Youden Index" = "youden"
#'   )
#' )
#' }
autoplot.predictNMBscreen <- function(object,
                                      x_axis_var = NULL,
                                      constants = list(),
                                      what = c("nmb", "inb", "cutpoints", "qalys", "costs"),
                                      inb_ref_col = NA,
                                      plot_range = TRUE,
                                      plot_conf_level = TRUE,
                                      plot_line = TRUE,
                                      plot_alpha = 0.5,
                                      dodge_width = 0,
                                      conf.level = 0.95,
                                      methods_order = NULL,
                                      rename_vector,
                                      ...) {
  what <- match.arg(what)

  if (is.null(x_axis_var)) {
    message("No value for 'x_axis_var' given.")
    if (length(object$screen_meta) == 1) {
      message(paste0("Screening over ", names(object$screen_meta), " by default"))
    } else {
      message(
        paste0(
          "Screening over ",
          names(object$screen_meta)[1],
          " by default. Specify the variable in the 'x_axis_var' argument",
          " if you want to plot changes over:\n",
          paste0(names(object$screen_meta)[-1], collapse = ", ")
        )
      )
    }
    x_axis_var <- names(object$screen_meta)[1]
  }

  grid_lookup <- object$summary_grid
  sim.id_ignore <- c()

  if (length(names(object$screen_meta)) > 1) {
    message(
      paste0(
        "\n\nVarying simulation inputs, other than ",
        x_axis_var,
        ", are being held constant:\n"
      )
    )
    non_x_axis_vars <- object$screen_meta
    non_x_axis_vars[[x_axis_var]] <- NULL
    for (i in length(non_x_axis_vars)) {
      if (names(non_x_axis_vars)[i] %in% names(constants)) {
        v <- constants[[names(non_x_axis_vars)[i]]]
        possible_values <- sort(
          unique(grid_lookup[[names(non_x_axis_vars)[i]]])
        )

        if (!any(approx_match(vec = possible_values, val = v))) {
          stop(
            paste0(
              "A ",
              names(non_x_axis_vars)[i],
              " value of [",
              v,
              "] was not included in the screened inputs and cannot be used as",
              " a specified constant.",
              "\n\nScreened input values that can be used for ",
              names(non_x_axis_vars)[i],
              " are: ",
              paste0(possible_values, collapse = ", ")
            )
          )
        }
      } else {
        v <- non_x_axis_vars[[i]][[1]]
        if (is.function(v)) {
          v <- names(non_x_axis_vars[[i]])[1]
          if (object$pair_nmb_train_and_evaluation_functions &
            x_axis_var %in% c("fx_nmb_training", "fx_nmb_evaluation")) {
            next
          }
        }
      }

      message(paste0(
        names(non_x_axis_vars)[i],
        ": ",
        v
      ))

      sim.id_ignore <- append(
        sim.id_ignore,
        which(
          !approx_match(
            vec = grid_lookup[[names(non_x_axis_vars)[i]]],
            val = v
          )
        )
      )
    }
    sim.id_ignore <- unique(sim.id_ignore)

    # filter the grid of simulations to only those which will be in the plot
    # (keeping the non-{x_axis_var}'s constant)
    grid_lookup <- grid_lookup[!grid_lookup$.sim_id %in% sim.id_ignore, ]
  }

  p_data_full <- NULL
  for (s in grid_lookup$.sim_id) {
    p_data <- get_plot_data(
      x = object$simulations[[s]],
      what = what,
      methods_order = methods_order,
      rename_vector = rename_vector,
      inb_ref_col = inb_ref_col,
      conf.level = conf.level
    )
    p_data$.sim_id <- s
    p_data_full <- rbind(p_data_full, stats::na.omit(p_data))
  }

  p_data_full <-
    dplyr::left_join(p_data_full, grid_lookup, by = ".sim_id") %>%
    dplyr::rename(x_axis_var = dplyr::all_of(x_axis_var))

  p_data_range <-
    p_data_full %>%
    dplyr::group_by(x_axis_var, name) %>%
    dplyr::summarize(
      m = stats::median(value),
      ymin = min(value),
      ymax = max(value),
      .groups = "drop"
    ) %>%
    dplyr::ungroup()

  p_data_interval <-
    p_data_full %>%
    dplyr::filter(in_interval) %>%
    dplyr::group_by(x_axis_var, name) %>%
    dplyr::summarize(
      ymin = min(value),
      ymax = max(value),
      .groups = "drop"
    ) %>%
    dplyr::ungroup()

  y_axis_title <- get_axis_label(what)

  x_axis_title <- get_axis_label(x_axis_var)

  if (dodge_width == 0) {
    position <- "identity"
  } else {
    position <- ggplot2::position_dodge(width = dodge_width)
  }

  p <-
    ggplot2::ggplot() +
    ggplot2::geom_point(
      data = p_data_range,
      ggplot2::aes(x = x_axis_var, y = m, col = name, group = name),
      size = 3, alpha = plot_alpha,
      position = position
    ) +
    ggplot2::labs(
      x = x_axis_title,
      y = y_axis_title,
      col = "Cutpoint Methods"
    )

  if (plot_line) {
    p <-
      p +
      ggplot2::geom_line(
        data = p_data_range,
        ggplot2::aes(x = x_axis_var, y = m, col = name, group = name),
        alpha = plot_alpha,
        position = position
      )
  }

  if (plot_conf_level) {
    if (use_linewidth()) {
      ci_linerange_layer <- ggplot2::geom_linerange(
        data = p_data_interval,
        ggplot2::aes(x = x_axis_var, col = name, ymin = ymin, ymax = ymax),
        linewidth = 1.2, alpha = plot_alpha,
        position = position
      )
    } else {
      ci_linerange_layer <- ggplot2::geom_linerange(
        data = p_data_interval,
        ggplot2::aes(x = x_axis_var, col = name, ymin = ymin, ymax = ymax),
        size = 1.2, alpha = plot_alpha,
        position = position
      )
    }
    p <- p + ci_linerange_layer
  }

  if (plot_range) {
    p <-
      p +
      ggplot2::geom_linerange(
        data = p_data_range,
        ggplot2::aes(x = x_axis_var, col = name, ymin = ymin, ymax = ymax),
        alpha = plot_alpha,
        position = position
      )
  }

  if (!inherits(p_data_range$x_axis_var, "character")) {
    p <-
      p +
      ggplot2::scale_x_continuous(
        breaks = unique(p_data_range$x_axis_var)
      )
  }
  p
}
Any scripts or data that you put into this service are public.
predictNMB documentation built on June 7, 2023, 6:31 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
predictNMB
Evaluate Clinical Prediction Models by Net Monetary Benefit

R/autoplot.R
In predictNMB: Evaluate Clinical Prediction Models by Net Monetary Benefit

Defines functions autoplot.predictNMBscreen theme_sim autoplot.predictNMBsim add_interval get_plot_data use_linewidth

Documented in autoplot.predictNMBscreen autoplot.predictNMBsim theme_sim

Try the predictNMB package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

predictNMB Evaluate Clinical Prediction Models by Net Monetary Benefit

R/autoplot.R In predictNMB: Evaluate Clinical Prediction Models by Net Monetary Benefit

Defines functions autoplot.predictNMBscreen theme_sim autoplot.predictNMBsim add_interval get_plot_data use_linewidth

Documented in autoplot.predictNMBscreen autoplot.predictNMBsim theme_sim

Try the predictNMB package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

predictNMB
Evaluate Clinical Prediction Models by Net Monetary Benefit

R/autoplot.R
In predictNMB: Evaluate Clinical Prediction Models by Net Monetary Benefit