R/class-npv.R

Defines functions npv_multiclass npv_binary npv_table_impl npv_vec npv.matrix npv.table npv.data.frame npv

Documented in npv npv.data.frame npv_vec

#' Negative predictive value
#'
#' These functions calculate the [npv()] (negative predictive value) of a
#' measurement system compared to a reference result (the "truth" or gold standard).
#' Highly related functions are [spec()], [sens()], and [ppv()].
#'
#' The positive predictive value ([ppv()]) is defined as the percent of
#' predicted positives that are actually positive while the
#' negative predictive value ([npv()]) is defined as the percent of negative
#' positives that are actually negative.
#'
#' @family class metrics
#' @family sensitivity metrics
#' @seealso [All class metrics][class-metrics]
#' @templateVar fn npv
#' @template event_first
#' @template multiclass
#' @template return
#' @inheritParams ppv
#'
#' @details
#' Suppose a 2x2 table with notation:
#'
#' \tabular{rcc}{ \tab Reference \tab \cr Predicted \tab Positive \tab Negative
#' \cr Positive \tab A \tab B \cr Negative \tab C \tab D \cr }
#'
#' The formulas used here are:
#'
#' \deqn{\text{Sensitivity} = \frac{A}{A + C}}
#'
#' \deqn{\text{Specificity} = \frac{D}{B + D}}
#'
#' \deqn{\text{Prevalence} = \frac{A + C}{A + B + C + D}}
#'
#' \deqn{\text{NPV} = \frac{\text{Specificity} \cdot (1 - \text{Prevalence})}{((1 - \text{Sensitivity}) \cdot \text{Prevalence}) + (\text{Specificity} \cdot (1 - \text{Prevalence}))}}
#'
#' NPV is a metric that should be `r attr(npv, "direction")`d. The output
#' ranges from `r metric_range_chr(npv, 1)` to `r metric_range_chr(npv, 2)`, with
#' `r metric_optimal(npv)` indicating all predicted negatives are true
#' negatives.
#'
#' @author Max Kuhn
#'
#' @references
#'
#' Altman, D.G., Bland, J.M. (1994) ``Diagnostic tests 2:
#' predictive values,'' *British Medical Journal*, vol 309,
#' 102.
#'
#' @template examples-class
#' @examples
#' # Using a different value of 'prevalence'... if you are adding the metric to a
#' # metric set, you can create a new metric function with the updated argument
#' # value:
#'
#' npv_alt_prev  <- metric_tweak("npv_alt_prev", npv, prevalence = 0.40)
#' multi_metrics <- metric_set(npv, npv_alt_prev)
#' multi_metrics(two_class_example, truth, estimate = predicted)
#' @export
npv <- function(data, ...) {
  UseMethod("npv")
}
npv <- new_class_metric(
  npv,
  direction = "maximize",
  range = c(0, 1)
)

#' @rdname npv
#' @export
npv.data.frame <- function(
  data,
  truth,
  estimate,
  prevalence = NULL,
  estimator = NULL,
  na_rm = TRUE,
  case_weights = NULL,
  event_level = yardstick_event_level(),
  ...
) {
  class_metric_summarizer(
    name = "npv",
    fn = npv_vec,
    data = data,
    truth = !!enquo(truth),
    estimate = !!enquo(estimate),
    estimator = estimator,
    na_rm = na_rm,
    case_weights = !!enquo(case_weights),
    event_level = event_level,
    fn_options = list(prevalence = prevalence)
  )
}

#' @export
npv.table <- function(
  data,
  prevalence = NULL,
  estimator = NULL,
  event_level = yardstick_event_level(),
  ...
) {
  check_table(data)
  estimator <- finalize_estimator(data, estimator)

  metric_tibbler(
    .metric = "npv",
    .estimator = estimator,
    .estimate = npv_table_impl(
      data,
      estimator,
      event_level,
      prevalence = prevalence
    )
  )
}

#' @export
npv.matrix <- function(
  data,
  prevalence = NULL,
  estimator = NULL,
  event_level = yardstick_event_level(),
  ...
) {
  data <- as.table(data)
  npv.table(data, prevalence, estimator, event_level)
}

#' @export
#' @rdname npv
npv_vec <- function(
  truth,
  estimate,
  prevalence = NULL,
  estimator = NULL,
  na_rm = TRUE,
  case_weights = NULL,
  event_level = yardstick_event_level(),
  ...
) {
  check_bool(na_rm)
  check_number_decimal(prevalence, min = 0, max = 1, allow_null = TRUE)
  abort_if_class_pred(truth)
  estimate <- as_factor_from_class_pred(estimate)

  estimator <- finalize_estimator(truth, estimator)

  check_class_metric(truth, estimate, case_weights, estimator)

  if (na_rm) {
    result <- yardstick_remove_missing(truth, estimate, case_weights)

    truth <- result$truth
    estimate <- result$estimate
    case_weights <- result$case_weights
  } else if (yardstick_any_missing(truth, estimate, case_weights)) {
    return(NA_real_)
  }

  data <- yardstick_table(truth, estimate, case_weights = case_weights)
  npv_table_impl(data, estimator, event_level, prevalence = prevalence)
}

npv_table_impl <- function(data, estimator, event_level, prevalence = NULL) {
  if (is_binary(estimator)) {
    npv_binary(data, event_level, prevalence)
  } else {
    w <- get_weights(data, estimator)
    out_vec <- npv_multiclass(data, estimator, prevalence)
    stats::weighted.mean(out_vec, w)
  }
}

npv_binary <- function(data, event_level, prevalence = NULL) {
  positive <- pos_val(data, event_level)

  if (is.null(prevalence)) {
    prevalence <- sum(data[, positive]) / sum(data)
  }

  sens <- sens_binary(data, event_level)
  spec <- spec_binary(data, event_level)
  (spec * (1 - prevalence)) /
    (((1 - sens) * prevalence) + ((spec) * (1 - prevalence)))
}

npv_multiclass <- function(data, estimator, prevalence = NULL) {
  if (is.null(prevalence)) {
    tpfn <- colSums(data)
    tptnfpfn <- rep(sum(data), times = nrow(data))

    if (is_micro(estimator)) {
      tpfn <- sum(tpfn)
      tptnfpfn <- sum(tptnfpfn)
    }

    prevalence <- tpfn / tptnfpfn
  }

  .sens_vec <- recall_multiclass(data, estimator)
  .spec_vec <- spec_multiclass(data, estimator)

  numer <- .spec_vec * (1 - prevalence)
  denom <- (1 - .sens_vec) * prevalence + .spec_vec * (1 - prevalence)

  denom[denom <= 0] <- NA_real_

  numer / denom
}

Try the yardstick package in your browser

Any scripts or data that you put into this service are public.

yardstick documentation built on April 8, 2026, 1:06 a.m.