R/num-ccc.R

Defines functions ccc_impl ccc_vec ccc.data.frame ccc

Documented in ccc ccc.data.frame ccc_vec

#' Concordance correlation coefficient
#'
#' Calculate the concordance correlation coefficient.
#'
#' [ccc()] is a metric of both consistency/correlation and accuracy,
#' while metrics such as [rmse()] are strictly for accuracy and metrics
#' such as [rsq()] are strictly for consistency/correlation
#'
#' @family numeric metrics
#' @family consistency metrics
#' @family accuracy metrics
#' @seealso [All numeric metrics][numeric-metrics]
#' @templateVar fn ccc
#' @template return
#'
#' @inheritParams rmse
#'
#' @param bias A `logical`; should the biased estimate of variance
#' be used (as is Lin (1989))?
#'
#' @details
#' CCC is a metric that should be `r attr(ccc, "direction")`d. The output
#' ranges from `r metric_range_chr(ccc, 1)` to `r metric_range_chr(ccc, 2)`, with
#' `r metric_optimal(ccc)` indicating perfect agreement.
#'
#' The formula for CCC is:
#'
#' \deqn{\text{CCC} = \frac{2 \cdot \text{cov}(\text{truth}, \text{estimate})}{\text{var}(\text{truth}) + \text{var}(\text{estimate}) + (\bar{\text{truth}} - \bar{\text{estimate}})^2}}
#'
#' @author Max Kuhn
#'
#' @references
#'
#' Lin, L. (1989). A concordance correlation
#'  coefficient to evaluate reproducibility. _Biometrics_, 45 (1),
#'  255-268.
#'
#' Nickerson, C. (1997). A note on "A concordance correlation
#'  coefficient to evaluate reproducibility". _Biometrics_, 53(4),
#'  1503-1507.
#'
#'
#' @template examples-numeric
#' @examples
#' # Using a different value of 'bias'... if you are adding the metric to a
#' # metric set, you can create a new metric function with the updated argument
#' # value:
#'
#' ccc_bias <- metric_tweak("ccc_bias", ccc, bias = TRUE)
#' multi_metrics <- metric_set(ccc, ccc_bias)
#' multi_metrics(solubility_test, solubility, prediction)
#'
#' @export
#'
ccc <- function(data, ...) {
  UseMethod("ccc")
}
ccc <- new_numeric_metric(
  ccc,
  direction = "maximize",
  range = c(-1, 1)
)

#' @rdname ccc
#' @export
ccc.data.frame <- function(
  data,
  truth,
  estimate,
  bias = FALSE,
  na_rm = TRUE,
  case_weights = NULL,
  ...
) {
  numeric_metric_summarizer(
    name = "ccc",
    fn = ccc_vec,
    data = data,
    truth = !!enquo(truth),
    estimate = !!enquo(estimate),
    na_rm = na_rm,
    case_weights = !!enquo(case_weights),
    # Extra argument for ccc_impl()
    fn_options = list(bias = bias)
  )
}

#' @export
#' @rdname ccc
ccc_vec <- function(
  truth,
  estimate,
  bias = FALSE,
  na_rm = TRUE,
  case_weights = NULL,
  ...
) {
  check_bool(na_rm)
  check_bool(bias)
  check_numeric_metric(truth, estimate, case_weights)

  if (na_rm) {
    result <- yardstick_remove_missing(truth, estimate, case_weights)

    truth <- result$truth
    estimate <- result$estimate
    case_weights <- result$case_weights
  } else if (yardstick_any_missing(truth, estimate, case_weights)) {
    return(NA_real_)
  }

  ccc_impl(truth, estimate, bias, case_weights)
}

ccc_impl <- function(truth, estimate, bias, case_weights) {
  case_weights <- vec_cast(case_weights, to = double())

  truth_mean <- yardstick_mean(truth, case_weights = case_weights)
  estimate_mean <- yardstick_mean(estimate, case_weights = case_weights)

  truth_variance <- yardstick_var(truth, case_weights = case_weights)
  estimate_variance <- yardstick_var(estimate, case_weights = case_weights)

  covariance <- yardstick_cov(truth, estimate, case_weights = case_weights)

  if (bias) {
    # Assumes `case_weights` are frequency weights so we can generate an
    # appropriate `n`
    if (is.null(case_weights)) {
      n <- length(estimate)
    } else {
      n <- sum(case_weights)
    }

    estimate_variance <- estimate_variance * (n - 1) / n
    truth_variance <- truth_variance * (n - 1) / n
    covariance <- covariance * (n - 1) / n
  }

  numerator <- 2 * covariance
  denominator <- truth_variance +
    estimate_variance +
    (truth_mean - estimate_mean)^2

  numerator / denominator
}

Try the yardstick package in your browser

Any scripts or data that you put into this service are public.

yardstick documentation built on April 8, 2026, 1:06 a.m.