#' Determine an optimal cutpoint maximizing the Youden-Index based on kernel smoothed densities
#'
#' Instead of searching for an optimal cutpoint to maximize (sensitivity +
#' specificity - 1) on the ROC curve, this function first smoothes the empirical
#' distributions of \code{x} per class. The smoothing is done using a binned kernel
#' density estimate. The bandwidth is automatically selected using the direct
#' plug-in method.
#'
#' The functions for calculating the kernel density estimate and the bandwidth
#' are both from \pkg{KernSmooth} with default parameters, except for
#' the bandwidth selection, which uses the standard deviation as scale estimate.
#'
#' The cutpoint is estimated as the cutpoint that maximizes the Youden-Index
#' given by \eqn{J = max_c {F_N(c) - G_N(c) }} where
#' \eqn{J} and \eqn{G} are the smoothed distribution functions.
#'
#' @inheritParams oc_youden_normal
#' @source Fluss, R., Faraggi, D., & Reiser, B. (2005). Estimation of the
#' Youden Index and its associated cutoff point. Biometrical Journal, 47(4), 458–472.
#' @source Matt Wand (2015). KernSmooth: Functions for Kernel Smoothing
#' Supporting Wand & Jones (1995). R package version 2.23-15.
#' https://CRAN.R-project.org/package=KernSmooth
#' @examples
#' data(suicide)
#' if (require(KernSmooth)) {
#' oc_youden_kernel(suicide, "dsi", "suicide", oc_metric = "Youden",
#' pos_class = "yes", neg_class = "no", direction = ">=")
#' ## Within cutpointr
#' cutpointr(suicide, dsi, suicide, method = oc_youden_kernel)
#' }
#' @family method functions
#' @export
oc_youden_kernel <- function(data, x, class, pos_class, neg_class,
direction, ...) {
stopifnot(is.character(x))
stopifnot(is.character(class))
iv <- unlist(data[, x])
cla <- unlist(data[, class])
if (direction %in% c(">", ">=")) {
x_pos <- iv[cla == pos_class]
x_neg <- iv[cla == neg_class]
} else {
x_neg <- iv[cla == pos_class]
x_pos <- iv[cla == neg_class]
}
bw_n <- KernSmooth::dpik(x_neg, "stdev")
neg_k <- KernSmooth::bkde(x_neg, bandwidth = bw_n)
bw_p <- KernSmooth::dpik(x_pos, "stdev")
pos_k <- KernSmooth::bkde(x_pos, bandwidth = bw_p)
oc <- suppressWarnings(stats::optimize(
f = youden_kern,
interval = c(min(c(x_pos, x_neg)), max(c(x_pos, x_neg))),
maximum = TRUE,
neg_k = neg_k,
pos_k = pos_k
)$maximum)
# Extremely high or low cutoffs can result in some scenarios
if (oc < min(c(x_neg, x_pos))) {
warning(paste("Cutpoint", oc, "was restricted to range of independent variable"))
oc <- min(c(x_neg, x_pos))
} else if (oc > max(c(x_neg, x_pos))) {
warning(paste("Cutpoint", oc, "was restricted to range of independent variable"))
oc <- max(c(x_neg, x_pos))
}
return(data.frame(optimal_cutpoint = oc))
}
youden_kern <- function(threshold = NULL, neg_k, pos_k) {
youden <- vector_auc(neg_k$x, neg_k$y, to = threshold) -
vector_auc(pos_k$x, pos_k$y, to = threshold)
return(youden)
}
#' @source Forked from MESS
vector_auc <- function (x, y, from = min(x), to = max(x), ...) {
if (length(x) != length(y))
stop("x and y must have the same length")
if (length(unique(x)) < 2)
return(NA)
if (to > max(x)) to <- max(x)
values <- stats::approx(x, y, xout = sort(unique(c(from, to,
x[x > from & x < to]))), ...)
res <- 0.5 * sum(diff(values$x) * (values$y[-1] + values$y[-length(values$y)]))
res
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.