R/oc_youden_normal.R

Defines functions oc_youden_normal

Documented in oc_youden_normal

#' Determine an optimal cutpoint for the Youden-Index assuming normal distributions
#'
#' An optimal cutpoint maximizing the Youden- or J-Index
#' (sensitivity + specificity - 1) is calculated parametrically assuming
#' normal distributions per class.
#'
#' @param data A data frame or tibble in which the columns that are given in x
#' and class can be found.
#' @param x (character) The variable name to be used for classification,
#' e.g. predictions or test values.
#' @param class (character) The variable name indicating class membership.
#' @param pos_class The value of class that indicates the positive class.
#' @param neg_class The value of class that indicates the negative class.
#' @param direction (character) Use ">=" or "<=" to select whether an x value
#' >= or <= the cutoff predicts the positive class.
#' @param ... To capture further arguments that are always passed to the method
#' function by cutpointr. The cutpointr function passes data, x, class,
#' metric_func, direction, pos_class and neg_class to the method function.
#' @examples
#' data(suicide)
#' oc_youden_normal(suicide, "dsi", "suicide",
#'   pos_class = "yes", neg_class = "no", direction = ">=")
#' cutpointr(suicide, dsi, suicide, method = oc_youden_normal)
#' @family method functions
#' @export
oc_youden_normal <- function(data, x, class, pos_class = NULL, neg_class = NULL,
                             direction, ...) {
    stopifnot(is.character(x))
    stopifnot(is.character(class))
    iv <- unlist(data[, x])
    if (any_inf(iv)) stop("Only finite values allowed in oc_youden_normal")
    cla <- unlist(data[, class])
    if (direction %in% c(">", ">=")) {
        patients <- iv[cla == pos_class]
        controls <- iv[cla == neg_class]
    } else if (direction %in% c("<", "<=")) {
        patients <- iv[cla == neg_class]
        controls <- iv[cla == pos_class]
    }
    m_h <- mean(controls)
    sd_h <- stats::sd(controls)
    m_d <- mean(patients)
    sd_d <- stats::sd(patients)
    if (sd_h == sd_d) {
        c <- (m_h+m_d)/2
    } else if (any(sd_h == 0, sd_d == 0)) {
        # if sd_h = 0 and/or sd_d = 0 the cutoff would be NaN
        c <- (m_h+m_d)/2
    } else {
        c <- ((m_d*sd_h^2 - m_h*sd_d^2) - sd_h*sd_d*(sqrt((m_h-m_d)^2 + (sd_h^2-sd_d^2) * log(sd_h^2/sd_d^2)))) /
            (sd_h^2-sd_d^2)
    }

    # Extremely high or low cutoffs can result if m_d < m_h and direction = ">="
    if (c < min(c(controls, patients))) {
        warning(paste("Cutpoint", c, "was restricted to range of independent variable"))
        c <- min(c(controls, patients))
    } else if (c > max(c(controls, patients))) {
        warning(paste("Cutpoint", c, "was restricted to range of independent variable"))
        c <- max(c(controls, patients))
    }
    return(data.frame(optimal_cutpoint = c))
}
Thie1e/cutpointr documentation built on March 7, 2020, 3:25 a.m.