R/errordist.ROC.R

Defines functions errordist.ROC

errordist.ROC <- function(dataG, dataB, varname,
                          Hypothesis = "g > b", nBoot = 250,
                          conf.level = 0.95) {
  # selection of the data by variable character
  varname2 <- enquo(varname)

  dataG <- dataG %>%
    select(!!varname2) %>%
    pull()

  dataB <- dataB %>%
    select(!!varname2) %>%
    pull()

  # PD of observed sample
  gesamt <- length(dataG) + length(dataB)
  meanPortfolioPD <- length(dataB) / gesamt

  # computing the area under the curve (AUC) with bootstrapping
  numB <- rbinom(nBoot, gesamt, meanPortfolioPD)
  numG <- gesamt - numB

  sampling <- seq(1, nBoot) %>%
    purrr::map_dbl(
      ~ roc.curve(
        sample(dataG, numG[.x], replace = T),
        sample(dataB, numB[.x], replace = T)
      )$auc
    )

  # Calculating gini coefficients
  Gini.vec <- ifelse(Hypothesis == "g > b", 1, -1) * 2 * (abs(sampling) - 0.5)
  Gini <- median(Gini.vec, na.rm = T)
  Gini.low <- unname(quantile(Gini.vec, probs = (1 - conf.level) / 2, na.rm = T) - Gini)
  Gini.high <- unname(quantile(Gini.vec, probs = (1 + conf.level) / 2, na.rm = T) - Gini)

  # Calculating gini coefficients without bootstrapping
  Gini.boot <- ifelse(Hypothesis == "g > b", 1, -1) * 2 * (abs(roc.curve(dataG, dataB)$auc) - 0.5)

  result <- data.frame(
    Gini.boot = Gini.boot,
    Gini = Gini,
    Gini.low = Gini.low,
    Gini.high = Gini.high
  )

  return(result)
}
irisweyermenkhoff/toyota-idv-functions documentation built on March 4, 2020, 9:57 a.m.