calibration: Calibration Performance Evaluation and Recalibration

Documented in isotonic_calibration

#' @title Isotonic probability calibration
#' @description Performs an isotonic regression calibration of posterior probability to minimize log loss.
#'
#' @param y                Binomial response variable used to fit model
#' @param p                Estimated probabilities from fit model
#' @param regularization   (\code{FALSE}/\code{TRUE}) should regularization be performed on the probabilities? (see notes)
#'
#' @return a vector of calibrated probabilities
#'
#' @note Isotonic calibration can correct for monotonic distortions.
#' @note regularization defines new minimum and maximum bound for the probabilities using:
#' @note   pmax = ( n1 + 1) / (n1 + 2), pmin = 1 / ( n0 + 2); where n1 = number of prevalence values and n0 = number of null values
#'
#' @author Jeffrey S. Evans    <jeffrey_evans<at>tnc.org>
#'
#' @references Platt, J. (1999) Probabilistic outputs for support vector machines and comparison to regularized likelihood methods. Advances in Large Margin Classifiers (pp 61-74).
#' @references Niculescu-Mizil, A., & R. Caruana (2005) Obtaining calibrated probabilities from boosting. Proc. 21th Conference on Uncertainty in Artificial Intelligence (UAI 2005). AUAI Press.
#'
#' @examples
#'  library(randomForest)
#'  data(iris)
#'  iris$Species <- ifelse( iris$Species == "versicolor", 1, 0 )
#'
#'  # Add some noise
#'  idx1 <- which(iris$Species %in% 1)
#'  idx0 <- which( iris$Species %in% 0)
#'  iris$Species[sample(idx1, 2)] <- 0
#'  iris$Species[sample(idx0, 2)] <- 1
#'
#'  # Specify model
#'  y = iris[,"Species"]
#'  x = iris[,1:4]
#'  set.seed(4364)
#'  (rf.mdl <- randomForest(x=x, y=factor(y)))
#'  y.hat <- predict(rf.mdl, iris[,1:4], type="prob")[,2]
#'
#'  # Calibrate probabilities
#'  calibrated.y.hat <- probability.calibration(y, y.hat, regularization = TRUE)
#'
#'  # Plot calibrated against original probability estimate
#'  plot(density(y.hat), col="red", xlim=c(0,1), ylab="Density", xlab="probabilities",
#'       main="Calibrated probabilities" )
#'         lines(density(calibrated.y.hat), col="blue")
#'           legend("topright", legend=c("original","calibrated"),
#'  	            lty = c(1,1), col=c("red","blue"))
#'
#' @export



isotonic_calibration <- function(y, p, regularization = FALSE) {
  if( length(p) != length(y)) stop("Vectors do not match")
  if(!is.numeric(y)) if(is.factor(y)) { y <- as.numeric(as.character(y)) } else {
    stop("y is not valid binomial vector") }
  if(length(unique(y)) > 2) stop("y is not a valid binomial vector")
  if(!min(unique(y)) == 0) stop("y is not a valid binomial vector")
  if(!max(unique(y)) == 1) stop("y is not a valid binomial vector")
  if(!is.numeric(p)) stop("p arguments must be numeric")
  if(regularization == TRUE) {
    p.max <- (length(y[y == 1]) + 1) / (length(y[y == 1]) + 2)  
    p.min <- 1 / (length(y[y == 0]) + 2)
    p <- ifelse( p < p.min, p.min, p)
    p <- ifelse( p > p.max, p.max, p)
  }
  idx <- duplicated(p)     
  idx <- which( idx == TRUE)
  p.unique <- p[-idx]
  y.unique <- y[-idx]
  isotonic.calibration <- function(iso, x0) {
    o = iso$o
    if (is.null(o))
      o = 1:length(x0)
    x = iso$x[o]
    y = iso$yf
    ind = cut(x0, breaks = x, labels = FALSE, include.lowest = TRUE)
    min.x <- min(x)
    max.x <- max(x)
    adjusted.knots <- iso$iKnots[c(1, which(iso$yf[iso$iKnots] > 0))]
    fits = sapply(seq(along = x0), function(i) {
      j = ind[i]
      if (is.na(j)) {
        if (x0[i] > max.x) j <- length(x)
        else if (x0[i] < min.x) j <- 1
      }
      upper.step.n <- min(which(adjusted.knots > j))
      upper.step <- adjusted.knots[upper.step.n]
      lower.step <- ifelse(upper.step.n==1, 1, adjusted.knots[upper.step.n -1] )
      denom <- x[upper.step] - x[lower.step]
      denom <- ifelse(denom == 0, 1, denom)
      val <- y[lower.step] + (y[upper.step] - y[lower.step]) * (x0[i] - x[lower.step]) / (denom)
      val <- ifelse(val > 1, max.x, val)
      val <- ifelse(val < 0, min.x, val)
      val <- ifelse(is.na(val), max.x, val)
      val
    })
    return( fits )
  }
  iso.mdl <- stats::isoreg(p.unique, y.unique)
  return( isotonic.calibration(iso.mdl, p) )
}