R/brierCurve.R

Defines functions brierCurve

Documented in brierCurve

#' Calculates Brier Curve
#'
#' This function calculates the Brier curve (both in terms of cost and skew) based on a set of predictions generated by a binary classifier. Brier curves allow an evaluation of classifier performance in cost space. This code is an adapted version from the authors' original implementation, available through http://dmip.webs.upv.es/BrierCurves/BrierCurves.R.
#'
#' @param preds Vector with predictions (real-valued or discrete)
#' @param labels Vector with true class labels
#' @param resolution Value for the determination of percentile intervals. Defaults to 1/1000.
#' @export
#' @import zoo
#' @importFrom data.table as.data.table
#' @return object of the class \code{brierCurve} which is a list with the following components:
#' \item{brierCurveCost}{Cost-based Brier curve, represented as (cost,loss) coordinates}
#' \item{brierCurveSkew}{Skew-based Brier curve, represented as (skew,loss) coordinates}
#' \item{auc_brierCurveCost}{Area under the cost-based Brier curve.}
#' \item{auc_brierCurveSkew}{Area under the skew-based Brier curve.}
#' @author Koen W. De Bock, \email{kdebock@@audencia.com}
#' @references Hernandez-Orallo, J., Flach, P., & Ferri, C. (2011). Brier Curves: a New Cost-Based Visualisation of Classifier Performance. Proceedings of the 28th International Conference on Machine Learning (ICML-11), 585–592.
#' @seealso \code{\link{plotBrierCurve}}, \code{\link{CSMES.ensNomCurve}}
#' @examples
#' ##load data
#' library(rpart)
#' data(BFP)
#' ##generate random order vector
#' BFP_r<-BFP[sample(nrow(BFP),nrow(BFP)),]
#' size<-nrow(BFP_r)
#' ##size<-300
#' train<-BFP_r[1:floor(size/3),]
#' val<-BFP_r[ceiling(size/3):floor(2*size/3),]
#' test<-BFP_r[ceiling(2*size/3):size,]
#' ##train CART decision tree model
#' model=rpart(as.formula(Class~.),train,method="class")
#' ##generate predictions for the tes set
#' preds<-predict(model,newdata=test)[,2]
#' ##calculate brier curve
#' bc<-brierCurve(test[,"Class"],preds)

brierCurve<-function(labels,preds,resolution=0.001){

  inp <- cbind((labels==sort(unique(labels))[2])*1,preds)
  bfactor <- 2
  n0n1 <- nrow(inp)
  x <- t(inp)
  zord <- order(x[2,])
  sc <- x[,zord]
  n1 <- sum(sc[1,])
  n0 <- n0n1 - n1
  pi0 <- n0/n0n1
  pi1 <- n1/n0n1

  zord <- order(x[2,])
  zordrev <- rev(zord)
  screv <- x[,zordrev]
  inp <- t(screv) #Decreasing order

  if (n0 == 0)
    warning("No elements of class 0")
  if (n1 == 0)
    warning("No elements of class 1")

  sc <- cbind(sc,sc[,n0n1])
  F0 <- c(0:n0n1)
  F1 <- c(0:n0n1)

  K1 <- 1
  k <- 2
  for (i in 1:n0n1) {
    F0[k] <- F0[K1]+(1-sc[1,i])
    F1[k] <- F1[K1]+sc[1,i]
    K1 <- k
    k <- if (sc[2,i+1] == sc[2,i]) (k) else (k+1)
  }
  F0 <- F0[1:K1]
  F1 <- F1[1:K1]
  G0nomin <- F0 / n0
  G1nomin <- F1 / n1

  inpnorep <- 1:n0n1
  j <- 1
  olda <- -1
  for (i in 1:n0n1) {
    a <- inp[i,2]
    if ((a != olda) || (i == 1)) {
      inpnorep[j] <- a
      olda <- a
      j <- j+1
    }
  }
  # j-1 should be equal to K1 here
  inpnorep <- inpnorep[1:(K1-1)]
  costprobnorep <- c(1:(K1+1))
  costprobnorep[1] <- 0
  costprobnorep[K1+1] <- 1
  for (i in 2:K1)
  {
    costprobnorep[i] <- 1 * inpnorep[K1-i+1]
    # costprobnorep[i] <- 1-inpnorep[i-1]
  }

  ######## Expected cost Qprobnew (Brier) ####### for COST
  K1new <- K1*2
  costprobnew <- c(1:K1new)
  Qprobnew <- c(1:K1new)

  Qprobnew0 <- c(1:K1new)
  Qprobnew1 <- c(1:K1new)

  for (i in 2:(K1new-1))
  {
    costprobnew[i] <- costprobnorep[trunc(i/2)+1]

    prova <- costprobnew[i]
    prova0 <- G0nomin[i]
    prova1 <- G1nomin[i]
    Qprobnew[i] <- bfactor * (prova*pi0*(1-G0nomin[trunc((i+1)/2)]) + (1-prova)*pi1*G1nomin[trunc((i+1)/2)])
    Qprobnew0[i] <- bfactor * (prova*pi0*(1-G0nomin[trunc((i+1)/2)]))
    Qprobnew1[i] <- bfactor * (1-prova)*pi1*G1nomin[trunc((i+1)/2)]
  }
  Qprobnew[1] <- 0
  Qprobnew[K1new] <- 0
  Qprobnew0[1] <- 0
  Qprobnew0[K1new] <- 0
  Qprobnew1[1] <- 0
  Qprobnew1[K1new] <- 0
  costprobnew[1] <- 0
  costprobnew[K1new] <- 1

  ##### Expected cost Qprobnewnorm (Brier) ###### for SKEW
  K1new <- K1*2
  costprobnewnorm <- c(1:K1new)
  Qprobnewnorm <- c(1:K1new)
  Qprobnewnorm0 <- c(1:K1new)
  Qprobnewnorm1 <- c(1:K1new)

  for (i in 2:(K1new-1))
  {
    p <- costprobnorep[trunc(i/2)+1]
    costprobnewnorm[i] <- p
    prova <- costprobnewnorm[i]
    Qprobnewnorm[i] <- bfactor * 0.5 * (prova*(1-G0nomin[trunc((i+1)/2)]) + (1-prova)*G1nomin[trunc((i+1)/2)])
    Qprobnewnorm0[i] <- bfactor * 0.5 * (prova*(1-G0nomin[trunc((i+1)/2)]))
    Qprobnewnorm1[i] <- bfactor * 0.5 * ((1-prova)*G1nomin[trunc((i+1)/2)])
  }
  Qprobnewnorm[1] <- 0
  Qprobnewnorm[K1new] <- 0

  Qprobnewnorm0[1] <- 0
  Qprobnewnorm0[K1new] <- 0
  Qprobnewnorm1[1] <- 0
  Qprobnewnorm1[K1new] <- 0

  costprobnewnorm[1] <- 0
  costprobnewnorm[K1new] <- 1
  y<-NA #test
  x.values=costprobnew
  y.values=Qprobnew

  group<-as.data.table(data.frame(x=x.values,y=y.values))
  a<-group[group[, .I[y == max(y)], by=x]$V1]
  x.values<-as.numeric(unlist(a[,1]))
  y.values<-as.numeric(unlist(a[,2]))

  nr_intervals=1/resolution
  seqs<-seq(from=0,to=1,by=1/nr_intervals)
  values<-approx(x.values, y.values,xout=seqs)[[2]]
  lower_env_coordinates_cost<-rbind(seqs,values)

  x.values=costprobnewnorm
  y.values=Qprobnewnorm

  group<-as.data.table(data.frame(x=x.values,y=y.values))
  a<-group[group[, .I[y == max(y)], by=x]$V1]
  x.values<-as.numeric(unlist(a[,1]))
  y.values<-as.numeric(unlist(a[,2]))

  values<-array(0,c(length(x.values),nr_intervals+1))
  seqs<-seq(from=0,to=1,by=1/nr_intervals)
  values<-approx(x.values, y.values,xout=seqs)[[2]]
  lower_env_coordinates_skew<-rbind(seqs,values)

  Briercurve_cost_nods<-rbind(costprobnew,Qprobnew)
  Briercurve_skew_nods<-rbind(costprobnewnorm,Qprobnewnorm)
  rownames(Briercurve_cost_nods)<-c("cost","loss")
  rownames(Briercurve_skew_nods)<-c("skew","loss")
  Briercurve_cost<-lower_env_coordinates_cost
  Briercurve_skew<-lower_env_coordinates_skew
  rownames(Briercurve_cost)<-c("cost","loss")
  rownames(Briercurve_skew)<-c("skew","loss")

  x2 <- Briercurve_cost[1,]
  y2 <- Briercurve_cost[2,]
  id <- order(x2)
  auc_brierCurveCost<- sum(diff(x2[id])*rollmean(y2[id],2))

  x2 <- Briercurve_skew[1,]
  y2 <- Briercurve_skew[2,]
  id <- order(x2)
  auc_brierCurveSkew<- sum(diff(x2[id])*rollmean(y2[id],2))

  ans<-list(brierCurveCost=t(Briercurve_cost),brierCurveSkew=t(Briercurve_skew),auc_brierCurveCost=auc_brierCurveCost,auc_brierCurveSkew=auc_brierCurveSkew)
  class(ans) <- "brierCurve"
  ans
}

Try the CSMES package in your browser

Any scripts or data that you put into this service are public.

CSMES documentation built on Feb. 16, 2023, 10:09 p.m.