R/ClassificationPlot.R

#' Classification plot
#'
#' This function can be used to visualize the performance measures and create a custom classification plot of a model or two competing models.
#' An object is also returned containing the performance measures of the model(s).
#'
#' @param model1 Variable with the risks of the (baseline) model.
#' @param model2 Variable with the risks of the competing model.
#' @param outcome Response variable.
#' @param data Dataframe in which the predicted risks are to be found
#' @param cutoffs A vector containing the cutoffs for which the performance measures need to be calculated.
#' @param col1 Color for the lines of the TPR model(s).
#' @param col2 Color for the lines of the FPR of the model(s).
#' @param lwd Linewidth of the lines in the plot.
#' @param TreatAll Logical, indicates if treat all has to be shown on the plot. Default is \code{TRUE}.
#' @param colTreatAll Color of treat all.
#' @param TreatNone Logical, indicates if treat none has to be shown on the plot. Default is \code{TRUE}.
#' @param colTreatNone Color of treat none.
#' @param SNBpl Logical, indicates if SNB has to be shown on the plot. Default is \code{TRUE}.
#' @param colSNB Color of SNB.
#' @param axes Axes command of \code{\link{plot}}, default is \code{TRUE}. If set to \code{FALSE}, only the y- and x-axis
#' will be shown on the plot.
#' @param ShowAUC Logical, indicates if the AUC has to be shown on the plot. Default is \code{TRUE}.
#' @param AUCcoord Vector of length two with the coordinates of the AUCs on the plot. \code{\link{locator}} can also be used.
#' @param LegCoord Vector of length two with the coordinates of the legend on the plot. \code{\link{locator}} can also
#'  be used.
#' @param y.intersp Character interspecing factor for vertical distance.
#' @param RiskSet Indicates if performance measures have to be shown (see UtilityMeasures for further specification) below
#' the plot. Specify \code{'model1'} for the performance measures of model 1, \code{'model2'}
#' for those of model 2 and \code{'both'} if the performance measures for both models have to be shown..
#' \code{'none'} suppresses the printing. Default is \code{'model1'}.
#' @param cex.leg Size of the legend.
#' @param cex.auc Size of the AUCs shown on the plot.
#' @param GraphSettings List with graphical settings. See \code{\link{par}}.
#' @param PrintMessages Logical, indicates whether messages have to be printed while the function is calculating the
#' performance measures and preparing the plot. Default is \code{TRUE}.
#' @param UtilityMeasures Indicates which performance measures have to be shown for each of the cutoffs. Specify
#' \code{'default'} for the true and false positive rate (TPR and FPR, respectively) and
#' \code{'accuracy'} to additionally show the positive likelihood ratio (PLR), negative likelihood ratio (NLR),
#'  positive predictive value (PPV) and negative predictive value (NPV).
#' \code{'Utility'} shows the TPR, FPR, standardized net benefit (SNB), net TP per 100 and NNT for 1 TP if the
#' predicted risks of one model are given. In case of 2 models, the TPR and FPR for both models are given
#' as well as the difference in SNB and the difference in net TP per 100. Only possible for the default cutoffs.
#' Default is \code{'proportions'}.
#' @param loc Specifies the x-coordinates for the titles of the RiskSet.
#' @param LabelsModels The labels for the model(s) when \code{Riskset!='none'}.
#' @param ylab Label for the y-axis.
#' @param ... Arguments to be passed to \code{\link{plot}}, see \code{\link{par}}.
#' @param pointwiseCI Can be used to plot the pointwise confidence intervals of the TPR and/or FPR at specific cutoffs.
#' \code{"TPR"} when this needs to be plotted for the TPR, \code{"FPR"} when this only needs to be plotted for the FPR
#' and \code{"both"} when it has to be plotted for both the FPR and TPR.
#' @param pointwiseCIcutoff A vector of cutoffs for which the pointwise confidence intervals have to be calculated.
#'
#' @return The classification plot is plotted and an object containing the performance measures of the model(s) is returned.
#'
#' @references Verbakel JY, Steyerberg EW, Uno H, De Cock B, Collins G, Van Calster B. From ROC curves to classification plots for markers 
#' and models: from wast of ink towards useful insights. \emph{Submitted}.
#' @references Vickers AJ, Elkin EB. Decision Curve Analysis: A Novel Method for Evaluating Prediction Models.
#' \emph{Medical Decision Making} 2006, 26(6): 565-574
#' @references Van Calster B, Vickers A, Pencina M, Baker S, Timmerman D, Steyerberg EW.
#' Evaluation of Markers and Risk Prediction Models: Overview of Relationships between NRI and Decision-Analytic
#'  easures. \emph{Medical Decision Making} 2013, 33(4): 490-501.
#' @references Vickers AJ, Van Calster B, Steyerberg EW. Net benefit approaches to the evaluation
#' of prediction models, molecular markers and diagnostic tests. \emph{British Medical Journal} 2016, 352
#'
#' @examples
#' #---------#
#' # 1 model #
#' #---------#
#'
#' # simulated data
#' X      = replicate(4, rnorm(5e2))
#' p0true = binomial()$linkinv(cbind(1, X)%*%c(0.1, 0.5, 1.2, -0.75, 0.8))
#' y      = rbinom(5e2, 1, p0true)
#' Df     = data.frame(y,X)
#'
#' # fit logistic model
#' FitLog = glm(y~., Df, family=binomial)
#' Pred   = binomial()$linkinv(cbind(1, X)%*%coef(FitLog))
#' Df2    = cbind.data.frame(Pred = Pred, Outcome = y)
#'
#' # Classification plot
#' ClassificationPlot(Pred, outcome = Outcome, data=Df2)
#'
#' #----------#
#' # 2 models #
#' #----------#
#'
#' # Fit second model
#' FitLog2 = glm(y~., Df[,1:3], family=binomial)
#' Pred2   = binomial()$linkinv(cbind(1, X[,1:2])%*%coef(FitLog2))
#' Df3    = cbind.data.frame(Model1 = Pred, Model2 = Pred2, Outcome = y)
#'
#' # Classification plot
#' ClassificationPlot(Model1, Model2, Outcome, Df3)
#' ClassificationPlot(Model1, Model2, Outcome, Df3, RiskSet = "both")
#' ClassificationPlot(Model1, Model2, Outcome, Df3, RiskSet = "both", UtilityMeasures = "utility")

ClassificationPlot <- function(model1, model2, outcome, data, cutoffs = seq(0, 0.9, by = 0.1),
                               pointwiseCI = c("none", "TPR", "FPR", "both"),
                               pointwiseCIcutoff = NULL,
                               col1 = "darkgreen", col2 = "red",
                               lwd = 3, TreatAll = F, colTreatAll = "grey", TreatNone = F,
                               colTreatNone = "black",
                               SNBpl = F, colSNB = "blue",
                               axes = T, ShowAUC = T, AUCcoord = c(0.625, 0.95),
                               LegCoord = c(0.75, 1.05),
                               y.intersp = 0.75, RiskSet = c("model1", "model2", "both","none"),
                               cex.leg=0.75, cex.auc = 0.85,
                               GraphSettings = NULL, PrintMessages = T,
                               UtilityMeasures = c("default", "accuracy", "utility"),
                               loc = -0.2,
                               LabelsModels = c("Model 1", "Model 2"),
                               ylab = "Proportion",
                               ...){
  Argz = as.list(match.call())[-1]
  RiskSet = match.arg(RiskSet)
  UtilityMeasures = match.arg(UtilityMeasures)
  pointwiseCI = match.arg(pointwiseCI)
  if(pointwiseCI != "none" & is.null(pointwiseCIcutoff))
    stop("Please specify the cutoff that has to be used.")
  if(pointwiseCI != "none" & !is.numeric(pointwiseCIcutoff))
    stop("pointwiseCI has to be of the type vector and has to be numeric.")
  if(pointwiseCI != "none" & (any(pointwiseCIcutoff <= 0) | any(pointwiseCIcutoff >= 1)))
     stop("Values must be > 0 and < 1.")
  
  if(missing(model1) & !missing(model2))
    stop("Please specify model 1.")  
  if(!missing(data)) {
  if(!is.data.frame(data))
    stop("Has to be of type dataframe.")
  } else {
    if(!is.vector(model1))
      stop("model 1 has to be of type vector.")
    if(!missing(model2))
      if(!is.vector(model2))
        stop("model2 has to be of type vector.")
    data = data.frame(model1 = model1, outcome = outcome)
    if(!missing(model2))
      data$model2 = model2
  }

  if(min(cutoffs)<0 | max(cutoffs)>1)
    stop("Cutoffs < 0 or > 1 are not possible.")
  if(!is.character(LabelsModels))
    stop("Only character strings are allowed.")
  if(length(LabelsModels)>2)
    stop("Only one or two character strings are permitted.")
  if(RiskSet=="both" & UtilityMeasures=="accuracy")
    stop("The accuracy options is not possible for both models.")
  if(!"character" %in% class(ylab))
    stop("Has to be of type character.")

  n  = nrow(data)
  Df = data
  Df$m1    = eval(Argz$model1, data)
  if(any(Df$m1<=0) | any(Df$m1>=1))
    stop("Predicted probabilities have to be given for model1 (0 < risk < 1).")
  if(!missing(model2))
    Df$m2 = eval(Argz$model2, data)
  if(!missing(model2))
    if(any(Df$m2<=0) | any(Df$m2>=1)) stop("Predicted probabilities have to be given for model1 (0 < risk < 1).")
  if(missing(model2) & RiskSet=="model2") stop("RiskSet cannot be calculated when model2 is not given.")
  Df$Outc  = eval(Argz$outcome, data)
  if(!all(Df$outcome %in% 0:1)) stop("The response variable can only contain 0 or 1.")
  Prev     = sum(Df$Outc)/nrow(Df)
  op = par("mfrow", "oma", "mai")
  if(RiskSet!="none"){
    if(is.null(GraphSettings)){
      par(mfrow=c(1,1), oma=c(if(RiskSet!="both") 5 else 10,5,0,0),
          mgp = c(2.5, 1, 0),
          mai=c(0.5,0.7,0.2,0.2))
    }else{
      if(!is.list(GraphSettings)) stop("Has to be of type list.")
      NamesPar = names(par())
      if(!all(names(GraphSettings)%in%NamesPar)){
        NoGraphSett = names(GraphSettings)[!names(GraphSettings)%in%NamesPar]
        cat(paste("\n\nThe following are not recognized as graphical parameters:\n",
                  paste(" - ",NoGraphSett, collapse="\n", sep=""), sep=""))
        stop()
        op = par()
        par(GraphSettings)
      }
    }
  }

  if(PrintMessages) cat("\n\nComputing performance measures for the model(s).\n\n")
  ft  = roc(Outc, m1, Df)
  if(!missing(model2)) ft1 = roc(Outc, m2, Df)

  NNT = TP = TN = FP = FN = SE = SP = rep(0,length(cutoffs))
  lambda = NB_model = NB_nomodel = rep(0,length(cutoffs))

  if(PrintMessages) cat("\n\nComputing performance measures for each of the cutoffs.\n\n")
  Results = list()
  Results$Model1 = PerfMeas(ft, cutoffs)
  Results$Model1$AUC     = ft$AUC
  list2env(as.list(cbind(Results$Model1$Summary[, , drop = F],
                         Results$Model1$AddInfo[, , drop = F])), envir = environment())
  if(!missing(model2)){
    Results$Model2 = PerfMeas(ft1, cutoffs)
    Results$Model2$AUC     = ft1$AUC
    if(RiskSet == "model2") list2env(as.list(cbind(Results$Model1$Summary[,,drop=F],
                                                 Results$Model1$AddInfo[,,drop=F])
                                           ), envir = environment())
  }

  netSE = SNB = ft$SE-ft$nFP/(ft$nTP+ft$nFN)*(ft$dscore/(1-ft$dscore))
  # netSE  = ifelse(SNB<0,NA,SNB)

  if(!missing(model2)){
    netSE1 = SNB1 = ft1$SE-ft1$nFP/(ft1$nTP+ft1$nFN)*(ft1$dscore/(1-ft1$dscore))
    # netSE1 = ifelse(SNB1<0,NA,SNB1)
  }

  lambda2 = ft$dscore/(1-ft$dscore)
  P = (ft$nTP+ft$nFN)/(ft$nTP+ft$nFN+ft$nFP+ft$nTN)

  SNBTA = SNBtreat_all = 1-(1/P)*lambda2*(1-P)
  # SNBTA = ifelse(SNBtreat_all<0,NA,SNBtreat_all)

  NNT_extra[0] = NNT_nomodel[0.5] = NNT_model[0.5] = netTP_nomodel[0.5] = netTP_model[0.5] = NA
  # netTP_nomodel[netTP_nomodel<0] = 0

  idx = ft$dscore<=1

  if(PrintMessages) cat("\n\nPreparing the plot\n\n")
  plot(ft$dscore, ft$SE, col=col1, type="l",xlab="", ylab = ylab, lab=c(10, 10, 7), axes=axes, las=1, lwd=lwd, ...)
  lines(ft$dscore, 1-ft$SP, col=col2, lwd=lwd)
  if(SNBpl) lines(ft$dscore, netSE, col=colSNB, lty=1, lwd=lwd)
  if(TreatAll) lines(ft$dscore, SNBTA, col=colTreatAll, lty=4, lwd=lwd)
  if(TreatNone) abline(h=0, lty=2, col=colTreatNone)
  
  if(pointwiseCI != "none"){
    pointwiseCIcutoff = sort(pointwiseCIcutoff)
    pointwiseResults  = matrix(NA, length(pointwiseCIcutoff), 6)
    rownames(pointwiseResults) = paste("cutoff", pointwiseCIcutoff)
    colnames(pointwiseResults) = c("TPR", "TPRlcl", "TPRucl", "FPR", "FPRlcl", "FPRucl")
    for(i in pointwiseCIcutoff){
      Df$bin = sapply(Df$m1, 
                   function(x) {
                     if(is.na(x))
                       NA
                     else if(x < i)
                       0
                     else
                       1
                   })
      tp  = sum(Df$bin == 1 & Df$Outc == 1)
      fn  = sum(Df$bin == 0 & Df$Outc == 1)
      tn  = sum(Df$bin == 0 & Df$Outc == 0)
      fp  = sum(Df$bin == 1 & Df$Outc == 0)
      TPRci = binci.desmon(tp, tp + fn)$out
      FPRci = binci.desmon(fp, tn + fp)$out
      if(pointwiseCI %in% c("TPR", "both"))
        arrows(i, TPRci[1], i, TPRci[2], angle = 90, col = col1, code = 3, lwd = lwd * 0.75, length = 0.1)
      if(pointwiseCI %in% c("FPR", "both"))
        arrows(i, FPRci[1], i, FPRci[2], angle = 90, col = col2, code = 3, lwd = lwd * 0.75, length = 0.1)
      pointwiseResults[which(pointwiseCIcutoff == i), ] = c(tp / (tp + fn), TPRci, fp / (fp + tn), FPRci)
    }
    pointwiseCIm1 = pointwiseResults
  }

  if(!axes){
    axis(side=1, pos=0, at=seq(0,1, 0.1), labels=seq(0,1,0.1))
    axis(side=2, pos=0, at=seq(0, 1, 0.2), las=1)
  }
  if(!missing(model2)){
    lines(ft1$dscore, ft1$SE, col=col1, lty=2, lwd=lwd)
    lines(ft1$dscore, 1-ft1$SP, col=col2, lty=2, lwd=lwd)
    if(SNBpl) lines(ft1$dscore, netSE1, col=colSNB, lty=2, lwd=lwd)
    if(ShowAUC)
      text(AUCcoord[1], AUCcoord[2], labels=paste("AUC ", LabelsModels[1]," = ",round(ft$AUC[1],digits = 2)," \n",
                                            paste("AUC ", LabelsModels[2], " = ",round(ft1$AUC[1],digits = 2)), sep=""), pos=4,
                     cex = cex.auc)
    LegText = c(paste("TPR", LabelsModels),paste("FPR", LabelsModels))
    LegCol  = c(rep(col1, 2), rep(col2, 2))
    LegLty  = c(rep(c(1,2), 2))
    
    if(pointwiseCI != "none"){
      pointwiseCIcutoff = sort(pointwiseCIcutoff)
      pointwiseResults  = matrix(NA, length(pointwiseCIcutoff), 6)
      rownames(pointwiseResults) = paste("cutoff", pointwiseCIcutoff)
      colnames(pointwiseResults) = c("TPR", "TPRlcl", "TPRucl", "FPR", "FPRlcl", "FPRucl")
      for(i in pointwiseCIcutoff){
        Df$bin = sapply(Df$m2, 
                     function(x) {
                       if(is.na(x))
                         NA
                       else if(x < i)
                         0
                       else
                         1
                     })
        tp  = sum(Df$bin == 1 & Df$Outc == 1)
        fn  = sum(Df$bin == 0 & Df$Outc == 1)
        tn  = sum(Df$bin == 0 & Df$Outc == 0)
        fp  = sum(Df$bin == 1 & Df$Outc == 0)
        TPRci = binci.desmon(tp, tp + fn)$out
        FPRci = binci.desmon(fp, tn + fp)$out
        if(pointwiseCI %in% c("TPR", "both"))
          arrows(i, TPRci[1], i, TPRci[2], angle = 90, col = col1, code = 3, lwd = lwd * 0.75, length = 0.1)
        if(pointwiseCI %in% c("FPR", "both"))
          arrows(i, FPRci[1], i, FPRci[2], angle = 90, col = col2, code = 3, lwd = lwd * 0.75, length = 0.1)
        pointwiseResults[which(pointwiseCIcutoff == i), ] = c(tp / (tp + fn), TPRci, fp / (fp + tn), FPRci)
      }
      pointwiseCIm2 = pointwiseResults
    }


  }else{
    LegText = c("TPR", "FPR")
    LegCol  = c(col1, col2)
    LegLty  = c(rep(1,2))
    if(ShowAUC) text(AUCcoord[1], AUCcoord[2], labels = paste("AUC ", LabelsModels[1]," = ",
                                                            round(ft$AUC[1],digits=2)), cex = cex.auc)
  }
  if(SNBpl){
    LegText =
      if(missing(model2)) c(LegText, "SNB")
        else c(LegText, paste("SNB", LabelsModels))
    LegCol  = c(LegCol, rep(colSNB, if(missing(model2)) 1 else 2))
    LegLty  =
      if(missing(model2)) c(LegLty, 1)
        else c(LegLty, 1:2)
  }
  if(TreatAll){
    LegText = c(LegText, "SNB treat all")
    LegCol  = c(LegCol, colTreatAll)
    LegLty  = c(LegLty, 2)
  }
  if(TreatNone){
    LegText = c(LegText, "SNB treat none")
    LegCol  = c(LegCol, colTreatNone)
    LegLty  = c(LegLty, 2)
  }

  mtext(side = 1, at = 0.5,  line = 2, text = "Risk Threshold", col = "black", cex=1)

  legend(LegCoord[1], LegCoord[2],LegText,
         lwd=2, lty=LegLty,col=LegCol,
         y.intersp=y.intersp, box.col="transparent", cex=cex.leg, bg="transparent")

  #--- risk set --
  if(RiskSet!="none"){
    CEX=0.7 ; ATT=seq(0,0.9,0.1)
    TmpTxt = if(RiskSet%in%c("model1", "both")) LabelsModels[1] else if(RiskSet=="model2") LabelsModels[2]

    mtext(side = 1, at = loc, line = 3, text = TmpTxt,  col = "black", cex=CEX, font = 2)
    mtext(side = 1, at = loc, line = 4, text = "True positive rate",  col = "black", cex=CEX)
    mtext(side = 1, at = loc, line = 5, text = "False positive rate",  col = "black", cex=CEX)
    mtext(side = 1, at = ATT, line = 4, text = round(SE,digits=2), col = "black", cex=CEX)
    mtext(side = 1, at = ATT, line = 5, text = round(1-SP,digits=2), col = "black", cex=CEX)
    # Utility
    if(UtilityMeasures=="utility" & RiskSet!="both"){
      mtext(side = 1, at = loc, line = 6, text = "Std Net benefit",  col = "black", cex=CEX)
      mtext(side = 1, at = loc, line = 7, text = "net TP per 100",  col = "black", cex=CEX)
      mtext(side = 1, at = loc, line = 8, text = "NNT for 1 TP",  col = "black", cex=CEX)

      mtext(side = 1, at = ATT, line = 6, text = round(StdNB,digits=2), col = "black", cex=CEX)
      mtext(side = 1, at = ATT, line = 7, text = round(NB_model * 100,digits=1), col = "black", cex=CEX)
      mtext(side = 1, at = ATT, line = 8, text = round(NNT_extra,digits=0), col = "black", cex=CEX)
    }
    if(RiskSet=="both"){
      # Default model2
      mtext(side = 1, at = loc, line = 6, text = LabelsModels[2],  col = "black", cex=CEX, font = 2)
      mtext(side = 1, at = loc, line = 7, text = "True positive rate",  col = "black", cex=CEX)
      mtext(side = 1, at = loc, line = 8, text = "False positive rate",  col = "black", cex=CEX)
      SE2 = Results$Model2$Summary$SE
      SP2 = Results$Model2$Summary$SP
      mtext(side = 1, at = ATT, line = 7, text = round(SE2,digits=2), col = "black", cex=CEX)
      mtext(side = 1, at = ATT, line = 8, text = round(1-SP2,digits=2), col = "black", cex=CEX)
    if(UtilityMeasures=="utility"){
      mtext(side = 1, at = loc, line = 9, text = "Comparison models",  col = "black", cex=CEX, font = 2)
      mtext(side = 1, at = loc, line = 10, text = expression(paste(Delta, " Std NB")), col = "black", cex=CEX)
      mtext(side = 1, at = loc, line = 11, text = expression(paste(Delta, " net TP per 100")),
            col = "black", cex=CEX)
      DiffSNB   = Results$Model2$Summary$StdNB - StdNB
      DiffNetTP = (Results$Model2$Summary$NB_model - NB_model)*100
      mtext(side = 1, at = ATT, line = 10, text = round(DiffSNB,digits=2), col = "black", cex=CEX)
      mtext(side = 1, at = ATT, line = 11, text = round(DiffNetTP,digits=0), col = "black", cex=CEX)
    }
    }
    if(UtilityMeasures=="accuracy"){
      mtext(side = 1, at = loc, line = 6, text = "Positive LR",  col = "black", cex=CEX)
      mtext(side = 1, at = loc, line = 7, text = "Negative LR",  col = "black", cex=CEX)
      mtext(side = 1, at = loc, line = 8, text = "PPV",  col = "black", cex=CEX)
      mtext(side = 1, at = loc, line = 9, text = "NPV",  col = "black", cex=CEX)
      mtext(side = 1, at = ATT, line = 6, text = round(plr,digits=2), col = "black", cex=CEX)
      mtext(side = 1, at = ATT, line = 7, text = round(nlr,digits=2), col = "black", cex=CEX)
      mtext(side = 1, at = ATT, line = 8, text = round(ppv,digits=2), col = "black", cex=CEX)
      mtext(side = 1, at = ATT, line = 9, text = round(npv,digits=2), col = "black", cex=CEX)
    }
  }
  if(pointwiseCI != "none") {
    Results$pointwiseCIs = if (exists("pointwiseCIm2")){
      list(model1 = pointwiseCIm1, model2 = pointwiseCIm2)
    }else{
      pointwiseCIm1
    }
  }
  par(op)
  Results = structure(Results, class = "ClassificationPlot")
  return(Results)
}
BavoDC/ClassificationPlot documentation built on June 26, 2019, 4:40 a.m.