properROC: Display and Analyze Proper ROC Curves

Documented in aucfromdor rocauc roccoord rocdor roctable

#-----------------------------------------#
#                 roctable                #
#          making a ROC Table (S3)        #
# (ROC coordinates and accuracy indices)  #
#-----------------------------------------#
#
#' @title Make a ROC Table
#'
#' @description Make a ROC Table (S3 object derived from a R matrix).
#' The ROC Table includes seven columns, corresponding to the following
#' variables: sensitivity ("1-Sens"), 1 - specificity ("1-Spec"),
#' Youden index ("J"), Global Diagnostic Accuracy ("GDA"), Positive
#' Likelihood Ratio ("LR+"), Negative Likelihood Ratio ("LR-"), and
#' the corresponding cut-off values ("Cut-off")
#'
#' @param marker The disease marker (a continuous variable)
#' @param status Patient status: 0 = control,  1 = case
#'
#' @return rocmatr, the ROC Table Matrix (S3 object)
#'
#' @importFrom stats na.omit
#'
#' @export
#'
#' @examples
#' ## Not Run:
#' RocCa125 <- roctable(PancreaticData$Ca125, PancreaticData$Status)
#' head(RocCa125)
#'
roctable <- function(marker, status) {

  # Make a temporary dataframe for sorting, excluding missing values
  data <- data.frame(Marker = marker, Status = status)
  data <- na.omit(data)
  data <- data[order(data$Marker),]  #Sorting on marker values

  nsam <- length(data$Status)
  ncases <- sum(data$Status)
  ncontr <- nsam - ncases
  prev <- ncases/nsam # Disease "prevalence"

  # rocmatr includes: Sens, 1-Spec, J, Acc, LR+, LR-, cut-offs

  rocmatr <- matrix(nrow=nsam+1, ncol= 7) # Empty table
  rocmatr[1,1] <- 1    # First value of sensitivity
  rocmatr[1,2] <- 1    # First value of 1 - specificity
  rocmatr[1,3] <- 0.0  # First value of Youden Index
  rocmatr[1,4] <- prev # First value of GDA (glob. diag. accuracy)
  # rocmatr[1,5] LR+; the first value (1/0) is already set to NA
  rocmatr[1,6] <- 0.0  # First value of LR-
  rocmatr[1,7] <- data$Marker[1]  # First cut-off
  colnames(rocmatr) <- c("Sens", "1-Spec", "J", "GDA", "LR+", "LR-", "Cut-off")

  for (i in 1:nsam) {

    sens <- spec <- 0.0
    for (j in 1:nsam) {
      if ((data$Marker[j] > data$Marker[i]) &&
           data$Status[j] == 1) {
        sens <- sens + 1
      }  else if ((data$Marker[j] <= data$Marker[i]) &&
                   data$Status[j] == 0) {
        spec = spec + 1
      }
    }
    sens <- sens/ncases
    spec <- spec/ncontr
    rocmatr[i+1,1] <- sens
    rocmatr[i+1,2] <- 1-spec
    rocmatr[i+1,3] <- sens + spec - 1 # Youden Index
    rocmatr[i+1,4] <- sens * prev + spec *(1-prev) # Global accuracy
    if (1-spec > 0) {
      rocmatr[i+1,5] = sens / (1 - spec) # LR+
    }
    if (spec > 0) {
      rocmatr[i+1,6] <- (1-sens) / spec # LR+
    }
    if (i < nsam) {
      rocmatr[i+1,7] <- (data$Marker[i+1] + data$Marker[i]) /2
    } else {
      rocmatr[i+1,7] <- data$Marker[i]
    }
  }

  # Transforming rocmatr in an S3 object
  class(rocmatr) <- c("roctab", "matrix")
  return(rocmatr)

}

#-----------------------------------------#
#                roccoord                 #
#        coordinates of a ROC plot        #
#  (quick routine for simulations only)   #
#-----------------------------------------#
#' @title Calculate the Coordinates of a ROC curve
#'
#' @description Calculate the coordinates of a ROC curve
#' (called by: pvaltprc(). For internal use only)
#'
#' @param marker The disease marker (a continuous variable)
#' @param status Patient status: 0 = control,  1 = case
#'
#' @return matrix, a matrix containing values of sens. and 1-spec.
roccoord <- function(marker, status) {

  # Calculate coordinates (sens and 1 - spec)
  # for simulated ROC curves only
  # Missing values are not allowed

  data <- matrix(c(marker, status), ncol = 2)
  data <- data[sort.list(data[,1]),]  #Sorting on marker values

  nsam <- length(status)
  ncases <- sum(status)
  ncontr <- nsam - ncases
  sens <- array(dim=nsam+1)
  onespec <- sens

  sens[1] <- ncases    # First value of sensitivity
  onespec[1] <- ncontr # First value of 1 - specificity

  for (i in 1:nsam) {
    sensi <- speci <- 0.0
    for (j in 1:nsam) {
      if ((data[j,1] > data[i,1]) && (data[j,2] == 1)) {
        sensi <- sensi + 1
      }  else if ((data[j,1] <= data[i,1]) && (data[j,2] == 0)) {
        speci <- speci + 1
      }
    }
    sens[i+1] <- sensi
    onespec[i+1] <- ncontr-speci
  }
  sens <- sens/ncases
  onespec <- onespec/ncontr

  return(matrix(c(sens,onespec), ncol = 2))

}


#---------------------------#
#            rocauc         #
#        estimate AUC       #
#      from a ROC Table     #
#---------------------------#
#
#' @title Estimate AUC from a ROC Table
#'
#' @description Calculate the area under a ROC curve from a ROC Table.
#' ROC Table is an S3 object derived from an R matrix and generated by
#' the roctable() function
#'
#' @param roc A ROC Table (S3 object)
#'
#' @return auc, the area under the ROC curve whose coordinates are in the
#' first two columns of the ROC table
#'
#' @export
#'
#' @examples
#' ROCCa125 <- roctable(PancreaticData$Ca125, PancreaticData$Status)
#' AUCCa125 <- rocauc(ROCCa125)
#' print(paste("AUC for Ca125 tumour marker: ", round(AUCCa125,4)))
#'
#'
rocauc <- function(roc) {

  # Estimate of AUC by the trapezoidal rule
  # (roc is an S3 ROC Table)

  auc <- 0
  n <- length(roc[,1])
  for (i in 2 : n) {
    auc <- auc + 0.5 * (roc[i-1,1] + roc[i,1]) *
          (roc[i-1,2] - roc[i,2])
  }

  return(auc)

}


#----------------------------#
#           rocdor           #
#  estimating DOR from AUC   #
#   in a proper ROC model    #
#----------------------------#
#' @title Estimate DOR from an AUC under a Proper Model Assumption
#'
#' @description Estimate the Diagnostic Odds Ratio (DOR) corresponding
#' to the area under a ROC curve (AUC), under the assumption of a proper
#' model.
#'
#' @param auc The area under a ROC curve
#' @param eps The precision required for DOR estimate (default: 0.0001)
#' @param MAXITER the maximum number of iterations for the DOR estimate
#' (default: 1000)
#'
#' @return dor, the Diagnostic Odds Ratio (the parameter defining a proper
#' ROC curve)
#'
#' @export
#'
#' @examples
#' ROCCa125 <- roctable(PancreaticData$Ca125, PancreaticData$Status)
#' DORCa125 <- rocdor(rocauc(ROCCa125))
#' print(paste("DOR for the proper ROC curve for Ca125 tumor marker: ",
#'       round(DORCa125,4)))
#'
rocdor <- function(auc, eps = 0.0001, MAXITER = 1000L) {

  #Estimate of DOR under a proper model assumption

  MAXDOR <- 100000  # Maximum value admitted for DOR
  eauc <- 0.5       # Estimated AUC
  incr <- 1.0       # Increment for DOR estimate

  if (auc < 0.5) {
    # The curve is not proper (AUC < 0.5)
    return(-1)
  } else if (auc > 1) {
    print("AUC cannot be > 1.0")
    return(-1)
  } else if (auc == 0.5000) {
    return(1)
  } else if (auc == 1.0000) {
    print("DOR is infinite. DOR = 10000 is returned")
    return(10000)
  } else if (auc > 0.9999) {
    return(MAXDOR)
  }

  # Early approximated estimate of DOR
  # from Taylor series expansion
  if (auc < 0.6137) {                     # DOR range: about 1-2
    dor <- 29 - sqrt(16.3 - 25.4*auc)
    incr <- 0.5
  } else if (auc < 0.7172) {              # DOR range: about 2-4
    dor <- 27.8 - 20*sqrt(3.2 - 2.5*auc)
    incr <- 1.0
  } else if (auc < 0.8268) {             # DOR range: about 4-10
    dor <- 12.2 - 25*sqrt(0.83 - auc)
    incr <- 3.0
  } else if (auc < 0.8867) {              # DOR range: about 10-20
    dor <- 179.6 * auc - 139
    incr <- 5.0
  } else if (auc < 0.9286) {              # DOR range: about 20-40
    dor <- 515 * auc - 438
    incr <- 10.0
  } else if (auc < 0.9464) {              # DOR range: about 40-60
    dor <- 1159 * auc - 1038
    incr <- 10.0
  } else if (auc < 0.9631) {              # DOR range: about 60-100
    dor <- 2504 * auc - 2314
    incr <- 20.0
  } else if (auc < 0.9764) {              # DOR range: about 100 - 180
    dor <- 6420 * auc - 6090
    incr <- 40.0
  } else if (auc < 0.9818) {              # DOR range: about 180 - 250
    dor <- 13947 * auc - 13445
    incr <- 35.0
  } else {                                # DOR > 250
    dor <- 530 # AUC = 0.9900
    incr <- 50
  }

  # Refining DOR estimate by a divide and conquer approach
  niter <- 0L
  eauc <- aucfromdor(dor)
  delta <- auc - eauc
  direc <- sign(delta) # Direction for iteration (-1 decrement)

  while((abs(delta) > eps) || (niter == MAXITER)){
    niter <- niter + 1
    dor <- dor +direc*incr
    if (dor < 1.0) {
      dor <- 1.0
      incr <- incr/2
      direc <- +1 # Summing increment
    }
    # Control for DOR < 1.0 here
    eauc <- aucfromdor(dor)
    if (sign(auc - eauc) != direc) {
      incr <- incr/2
      direc <- -direc
    }
    delta = auc - eauc
  }

  return(dor)

}


#----------------------------#
#         aucfromdor         #
#   estimate AUC from DOR    #
#   in a proper ROC model    #
#----------------------------#
#
#' @title Estimate AUC from a Diagnostic Odds Ratio
#'
#' @description Estimate the Area Under a ROC curve (AUC) under the
#' assumption of a proper model using the corresponding Diagnostic
#' Odds Ratio (the parameter of the theoretical ROC curve with the
#' same AUC). In a proper ROC curve DOR must be >= 1.0.
#'
#' @param dor A Diagnostic Odds Ratio
#'
#' @return auc, the area under the theoretical proper ROC curve
#'
#' @export
#'
#' @examples
#' aucfromdor(1.0)
#' # AUC = 0.5 corresponds to a non informative ROC curve
#'
#' aucfromdor(8.0)
#' # A Diagnostic Odds Ratio >= 0.8 is needed to obtain an AUC of at least 0.8
#'
#' aucfromdor(0.9)
#' # A Diagnostic Odds Ratio > 1.0 corresponds to a non-proper ROC curve
#'
aucfromdor <- function(dor) {

  # Estimate of AUC in a proper ROC from DOR
  if (dor == 1) {
    return(0.5)
  } else {
    return(dor/(dor-1)-dor*log(dor)/(dor-1)^2)
  }

}