R/overallConfusionMetrics.R

Defines functions overallConfusionMetrics

Documented in overallConfusionMetrics

#' Confusion Matrix metrics for Cross-validation
#'
#' A simple function to generate confusion matrix metrics for cross-validated Analyses
#' 
#' @param confusionMat      (confusion matrix or a list of it) Input confusion Matrix generated by function confusionMatrix from caret library 
#' @details 
#'  A function to output confusion matrices and related metrics of sensitivity, specificity, precision, recall and other metrics for cross-validated analyses.
#'  There are multiple documented ways of calculating confusion matrix metrics for cross-validation
#'  (see Forman and Scholz 2012 for details on F1 score). The current procedure generates a final bigger 
#'  confusion matrix and calculates the measures of sensitivity, specificity etc. on this matrix (instead of
#'  averaging sensitivities, specificities in each fold).
#'  
#'  The intuition from (Kelleher, Namee and D'Arcy 2015) is:
#'  
#'  "When we have a small dataset (introducing the possibility of a lucky split) measuring
#'  aggregate performance using a set of models gives a better estimate of post-deployment performance than
#'  measuring performance using a single model."
#'  
#'  In addition, (Forman and Scholz 2010) using simulation studies show that F1 values calculated this way are 
#'  less biased. 
#' 
#' 
#' @return A list with metrics as generated by confusionMatrix function in caret library. 
#'   
#' @examples
#' # Result from a confusion matrix 
#' confusionMat <- list(table = matrix(c(110,29,80,531),ncol = 2,
#' dimnames = list(Prediction = c(1,2),Reference = c(1,2))))
#' overallConfusionMetrics(confusionMat)
#' 
#' # Output:
#' #
#' # Confusion Matrix and Statistics
#' #           Reference
#' # Predicted   1   2
#' #          1 110  80
#' #          2  29 531
#' # Accuracy : 0.8547          
#' # 95% CI : (0.8274, 0.8791)
#' # No Information Rate : 0.8147          
#' # P-Value [Acc > NIR] : 0.002214        
#' # 
#' # Kappa : 0.5785          
#' # Mcnemar's Test P-Value : 1.675e-06       
#' # 
#' # Sensitivity : 0.7914          
#' # Specificity : 0.8691          
#' # Pos Pred Value : 0.5789          
#' # Neg Pred Value : 0.9482          
#' # Prevalence : 0.1853          
#' # Detection Rate : 0.1467          
#' # Detection Prevalence : 0.2533          
#' # Balanced Accuracy : 0.8302          
#' # 
#' # 'Positive' Class : 1         
#'
#' # Alternative (realistic) examples
#' Results <- classifyFun(Data = KinData,classCol = 1,
#' selectedCols = c(1,2,12,22,32,42,52,62,72,82,92,102,112),cvType = "folds",
#' extendedResults = TRUE)
#' 
#' overallConfusionMetrics(Results$ConfMatrix)
#'
#'
#'
#' @import caret
#' @author
#' Atesh Koul, C'MON unit, Istituto Italiano di Tecnologia
#'
#' \email{atesh.koul@@iit.it}
#' 
#' 
#' @references 
#' Kelleher, J. D., Namee, B. Mac & D'Arcy, A. Fundamentals of Machine Learning for Predictive Data Analytics. (The MIT Press, 2015).
#' Section 8.4.1.2
#' Elkan, C. Evaluating Classifiers. (2012).https://pdfs.semanticscholar.org/2bdc/61752a02783aa0e69e92fe6f9b449916a095.pdf
#' pp. 4
#' 
#' Forman, G. & Scholz, M. Apples-to-apples in cross-validation studies. ACM SIGKDD Explor. Newsl. 12, 49 (2010).
#' 
#' @export
overallConfusionMetrics <- function(confusionMat){
  
  if(is.null(names(confusionMat))){
    
    mergeConfTable <- function(listOfConfMat){
      
      confTable <- list()
      for(i in 1:length(listOfConfMat)){
        # create a bigger matrix of only the confusion table
        # This is to ensure that Reduce works
        confTable[[i]] <- listOfConfMat[[i]]$table
      }
      MergedConfTable <- Reduce('+',confTable)
      return(MergedConfTable)
      
    }
    confusionMatrixOverallTable <- mergeConfTable(confusionMat)
  } else {
    # ensure that a random matrix of values is coerced as a table
    # by default this should be a table (from the function confusionMatrix)
    confusionMatrixOverallTable <- as.table(confusionMat$table)
  }
  
  
  
  # use the confusionMatrix function with data as table
  confusionMatrixOverallResults <- confusionMatrix(confusionMatrixOverallTable)
  return(confusionMatrixOverallResults)
  
  
  # Works for only 2-class problems
  # confusionMatrices <- unlist(confusionMat)
  # 
  # 
  # if(is.null(names(confusionMat))){
  #   # confusionMat produced by confusionMatrix function are by nature lists
  #   # However, the results produced by cross-validation are unnammed lists
  #   # Using this property to segregate the input matrices
  #   
  #   # take the dimension names from first confusion matrix
  #   # The dimnames should be the same across the cross-validation folds
  #   dimenNames <- dimnames(confusionMat[[1]]$table)
  # } else {
  #   
  #   
  #   dimenNames <- dimnames(confusionMat$table)
  # }
  # 
  # # construct the final bigger confusion matrix for 2 class classification problem.
  # tableA <- sum(as.numeric(confusionMatrices[grep("table1",names(confusionMatrices))]))
  # tableC <- sum(as.numeric(confusionMatrices[grep("table2",names(confusionMatrices))]))
  # tableB <- sum(as.numeric(confusionMatrices[grep("table3",names(confusionMatrices))]))
  # tableD <- sum(as.numeric(confusionMatrices[grep("table4",names(confusionMatrices))]))
  # 
  # # get it in line with confusion matrix function
  # confusionMatrixOverallTable <- matrix(c(tableA,tableC,tableB,tableD),ncol = 2,dimnames = dimenNames)
  # 
  # # coerce this as a table
  # confusionMatrixOverallTable <- as.table(confusionMatrixOverallTable)
  
  
  
}
ateshkoul/PredPsych documentation built on Aug. 1, 2020, 5:27 p.m.