Nothing
#' Confusion Matrix metrics for Cross-validation
#'
#' A simple function to generate confusion matrix metrics for cross-validated Analyses
#'
#' @param confusionMat (confusion matrix or a list of it) Input confusion Matrix generated by function confusionMatrix from caret library
#' @details
#' A function to output confusion matrices and related metrics of sensitivity, specificity, precision, recall and other metrics for cross-validated analyses.
#' There are multiple documented ways of calculating confusion matrix metrics for cross-validation
#' (see Forman and Scholz 2012 for details on F1 score). The current procedure generates a final bigger
#' confusion matrix and calculates the measures of sensitivity, specificity etc. on this matrix (instead of
#' averaging sensitivities, specificities in each fold).
#'
#' The intuition from (Kelleher, Namee and D'Arcy 2015) is:
#'
#' "When we have a small dataset (introducing the possibility of a lucky split) measuring
#' aggregate performance using a set of models gives a better estimate of post-deployment performance than
#' measuring performance using a single model."
#'
#' In addition, (Forman and Scholz 2010) using simulation studies show that F1 values calculated this way are
#' less biased.
#'
#'
#' @return A list with metrics as generated by confusionMatrix function in caret library.
#'
#' @examples
#' # Result from a confusion matrix
#' confusionMat <- list(table = matrix(c(110,29,80,531),ncol = 2,
#' dimnames = list(Prediction = c(1,2),Reference = c(1,2))))
#' overallConfusionMetrics(confusionMat)
#'
#' # Output:
#' #
#' # Confusion Matrix and Statistics
#' # Reference
#' # Predicted 1 2
#' # 1 110 80
#' # 2 29 531
#' # Accuracy : 0.8547
#' # 95% CI : (0.8274, 0.8791)
#' # No Information Rate : 0.8147
#' # P-Value [Acc > NIR] : 0.002214
#' #
#' # Kappa : 0.5785
#' # Mcnemar's Test P-Value : 1.675e-06
#' #
#' # Sensitivity : 0.7914
#' # Specificity : 0.8691
#' # Pos Pred Value : 0.5789
#' # Neg Pred Value : 0.9482
#' # Prevalence : 0.1853
#' # Detection Rate : 0.1467
#' # Detection Prevalence : 0.2533
#' # Balanced Accuracy : 0.8302
#' #
#' # 'Positive' Class : 1
#'
#' # Alternative (realistic) examples
#' Results <- classifyFun(Data = KinData,classCol = 1,
#' selectedCols = c(1,2,12,22,32,42,52,62,72,82,92,102,112),cvType = "folds",
#' extendedResults = TRUE)
#'
#' overallConfusionMetrics(Results$ConfMatrix)
#'
#'
#'
#' @import caret
#' @author
#' Atesh Koul, C'MON unit, Istituto Italiano di Tecnologia
#'
#' \email{atesh.koul@@gmail.com}
#'
#'
#' @references
#' Kelleher, J. D., Namee, B. Mac & D'Arcy, A. Fundamentals of Machine Learning for Predictive Data Analytics. (The MIT Press, 2015).
#' Section 8.4.1.2
#' Elkan, C. Evaluating Classifiers. (2012).https://pdfs.semanticscholar.org/2bdc/61752a02783aa0e69e92fe6f9b449916a095.pdf
#' pp. 4
#'
#' Forman, G. & Scholz, M. Apples-to-apples in cross-validation studies. ACM SIGKDD Explor. Newsl. 12, 49 (2010).
#'
#' @export
overallConfusionMetrics <- function(confusionMat){
if(is.null(names(confusionMat))){
mergeConfTable <- function(listOfConfMat){
confTable <- list()
for(i in 1:length(listOfConfMat)){
# create a bigger matrix of only the confusion table
# This is to ensure that Reduce works
confTable[[i]] <- listOfConfMat[[i]]$table
}
MergedConfTable <- Reduce('+',confTable)
return(MergedConfTable)
}
confusionMatrixOverallTable <- mergeConfTable(confusionMat)
} else {
# ensure that a random matrix of values is coerced as a table
# by default this should be a table (from the function confusionMatrix)
confusionMatrixOverallTable <- as.table(confusionMat$table)
}
# use the confusionMatrix function with data as table
confusionMatrixOverallResults <- confusionMatrix(confusionMatrixOverallTable)
return(confusionMatrixOverallResults)
# Works for only 2-class problems
# confusionMatrices <- unlist(confusionMat)
#
#
# if(is.null(names(confusionMat))){
# # confusionMat produced by confusionMatrix function are by nature lists
# # However, the results produced by cross-validation are unnammed lists
# # Using this property to segregate the input matrices
#
# # take the dimension names from first confusion matrix
# # The dimnames should be the same across the cross-validation folds
# dimenNames <- dimnames(confusionMat[[1]]$table)
# } else {
#
#
# dimenNames <- dimnames(confusionMat$table)
# }
#
# # construct the final bigger confusion matrix for 2 class classification problem.
# tableA <- sum(as.numeric(confusionMatrices[grep("table1",names(confusionMatrices))]))
# tableC <- sum(as.numeric(confusionMatrices[grep("table2",names(confusionMatrices))]))
# tableB <- sum(as.numeric(confusionMatrices[grep("table3",names(confusionMatrices))]))
# tableD <- sum(as.numeric(confusionMatrices[grep("table4",names(confusionMatrices))]))
#
# # get it in line with confusion matrix function
# confusionMatrixOverallTable <- matrix(c(tableA,tableC,tableB,tableD),ncol = 2,dimnames = dimenNames)
#
# # coerce this as a table
# confusionMatrixOverallTable <- as.table(confusionMatrixOverallTable)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.