#' getConfusion
#'
#' @usage getConfusion(dfA, dfB, inds, idColA, idColB = NULL)
#'
#' @param dfA First data frame being linked
#' @param dfB Second data frame being linked
#' @param inds A nx2 index pair matrix indicating the detected matches
#' @param idColA The column name in dfA of the shared id across dfA and dfB
#' @param idColB The column name in dfB of the shared id across dfA and dfB. If NULL, it assumes the column in B has the same name as the column in A
#'
#'
#' @return \code{getConfusion} returns a dataframe of the original dataframes bound by index.
#'
#' @author Sam Murray<slmurray@andrew.cmu.edu>
#' @export
#' @import tidyverse
getConfusion <- function(dfA, dfB, inds, idColA, idColB = NULL){
if(is.null(idColB)){
idColB = idColA
}
#We call 2 records matches if they represent the same entity
#We call 2 records linked if they are detected as a match by the linking rule defined by inds
r_names = c(idColB)
names(r_names) = idColA
true_matches = inner_join(dfA, dfB, by = r_names)
#Number of matches
N_true_matches = nrow(true_matches)
#Number of nonmatch pairs
N_true_nonmatches = nrow(dfA)*nrow(dfB) - N_true_matches
#True positives
N_linked_matches = sum(dfA[[idColA]][inds[ ,1]] == dfB[[idColB]][inds[ ,2]])
#False Negatives
N_unlinked_matches = N_true_matches - N_linked_matches
#False Positives
N_linked_nonmatches = nrow(inds) - N_linked_matches
#True Negatives
N_unlinked_nonmatches = N_true_nonmatches - N_linked_nonmatches
confusion_matrix = data.frame(True_match = c(N_linked_matches,N_unlinked_matches), True_Nonmatch = c(N_linked_nonmatches,N_unlinked_nonmatches))
rownames(confusion_matrix) <- c("Detected Link", "Detected Non-Link")
return(confusion_matrix)
}
#' getConfusionInfo
#'
#' @usage getConfusionInfo(conMat)
#'
#' @param conMat The confusion matrix generated by getConfusion
#'
#' @return \code{getConfusionInfo} Returns a dataframe of information on the given confusion table, including sensitivity, specificity, PPV, NPV, etc.
#'
#' @author Sam Murray<slmurray@andrew.cmu.edu>
#' @export
#' @import tidyverse
getConfusionInfo <- function(conmat){
#We call 2 records matches if they represent the same entity
#We call 2 records linked if they are detected as a match by the linking rule defined by inds
TruePositive <- conmat[1,1]
FalseNegative <- conmat[2,1]
FalsePositive <- conmat[1,2]
TrueNegative <- conmat[2,2]
TotalPositive <- TruePositive + FalseNegative
TotalNegative <- TrueNegative + FalsePositive
sensitivity <- TruePositive/TotalPositive
specificity <- TrueNegative/TotalNegative
#Positive Predictive Value
PPV <- TruePositive/(TruePositive + FalsePositive)
#Negative Predictive Value
NPV <- TrueNegative/(TrueNegative + FalseNegative)
#False Negative Rate
FNR <- 1 - sensitivity
#False Positive Rate
FPR <- 1 - specificity
#False Discovery Rate
FDR <- 1 - PPV
#False Omission Rate
FOR <- 1-NPV
#Threat Score
TS <- TruePositive/(TotalPositive + FalseNegative)
#Accuracy
ACC <- (TruePositive + TrueNegative)/(TotalNegative + TotalPositive)
#F1 Score
F1 <- 2*(sensitivity*PPV)/(sensitivity+PPV)
#Informedness
BM <- sensitivity + specificity - 1
#Markedness
MK <- PPV + NPV - 1
out <- data.frame(info = c(sensitivity, specificity, PPV, NPV, FNR, FPR, FDR, FOR, TS, ACC, F1, BM, MK, TotalPositive, TotalNegative),
row.names = c("sensitivity", "specificity", "Positive Predictive Value", "Negative Predictive Value", "False Negative Rate",
"False Positive Rate","False Discovery Rate", "False Omission Rate", "Threat Score", "Accuracy", "F1 Score",
"Informedness", "Markedness", "Total Postive", "Total Negative"))
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.