Nothing
#' External validity indices
#'
#' \strong{E}xternal \strong{v}alidity indices compare a predicted clustering
#' result with a reference class or gold standard.
#'
#' `ev_nmi` calculates the normalized mutual information
#'
#' @param pred.lab predicted labels generated by classifier
#' @param ref.lab reference labels for the observations
#' @param method method of computing the entropy. Can be any one of "emp", "mm",
#' "shrink", or "sg".
#'
#' @return `ev_nmi` returns the normalized mutual information.
#' @references Strehl A, Ghosh J. Cluster ensembles: a knowledge reuse framework
#' for combining multiple partitions. J. Mach. Learn. Res. 2002;3:583-617.
#' @note `ev_nmi` is adapted from [infotheo::mutinformation()]
#' @author Johnson Liu, Derek Chiu
#' @name external_validity
#' @export
#'
#' @examples
#' set.seed(1)
#' E <- matrix(rep(sample(1:4, 1000, replace = TRUE)), nrow = 100, byrow =
#' FALSE)
#' x <- sample(1:4, 100, replace = TRUE)
#' y <- sample(1:4, 100, replace = TRUE)
#' ev_nmi(x, y)
#' ev_confmat(x, y)
ev_nmi <- function(pred.lab, ref.lab, method = "emp") {
U <- data.frame(ref.lab, pred.lab)
Hyx <- infotheo::entropy(U, method)
Hx <- infotheo::entropy(pred.lab, method)
Hy <- infotheo::entropy(ref.lab, method)
I <- ifelse(Hx + Hy - Hyx < 0, 0, Hx + Hy - Hyx)
NMI <- I / sqrt(Hx * Hy)
NMI
}
#' @details `ev_confmat` calculates a variety of statistics associated with
#' confusion matrices. Accuracy, Cohen's kappa, and Matthews correlation
#' coefficient have direct multiclass definitions, whereas all other
#' metrics use macro-averaging.
#'
#' @return `ev_confmat` returns a tibble of the following summary statistics using [yardstick::summary.conf_mat()]:
#' * `accuracy`: Accuracy
#' * `kap`: Cohen's kappa
#' * `sens`: Sensitivity
#' * `spec`: Specificity
#' * `ppv`: Positive predictive value
#' * `npv`: Negative predictive value
#' * `mcc`: Matthews correlation coefficient
#' * `j_index`: Youden's J statistic
#' * `bal_accuracy`: Balanced accuracy
#' * `detection_prevalence`: Detection prevalence
#' * `precision`: alias for `ppv`
#' * `recall`: alias for `sens`
#' * `f_meas`: F Measure
#' @rdname external_validity
#' @export
ev_confmat <- function(pred.lab, ref.lab) {
if (!all(unique(pred.lab) %in% unique(ref.lab))) {
stop("Cluster labels should be the same in the predicted and reference
classes.")
}
# Relabel predicted classes
pred.relab <- relabel_class(pred.lab, ref.lab) %>%
factor(levels = sort(unique(ref.lab)))
# Confusion matrix and summary statistics
CM <- table(pred.relab, ref.lab)
summary(yardstick::conf_mat(CM))
}
#' Concordance counts for all pairs of points between two cluster partitions
#' @return A list with 4 elements
#' * yy: number of points belonging to the same cluster both in cl1 and cl2
#' * yn: number of points belonging to the same cluster both in cl1 but not in cl2
#' * ny: number of points belonging to the same cluster both in cl2 but not in cl1
#' * nn: number of points _not_ belonging to the same cluster both in cl1 and cl2
#' @noRd
concordance_counts <- function(cl1, cl2) {
n <- length(cl1)
yy <- yn <- ny <- nn <- 0
for (i in 1:(n - 1)) {
for (j in (i + 1):n) {
if (cl1[[i]] == cl1[[j]]) {
if (cl2[[i]] == cl2[[j]]) {
yy <- yy + 1
} else {
yn <- yn + 1
}
} else {
if (cl2[[i]] == cl2[[j]]) {
ny <- ny + 1
} else {
nn <- nn + 1
}
}
}
}
return(list(yy = yy, yn = yn, ny = ny, nn = nn))
}
#' Hubert index
#' @noRd
ev_hubert <- function(cl1, cl2) {
cc <- concordance_counts(cl1, cl2)
Nt <- Reduce(`+`, cc)
yy <- cc[["yy"]]
yn <- cc[["yn"]]
ny <- cc[["ny"]]
nn <- cc[["nn"]]
(Nt * yy - (yy + yn) * (yy + ny)) /
sqrt((yy + yn) * (yy + ny) * (nn + yn) * (nn + ny))
}
#' Jaccard index
#' @noRd
ev_jaccard <- function(cl1, cl2) {
cc <- concordance_counts(cl1, cl2)
cc[["yy"]] / c(cc[["yy"]] + cc[["yn"]] + cc[["ny"]])
}
#' McNemar index
#' @noRd
ev_mcnemar <- function(cl1, cl2) {
cc <- concordance_counts(cl1, cl2)
(cc[["yn"]] - cc[["ny"]]) / sqrt(cc[["yn"]] + cc[["ny"]])
}
#' Rand index
#' @noRd
ev_rand <- function(cl1, cl2) {
cc <- concordance_counts(cl1, cl2)
Nt <- Reduce(`+`, cc)
(cc[["yy"]] + cc[["nn"]]) / Nt
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.