R/confusion.R

Defines functions print.confusion confusion.mlearning confusion.default .confusion confusion

Documented in confusion confusion.default confusion.mlearning print.confusion

#' Construct and analyze confusion matrices
#'
#' @description
#' Confusion matrices compare two classifications (usually one done
#' automatically using a machine learning algorithm versus the true
#' classification done by a specialist... but one can also compare two automatic
#' or two manual classifications against each other).
#'
#' @param x an object with a `confusion()` method implemented.
#' @param y another object, from which to extract the second classification, or
#'   `NULL` if not used.
#' @param vars the variables of interest in the first and second classification
#'   in the case the objects are lists or data frames. Otherwise, this argument
#'   is ignored and `x` and `y` must be factors with same length and same levels.
#' @param labels labels to use for the two classifications. By default, they are
#'   the same as `vars`, or the one in the confusion matrix.
#' @param merge.by a character string with the name of variables to use to merge
#'   the two data frames, or `NULL`.
#' @param useNA do we keep `NA`s as a separate category? The default `"ifany"`
#'   creates this category only if there are missing values. Other possibilities
#'   are `"no"`, or `"always"`.
#' @param prior class frequencies to use for first classifier that is tabulated
#'   in the rows of the confusion matrix. For its value, see here under, the
#'   `value=` argument.
#' @param ... further arguments passed to the method.
#'
#' @return A confusion matrix in a **confusion** object.
#' @export
#' @seealso [mlearning()], [plot.confusion()], [prior()]
#' @examples
#' data("Glass", package = "mlbench")
#' # Use a little bit more informative labels for Type
#' Glass$Type <- as.factor(paste("Glass", Glass$Type))
#'
#' # Use learning vector quantization to classify the glass types
#' # (using default parameters)
#' summary(glass_lvq <- ml_lvq(Type ~ ., data = Glass))
#'
#' # Calculate cross-validated confusion matrix
#' (glass_conf <- confusion(cvpredict(glass_lvq), Glass$Type))
#' # Raw confusion matrix: no sort and no margins
#' print(glass_conf, sums = FALSE, sort = FALSE)
#'
#' summary(glass_conf)
#' summary(glass_conf, type = "Fscore")
confusion <- function(x, ...)
  UseMethod("confusion")

# TODO: implement weights
.confusion <- function(classes, labels, useNA, prior, ...) {
  # useNA can be "no", "always" or "ifany", but with the later value
  # one takes the risk to get non square matrix if there are NAs in only
  # one vector of classes => change to "no" or "always", depending if there
  # are missing data or not
  if (useNA == "ifany")
    if (any(is.na(classes))) useNA <- "always" else useNA <- "no"
  res <- table(classes, dnn = labels, useNA = useNA)
  total <- sum(res)
  truePos <- sum(diag(res))
  row.freqs <- rowSums(res)

  # Additional data as attributes
  attr(res, "row.freqs") <- row.freqs
  attr(res, "col.freqs") <- colSums(res)
  attr(res, "levels") <- levels(classes[1, ]) # These are *initial* levels!
  # Final levels may differ if there are empty levels, or NAs!
  attr(res, "prior") <- row.freqs # Initial prior are row.freqs
  attr(res, "stats") <- c(total = total, truepos = truePos,
    error = 1 - (truePos / total))

  # This is a confusion object, inheriting from table
  class(res) <- c("confusion", "table")

  # Do we rescale the confusion matrix?
  if (!missing(prior)) prior(res) <- prior

  res
}

#' @rdname confusion
#' @export
#' @method confusion default
confusion.default <- function(x, y = NULL, vars = c("Actual", "Predicted"),
labels = vars, merge.by = "Id", useNA = "ifany", prior, ...) {
  # If the object is already a 'confusion' object, return it
  if (inherits(x, "confusion")) {
    if (!missing(y))
    warning("you cannot provide 'y' when 'x' is a 'confusion' object")
    # Possibly rescale it
    if (!missing(prior)) prior(x) <- prior
    x
  }

  # Idem if there is a 'confusion' attribute and no y
  conf <- attr(x, "confusion")
  if (!is.null(conf) && missing(y)) {
    # Possibly reweight it
    if (!missing(prior)) prior(conf) <- prior
    conf
  }

  # Reworks and check arguments
  vars <- as.character(vars)
  if (length(vars) != 2)
    stop("You must provide exactly 2 strings for 'vars'")
  merge.by <- as.character(merge.by)

  # There are three possibilities:
  # 1) A single data frame => use vars
  if (missing(y)) {
    # Special case of a data frame or list of two factors: keep as it is
    if (is.list(x) && length(x) == 2 && is.null(vars)) {
      clCompa <- as.data.frame(x)
      if (missing(labels)) labels <- names(clCompa)
    } else {
      x <- as.data.frame(x)
      # Check that vars exist
      if (is.null(names(x)) || !all(vars %in% names(x)))
        stop("'vars' are not among column names of 'x'")
      # Check that levels of two vars do match
      lev1 <- levels(x[[vars[1]]])
      lev2 <- levels(x[[vars[2]]])
      if (!all(lev1 == lev2)) {
        # If difference is only in the order of both levels, reorder #2
        if (!all(sort(lev1) == sort(lev2))) {
          stop("levels of the two variables in 'x' do not match")
        } else x[[vars[2]]] <- factor(as.character(x[[vars[2]]]),
          levels = lev1)
      }
      clCompa <- data.frame(class1 = x[[vars[1]]], class2 = x[[vars[2]]])
    }
  } else {# y is provided
    # 2) Two vectors of factors (must have same length/same levels)
    if (is.factor(x) && is.factor(y)) {
      # Check length match
      if (length(x) != length(x))
        stop("lengths of 'x' and 'y' are not the same")
      # Check levels match
      lev1 <- levels(x)
      lev2 <- levels(y)
      if (!all(lev1  == lev2)) {
        # If difference is only in the order of both levels, reorder #2
        if (!all(sort(lev1)  == sort(lev1))) {
          stop("'x' and 'y' levels do not match")
        } else {
          y <- factor(as.character(y), levels = lev1)
        }
      }
      clCompa <- data.frame(class1 = y, class2 = x)
    } else {
      # 3) Two data frames => merge first, then use vars
      # Check that vars exists
      if (is.null(names(x)) || !(vars[1] %in% names(x)))
        stop("first item of 'vars' is not among names of 'x'")
      if (is.null(names(y)) || !(vars[2] %in% names(y)))
        stop("second item of 'vars' is not among names of 'y'")
      # Check that levels of two vars do match
      lev1 <- levels(x[[vars[1]]])
      lev2 <- levels(y[[vars[2]]])
      if (!all(lev1  == lev2)) {
        # If difference is only in the order of both levels, reorder #2
        if (!all(sort(lev1)  == sort(lev2))) {
          stop("levels of the variables in 'x' and 'y' do not match")
        } else {
          x[[vars[2]]] <- factor(as.character(x[[vars[2]]]), levels = lev1)
        }
      }
      # Merge data according to merge.by
      clCompa <- merge(y[, c(vars[2], merge.by)],
        x[, c(vars[1], merge.by)], by = merge.by)
      nc <- ncol(clCompa)
      clCompa <- clCompa[, c(nc - 1, nc)]
      # Are there common objects left?
      if (!nrow(clCompa)) stop("no common objects between 'x' and 'y'")
    }
  }

  # Construct the confusion object
  if (missing(prior)) {
    .confusion(classes = clCompa, labels = labels, useNA = useNA, ...)
  } else {
    .confusion(classes = clCompa, labels = labels, useNA = useNA,
      prior = prior, ...)
  }
}

#' @rdname confusion
#' @export
#' @method confusion mlearning
confusion.mlearning <- function(x, y = response(x),
labels = c("Actual", "Predicted"), useNA = "ifany", prior, ...) {
  # Check labels
  labels <- as.character(labels)
  if (length(labels) != 2)
    stop("You must provide exactly 2 character strings for 'labels'")

  # Extract class2 by using predict on the mlearning object
  class2 <- predict(x, ...)

  # Check that both variables are of same length and same levels
  if (length(y) != length(class2))
    stop("lengths of 'x' and 'y' are not the same")
  lev1 <- levels(y)
  lev2 <- levels(class2)
  if (!all(lev1  == lev2)) {
    # If difference is only in the order of both levels, reorder #2
    if (!all(sort(lev1)  == sort(lev2))) {
      stop("levels of 'x' and 'y' do not match")
    } else {
      class2 <- factor(as.character(class2), levels = lev1)
    }
  }

  # Construct the confusion object
  if (missing(prior)) {
    .confusion(data.frame(class1 = y, class2 = class2),
      labels = labels, useNA = useNA, ...)
  } else {
    .confusion(data.frame(class1 = y, class2 = class2),
      labels = labels, useNA = useNA, prior = prior, ...)
  }
}

#' @rdname confusion
#' @export
#' @method print confusion
#' @param sums is the confusion matrix printed with rows and columns sums?
#' @param error.col is a column with class error for first classifier added
#'   (equivalent to false negative rate of FNR)?
#' @param digits the number of digits after the decimal point to print in the
#'   confusion matrix. The default or zero leads to most compact presentation
#'   and is suitable for frequencies, but not for relative frequencies.
#' @param sort are rows and columns of the confusion matrix sorted so that
#'   classes with larger confusion are closer together? Sorting is done
#'   using a hierarchical clustering with [hclust()]. The clustering method
#'   is `"ward.D2"` by default, but see the [hclust()] help for other options).
#'   If `FALSE` or `NULL`, no sorting is done.
print.confusion <- function(x, sums = TRUE, error.col = sums, digits = 0,
sort = "ward.D2", ...) {
  # General stats on the confusion matrix
  Stats <- attr(x, "stats")
  Error <- round(Stats["error"] * 100, 1)
  cat(Stats["total"], " items classified with ", Stats["truepos"],
    " true positives (error rate = ", Error, "%)\n", sep = "")
  row.freqs <- attr(x, "row.freqs")
  if (!all(attr(x, "prior") == row.freqs)) {
    cat("with initial row frequencies:\n")
    print(row.freqs)
    cat("Rescaled to:\n")
  }

  # Print the confusion matrix itself
  X <- x
  class(X) <- "table"

  n <- ncol(X)

  # Do we sort items?
  if (length(sort) && any(!is.na(sort)) && any(sort != FALSE) &&
    any(sort != "")) {
    # Grouping of items
    confuSim <- X + t(X)
    confuSim <- 1 - (confuSim / sum(confuSim) * 2)
    confuDist <- structure(confuSim[lower.tri(confuSim)], Size = n,
      Diag = FALSE, Upper = FALSE, method = "confusion", call = "",
      class = "dist")
    order <- hclust(confuDist, method = sort)$order
    X <- X[order, order]
  }

  # Change row and column names to a more compact representation
  nbrs <- formatC(1:ncol(X), digits = 1, flag = "0")
  colnames(X) <- nbrs
  rownames(X) <- paste(nbrs, rownames(X))

  # Add sums?
  if (isTRUE(as.logical(sums))) {
    # Calculate error (%)
    ErrorTot <- (1 - (sum(diag(x)) / sum(x))) * 100
    Errors <- as.integer(round(c((1 - diag(X) / apply(X, 1, sum)) * 100,
      ErrorTot), 0))
    # ... and add row and column sums
    X <- addmargins(X, FUN = list(`(sum)` = sum), quiet = TRUE)
  } else {
    Errors <- as.integer(round((1 - diag(X) / apply(X, 1, sum)) * 100, 0))
  }

  # Add class errors?
  if (isTRUE(as.logical(error.col))) {
    X <- as.table(cbind(X, `(FNR%)` = Errors))
    dn <- dimnames(X)
    names(dn) <- names(dimnames(x))
    dimnames(X) <- dn
  }
  print(round(X, digits))

  # Return the original object invisibly
  invisible(x)
}

#' @rdname confusion
#' @export
#' @method summary confusion
#' @param object a **confusion** object
#' @param type either `"all"` (by default), or considering `TP` is the true
#' positives, `FP` is the false positives, `TN` is the true negatives and `FN`
#' is the false negatives, one can also specify: `"Fscore"` (F-score = F-measure
#' = F1 score = harmonic mean of Precision and recall), `"Recall"`
#' (TP / (TP + FN) = 1 - FNR), `"Precision"` (TP / (TP + FP) = 1 - FDR),
#' `"Specificity"` (TN / (TN + FP) = 1 - FPR), `"NPV"` (Negative predicted value
#' = TN / (TN + FN) = 1 - FOR), `"FPR"` (False positive rate = 1 - Specificity
#' = FP / (FP + TN)), `"FNR"` (False negative rate = 1 - Recall = FN / (TP + FN)),
#' `"FDR"` (False Discovery Rate = 1 - Precision = FP / (TP + FP)), `"FOR"`
#' (False omission rate = 1 - NPV = FN / (FN + TN)), `"LRPT"` (Likelihood Ratio
#' for Positive Tests = Recall / FPR = Recall / (1 - Specificity)), `"LRNT"`
#' Likelihood Ratio for Negative Tests = FNR / Specificity = (1 - Recall) /
#' Specificity, `"LRPS"` (Likelihood Ratio for Positive Subjects = Precision /
#' FOR = Precision / (1 - NPV)), `"LRNS"` (Likelihood Ratio Negative Subjects =
#' FDR / NPV = (1 - Precision) / (1 - FOR)), `"BalAcc"` (Balanced accuracy
#' = (Sensitivity + Specificity) / 2), `"MCC"` (Matthews correlation coefficient),
#'   `"Chisq"` (Chisq metric), or `"Bray"` (Bray-Curtis metric)
#' @param sort.by the statistics to use to sort the table (by default, Fmeasure,
#' the F1 score for each class = 2 * recall * precision / (recall + precision)).
#' @param decreasing do we sort in increasing or decreasing order?
summary.confusion <- function(object, type = "all", sort.by = "Fscore",
decreasing = TRUE, ...) {
  # Check objects
  if (!inherits(object, "confusion"))
    stop("'object' must be a 'confusion' object")

  # General parameters
  ## Number of groups
  Ngp <- nrow(object)

  # Total : TP + TN + FP + FN
  Tot <- sum(object)

  # TP : True positive item : All items on diagonal
  TP <- diag(object)

  # TP + TN : sum of diagonal = All correct identification
  TP_TN <- sum(TP)

  # TP + FP : sum of columns : Automatic classification
  TP_FP <- colSums(object)

  # TP + FN : sum of rows : Manual classification
  TP_FN <- rowSums(object)

  # FP : False positive items
  FP <- TP_FP - TP

  # FN : False negative item
  FN <- TP_FN - TP

  # TN : True Negative = Total - TP - FP - FN
  TN <- rep(Tot, Ngp) - TP - FP - FN

  # The 8 basic ratios
  # Recall = TP / (TP + FN) = 1 - FNR
   Recall <- TP / (TP_FN)

  # Specificity = TN / (TN + FP) = 1 - FPR
  Specificity <- TN / (TN + FP)

  # Precision = TP / (TP + FP) = 1 - FDR
  Precision <- TP / (TP_FP)

  # NPV : Negative predicted value = TN / (TN + FN) = 1 - FOR
  NPV <- TN / (TN + FN)

  # FPR : False positive rate = 1 - Specificity = FP / (FP + TN)
  FPR <- FP / (FP + TN) #1 - Specificity

  # FNR : False negative rate = 1 - Recall = FN / (TP + FN)
  FNR <- FN / (TP + FN) #1 - Recall

  # FDR : False Discovery Rate = 1 - Precision = FP / (TP + FP)
  FDR <- FP / (TP_FP) #1 - Precision

  # FOR : False omission rate = 1 - NPV = FN / (FN + TN)
  FOR <- FN / (FN + TN) #1 - NPV

  # The 4 ratios of ratios
  # LRPT = Likelihood Ratio for Positive Tests = Recall / FPR = Recall /
  # (1 - Specificity)
  LRPT <- Recall / (FPR)

  # LRNT = Likelihood Ratio for Negative Tests = FNR / Specificity =
  # (1 - Recall) / Specificity
  LRNT <- FNR / (Specificity)

  # LRPS : Likelihood Ratio for Positive Subjects = Precision / FOR =
  # Precision / (1 - NPV)
  LRPS <- Precision / (FOR)

  # LRNS : Likelihood Ratio Negative Subjects = FDR / NPV = (1 - Precision) /
  # (1 - FOR)
  LRNS <- FDR / (NPV)

  # Additional statistics
  # F-score = F-measure = F1 score = Harmonic mean of Precision and recall
  Fscore <- 2 * ((Precision * Recall) / (Precision + Recall))
  # F-score is also TP/(TP + (FP + FN) / 2). As such, if TP is null but
  # at least one of FP or FN is not null, F-score = 0. In this case,
  # as both Recall and Precision equal zero, we got NaN => do the correction!
  Fscore[is.nan(Fscore)] <- 0

  # Balanced accuracy = (Sensitivity + Specificity) / 2
  BalAcc <- (Recall + Specificity) / 2

  # MCC : Matthews correlation coefficient
  Sum1 <- TP + FP
  Sum2 <- TP + FN
  Sum3 <- TN + FP
  Sum4 <- TN + FN
  Denominator <- sqrt(Sum1 * Sum2 * Sum3 * Sum4)
  ZeroIdx <- Sum1 == 0 | Sum2 == 0 | Sum3 == 0 | Sum4 == 0
  if (any(!is.na(ZeroIdx)) && any(ZeroIdx))
    Denominator[ZeroIdx] <- 1
  MCC <- ((TP * TN) - (FP * FN)) / Denominator

  # Chisq : Significance
  Chisq <- (((TP * TN) - (FP * FN))^2 * (TP + TN + FP + FN)) /
    ((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))

  # Automatic classification - Manual classification
  Auto_Manu <- TP_FP - TP_FN

  # Bray-Curtis dissimilarity index
  Bray <- abs(Auto_Manu) / (sum(TP_FP) + sum(TP_FN))

  # General statistics
  # Error = 1 - Accuracy = 1 - ((TP + TN) / (TP + TN + FP + FN))
  Error <- 1 - (TP_TN / Tot)
  # Micro and macro-averaged F-score
  meanRecall <- sum(Recall, na.rm = TRUE) / Ngp
  meanPrecision <- sum(Precision, na.rm = TRUE) / Ngp
  Fmicro <- 2 * meanRecall * meanPrecision / (meanRecall + meanPrecision)
  Fmacro <- sum(Fscore, na.rm = TRUE) / Ngp

  # Take care to avoid missing data for data frame rownames!
  nms <- names(Fscore)
  nms[is.na(nms)] <- "<NA>"
  names(Fscore) <- nms

  # Create a data frame with all results
  res <- data.frame(
    Fscore = Fscore, Recall = Recall, Precision = Precision,
    Specificity = Specificity, NPV = NPV, FPR = FPR, FNR = FNR, FDR = FDR,
    FOR = FOR, LRPT = LRPT, LRNT = LRNT, LRPS = LRPS, LRNS = LRNS,
    BalAcc = BalAcc, MCC = MCC, Chisq = Chisq, Bray = Bray, Auto = TP_FP,
    Manu = TP_FN, A_M = Auto_Manu, TP = TP, FP = FP, FN = FN, TN = TN)

  lev <- rownames(object)
  lev[is.na(lev)] <- "<NA>"
  rownames(res) <- lev

  # Sort the table in function of one parameter... by default Fscore
  if (length(sort.by) && sort.by[1] != FALSE) {
    if (sort.by[1] %in% names(res)) {
      ord <- order(res[, sort.by], decreasing = decreasing)
      res <- res[ord, ]
      lev <- lev[ord]
    } else {
      warning("wrong sort.by: ignored and no sort performed")
    }
  }

  # What type of results should we return?
  if (length(type) && type[1] != "all") {
    okType <- type[type %in% names(res)]
    if (!length(okType))
      stop("Wrong type specified")
    if (length(okType) < length(type))
      warning("one or more wrong types are ignored")
    res <- res[, okType]
    # If the data are reduced to a numeric vector, reinject names
    # and return only this vector
    if (is.numeric(res)) {
      res <- as.numeric(res)
      names(res) <- lev
      attr(res, "stat.type") <- okType
      return(res)
    }
  }

  attr(res, "stats") <- attr(object, "stats")
  attr(res, "stats.weighted") <-
    c(error = Error, Fmicro = Fmicro, Fmacro = Fmacro)

  class(res) <- c("summary.confusion", class(res))
  res
}

#' @rdname confusion
#' @export
#' @method print summary.confusion
print.summary.confusion <- function(x, ...) {
  # General stats on the confusion matrix
  Stats <- attr(x, "stats")
  Error <- round(Stats["error"] * 100, 1)
  cat(Stats["total"], " items classified with ", Stats["truepos"],
    " true positives (error = ", Error, "%)\n",
    sep = "")
  cat("\nGlobal statistics on reweighted data:\n")
  Stats2 <- attr(x, "stats.weighted")
  cat("Error rate: ", round(Stats2["error"] * 100, digits = 1),
    "%, F(micro-average): ", round(Stats2["Fmicro"], digits = 3),
    ", F(macro-average): ", round(Stats2["Fmacro"], digits = 3), "\n\n",
    sep = "")
  X <- x
  class(X) <- class(X)[-1]
  print(X)

  invisible(x)
}

Try the mlearning package in your browser

Any scripts or data that you put into this service are public.

mlearning documentation built on Aug. 31, 2023, 1:09 a.m.