R/confusion.R
In mlearning: Machine learning algorithms with unified interface and confusion matrices

confusion <- function (x, ...)
	UseMethod("confusion")

## TODO: implement weights
.confusion <- function (classes, labels, useNA, prior, ...)
{
	## useNA can be "no", "always" or "ifany", but with the later value
	## one takes the risk to get non square matrix if there are NAs in only
	## on vector of classes => change to "no" or "always", depending if there
	## are missing data or not
	if (useNA == "ifany")
		if (any(is.na(classes))) useNA <- "always" else useNA <- "no"
	res <- table(classes, dnn = labels, useNA = useNA)
	total <- sum(res)
	truePos <- sum(diag(res))
	row.freqs <- rowSums(res)	
	
	## Additional data as attributes
	attr(res, "row.freqs") <- row.freqs
	attr(res, "col.freqs") <- colSums(res)
	attr(res, "levels") <- levels(classes[1, ]) # These are *initial* levels!
	## Final levels may differ if there are empty levels, or NAs!
	attr(res, "prior") <- row.freqs # Initial prior are row.freqs
	attr(res, "stats") <- c(total = total, truepos = truePos,
		error = 1 - (truePos / total))
	
	## This is a confusion object, inheriting from table
	class(res) <- c("confusion", "table")
	
	## Do we rescale the confusion matrix?
	if (!missing(prior)) prior(res) <- prior
	
	res
}
	
confusion.default <- function (x, y = NULL, vars = c("Actual", "Predicted"),
labels = vars, merge.by = "Id", useNA = "ifany", prior, ...)
{	
	## If the object is already a 'confusion' object, return it
	if (inherits(x, "confusion")) {
		if (!missing(y))
			warning("you cannot provide 'y' when 'x' is a 'confusion' object")
		## Possibly rescale it
		if (!missing(prior)) prior(x) <- prior		
		return(x)
	}
	
	## Idem if there is a 'confusion' attribute and no y
	conf <- attr(x, "confusion")
	if (!is.null(conf) && missing(y)) {
		## Possibly reweight it
		if (!missing(prior)) prior(conf) <- prior
		return(conf)
	}	

	## Reworks and check arguments
	vars <- as.character(vars)
	if (length(vars) != 2)
		stop("You must provide exactly 2 strings for 'vars'")
	merge.by <- as.character(merge.by)
	
	## There are three possibilities:
	## 1) A single data frame => use vars
	if (missing(y)) {
		## Special case of a data frame or list of two factors: keep as it is
		if (is.list(x) && length(x) == 2 && is.null(vars)) {
			clCompa <- as.data.frame(x)
			if (missing(labels)) labels <- names(clCompa)
		} else {
			x <- as.data.frame(x)
			## Check that vars exist
			if (is.null(names(x)) || !all(vars %in% names(x)))
				stop("'vars' are not among column names of 'x'")
			## Check that levels of two vars do match
			lev1 <- levels(x[[vars[1]]])
			lev2 <- levels(x[[vars[2]]])
			if (!all(lev1 == lev2)) {
				## If difference is only in the order of both levels, reorder #2
				if (!all(sort(lev1) == sort(lev2))) {
					stop("levels of the two variables in 'x' do not match")
				} else x[[vars[2]]] <- factor(as.character(x[[vars[2]]]),
					levels = lev1)
			}
			clCompa <- data.frame(class1 = x[[vars[1]]], class2 = x[[vars[2]]])
		}
	} else { # y is provided
		## 2) Two vectors of factors (must have same length/same levels)
		if (is.factor(x) && is.factor(y)) {
			## Check length match
			if (length(x) != length(x))
				stop("lengths of 'x' and 'y' are not the same")
			## Check levels match
			lev1 <- levels(x)
			lev2 <- levels(y)
			if (!all(lev1  == lev2)) {
				## If difference is only in the order of both levels, reorder #2
				if (!all(sort(lev1)  == sort(lev1))) {
					stop("'x' and 'y' levels do not match")
				} else y <- factor(as.character(y), levels = lev1)
			}
			clCompa <- data.frame(class1 = y, class2 = x)
		} else {
			## 3) Two data frames => merge first, then use vars
			## Check vars exist
			if (is.null(names(x)) || !(vars[1] %in% names(x)))
				stop("first item of 'vars' is not among names of 'x'")
			if (is.null(names(y)) || !(vars[2] %in% names(y)))
				stop("second item of 'vars' is not among names of 'y'")
			## Check that levels of two vars do match
			lev1 <- levels(x[[vars[1]]])
			lev2 <- levels(y[[vars[2]]])
			if (!all(lev1  == lev2)) {
				## If difference is only in the order of both levels, reorder #2
				if (!all(sort(lev1)  == sort(lev2))) {
					stop("levels of the variables in 'x' and 'y' do not match")
				} else x[[vars[2]]] <- factor(as.character(x[[vars[2]]]),
					levels = lev1)
			}
			## Merge data according to merge.by
			clCompa <- merge(y[, c(vars[2], merge.by)],
				x[, c(vars[1], merge.by)], by = merge.by)
			nc <- ncol(clCompa)
			clCompa <- clCompa[, c(nc - 1, nc)]
			## Are there common objects left?
			if (!nrow(clCompa)) stop("no common objects between 'x' and 'y'")
		}
	}
	
	## Construct the confusion object
	if (missing(prior)) {
		.confusion(classes = clCompa, labels = labels, useNA = useNA, ...)
	} else {
		.confusion(classes = clCompa, labels = labels, useNA = useNA,
			prior = prior, ...)
	}
}

confusion.mlearning <- function (x, y = response(x),
labels = c("Actual", "Predicted"), useNA = "ifany", prior, ...) {
	## Check labels
	labels <- as.character(labels)
	if (length(labels) != 2)
		stop("You must provide exactly 2 character strings for 'labels'")
	
	## Extract class2 by using predict on the mlearning object
	class2 <- predict(x, ...)
	
	## Check that both variables are of same length and same levels
	if (length(y) != length(class2))
		stop("lengths of 'x' and 'y' are not the same")
	lev1 <- levels(y)
	lev2 <- levels(class2)
	if (!all(lev1  == lev2)) {
		## If difference is only in the order of both levels, reorder #2
		if (!all(sort(lev1)  == sort(lev2))) {
			stop("levels of 'x' and 'y' do not match")
		} else class2 <- factor(as.character(class2), levels = lev1)
	}
	
	## Construct the confusion object
	if (missing(prior)) {
		.confusion(data.frame(class1 = y, class2 = class2),
			labels = labels, useNA = useNA, ...)
	} else {
		.confusion(data.frame(class1 = y, class2 = class2),
			labels = labels, useNA = useNA, prior = prior, ...)
	}
}

prior <- function (object, ...)
	UseMethod("prior")

prior.confusion <- function (object, ...)
	attr(object, "prior")

`prior<-`<- function (object, ..., value)
	UseMethod("prior<-")

`prior<-.confusion`<- function (object, ..., value)
{
	rsums <- rowSums(object)
	if (!length(value)) { # value is NULL or of zero length
		## Reset prior to original frequencies
		value <- attr(object, "row.freqs")
		res <- round(object / rsums * value)
	
	} else if (is.numeric(value)) { # value is numeric
		
		if (length(value) == 1) { # value is a single number
			if (is.na(value) || !is.finite(value) || value <= 0)
				stop("value must be a finite positive number")
			res <- object / rsums * as.numeric(value)
		
		} else { # value is a vector of numerics
			## It must be either of the same length as nrow(object) or of
			## levels(objects)
			l <- length(value)
			n <- names(value)
			l2 <- levels(object)
			
			if (l == nrow(object)) {
				## If the vector is named, check names and possibly reorder it
				if (length(n))
					if (all(n %in% rownames(object))) {
						value <- value[rownames(object)]
					} else stop("Names of the values do not match levels in the confusion matrix")
			
			} else if (l == length(l2)) {
				## Assume names as levels(object), if they are not provides
				if (!length(n)) names(value) <- n <- l2
				
				## If the vector is named, check names match levels
				if (length(n))
					if (all(n %in% l2)) {
						## Extract levels used in the confusion matrix
						value <- value[rownames(object)]
					} else stop("Names of the values do not match levels in the confusion matrix")

			} else stop("length of 'value' do not match the number of levels in the confusion matrix")	
			
			res <- object / rsums * as.numeric(value)
		}
		
	} else stop("value must be a numeric vector, a single number or NULL")
	
	attr(res, "prior") <- value
	## Take care to rows with no items! => put back zeros!
	res[rsums == 0] <- 0
	res
}

print.confusion <- function (x, sums = TRUE, error.col = sums, digits = 0,
sort = "ward", ...)
{
	## General stats on the confusion matrix
	Stats <- attr(x, "stats")
	Error <- round(Stats["error"] * 100, 1)
	cat(Stats["total"], " items classified with ", Stats["truepos"],
		" true positives (error rate = ", Error, "%)\n",
		sep = "")
	row.freqs <- attr(x, "row.freqs")
	if (!all(attr(x, "prior") == row.freqs)) {
		cat("with initial row frequencies:\n")
		print(row.freqs)
		cat("Rescaled to:\n")
	}
	
	## Print the confusion matrix itself
	X <- x
	class(X) <- "table"

    n <- ncol(X)

	## Do we sort items?
	if (length(sort) && !is.na(sort) && sort != FALSE && sort != "") {
		## Grouping of items
		confuSim <- X + t(X)
		confuSim <- 1 - (confuSim / sum(confuSim) * 2)
		confuDist <- structure(confuSim[lower.tri(confuSim)], Size = n,
			Diag = FALSE, Upper = FALSE, method = "confusion", call = "",
			class = "dist")
		order <- hclust(confuDist, method = sort)$order
		X <- X[order, order]
	}
	
	## Change row and column names to a more compact representation
	nbrs <- formatC(1:ncol(X), digits = 1, flag = "0")
	colnames(X) <- nbrs
	rownames(X) <- paste(nbrs, rownames(X))
	
	## Add sums?
	if (isTRUE(as.logical(sums))) {
		## Calculate error (%)
		ErrorTot <- (1 - (sum(diag(x)) / sum(x))) * 100
		Errors <- as.integer(round(c((1 - diag(X) / apply(X, 1, sum)) * 100,
			ErrorTot), 0))
		## ... and add row and column sums
		X <- addmargins(X, FUN = list(`(sum)` = sum), quiet = TRUE)
	} else Errors <- as.integer(round((1 - diag(X) / apply(X, 1, sum)) * 100, 0))
	
	## Add class errors?
	if (isTRUE(as.logical(error.col))) {
		X <- as.table(cbind(X, `(FNR%)` = Errors))
		dn <- dimnames(X)
		names(dn) <- names(dimnames(x))
		dimnames(X) <- dn
	}
	print(round(X, digits))
	
	## Return the original object invisibly
	invisible(x)
}

plot.confusion <- function (x, y = NULL,
type = c("image", "barplot", "stars", "dendrogram"), stat1 = "Recall",
stat2 = "Precision", names, ...)
{
	if (is.null(y)) type <- match.arg(type)[1] else type <- "stars"
	if (missing(names)) names <- c(substitute(x), substitute(y))
	res <- switch(type,
		image = confusionImage(x, y, ...),
		barplot = confusionBarplot(x, y, ...),
		stars = confusionStars(x, y, stat1 = stat1, stat2 = stat2, names, ...),
		dendrogram = confusionDendrogram(x, y, ...),
		stop("'type' must be 'image', 'barplot', 'stars' or 'dendrogram'"))
	invisible(res)
}

## Representation of the confusion matrix
confusionImage <- function (x, y = NULL, labels = names(dimnames(x)),
sort = "ward", numbers = TRUE, digits = 0, mar = c(3.1, 10.1, 3.1, 3.1),
cex = 1, asp = 1, colfun, ncols = 41, col0 = FALSE, grid.col = "gray", ...)
{
	if (!inherits(x, "confusion"))
        stop("'x' must be a 'confusion' object")

	if (!is.null(y))
		stop("cannot use a second classifier 'y' for this plot")
	
	## Default labels in case none provided
	if (is.null(labels)) labels <- c("Actual", "Predicted")
	
	## Default color function
	## (greens for correct values, reds for errors, white for zero)
	if (missing(colfun)) colfun <- function (n, alpha = 1, s = 0.9, v = 0.9) {
		if ((n <- as.integer(n[1L])) <= 0) return(character(0L))
		## Initial (red) and final (green) colors with white in between
		cols <- c(hsv(h = 0, s = s, v = v, alpha = alpha),   # Red
				  hsv(h = 0, s = 0, v = v, alpha = alpha),   # White
				  hsv(h = 2/6, s = s, v = v, alpha = alpha)) # Green
		## Use a color ramp from red to white to green
		return(colorRampPalette(cols)(n))
	}
	
    n <- ncol(x)

	## Do we sort items?
	if (length(sort) && !is.na(sort) && sort != FALSE && sort != "") {
		## Grouping of items
		confuSim <- x + t(x)
		confuSim <- 1 - (confuSim / sum(confuSim) * 2)
		confuDist <- structure(confuSim[lower.tri(confuSim)], Size = n,
			Diag = FALSE, Upper = FALSE, method = "confusion", call = "",
			class = "dist")
		order <- hclust(confuDist, method = sort)$order
		x <- x[order, order]
	}
	
	## Recode row and column names for more compact display
	colnames(x) <- names2 <- formatC(1:n, digits = 1, flag = "0")
	rownames(x) <- names1 <- paste(rownames(x), names2)
	
	## Transform for better colorization
	## (use a transfo to get 0, 1, 2, 3, 4, 7, 10, 15, 25+)
	confuCol <- x
	confuCol <- log(confuCol + .5) * 2.33
	confuCol[confuCol < 0] <- if (isTRUE(as.logical(col0))) 0 else NA
	confuCol[confuCol > 10] <- 10
	
	## Negative values (in green) on the diagonal (correct IDs)
	diag(confuCol) <- -diag(confuCol)	
	
	## Make an image of this matrix
	opar <- par(no.readonly = TRUE)
	on.exit(par(opar))
	par(mar = mar, cex = cex)
	image(1:n, 1:n, -t(confuCol[nrow(confuCol):1, ]), zlim = c(-10, 10),
		asp = asp, bty = "n", col = colfun(ncols), xaxt = "n", yaxt = "n",
		xlab = "", ylab = "", ...)
	
	## Indicate the actual numbers
	if (isTRUE(as.logical(numbers))) {
		confuTxt <- as.character(round(x[n:1, ], digits = digits))
		confuTxt[confuTxt == "0"] <- ""
		text(rep(1:n, each = n), 1:n, labels = confuTxt)
	}
	
	## Add the grid
	if (length(grid.col)) {
		abline(h = 0:n + 0.5, col = grid.col)
		abline(v = 0:n + 0.5, col = grid.col)
	}
	
	## Add the axis labels
	axis(1, 1:n, labels = names2, tick =  FALSE, padj = 0)
	axis(2, 1:n, labels = names1[n:1], tick =  FALSE, las = 1, hadj = 1)
	axis(3, 1:n, labels = names2, tick =  FALSE)
	axis(4, 1:n, labels = names2[n:1], tick =  FALSE, las = 1, hadj = 0)
	
	## Add labels at top-left
	if (length(labels)) {
		if (length(labels) != 2) stop("You must provide two labels")
		mar[2] <- 1.1
		par (mar = mar, new = TRUE)
		plot(0, 0, type = "n", xaxt = "n", yaxt = "n", bty = "n")
		mtext(paste(labels, collapse = " // "), adj = 0, line = 1, cex = cex)
	}
	
	## Return the confusion matrix, as displayed, in text format
	invisible(x)
}

## Confusion barplot with recall and precision in green bars
## TODO: various bar rescaling possibilities!!!
confusionBarplot <- function (x, y = NULL,
col = c("PeachPuff2", "green3", "lemonChiffon2"), mar = c(1.1, 8.1, 4.1, 2.1),
cex = 1, cex.axis = cex, cex.legend = cex,
main = "F-score (precision versus recall)", numbers = TRUE, min.width = 17, ...)
{
    if (!inherits(x, "confusion"))
        stop("'x' must be a 'confusion' object")
		
	if (!is.null(y))
		stop("cannot use a second classifier 'y' for this plot")
	
	## F-score is 2 * recall * precision / (recall + precision), ... but also
	## F-score = TP / (TP + FP/2 + FN/2). We represent this in a barplot
	TP <- tp <- diag(x)
	FP <- fp <- colSums(x) - tp
	FN <- fn <- rowSums(x) - tp
	## In case we have missing data...
	fn[is.na(tp)] <- 50
    fp[is.na(tp)] <- 50
    tp[is.na(tp)] <- 0

	## We scale these values, so that the sum fp/2 + tp + fn/2 makes 100
	scale <- fp/2 + tp + fn/2
    res <- matrix(c(fp/2 / scale * 100, tp / scale * 100, fn/2 / scale * 100),
		ncol = 3)
    colnames(res) <- c("FPcontrib", "Fscore", "FNcontrib") # In %
    Labels <- names(attr(x, "col.freqs"))
    
	## The graph is ordered in decreasing F-score values
	pos <- order(res[, 2], decreasing = TRUE)
    res <- res[pos, ]
    FN <- FN[pos]
    FP <- FP[pos]
    TP <- TP[pos]
    Labels <- Labels[pos]
    l <- length(FN)
    
	## Plot the graph
	omar <- par("mar")
    on.exit(par(omar))
    par(mar = mar)
    ## The barplot
	barplot(t(res), horiz = TRUE, col = col, xaxt = "n", las = 1, space = 0,
		main = main, ...)
    ## The line that shows where symmetry is
	lines(c(50, 50), c(0, l), lwd = 1)
    
	## Do we add figures into the plot?
	if (isTRUE(as.logical(numbers))) {
		## F-score is written in the middle of the central bar
		xpos <- res[, 1] + res[, 2] / 2
		text(xpos, 1:l - 0.5, paste("(", round(res[, 2]), "%)", sep = ""),
			adj = c(0.5, 0.5), cex = cex)
		
		## Add the number of FP and FN to the left and right, respectively
		text(rep(1, l), 1:l - 0.5, round(FP), adj = c(0, 0.5), cex = cex)
		text(rep(99, l), 1:l - 0.5, round(FN), adj = c(1, 0.5), cex = cex)
	}

    ## Add a legend (if cex.legend is not NULL)
	if (length(cex.legend)) {
		legend(50, l * 1.05, legend = c("False Positives",
			"2*TP (F-score %)", "False Negatives"), cex = cex.legend, xjust = 0.5, yjust = 1,
			fill = col, bty = "n", horiz = TRUE)
	}
    
	## Add axes if cex.axis is not NULL
	if (length(cex.axis))
		axis(2, 1:l - 0.5, tick = FALSE, las = 1, cex.axis = cex.axis,
			labels = Labels)
    
    invisible(res)
}

## TODO: check the box around the legend
confusionStars <- function(x, y = NULL, stat1 = "Recall", stat2 = "Precision",
names, main, col = c("green2", "blue2", "green4", "blue4"), ...)
{
    ## Check objects
	if (!inherits(x, "confusion"))
        stop("'x' must be a 'confusion' object")
    if (!is.null(y) && !inherits(x, "confusion"))
        stop("'y' must be NULL or a 'confusion' object")
	
	## Check stats
	SupportedStats <- c("Recall", "Precision", "Specificity",
        "NPV", "FPR", "FNR", "FDR", "FOR")
    if (!stat1 %in% SupportedStats)
        stop("stats1 must be one of Recall, Precision, Specificity, NPV, FPR, FNR, FDR, FOR")
    if (!stat2 %in% SupportedStats)
        stop("stats2 must be one of Recall, Precision, Specificity, NPV, FPR, FNR, FDR, FOR")
	
	## Choose colors TODO: add a colors argument!
	Blue <- topo.colors(16)
    Green <- terrain.colors(16)
    Stat <- summary(x)
    if (!is.null(y)) { # Comparison of two confusion matrices
		Stat2 <- summary(y)
		Data <- data.frame(Stat2[, stat1], Stat[, stat1], Stat[, stat2],
			Stat2[, stat2])
		Data <- rbind(Data, rep(0, 4))
		colnames(Data) <- paste(rep(c(stat1, stat2), each = 2), c(2, 1, 1, 2))
		if (missing(main)) { # Calculate a suitable title
			if (missing(names)) {
				names <- c(substitute(x), substitute(y))
			} else if (length(names) != 2)
				stop("you must provide two nmaes for the two compared classifiers")
			names <- as.character(names)
			main <- paste("Groups comparison (1 =", names[1], ", 2 =",
			names[2], ")")
		}
		if (length(col) >= 4) {
			col <- col[c(3, 1, 2, 4)]
		} else stop("you must provide four colors for the two statistics and the two classifiers")
	} else { # Single confusion matrix
		Data <- data.frame(Stat[, stat1], Stat[, stat2])
		Data <- rbind(Data, rep(0, 2))
		colnames(Data) <- c(stat1, stat2)
		if (missing(main))
			main <- paste("Groups comparison")
		if (length(col) >= 2) {
			col <- col[1:2]
		} else stop("you must provide two colors for the two statistics")
	}
    rownames(Data) <- c(rownames(Stat), " ")
	## Note: last one is empty box for legend
	
	## Save graph parameters and restore on exit
	opar <- par(no.readonly = TRUE)
	on.exit(par(opar))
	
	## Calculate key location
	kl <- stars(Data, draw.segments = TRUE, scale = FALSE,
		len = 0.8, main = main, col.segments = col, plot = FALSE, ...)
	kcoords <- c(max(kl[, 1]), min(kl[, 2]))
	kspan <- apply(kl, 2, min) / 1.95
	
	## Draw the plot	
	res <- stars(Data, draw.segments = TRUE, scale = FALSE, key.loc = kcoords,
		len = 0.8, main = main, col.segments = col, ...)
	
	## Draw a rectangle around key to differentiate it from the rest
	rect(kcoords[1] - kspan[1], kcoords[2] - kspan[2], kcoords[1] + kspan[1],
		kcoords[2] + kspan[2])
	
	res
}

## Representation of the confusion matrix as a dendrogram
confusionDendrogram <- function (x, y = NULL, labels = rownames(x),
sort = "ward", main = "Groups clustering", ...)
{
    ## Check objects
	if (!inherits(x, "confusion"))
        stop("'x' must be a 'confusion' object")
	if (!is.null(y))
		stop("cannot use a second classifier 'y' for this plot")	
	
    ## Transform the confusion matrix into a symmetric matrix
    ConfuSim <- x + t(x)
    ConfuSim <- 1 - (ConfuSim / sum(ConfuSim) * 2)
	
	
	## Create the structure of a "dist" object
    ConfuDist <- structure(ConfuSim[lower.tri(ConfuSim)], Size = nrow(x),
        Diag = FALSE, Upper = FALSE, method = "confusion", call = "",
        class = "dist")
    
	## method :"ward", "single", "complete", "average", "mcquitty",
	## "median" or "centroid"
    HC <- hclust(ConfuDist, method = as.character(sort)[1])
    plot(HC, labels = labels, main = main, ...)
    
	invisible(HC)
}

## Table with stats per groupe precision, recall, etc
summary.confusion <- function(object, type = "all", sort.by = "Fscore",
decreasing = TRUE, ...)
{
    ## Check objects
	if (!inherits(object, "confusion"))
        stop("'object' must be a 'confusion' object")
	
	## General parameters
    ## Number of groups
    Ngp <- nrow(object)
    
    ## Total : TP + TN + FP + FN
    Tot <- sum(object)
    
    ## TP : True positive item : All items on diagonal
    TP <- diag(object)
    
    ## TP + TN : sum of diagonal = All correct identification
    TP_TN <- sum(TP)
    
    ## TP + FP : sum of columns : Automatic classification
    TP_FP <- colSums(object)
    
    ## TP + FN : sum of rows : Manual classification
    TP_FN <- rowSums(object)
    
    ## FP : False positive items
    FP <- TP_FP - TP    

    ## FN : False negative item
    FN <- TP_FN - TP

    ## TN : True Negative = Total - TP - FP - FN
    TN <- rep(Tot, Ngp) - TP - FP - FN

    ## The 8 basic ratios
    ## Recall = TP / (TP + FN) = 1 - FNR
    Recall <- TP / (TP_FN)

    ## Specificity = TN / (TN + FP) = 1 - FPR
    Specificity <- TN / (TN + FP)

    ## Precision = TP / (TP + FP) = 1 - FDR
    Precision <- TP / (TP_FP)
    
    ## NPV : Negative predicted value = TN / (TN + FN) = 1 - FOR
    NPV <- TN / (TN + FN)
    
    ## FPR : False positive rate = 1 - Specificity = FP / (FP + TN) 
    FPR <- FP / (FP + TN) #1 - Specificity
    
    ## FNR : False negative rate = 1 - Recall = FN / (TP + FN)
    FNR <- FN / (TP + FN) #1 - Recall

    ## FDR : False Discovery Rate = 1 - Precision = FP / (TP + FP)
    FDR <- FP / (TP_FP) #1 - Precision
    
    ## FOR : False omission rate = 1 - NPV = FN / (FN + TN)
    FOR <- FN / (FN + TN) #1 - NPV

    ## The 4 ratios of ratios
    ## LRPT = Likelihood Ratio for Positive Tests = Recall / FPR = Recall /
	## (1 - Specificity)
    LRPT <- Recall / (FPR)
    
    ## LRNT = Likelihood Ratio for Negative Tests = FNR / Specificity =
	## (1 - Recall) / Specificity
    LRNT <- FNR / (Specificity)
    
    ## LRPS : Likelihood Ratio for Positive Subjects = Precision / FOR =
	## Precision / (1 - NPV)
    LRPS <- Precision / (FOR)
    
    ## LRNS : Likelihood Ratio Negative Subjects = FDR / NPV = (1 - Precision) /
	## (1 - FOR)
    LRNS <- FDR / (NPV)
    
	## Additional statistics
    ## F-score = F-measure = F1 score = Harmonic mean of Precision and recall
    Fscore <- 2 * ((Precision * Recall) / (Precision + Recall))
	## F-score is also TP/(TP + (FP + FN) / 2). As such, if TP is null but
	## at least one of FP or FN is not null, F-score = 0. In this case,
	## as both Recall and Precision equal zero, we got NaN => do the correction!
	Fscore[is.nan(Fscore)] <- 0
    
    ## Balanced accuracy = (Sensitivity + Specificity) / 2
    BalAcc <- (Recall + Specificity) / 2

    ## MCC : Matthews correlation coefficient
    Sum1 <- TP + FP
    Sum2 <- TP + FN
    Sum3 <- TN + FP
    Sum4 <- TN + FN
    Denominator <- sqrt(Sum1 * Sum2 * Sum3 * Sum4)
    ZeroIdx <- Sum1 == 0 | Sum2 == 0 | Sum3 == 0 | Sum4 == 0
    if (any(ZeroIdx)) Denominator[ZeroIdx] <- 1
    MCC <- ((TP * TN) - (FP * FN)) / Denominator
    
    ## Chisq : Significance
    Chisq <- (((TP * TN) - (FP * FN))^2 * (TP + TN + FP + FN)) /
		((TP + FP) * (TP + FN) * (TN + FP) * (TN + FN))

    ## Automatic classification - Manual classification
    Auto_Manu <- TP_FP - TP_FN
    
    ## Bray-Curtis dissimilarity index
    Bray <- abs(Auto_Manu) / (sum(TP_FP) + sum(TP_FN))
	
	## General statistics
    ## Error = 1 - Accuracy = 1 - ((TP + TN) / (TP + TN + FP + FN))
    Error <- 1 - (TP_TN / Tot)
	## Micro and macro-averaged F-score
	meanRecall <- sum(Recall, na.rm = TRUE) / Ngp
	meanPrecision <- sum(Precision, na.rm = TRUE) / Ngp
	Fmicro <- 2 * meanRecall * meanPrecision / (meanRecall + meanPrecision)
	Fmacro <- sum(Fscore, na.rm = TRUE) / Ngp
    
    ## Take care to avoid missing data for data frame rownames!
	nms <- names(Fscore)
	nms[is.na(nms)] <- "<NA>"
	names(Fscore) <- nms
	
	## Create a data frame with all results
	res <- data.frame(
	   Fscore = Fscore, Recall = Recall, Precision = Precision,
	   Specificity = Specificity, NPV = NPV, FPR = FPR, FNR = FNR, FDR = FDR,
	   FOR = FOR, LRPT = LRPT, LRNT = LRNT, LRPS = LRPS, LRNS = LRNS, 
	   BalAcc = BalAcc, MCC = MCC, Chisq = Chisq, Bray = Bray, Auto = TP_FP,
	   Manu = TP_FN, A_M = Auto_Manu, TP = TP, FP = FP, FN = FN, TN = TN)

    lev <- rownames(object)
    lev[is.na(lev)] <- "<NA>"
	rownames(res) <- lev
	
	## Sort the table in function of one parameter... by default Fscore
	if (length(sort.by) && sort.by != FALSE) {
		if (sort.by %in% names(res)) {
			ord <- order(res[, sort.by], decreasing = decreasing)
			res <- res[ord, ]
			lev <- lev[ord]
		} else warning("wrong sort.by: ignored and no sort performed")
	}
	
	## What type of results should we return?
	if (length(type) && type != "all") {
		okType <- type[type %in% names(res)]
		if (!length(okType)) stop("Wrong type specified")
		if (length(okType) < length(type))
			warning("one or more wrong types are ignored")
		res <- res[, okType]
		## If the data are reduced to a numeric vector, reinject names
		## and return only this vector
		if (is.numeric(res)) {
			res <- as.numeric(res)
			names(res) <- lev
			attr(res, "stat.type") <- okType
			return(res)
		}
	}

    attr(res, "stats") <- attr(object, "stats")
    attr(res, "stats.weighted") <-
		c(error = Error, Fmicro = Fmicro, Fmacro = Fmacro)
    
    class(res) <- c("summary.confusion", class(res))
	res
}

print.summary.confusion <- function (x, ...)
{
	## General stats on the confusion matrix
	Stats <- attr(x, "stats")
	Error <- round(Stats["error"] * 100, 1)
	cat(Stats["total"], " items classified with ", Stats["truepos"],
		" true positives (error = ", Error, "%)\n",
		sep = "")
	cat("\nGlobal statistics on reweighted data:\n")
	Stats2 <- attr(x, "stats.weighted")
	cat("Error rate: ", round(Stats2["error"] * 100, digits = 1),
		"%, F(micro-average): ", round(Stats2["Fmicro"], digits = 3),
		", F(macro-average): ", round(Stats2["Fmacro"], digits = 3), "\n\n",
		sep = "")
	X <- x
	class(X) <- class(X)[-1]
	print(X)
	
	invisible(x)
}
Any scripts or data that you put into this service are public.
mlearning documentation built on May 2, 2019, 6:05 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
mlearning
Machine learning algorithms with unified interface and confusion matrices

R/confusion.R
In mlearning: Machine learning algorithms with unified interface and confusion matrices

Try the mlearning package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

mlearning Machine learning algorithms with unified interface and confusion matrices

R/confusion.R In mlearning: Machine learning algorithms with unified interface and confusion matrices

Try the mlearning package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

mlearning
Machine learning algorithms with unified interface and confusion matrices

R/confusion.R
In mlearning: Machine learning algorithms with unified interface and confusion matrices