Nothing
#' @title Logarithmic loss (logLoss)
#' @description Evaluation of estimate quality in binomial models using cross-entropy or log likelihood loss
#'
#' @param p vector of predicted probabilities {0-1}
#' @param y vector of observed binomial values {0,1}
#' @param likelihood (FALSE/TRUE) return log likelihood loss, default is (FALSE) for log loss
#' @param global (TRUE/FALSE) return local or global log loss values, if FALSE local values are returned
#' @param eps epsilon scaling factor to avoid NaN values
#'
#' @return If likelihood TRUE the log likelihood loss will be returned. If global FALSE, a list with
#' observed (y), probability (p) and log loss (log.loss) otherwise, a vector of global log loss value
#'
#' @note The log loss metric, based on cross-entropy, measures the quality of predictions rather than
#' the accuracy. Effectively, the log loss is a measure that gages additional error coming the
#' estimates as opposed to the true values.
#'
#' @note As the estimated probability diverges from its observed value the log loss increases with an
#' expected of [0-1] where 0 would be a perfect model. For a single sample with true value yt
#' in {0,1} and estimated probability yp that yt = 1, the log loss is derived as:
#' -log P(yt | yp) = -(yt log(yp) + (1 - yt) log(1 - yp)) eps is used where log loss is undefined
#' for p=0 or p=1, so probabilities are clipped to: max(eps, min(1 - eps, p)) If likelihood is output,
#' the eps and local arguments are ignored.
#'
#' @author Jeffrey S. Evans <jeffrey_evans<at>tnc.org>
#'
#' @references C.M. Bishop (2006). Pattern Recognition and Machine Learning. Springer, p. 209.
#'
#' @examples
#'
#' require(randomForest)
#' data(iris)
#' iris$Species <- ifelse( iris$Species == "versicolor", 1, 0 )
#' # Add some noise
#' idx1 <- which(iris$Species %in% 1)
#' idx0 <- which( iris$Species %in% 0)
#' iris$Species[sample(idx1, 2)] <- 0
#' iris$Species[sample(idx0, 2)] <- 1
#'
#' ( mdl <- randomForest(x=iris[,1:4], y=as.factor(iris[,"Species"])) )
#'
#' # Global log loss
#' logLoss(y = iris$Species, p = predict(mdl, iris[,1:4], type="prob")[,2])
#'
#' # Local log loss
#' ( ll <- logLoss(y = iris$Species, p = predict(mdl, iris[,1:4],
#' type="prob")[,2], global = FALSE) )
#'
#' # Log likelihood loss
#' logLoss(y = iris$Species, p = predict(mdl, iris[,1:4],
#' type="prob")[,2], likelihood = TRUE)
#'
#' @export
logLoss <- function(y, p, likelihood = FALSE, global = TRUE, eps = 1e-15) {
if( length(p) != length(y) ) stop("Error: y and p are not the same length")
if( min(p) < 0 | max(p) > 1) stop("p is out of {0-1} probability range")
if( !is.numeric(y) ) y <- as.numeric(as.character(y))
if( min(y) < 0 | max(y) > 1) stop("y is not binomial {0,1}")
loglike.loss <- function(y, x) {
if(class(y) == "factor" | class(y) == "character")
y <- as.numeric(as.character(y))
x <- 1/(1 + exp(-x))
grad <- x - y
hess <- x * (y - x)
return(hess)
}
if( likelihood == FALSE) {
if( global == TRUE) {
p <- matrix(sapply(p, function(x) max(eps, x)), nrow = length(p))
p <- matrix(sapply(p, function(x) min(1 - eps, x)), nrow = length(p))
ll <- sum(y * log(p) + (1 - y) * log(1 - p))
ll <- ll * -1 / length(y)
class(ll) <- "global.log.loss"
} else {
ll <- vector()
for( i in 1:length(p)) {
ll <- append(ll, pmin(pmax(p[i], eps), 1 - eps) - (sum(y[i] * log(p[i]) +
(1 - y[i]) * log(1 - p[i]))) / length(y[i]) )
}
ll[is.nan(ll)] <- 0
ll <- list(y = y, p = p, log.loss = ll)
class(ll) <- "local.log.loss"
}
} else {
ll <- loglike.loss(y, p)
}
return(ll)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.