#' @title Builds the ROC movie (ROCM) an animated sequence of ROC curves.
#' @description This function computes the sequence of ROC curves which form the ROC Movie and produces a GIF animated ROCM.
#' @details The ROC movie can be used to visualize the performance of a real valued foreacsting problem. Therefore, a sequence of ROC curves is generated which can than be combined into a GIF animation. Each entry of the list consist of two vectors of length 1000 containing the values of farate (1-Specificity) and hitrate (sensitivity) and three values, namely the associated auc value, the weight and the threshold.
#' @param response a numeric vector of real valued responses.
#' @param predictor a numeric vector of the same length than \code{response}, containing real valued predictions for each observation.
#' @param a selects a subset of all ROC curves for the ROC movie with at least \code{a} and at most \code{a+b} ROC curves.
#' @param b selects a subset of all ROC curves for the ROC movie with at least \code{a} and at most \code{a+b} ROC curves.
#' @param object if TRUE a list of ROC curves is returned (default \code{object = TRUE}).
#' @param gif if TRUE a gif animation is created.
#' @param movie.name name of the movie (with extension).
#' @param ... parameters to control the behavior of the GIF animation using the external function ani.option from \link{animation}.
#'
#' @importFrom stats approx
#' @importFrom animation ani.options saveGIF
#' @importFrom graphics plot
#'
#' @return if \code{object = TRUE}, this function returns a list of ROC curves.
#' @export
#'
#' @examples
#' \dontrun{
#' data(longley)
#' response = longley$Employed
#' predictor = longley$GNP
#' rocm(response, predictor)}
rocm <- function(response,
predictor,
a = NULL,
b = NULL,
object = FALSE,
gif = TRUE,
movie.name = "animation.gif",
...) {
if (!is.vector(predictor) || !is.vector(response)) {
stop("Input must be a vector")
}
if (!(is.numeric(predictor) || is.logical(predictor))) {
stop("predictor must be numeric")
}
if (!(is.numeric(response) || is.logical(response))) {
stop("response must be numeric")
}
if (anyNA(response) || anyNA(predictor)) {
stop("missing values in the data")
}
if (!is.logical(object)) {
stop("invalid input for object")
}
if (!is.logical(gif)) {
stop("invalid input for object")
}
n <- length(response)
if (n != length(predictor)) {
stop("response and predictor should have the same length")
}
response_order <- order(response, decreasing=FALSE)
response <- response[response_order]
predictor <- predictor[response_order]
# Lenght encoding
Encoding = rle(response)
thresholds <- Encoding$values[-1]
N <- length(thresholds)
if (N == 0) {
stop("response must have more than one level")
}
if (is.null(a) && N <= 400) {
a = N
} else if (is.null(a) && N > 400) {
a = 400
}
if (is.null(b)) {
b = 1
}
if (a < 2 || a > N || round(a)!=a) {
stop("invalid value for a")
}
if (b <= 0 || b > n || round(b)!=b) {
stop("invalid value for b")
}
# find set C of ROC curves
class_length <- Encoding$lengths[-1]
s <- floor((N - 1) / (a - 1))
indx_setCa <- seq(1, (1 + (a - 1) * s), s)
indx_setCb <- which(class_length>n/b)
indxsetC <- sort(unique(c(indx_setCa, indx_setCb)))
# get number of zeros and ones for each binary problem included in the set C
ncontrols <- (which(duplicated(response) == FALSE) - 1)[-1]
ncontrols_split <- ncontrols[indxsetC]
thresholds_split <- thresholds[indxsetC]
group_length <- c(ncontrols_split) - c(0, ncontrols_split[-length(ncontrols_split)])
groups <- rep(seq(1, length(group_length),1), group_length)
# compute weight for each cnsidered binary problem
weights <- (ncontrols_split * (n - ncontrols_split))
weights_scaled <- weights/ max(ncontrols*(n-ncontrols))
# compute classes of predictor
Ranking_predictor_1 <- rank(predictor, ties.method = "first")
Classes_predictor <- cumsum(duplicated(sort(predictor))==FALSE)[Ranking_predictor_1]
# create list where each list element corresponds to a binary problem.
# Each list element contains only classes of predictor that correspond to a respond value of zero
# In each list element the class values are sorted and the difference between neighbouring values is computed
Split_classes_predictor <- split(Classes_predictor[1:length(groups)], groups)
Split_classes_predictor_ordered <- lapply(Split_classes_predictor, function(x){c(0, sort(x))})
rm(Split_classes_predictor)
Split_classes_predictor_ordered_diff <- lapply(Split_classes_predictor_ordered, function(x){x[-1] - x[-length(x)]})
rm(Split_classes_predictor_ordered)
# compute first roc curve
order_predictor <- order(predictor, decreasing = TRUE)
first_threshold <- thresholds_split[1]
response_binary <- response[order_predictor] >= first_threshold
dups <- rev(duplicated(rev(predictor[order_predictor])))
tp <- c(0, cumsum(response_binary == 1)[!dups])
fp <- c(0, cumsum(response_binary == 0)[!dups])
truepositive <- rev(tp)
falsepositive <- rev(fp)
# roc curve is only computed on "space_size=1000" equidistant values in the interval [0,1]
space_size <- 1000
interpoint <- seq(0, 1, (1 / space_size))
first_roc_curve <- approx(x = fp/ncontrols_split[1], y = tp/(n-ncontrols[1]), xout = interpoint, method = "linear", ties = "ordered")
hitrate <- first_roc_curve$y
# save plot as first element in a list of roc curves. Include information such as auc, weight and threshold.
sum_tp_fp <- falsepositive + truepositive
name <- paste("roc_curve_",1,sep="")
auc <- round(trap(interpoint, hitrate),2)
w <- round(weights_scaled[1],2)
z <- round(thresholds_split[1],2)
rocm_list = list()
roc_single <- list(farate = c(0,interpoint), hitrate = c(0,hitrate), auc = auc, weight = w, threshold = z)
rocm_list[[name]] <- roc_single
# Use for-loop to compute the rest of the ROC curves.
for (i in 2:length(indxsetC)) {
m <- length(Split_classes_predictor_ordered_diff[[i]])
sum_indicator <- rep(seq(m,1,-1), Split_classes_predictor_ordered_diff[[i]])
sequence_to_change <- length(sum_indicator)
truepositive[1:sequence_to_change] <- truepositive[1:sequence_to_change] - sum_indicator
farate <- (sum_tp_fp - truepositive) / ncontrols_split[i]
roc_approx <- approx(x = rev(farate), y = rev(truepositive) / (n-ncontrols_split[i]), xout = interpoint, method = "linear", ties = "ordered")
hitrate <- roc_approx$y
auc <- round(trap(interpoint, hitrate),2)
w <- round(weights_scaled[i],2)
z <- round(thresholds_split[i],2)
name <- paste("roc_curve_",i,sep="")
roc_single <- list(farate = c(0,interpoint), hitrate = c(0,hitrate), auc = auc, weight = w, threshold = z)
rocm_list[[name]] <- roc_single
}
# If gif is not working the list of ROC curves can be returned
if(object == TRUE) {
return(rocm_list)
}
# Directly oputputs a gif animation by transforming the list of ROC curves into a sequence of plots.
if(gif == TRUE) {
uroc_object <- uroc(response, predictor)
auc <- round(trap(uroc_object$farate, uroc_object$hitrate), 2)
rocm_list[["uroc"]] <- list(farate = uroc_object$farate, hitrate = uroc_object$hitrate, auc = auc)
ani.options(loop = 1, interval = 0.1, ...)
saveGIF({
for(i in 1:length(rocm_list)) {
plot(rocm_list[[i]]$farate, rocm_list[[i]]$hitrate, type="l", xlab = "1-Specificity", ylab = "Sensitivity")
lines(x = c(0,1), y = c(0,1))
if(i < length(rocm_list)) {text(x=0.75, y=0.2, labels = paste("AUC:", rocm_list[[i]]$auc), adj = 0)
text(x = 0.0, y = 0.95, labels = paste("z =", rocm_list[[i]]$threshold), adj = 0)
text(x = 0.2, y = 0.95, labels = paste("w =", rocm_list[[i]]$weight), adj = 0)}
if(i == length(rocm_list)) {text(x = 0.75, y = 0.2, labels = paste("CPA:", rocm_list[[i]]$auc), adj = 0)
text(x = 0, y = 0.95, labels = "UROC curve", adj = 0)}
}}, movie.name = movie.name)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.