#' plot_ROC
#'
#' @description
#' Returns a plot showing the ROC curve of one or more models compared to the null model
#'
#' #' Note: Predictions should be annualised (independent of exposure)
#'
#' @param actual Array[Numeric] - 0 or 1 - Values we are aiming to predict.
#' @param predicted Array[Numeric] / DataFrame[Numeric] - Between 0 and 1 - Values that we have predicted.
#' @param weight Optional: Array[Numeric] - Weighting of predictions. If NULL even weighting is used.
#' @param na.rm logical. Should missing values be removed?
#' @param use_plotly Optional: boolean - If TRUE plotly object is returned else ggplot2 object
#'
#'
#' @return plotly object of showing ROC curve for all predictions given
#' @export
#'
#' @examples
#'
#' data <- data.frame(x1=runif(100), x2=runif(100), noise=rnorm(100, sd=0.2)) %>%
#' mutate(target=ifelse(x1 + noise>0.5, 1, 0))
#'
#' plot_ROC(actual=data$target, predicted=data$x1)
#' plot_ROC(actual=data$target, predicted=data[c("x1","x2")])
#'
plot_ROC <- function(actual, predicted, weight=NULL, na.rm=FALSE, use_plotly=TRUE){
checkmate::assert_numeric(actual, lower=0)
checkmate::assert_logical(use_plotly, len=1)
# Use no weighting if none given
if (is.null(weight)){
weight <- rep(1, length(actual))
}else{
checkmate::assert_numeric(weight, len=length(actual), lower=0)
}
# Deal with predicted as dataframe or numeric vector
if (is.numeric(predicted)){
checkmate::assert_numeric(predicted, len=length(actual))
predicted_df <- data.frame(predicted=predicted)
pred_cols <- "predicted"
}else{
checkmate::assert_data_frame(predicted, nrows=length(actual))
predicted_df <- predicted
pred_cols <- colnames(predicted)
}
# Build base plot
if(use_plotly==TRUE){
roc_plot <- plotly::plot_ly() %>%
plotly::add_trace(x=c(0,1), y=c(0,1), type="scatter", mode="lines", name="NULL - 0.5", line=list(dash="dot"))
}else{
roc_plot <- ggplot2::ggplot() +
ggplot2::geom_line(ggplot2::aes(x=c(0,1), y=c(0,1), linetype="Null", color="NULL - 0.5"))
}
# For every prediction
for (ii in 1:ncol(predicted_df)){
# Build empty dataframe to hold results
roc_df <- null_ROC(actual=actual, predicted=predicted_df[[ii]], weight=weight, na.rm=na.rm)
# Calculate area under the curve
# trapezium rule for area under a curve then subtract 0.5 for area above y=x line
# trapezium rule - sum up. Average height ((y1+y2)/2) between two points * width between points (x2-x1)
roc_auc_score <- sum(((dplyr::lag(roc_df["TP_Rate"]) + roc_df["TP_Rate"]) * (dplyr::lag(roc_df["FP_Rate"]) - roc_df["FP_Rate"]))/2, na.rm=TRUE)
if (any(is.na(roc_df["TP_Rate"]), is.na(roc_df["FP_Rate"]))){
warning("NA found in inputs. Plotting skipped")
next
}
# Add curve to plot
if(use_plotly==TRUE){
roc_plot <- roc_plot %>%
plotly::add_trace(data=roc_df, x=~FP_Rate, y=~TP_Rate, type="scatter", mode="lines", name=paste(pred_cols[ii], round(roc_auc_score,4), sep=" - "))
}else{
roc_df_temp <- roc_df %>%
dplyr::arrange(TP_Rate, desc(FP_Rate)) %>%
dplyr::mutate(type=paste(colnames(predicted_df)[ii], round(roc_auc_score,4), sep=" - "))
roc_plot <- roc_plot +
ggplot2::geom_line(data=roc_df_temp, ggplot2::aes(x=FP_Rate, y=TP_Rate, color=type, linetype="Prediction"))
}
}
if(use_plotly==TRUE){
#Clean plot and return
roc_plot %>% plotly::layout(xaxis=list(title="False Positive Rate", range=c(-.01,1.01)),
yaxis=list(title="True Positive Rate", range=c(-.01,1.01)),
legend = list(x = 0.6, y = 0.4)) %>%
return()
}else{
roc_plot <- roc_plot +
ggplot2::labs(x="False Positive Rate", y="True Positive Rate", color="Model", linetype="") +
ggplot2::scale_linetype_manual(values =c("dashed", "solid"))
return(roc_plot)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.