#' Functions for plotting cumulative gains, lift and response charts
#'
#' @param H2OAutoML_object An object containing multiple models trained in H2O.
#' @param response_ref (Optional) You can include a reference line in the response chart by providing the rate of occurence of the target class, i.e. the proportion of the target class in the data.
#' @param save_pngs (Optional) Whether to save a png files with ggsave(). Default is FALSE.
#' @param n_models (Optional) The number of trained models to include.
#' @param explain (Optional) Whether to include a subtitle explaining each plot.
#'
#' @export
#' @import purrr
#' @import dplyr
#' @import ggplot2
#' @import h2o
#'
# best models
lift4gains2 <- function(H2OAutoML_object, response_ref = NULL, save_pngs = F,
n_models = 5, explain = F) {
models <- as.vector(as.character(H2OAutoML_object@leaderboard$model_id))[1:n_models]
df <- models %>% map(h2o.getModel) %>% map(h2o.gainsLift) %>% reduce(rbind) %>% as_tibble()
lengths <- models %>% map(h2o.getModel) %>% map(h2o.gainsLift) %>% map(dim) %>% map(1) %>% unlist()
df$model_id <- factor(rep(models,lengths))
df$model_rank <- rep(1:length(lengths),lengths)
df$model_id1 <- str_split(df$model_id, "_AutoML") %>%
map_chr(1) %>%
paste0(df$model_rank,": ",.)
df$model_id2 <- str_split(df$model_id,"(?<=_)(?=[_model])") %>%
map(2) %>%
paste("_",.) %>%
str_remove(" ")
df$model_id <- paste0(df$model_id1,df$model_id2)
df$model_id <- str_remove(df$model_id,"_NULL")
# Gains
p1 <- df %>%
ggplot(aes(x=cumulative_data_fraction,y=cumulative_capture_rate, colour = reorder(model_id,model_rank))) +
geom_line(size = .8) +
geom_point(size = 1) +
geom_segment(aes(x=0,y=0,xend = 1, yend = 1),size = 1,linetype = 2,col='grey')+
scale_colour_viridis_d("Model") +
labs(x = "Data fraction",
y = "Cumulative gains") +
theme_light() +
theme(plot.title = element_text(size = 16),
plot.subtitle = element_text(size = 14, face = "italic",vjust=-1))
if (explain == T) {
p1 <- p1 +
ggtitle("Gains chart",
subtitle = "When we apply the models and select x % of customers,\nwhat % of the target class observations can we expect to hit?\n")
} else {
p1 <- p1 +
ggtitle("Gains chart")
}
print(p1)
if (save_pngs == T) {
ggsave("gains.png")
}
# Lift
p2 <- df %>%
ggplot(aes(x=cumulative_data_fraction,y=cumulative_lift, colour = reorder(model_id,model_rank))) +
geom_line(size = .8) +
geom_point(size = 1) +
geom_segment(aes(x=0,y=1,xend = 1, yend = 1),size = 1,linetype = 2,col='grey')+
scale_colour_viridis_d("Model") +
labs(x = "Data fraction",
y = "Cumulative lift") +
theme_light() +
theme(plot.title = element_text(size = 16),
plot.subtitle = element_text(size = 14, face = "italic",vjust=-1))
if (explain == T) {
p2 <- p2 +
ggtitle("Lift chart",
subtitle = "When we apply the models and select x % of customers,\nhow many times better is that than using no model?\n")
} else {
p2 <- p2 +
ggtitle("Lift chart")
}
print(p2)
if (save_pngs == T) {
ggsave("lift.png")
}
# Response
p3 <- df %>%
ggplot(aes(x=cumulative_data_fraction,y=cumulative_response_rate, colour = reorder(model_id,model_rank))) +
geom_line(size = .8) +
geom_point(size = 1) +
ylim(c(0,1)) +
scale_colour_viridis_d("Model",alpha=.6) +
labs(x = "Data fraction",
y = "Cumulative response") +
theme_light() +
theme(plot.title = element_text(size = 16),
plot.subtitle = element_text(size = 14, face = "italic",vjust=-1))
if (!is.null(response_ref)) {
p3 <- p3 + geom_segment(aes(x=0,y = response_ref,xend = 1, yend = response_ref),size = 1,linetype = 2,col='grey')
}
if (explain == T) {
p3 <- p3 +
ggtitle("Response chart",
subtitle = "When we apply the model and select x % of customers, \nwhat is the expected % of target class observations in the selection?\n")
} else {
p3 <- p3 +
ggtitle("Response chart")
}
print(p3)
if (save_pngs == T) {
ggsave("response.png")
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.