tile.plot <- function (prob = 'p_', y, measure = NULL, df_train = ds1, df_test = ds2,
model = NULL, model_var = 'X1', n = 10,
nudge_train = 1.5, nudge_test = 1.5)
{
#-- Packages
require(tidyverse)
require(lemon)
#-- Data
df_train <- as.data.frame(df_train)
if (!is.null(df_test)) {
df_test <- as.data.frame(df_test)
}
#-- Assign Predictions
if (class(model)[1] == 'glm') {
df_train[,prob] <- predict(model, df_train, type = 'response')
if (!is.null(df_test)) {
df_test[,prob] <- predict(model, df_test, type = 'response')
}
}
if (class(model)[1] == 'train') {
df_train[,prob] <- predict(model, df_train, type = 'prob')[, model_var]
if (!is.null(df_test)) {
df_test[,prob] <- predict(model, df_test, type = 'prob')[, model_var]
}
}
#================================================================================#
#-- Assign Tiles
df_train$Tile <- ntile(df_train[,prob], n)
if (!is.null(df_test)) {
df_test[, 'Tile'] <- 0
df_test[df_test[, prob] %> =<% c(-Inf,
max(df_train[df_train[,'Tile'] == 1, prob])),
'Tile'] <- 1
for (i in 2:(n-1)) {
df_test[df_test[, prob] %> =<% c(max(df_train[df_train[,'Tile'] == i - 1, prob]),
max(df_train[df_train[, 'Tile'] == i, prob])),
'Tile'] <- i
}
df_test[df_test[, prob] %> =<% c(max(df_train[df_train[,'Tile'] == (n-1), prob]),
Inf),
'Tile'] <- n
}
#================================================================================#
#----- Lift Tables
#-- Train
df_train$Actual_Train <- df_train[,y]
df_train$Pred_Train <- df_train[,prob]
lift_train <- df_train %>%
mutate(Count = 1) %>%
group_by(Tile) %>%
summarise(
Actual_Train = mean(Actual_Train),
Pred_Train = mean(Pred_Train),
Count = sum(Count)
)
if (!is.null(measure)) {
df_train$Measure_Mean <- df_train[,measure]
lift_train <- lift_train %>%
left_join(
(
df_train %>%
group_by(Tile) %>%
summarise(Measure_Mean = mean(Measure_Mean))
),
by = 'Tile'
)
a_train <- sum(lift_train$Measure_Mean[1:3])/sum(lift_train$Measure_Mean)
b_train <- sum(lift_train$Measure_Mean[(nrow(lift_train)-2):nrow(lift_train)])/sum(lift_train$Measure_Mean)
}
print(lift_train)
if (!is.null(measure)) {
print(sprintf('Train Bottom 3 = %.*f', 3, a_train))
print(sprintf('Train Top 3 = %.*f', 3, b_train))
}
#-- Test
if (!is.null(df_test)) {
df_test$Actual_Test <- df_test[,y]
df_test$Pred_Test <- df_test[,prob]
lift_test <- df_test %>%
mutate(Count = 1) %>%
group_by(Tile) %>%
summarise(
Actual_Test = mean(Actual_Test),
Pred_Test = mean(Pred_Test),
Count = sum(Count)
)
if (!is.null(measure)) {
df_test$Measure_Mean <- df_test[,measure]
lift_test <- lift_test %>%
left_join(
(
df_test %>%
group_by(Tile) %>%
summarise(Measure_Mean = mean(Measure_Mean))
),
by = 'Tile'
)
a_test <- sum(lift_test$Measure_Mean[1:3])/sum(lift_test$Measure_Mean)
b_test <- sum(lift_test$Measure_Mean[(nrow(lift_test)-2):nrow(lift_test)])/sum(lift_test$Measure_Mean)
}
print(lift_test)
if (!is.null(measure)) {
print(sprintf('Train Bottom 3 = %.*f', 3, a_test))
print(sprintf('Train Top 3 = %.*f', 3, b_test))
}
}
#================================================================================#
#----- Lift Charts
#-- Train
gg_train <- lift_train %>%
ggplot(aes(x = Tile)) +
geom_line(aes(y = Actual_Train, color = 'Actual')) +
geom_point(aes(y = Actual_Train, color = 'Actual'), size = 2) +
geom_line(aes(y = Pred_Train, color = 'Predict')) +
geom_point(aes(y = Pred_Train, color = 'Predict'), size = 2) +
ggtitle('Lift Chart - Train', subtitle = measure) +
ylab(prob) +
scale_colour_manual(values = as.vector(unlist(MixPalette))) +
Mix_Theme
if (!is.null(measure)) {
t_train <- lift_train$Actual_Train
v_train <- t_train[2]
w_train <- t_train[nrow(lift_train)-1]
gg_train <- gg_train +
geom_label(mapping = aes(2, v_train),
label = sprintf('Bottom 3 = %.*f%s', 1, a_train * 100, '%'),
color = MixPalette$Green,
nudge_x = nudge_train + 0.3) +
geom_label(mapping = aes(max(lift_train$Tile)-1, w_train),
label = sprintf('Top 3 = %.*f%s', 1, b_train * 100, '%'),
color = MixPalette$Green,
nudge_x = - nudge_train)
}
reposition_legend(aplot = gg_train, position = 'top left')
#-- Test
if (!is.null(df_test)) {
gg_test <- lift_test %>%
ggplot(aes(x = Tile)) +
geom_line(aes(y = Actual_Test, color = 'Actual')) +
geom_point(aes(y = Actual_Test, color = 'Actual'), size = 2) +
geom_line(aes(y = Pred_Test, color = 'Predict')) +
geom_point(aes(y = Pred_Test, color = 'Predict'), size = 2) +
ggtitle('Lift Chart - Test', subtitle = measure) +
ylab(prob) +
scale_colour_manual(values = as.vector(unlist(MixPalette))) +
Mix_Theme
if (!is.null(measure)) {
t_test <- lift_test$Actual_Test
v_test <- t_test[2]
w_test <- t_test[nrow(lift_test)-1]
gg_test <- gg_test +
geom_label(mapping = aes(2, v_test),
label = sprintf('Bottom 3 = %.*f%s', 1, a_test * 100, '%'),
color = MixPalette$Green,
nudge_x = nudge_test + 0.3) +
geom_label(mapping = aes(max(lift_test$Tile)-1, w_test),
label = sprintf('Top 3 = %.*f%s', 1, b_test * 100, '%'),
color = MixPalette$Green,
nudge_x = - nudge_test)
}
reposition_legend(aplot = gg_test, position = 'top left')
}
#================================================================================#
# Invisible Lists
if (is.null(df_test)) {
list(Lift_Train = lift_train, Tiles_Train = df_train[, 'Tile']) %>% invisible()
}
else if (!is.null(df_test)) {
list(Lift_Train = lift_train, Lift_Test = lift_test,
Tiles_Train = df_train[, 'Tile'], Tiles_Test = df_test[, 'Tile']) %>% invisible()
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.