bin.valid <- function(x, y, data, p = 0.05, q = 0.01, s = 0.1, abbrev_length = 20, event_class = 1,
train_samples = 10, n_tests = 10, prop = 0.6, seed = 123) {
require(woeBinning)
data <- data[, c(y,x)] %>% as.data.frame()
#----- Define Variables
woe_train <- NULL
woe_tree_train <- NULL
woe_mse <- NULL
woe_tree_mse <- NULL
#----- Set Seeds for Train
set.seed(seed = seed)
train_seeds <- sample.int(1000, train_samples)
#----- Create Train WoE(s) through random sampling
for (i in 1:train_samples) {
set.seed(train_seeds[i])
TrainIndex <- createDataPartition(as.factor(data[, y]),
p = prop, list = F)
df_train <- data[TrainIndex,]
# df_test <- data[-TrainIndex,]
woe_train[[i]] <- woe.binning(
df = df_train,
target.var = y,
pred.var = x,
min.perc.total = p,
min.perc.class = q,
stop.limit = s,
event.class = event_class,
abbrev.fact.levels = abbrev_length
)
woe_tree_train[[i]] <- woe.tree.binning(
df = df_train,
target.var = y,
pred.var = x,
min.perc.total = p,
min.perc.class = q,
stop.limit = s,
event.class = event_class,
abbrev.fact.levels = abbrev_length
)
}
#----- Apply Train WoE on random test samples
for (j in 1:length(woe_train)) {
woe_test <- NULL
woe_tree_test <- NULL
set.seed(seed = seed)
test_seeds <- sample.int(1000, n_tests)
for (k in 1:n_tests) {
set.seed(test_seeds[k])
TrainIndex <- createDataPartition(as.factor(data[, y]),
p = prop, list = F)
df_test <- data[-TrainIndex,]
df_test_woe <- woe.binning.deploy(df = df_test, woe_train[[j]])
names(df_test_woe) <- gsub(pattern = '\\.', replacement = '_', x = names(df_test_woe))
df_test_woe[, paste(x, '_binned', sep = '')] <- gsub(pattern = '\\,', replacement = '_', x = df_test_woe[, paste(x, '_binned', sep = '')])
df_test_woe_tree <- woe.binning.deploy(df = df_test, woe_tree_train[[j]])
names(df_test_woe_tree) <- gsub(pattern = '\\.', replacement = '_', x = names(df_test_woe_tree))
df_test_woe_tree[, paste(x, '_binned', sep = '')] <- gsub(pattern = '\\,', replacement = '_', x = df_test_woe_tree[, paste(x, '_binned', sep = '')])
woe_test[k] <- bins(x = paste(x, '_binned', sep = ''), y = y, data = df_test_woe, type = 'f')$info$iv
woe_tree_test[k] <- bins(x = paste(x, '_binned', sep = ''), y = y, data = df_test_woe_tree, type = 'f')$info$iv
}
#-- Calculate MSE for each Train WoE
woe_mse[j] <- (woe_train[[1]][[3]] - woe_test)^2 %>% mean(na.rm = T)
woe_tree_mse[j] <- (woe_tree_train[[1]][[3]] - woe_tree_test)^2 %>% mean(na.rm = T)
}
#----- Output
ds_results <- tibble(
WoE = woe_train,
WoE_MSE = woe_mse
) %>%
arrange(WoE_MSE) %>%
slice(1)
ds_tree_results <- tibble(
WoE = woe_tree_train,
WoE_MSE = woe_tree_mse
) %>%
arrange(WoE_MSE) %>%
slice(1)
output <- list(WoE = ds_results$WoE[1][[1]], WoE_MSE = ds_results$WoE_MSE,
WoE_Tree = ds_tree_results$WoE[1][[1]], WoE_Tree_MSE = ds_tree_results$WoE_MSE)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.