stacking_weights <- function(preds, ebma_out, L2.unit, k.folds, cores) {
# initial binding of globals
stack_lme4 <- stack_nlm <- y <- stack_of_stacks <- ebma_preds <- k <- NULL
id <- NULL
# check whether more than one classifier was tuned
if (ncol(preds) <= 3) {
message("Skipping stacking, only one classifier was tuned.")
# return the stacked predictions and weights
return(list(stack_preds = NULL, stack_weights = NULL))
} else {
# log loss
log_loss_fun <- function(true_labels, predictions) {
l_loss <- -mean(
true_labels * log(predictions) + (1 - true_labels) *
log(1 - predictions)
)
return(l_loss)
}
# Objective function to minimize (log loss) with non-negative least squares
# adds a small number to avoid log(0)
objective <- function(weights) {
weighted_preds <- stack_predictions %*% weights
# Apply sigmoid
weighted_preds <- 1 / (1 + exp(-weighted_preds))
# Log loss
-mean(
true_labels * log(weighted_preds + 1e-15) +
(1 - true_labels) * log(1 - weighted_preds + 1e-15)
)
}
# add IDs
preds <- preds %>%
dplyr::mutate(id = seq_len(nrow(.)))
ebma_out$individual_level_predictions <-
ebma_out$individual_level_predictions %>%
dplyr::mutate(id = seq_len(nrow(.)))
# generate folds for stacking
cv_folds <- cv_folding(
data = preds,
L2.unit = L2.unit,
k.folds = k.folds,
cv.sampling = "L2 units"
)
# is the dpendent variable binary or not?
binary_dv <- if (length(unique(preds$y)) == 2) {
TRUE
} else {
FALSE
}
# stacking for binary dependent variables
if (binary_dv) {
#--------------------------------------------------
# cross-validation stacking
#--------------------------------------------------
# Register cores
cl <- multicore(cores = cores, type = "open", cl = NULL)
# loop over k folds
k_weights <- foreach::foreach(
k = seq_along(cv_folds), .packages = "autoMrP"
) %dorng% {
`%>%` <- magrittr::`%>%`
# generate weights based on training data
data_train <- dplyr::bind_rows(cv_folds[-k])
stack_predictions <- data_train %>%
dplyr::select(-y, -!!rlang::sym(L2.unit), -id) %>%
as.matrix()
# evaluate based on test data
data_test <- cv_folds[[k]] %>%
dplyr::select(-y, -!!rlang::sym(L2.unit), -id) %>%
as.matrix()
#--------------------------------------------------
# non-negative least squares
#--------------------------------------------------
ols_coefs <- nnls::nnls(
as.matrix(stack_predictions), data_train$y
)$x
names(ols_coefs) <- colnames(stack_predictions)
# normalize the weights
ols_coefs <- ols_coefs / sum(ols_coefs)
# weights for base models (replace NAs with 0)
stacking_weights <- ols_coefs
stacking_weights[is.na(ols_coefs)] <- 0
# test data log loss
test_preds <- data_test %*% stacking_weights
test_error <- log_loss_fun(cv_folds[[k]]$y, test_preds)
# store results
nnls_out <- list(
weights = stacking_weights,
test_error = test_error
)
#--------------------------------------------------
# optim
#--------------------------------------------------
# true labels
true_labels <- data_train$y
# Initial guess for weights
initial_weights <- rep(
1 / ncol(stack_predictions), ncol(stack_predictions)
)
# Define bounds for weights
lower_bounds <- rep(0, ncol(stack_predictions))
upper_bounds <- rep(Inf, ncol(stack_predictions))
# Run the optimization
result <- nloptr::nloptr(
x0 = initial_weights,
eval_f = objective,
lb = lower_bounds,
ub = upper_bounds,
opts = list(
algorithm = "NLOPT_LN_COBYLA",
xtol_rel = 1.0e-8,
maxeval = 1000
)
)
# Optimal weights
stacking_weights <- result$solution
names(stacking_weights) <- colnames(stack_predictions)
# normalize the weights
stacking_weights <- stacking_weights / sum(stacking_weights)
# test data log loss
test_preds <- data_test %*% stacking_weights
test_error <- log_loss_fun(cv_folds[[k]]$y, test_preds)
# store results
optim_out <- list(
weights = stacking_weights,
test_error = test_error
)
#--------------------------------------------------
# quadratic programming
#--------------------------------------------------
d_m <- t(stack_predictions) %*% stack_predictions
d_v <- t(stack_predictions) %*% data_train$y
# regularize
epsilon <- 1e-8
d_m <- d_m + epsilon * diag(ncol(d_m))
# constraints (non-negative weights)
a_m <- rbind(
rep(1, ncol(stack_predictions)), diag(ncol(stack_predictions))
)
b_v <- c(1, rep(0, ncol(stack_predictions)))
# solve the quadratic programming problem
qp_solution <- quadprog::solve.QP(
Dmat = d_m,
dvec = d_v,
Amat = t(a_m),
bvec = b_v,
meq = 1 # Equality constraint for sum(weights) = 1
)
# get weights
stacking_weights <- qp_solution$solution
names(stacking_weights) <- colnames(stack_predictions)
# evaluate based on test data
test_preds <- data_test %*% stacking_weights
test_error <- log_loss_fun(cv_folds[[k]]$y, test_preds)
# store results
qp_out <- list(
weights = stacking_weights,
test_error = test_error
)
#--------------------------------------------------
# Ornstein hillclimbing
#--------------------------------------------------
# initial weights for hill climbing
weights_ornstein <- rep(x = 1, times = ncol(stack_predictions))
# predictions based on hillclimbing weights
stack_ornstein <- stack_predictions %*% weights_ornstein /
sum(weights_ornstein)
# compute log loss
c_log_loss <- -mean(
true_labels * log(stack_ornstein) + (1 - true_labels) *
log(1 - stack_ornstein)
)
# container for best log loss
best_log_loss <- c_log_loss
# while loop control
keep_going <- TRUE
# hill climbing until number of iterations is reached or no improvement
# through addition of models
while (keep_going) {
# keep track of which model would be best 'greedy' addition to
# ensemble
best_addition <- 0L
# iterate over all models
for (j in seq_along(weights_ornstein)) {
# increase model weight by 1
weights_ornstein[j] <- weights_ornstein[j] + 1L
# generate predictions with updated weights
preds_update <- stack_predictions %*% weights_ornstein /
sum(weights_ornstein)
# compute log loss of updated predictions
updated_loglos <- -mean(
true_labels * log(preds_update) + (1 - true_labels) *
log(1 - preds_update)
)
# check whether updated log loss is better than current best
if (updated_loglos < best_log_loss) {
best_log_loss <- updated_loglos
# model addition that best improves log loss
best_addition <- j
}
# reset weights
weights_ornstein[j] <- weights_ornstein[j] - 1L
} # end of loop over all models
# if we found an improvement (and we're below the cutoff number of
# iterations), then add the best model
if (sum(weights_ornstein) < 1000 && best_log_loss < c_log_loss) {
weights_ornstein[best_addition] <- weights_ornstein[best_addition] +
1L
c_log_loss <- best_log_loss
} else {
# stop the while loop if no model addition improves log loss
keep_going <- FALSE
}
} # end of while loop
# normalize the weights
weights_ornstein <- weights_ornstein %>%
magrittr::divide_by(sum(weights_ornstein))
names(weights_ornstein) <- colnames(stack_predictions)
# evaluate based on test data
test_preds <- data_test %*% weights_ornstein
test_error <- log_loss_fun(cv_folds[[k]]$y, test_preds)
# store results
ornstein_out <- list(
weights = weights_ornstein,
test_error = test_error
)
return(
list(
nnls = nnls_out,
optim = optim_out,
qp = qp_out,
ornstein = ornstein_out
)
)
}
# De-register cluster
multicore(cores = cores, type = "close", cl = cl)
# all data
stack_predictions <- cv_folds %>%
dplyr::bind_rows() %>%
dplyr::select(-y, -!!rlang::sym(L2.unit), -id) %>%
as.matrix()
# non-negative least squares stacking
nnls_out <- do.call(rbind, k_weights)[, "nnls"]
# weights
nnls_w <- do.call(rbind, nnls_out)[, "weights"] %>%
dplyr::bind_rows()
# test error
nnls_e <- do.call(rbind, nnls_out)[, "test_error"] %>%
unlist()
nnls_e <- 1 / nnls_e
nnls_e <- nnls_e / sum(nnls_e)
# genearte a weighted mean of stacking_weights based on test error
nnls_w <- apply(nnls_w, 2, function(x) {
weighted.mean(x = x, w = nnls_e)
})
# individual level predictions
final_prediction <- stack_predictions %*% nnls_w
# stacked predictions output
stack_out <- tibble::tibble(
id = cv_folds %>%
dplyr::bind_rows() %>%
dplyr::pull(id),
y = cv_folds %>%
dplyr::bind_rows() %>%
dplyr::pull(y),
!!rlang::sym(L2.unit) := cv_folds %>%
dplyr::bind_rows() %>%
dplyr::pull(!!rlang::sym(L2.unit)),
stack_nnls = as.numeric(final_prediction)
)
# stacking weights container
stack_weights <- list(
stack_nnls = nnls_w
)
# optim stacking weights
optim_out <- do.call(rbind, k_weights)[, "optim"]
# weights
optim_w <- do.call(rbind, optim_out)[, "weights"] %>%
dplyr::bind_rows()
# test error
optim_e <- do.call(rbind, optim_out)[, "test_error"] %>%
unlist()
optim_e <- 1 / optim_e
optim_e <- optim_e / sum(optim_e)
# genearte a weighted mean of stacking_weights based on test error
optim_w <- apply(optim_w, 2, function(x) {
weighted.mean(x = x, w = optim_e)
})
# individual level predictions
final_prediction <- stack_predictions %*% optim_w
# stacked predictions output
stack_out <- stack_out %>%
dplyr::mutate(
stack_optim = as.numeric(final_prediction)
)
# stacking weights container
stack_weights$stack_optim <- optim_w
# quadratic programming stacking
qp_out <- do.call(rbind, k_weights)[, "qp"]
# weights
qp_w <- do.call(rbind, qp_out)[, "weights"] %>%
dplyr::bind_rows()
# test error
qp_e <- do.call(rbind, qp_out)[, "test_error"] %>%
unlist()
qp_e <- 1 / qp_e
qp_e <- qp_e / sum(qp_e)
# genearte a weighted mean of stacking_weights based on test error
qp_w <- apply(qp_w, 2, function(x) {
weighted.mean(x = x, w = qp_e)
})
# individual level predictions
final_prediction <- stack_predictions %*% qp_w
# stacked predictions output
stack_out <- stack_out %>%
dplyr::mutate(
qp_optim = as.numeric(final_prediction)
)
# stacking weights container
stack_weights$stack_qp <- qp_w
# Orstein stacking
ornstein_out <- do.call(rbind, k_weights)[, "ornstein"]
# weights
ornstein_w <- do.call(rbind, ornstein_out)[, "weights"] %>%
dplyr::bind_rows()
# test error
ornstein_e <- do.call(rbind, ornstein_out)[, "test_error"] %>% unlist()
ornstein_e <- 1 / ornstein_e
ornstein_e <- ornstein_e / sum(ornstein_e)
# genearte a weighted mean of stacking_weights based on test error
ornstein_w <- apply(ornstein_w, 2, function(x) {
weighted.mean(x = x, w = ornstein_e)
})
# individual level predictions
final_prediction <- stack_predictions %*% ornstein_w
# stacked predictions output
stack_out <- stack_out %>%
dplyr::mutate(
stack_ornstein = as.numeric(final_prediction)
)
# stacking weights container
stack_weights$stack_ornstein <- ornstein_w
# sort the stacked predictions by id
stack_out <- stack_out %>%
dplyr::arrange(id)
#----------------------------------------------------------------
# 2nd round of stacking: stack of stacking algorithms
#----------------------------------------------------------------
# generate folds
cv_folds <- cv_folding(
data = stack_out,
L2.unit = L2.unit,
k.folds = k.folds,
cv.sampling = "L2 units"
)
# Register cores
cl <- multicore(cores = cores, type = "open", cl = NULL)
# loop over k folds
k_weights <- foreach::foreach(
k = seq_along(cv_folds), .packages = "autoMrP"
) %dorng% {
`%>%` <- magrittr::`%>%`
# generate weights based on training data
data_train <- dplyr::bind_rows(cv_folds[-k])
stack_predictions <- data_train %>%
dplyr::select(-y, -!!rlang::sym(L2.unit), -id) %>%
as.matrix()
# evaluate based on test data
data_test <- cv_folds[[k]] %>%
dplyr::select(-y, -!!rlang::sym(L2.unit), -id) %>%
as.matrix()
true_labels <- data_train$y
#--------------------------------------------------
# Stacks without EBMA via Ornstein hillclimbing
#--------------------------------------------------
# initial weights for hill climbing
weights_ornstein <- rep(x = 1, times = ncol(stack_predictions))
# predictions based on hillclimbing weights
stack_ornstein <- stack_predictions %*% weights_ornstein /
sum(weights_ornstein)
# compute log loss
c_log_loss <- -mean(
true_labels * log(stack_ornstein) + (1 - true_labels) *
log(1 - stack_ornstein)
)
# container for best log loss
best_log_loss <- c_log_loss
# while loop control
keep_going <- TRUE
# hill climbing until number of iterations is reached or no improvement
# through addition of models
while (keep_going) {
# keep track of which model would be best 'greedy' addition to
# ensemble
best_addition <- 0L
# iterate over all models
for (j in seq_along(weights_ornstein)) {
# increase model weight by 1
weights_ornstein[j] <- weights_ornstein[j] + 1L
# generate predictions with updated weights
preds_update <- stack_predictions %*% weights_ornstein /
sum(weights_ornstein)
# compute log loss of updated predictions
updated_loglos <- -mean(
true_labels * log(preds_update) + (1 - true_labels) *
log(1 - preds_update)
)
# check whether updated log loss is better than current best
if (updated_loglos < best_log_loss) {
best_log_loss <- updated_loglos
# model addition that best improves log loss
best_addition <- j
}
# reset weights
weights_ornstein[j] <- weights_ornstein[j] - 1L
} # end of loop over all models
# if we found an improvement (and we're below the cutoff number of
# iterations), then add the best model
if (sum(weights_ornstein) < 1000 && best_log_loss < c_log_loss) {
weights_ornstein[best_addition] <- weights_ornstein[best_addition] +
1L
c_log_loss <- best_log_loss
} else {
# stop the while loop if no model addition improves log loss
keep_going <- FALSE
}
} # end of while loop
# normalize the weights
weights_ornstein <- weights_ornstein %>%
magrittr::divide_by(sum(weights_ornstein))
names(weights_ornstein) <- colnames(stack_predictions)
# evaluate based on test data
test_preds <- data_test %*% weights_ornstein
test_error <- log_loss_fun(cv_folds[[k]]$y, test_preds)
# store results
return(
list(
weights = weights_ornstein,
test_error = test_error
)
)
}
# De-register cluster
multicore(cores = cores, type = "close", cl = cl)
# all data
stack_predictions <- cv_folds %>%
dplyr::bind_rows() %>%
dplyr::select(-y, -!!rlang::sym(L2.unit), -id) %>%
as.matrix()
# 2nd round of stacking output
# weights
stack2_w <- do.call(rbind, k_weights)[, "weights"] %>%
dplyr::bind_rows()
# test error
stack2_e <- do.call(rbind, k_weights)[, "test_error"] %>%
unlist()
stack2_e <- 1 / stack2_e
stack2_e <- stack2_e / sum(stack2_e)
# genearte a weighted mean of stacking_weights based on test error
stack2_w <- apply(stack2_w, 2, function(x) {
weighted.mean(x = x, w = stack2_e)
})
# individual level predictions
final_prediction <- stack_predictions %*% stack2_w
# stck of stacks output
stack_of_stacks <- tibble::tibble(
id = cv_folds %>%
dplyr::bind_rows() %>%
dplyr::pull(id),
stack_of_stacks = as.numeric(final_prediction)
) %>%
dplyr::arrange(id)
# merge predictions into stack_out
stack_out <- stack_out %>%
dplyr::left_join(stack_of_stacks, by = "id")
# stacking weights container
stack_weights$stack_of_stacks <- stack2_w
#----------------------------------------------------------------
# 3rd round of stacking: stack of stacks with ebma
#----------------------------------------------------------------
# data for third round of stacking
stack_data3 <- stack_out %>%
dplyr::select(id, y, !!rlang::sym(L2.unit), stack_of_stacks)
# ebma predictions
ebma_preds <- ebma_out$individual_level_predictions %>%
dplyr::select(id, ebma_preds)
# join ebma predictions
stack_data3 <- stack_data3 %>%
dplyr::left_join(ebma_preds, by = "id")
# generate folds
cv_folds <- cv_folding(
data = stack_data3,
L2.unit = L2.unit,
k.folds = k.folds,
cv.sampling = "L2 units"
)
# Register cores
cl <- multicore(cores = cores, type = "open", cl = NULL)
# loop over k folds
k_weights <- foreach::foreach(
k = seq_along(cv_folds), .packages = "autoMrP"
) %dorng% {
`%>%` <- magrittr::`%>%`
# generate weights based on training data
data_train <- dplyr::bind_rows(cv_folds[-k])
stack_predictions <- data_train %>%
dplyr::select(-y, -!!rlang::sym(L2.unit), -id) %>%
as.matrix()
# evaluate based on test data
data_test <- cv_folds[[k]] %>%
dplyr::select(-y, -!!rlang::sym(L2.unit), -id) %>%
as.matrix()
true_labels <- data_train$y
#--------------------------------------------------
# Stacks without EBMA via Ornstein hillclimbing
#--------------------------------------------------
# initial weights for hill climbing
weights_ornstein <- rep(x = 1, times = ncol(stack_predictions))
# predictions based on hillclimbing weights
stack_ornstein <- stack_predictions %*% weights_ornstein /
sum(weights_ornstein)
# compute log loss
c_log_loss <- -mean(
true_labels * log(stack_ornstein) + (1 - true_labels) *
log(1 - stack_ornstein)
)
# container for best log loss
best_log_loss <- c_log_loss
# while loop control
keep_going <- TRUE
# hill climbing until number of iterations is reached or no improvement
# through addition of models
while (keep_going) {
# keep track of which model would be best 'greedy' addition to
# ensemble
best_addition <- 0L
# iterate over all models
for (j in seq_along(weights_ornstein)) {
# increase model weight by 1
weights_ornstein[j] <- weights_ornstein[j] + 1L
# generate predictions with updated weights
preds_update <- stack_predictions %*% weights_ornstein /
sum(weights_ornstein)
# compute log loss of updated predictions
updated_loglos <- -mean(
true_labels * log(preds_update) + (1 - true_labels) *
log(1 - preds_update)
)
# check whether updated log loss is better than current best
if (updated_loglos < best_log_loss) {
best_log_loss <- updated_loglos
# model addition that best improves log loss
best_addition <- j
}
# reset weights
weights_ornstein[j] <- weights_ornstein[j] - 1L
} # end of loop over all models
# if we found an improvement (and we're below the cutoff number of
# iterations), then add the best model
if (sum(weights_ornstein) < 1000 && best_log_loss < c_log_loss) {
weights_ornstein[best_addition] <- weights_ornstein[best_addition] +
1L
c_log_loss <- best_log_loss
} else {
# stop the while loop if no model addition improves log loss
keep_going <- FALSE
}
} # end of while loop
# normalize the weights
weights_ornstein <- weights_ornstein %>%
magrittr::divide_by(sum(weights_ornstein))
names(weights_ornstein) <- colnames(stack_predictions)
# evaluate based on test data
test_preds <- data_test %*% weights_ornstein
test_error <- log_loss_fun(cv_folds[[k]]$y, test_preds)
# store results
return(
list(
weights = weights_ornstein,
test_error = test_error
)
)
}
# De-register cluster
multicore(cores = cores, type = "close", cl = cl)
# all data
stack_predictions <- cv_folds %>%
dplyr::bind_rows() %>%
dplyr::select(-y, -!!rlang::sym(L2.unit), -id) %>%
as.matrix()
# 3rd round of stacking output
# weights
stack3_w <- do.call(rbind, k_weights)[, "weights"] %>%
dplyr::bind_rows()
# test error
stack3_e <- do.call(rbind, k_weights)[, "test_error"] %>%
unlist()
stack3_e <- 1 / stack3_e
stack3_e <- stack3_e / sum(stack3_e)
# genearte a weighted mean of stacking_weights based on test error
stack3_w <- apply(stack3_w, 2, function(x) {
weighted.mean(x = x, w = stack3_e)
})
# individual level predictions
final_prediction <- stack_predictions %*% stack3_w
# stck of stacks output
stack_of_stacks_ebma <- tibble::tibble(
id = cv_folds %>%
dplyr::bind_rows() %>%
dplyr::pull(id),
stack_of_stacks_ebma = as.numeric(final_prediction)
) %>%
dplyr::arrange(id)
# merge predictions into stack_out
stack_out <- stack_out %>%
dplyr::left_join(stack_of_stacks_ebma, by = "id")
# stacking weights container
stack_weights$stack_of_stacks_ebma <- stack3_w
} # end if binary_dv
# stacking for continuous dependent variables
if (binary_dv == FALSE) {
stop("Stacking for continuous dependent variables not implemented yet.")
}
# return the stacked predictions and weights
return(list(stack_preds = stack_out, stack_weights = stack_weights))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.