#' Apply PCA classifier to MrP.
#'
#' \code{run_deep_pca} is a wrapper function that applies the PCA classifier to
#' data provided by the user, evaluates prediction performance, and chooses the
#' best-performing model. It differs from \code{run_best_subset} in that it
#' includes L1.x interactions.
#'
#' @inheritParams auto_MrP
#' @param data Data for cross-validation. A \code{list} of \eqn{k}
#' \code{data.frames}, one for each fold to be used in \eqn{k}-fold
#' cross-validation.
#'
#' @return A model formula of the winning best subset classifier model.
run_deep_pca <- function(
y, L1.x, L2.x, L2.unit, L2.reg, loss.unit, loss.fun, deep.splines, data,
cores, verbose
) {
# Determine type of dependent variable
if (
data[[1]] %>%
dplyr::pull(!!y) %>%
unique() %>%
length() == 2
) {
dv_type <- "binary"
} else {
dv_type <- "linear"
}
# List of all models to be evaluated
models <- model_list_pca(
y = y,
L1.x = L1.x,
L2.x = L2.x,
L2.unit = L2.unit,
L2.reg = L2.reg
)
# no nesting with deep interactions
if (!is.null(L2.reg)) {
models <- lapply(models, function(x) {
# model formula to character
m_form <- as.character(x)
# replace (1 | region/state) with (1 | region) + (1 | state)
m_form <- stringr::str_replace_all(
string = m_form,
pattern = "\\(1 \\| region/state\\)",
replacement = "\\(1 | state\\) + \\(1 | region\\)"
)
# character to formula
m_form <- as.formula(sprintf("%s%s%s", m_form[2], m_form[1], m_form[3]))
})
}
# add interactions to the models
models <- lapply(models, function(x) {
# get all level 1 variables in the current model
c_l1_x <- x %>%
as.character() %>%
.[3] %>%
stringr::str_extract_all(pattern = "L1x\\d+") %>%
unlist()
# generate all interactions of L1.x
l1_comb <- unlist(lapply(2:length(c_l1_x), function(x) {
apply(combn(L1.x, x), 2, paste, collapse = ".")
}))
# generate all interactions of L1.x with L2.unit
l1_state <- paste(L1.x, L2.unit, sep = ".")
# generate all interactions of L1.x with L2.reg
if (!is.null(L2.reg)) {
l1_region <- paste(L1.x, L2.reg, sep = ".")
} else {
l1_region <- NULL
}
# interactions
add_interactions <- paste0(
# interactions of L1x
paste("(1 | ", l1_comb, ")", collapse = " + "), " + ",
# interactions of L1x with L2.unit
paste("(1 | ", l1_state, ")", collapse = " + "), " + ",
# interactions of L1x with L2.reg
if (any(!is.null(l1_region))) {
paste("(1 | ", l1_region, ")", collapse = " + ")
}
)
# remove trailing " + " from interactions
add_interactions <- stringr::str_extract(
string = add_interactions,
pattern = "^.*\\)"
)
# character to formula
add_interactions <- as.formula(paste("~ . +", add_interactions))
# update formula with interactions
x <- update(x, add_interactions)
# add splines to context level variables
if (deep.splines) {
# formula to character
char_form <- as.character(x)
char_form <- sprintf("%s %s %s", char_form[2], char_form[1], char_form[3])
# get all context level variables in the current model
c_l2_x <- char_form %>%
stringr::str_extract_all(pattern = "L2\\.x\\d+") %>%
unlist()
# replace in string
for (i in seq_along(c_l2_x)) {
char_form <- stringr::str_replace(
string = char_form,
pattern = c_l2_x[i],
replacement = sprintf("v_s(%s)", c_l2_x[i])
)
}
# character to formula
x <- as.formula(char_form)
}
return(x)
})
# Register cores
cl <- multicore(cores = cores, type = "open", cl = NULL)
# Train and evaluate each model
m_errors <- foreach::foreach(
m = seq_along(models), .packages = "autoMrP",
.export = c("deep_mrp_classifier", "loss_function")
) %dorng% {
`%>%` <- magrittr::`%>%`
# Loop over each fold
k_errors <- lapply(seq_along(data), function(k) {
# Split data in training and validation sets
data_train <- dplyr::bind_rows(data[-k])
data_valid <- dplyr::bind_rows(data[k])
# Train mth model on kth training set
model_m <- deep_mrp_classifier(
form = models[[m]],
y = y,
data = data_train,
verbose = TRUE
)
# predictions based on DV type (binary or continuous)
if (dv_type == "binary") {
# use trained model to make predictions for kth validation set
pred_m <- vglmer::predict_MAVB(
samples = 1000,
model_m,
newdata = data_valid,
allow_missing_levels = TRUE
)[["mean"]]
# convert to response probabilities
pred_m <- stats::plogis(pred_m)
} else if (dv_type == "linear") {
# Use trained model to make predictions for kth validation set
pred_m <- predict(
samples = 1000,
object = model_m,
newdata = data_valid,
allow_missing_levels = TRUE
)[["mean"]]
}
# evaluate predictions based on loss function
perform_m <- loss_function(
pred = pred_m,
data.valid = data_valid,
loss.unit = loss.unit,
loss.fun = loss.fun,
y = y,
L2.unit = L2.unit
)
})
# Mean over loss functions
k_errors <- dplyr::bind_rows(k_errors) %>%
dplyr::group_by(measure) %>%
dplyr::summarise(value = mean(value), .groups = "drop") %>%
dplyr::mutate(model = m)
}
# De-register cluster
multicore(cores = cores, type = "close", cl = cl)
# Extract best tuning parameters
grid_cells <- dplyr::bind_rows(m_errors)
best_params <- dplyr::slice(
loss_score_ranking(score = grid_cells, loss.fun = loss.fun), 1
)
# Choose best-performing model
out <- models[[dplyr::pull(.data = best_params, var = model)]]
# Function output
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.