Nothing
#=====================================================
# Neural Network Model Selector
#=====================================================
#' Neural Network Model Selector
#'
#' Fits multiple single-hidden-layer neural network models by
#' evaluating all possible predictor combinations and hidden node sizes.
#' The best model is selected based on minimum RMSE on test data.
#'
#' @param data A data frame containing the response and predictor variables.
#' @param response_var Character string specifying the response variable name.
#' @param train_ratio Proportion of data used for training (default = 0.75).
#' @param max_nodes Maximum number of hidden layer nodes to evaluate (default = 10).
#' @param maxit Maximum number of iterations for neural network training (default = 500).
#' @param seed Random seed for reproducibility (default = 123).
#'
#' @return A list containing:
#' \itemize{
#' \item best_predictors: Predictor variables of the best model
#' \item best_hidden_nodes: Optimal number of hidden nodes
#' \item best_performance: RMSE and MAPE of the best model
#' \item performance_table: Performance metrics for all model combinations
#' \item fitted: Actual vs fitted values for training data
#' \item forecast: Actual vs forecasted values for test data
#' }
#'
#' @details
#' Predictors are standardized before model fitting.
#' Model performance is evaluated using RMSE and MAPE.
#'
#' @examples
#' data_nn <- data.frame(
#' y = c(
#' 239.7255591, 239.6504622, 239.5848569, 239.5296290,
#' 239.4858835, 239.4547257, 239.4372607, 239.4345936,
#' 239.4478298, 239.4780743, 239.5264322, 239.5940089,
#' 239.6819094, 239.7912389, 239.9231027, 240.0786057,
#' 240.2588534, 240.4649507, 240.6980029, 240.9591152,
#' 241.2493927, 241.5699405, 241.9218640, 242.3062682
#' ),
#' x1 = c(
#' 9.968768102, 9.160298963, 7.294994564, 5.374395163,
#' 4.640671747, 5.495752064, 7.155488888, 8.532368787,
#' 8.032804811, 10.32506916, 12.17319856, 0.571302071,
#' 12.20714387, 27.13871523, 35.05310057, 42.40476672,
#' 46.28262184, 3.089076495, 40.31650327, 20.83471700,
#' 25.71428597, 21.06398002, 20.26911914, 22.17299909
#' ),
#' x2 = c(
#' 0.929946922, 4.246863796, 2.895052481, 6.827712819,
#' 11.53788333, 5.688668709, 26.08913871, 30.14926832,
#' 22.77412794, 4.519550904, 18.38195203, 40.50655053,
#' 58.61381025, 69.95404513, 76.08779720, 86.86779542,
#' 79.92326273, 32.26071629, 27.67652481, 66.80672448,
#' 86.54120883, 97.53881465, 95.49058569, 43.06666626
#' ),
#' x3 = c(
#' 143.7114315, 153.7664088, 158.5007862, 158.7973830,
#' 155.8340003, 150.2453258, 142.4471949, 132.8380705,
#' 121.6890278, 108.8662730, 94.52734991, 78.93448337,
#' 62.31616514, 44.76595425, 26.34367655, 7.109157889,
#' 12.72227903, 32.31332405, 50.67117014, 66.80301029,
#' 79.71603746, 88.41744464, 92.01533759, 90.21350491
#' )
#' )
#'
#' result <- nn_model_selector(
#' data = data_nn,
#' response_var = "y",
#' train_ratio = 0.75,
#' max_nodes = 5,
#' seed = 123
#' )
#'
#' result$best_performance
#'
#' @import nnet
#' @importFrom stats predict
#' @importFrom utils combn
#' @export
nn_model_selector <- function(data,
response_var,
train_ratio = 0.75,
max_nodes = 10,
maxit = 500,
seed = 123) {
set.seed(seed)
if (!is.character(response_var) || length(response_var) != 1) {
stop("response_var must be a single column name as a character string")
}
predictor_vars <- setdiff(names(data), response_var)
if (!is.numeric(data[[response_var]])) {
stop("Response variable must be numeric")
}
if (any(!vapply(data[predictor_vars], is.numeric, logical(1)))) {
stop("All predictors must be numeric")
}
# Scale predictors
data[predictor_vars] <- scale(data[predictor_vars])
# Train-test split
n <- nrow(data)
train_idx <- sample(seq_len(n), floor(train_ratio * n))
train_data <- data[train_idx, , drop = FALSE]
test_data <- data[-train_idx, , drop = FALSE]
# Predictor combinations
predictor_sets <- unlist(
lapply(
seq_along(predictor_vars),
function(k) combn(predictor_vars, k, simplify = FALSE)
),
recursive = FALSE
)
results <- data.frame(
predictors = character(),
hidden_nodes = integer(),
RMSE = numeric(),
MAPE = numeric(),
stringsAsFactors = FALSE
)
best_model <- NULL
best_rmse <- Inf
best_spec <- NULL
for (preds in predictor_sets) {
x_train <- as.matrix(train_data[, preds, drop = FALSE])
y_train <- train_data[[response_var]]
x_test <- as.matrix(test_data[, preds, drop = FALSE])
y_test <- test_data[[response_var]]
for (nodes in seq_len(max_nodes)) {
model <- try(
nnet(
x = x_train,
y = y_train,
size = nodes,
linout = TRUE,
maxit = maxit,
trace = FALSE
),
silent = TRUE
)
if (inherits(model, "try-error")) next
pred_test <- predict(model, x_test)
rmse_val <- sqrt(mean((y_test - pred_test)^2))
mape_val <- mean(abs((y_test - pred_test) / y_test)) * 100
results <- rbind(
results,
data.frame(
predictors = paste(preds, collapse = "+"),
hidden_nodes = nodes,
RMSE = rmse_val,
MAPE = mape_val,
stringsAsFactors = FALSE
)
)
if (rmse_val < best_rmse) {
best_rmse <- rmse_val
best_model <- model
best_spec <- list(
predictors = preds,
nodes = nodes,
x_train = x_train,
y_train = y_train,
x_test = x_test,
y_test = y_test
)
}
}
}
list(
best_predictors = best_spec$predictors,
best_hidden_nodes = best_spec$nodes,
best_performance = results[which.min(results$RMSE), ],
performance_table = results,
fitted = data.frame(
actual = best_spec$y_train,
fitted = predict(best_model, best_spec$x_train)
),
forecast = data.frame(
actual = best_spec$y_test,
forecasted = predict(best_model, best_spec$x_test)
)
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.