#' @importFrom tensorflow set_random_seed
#' @include utils.R
#' @include validator.R
#' @include deep_learning_model.R
#' @title Fit a Deep Learning Model
#'
#' @templateVar ClassName DeepLearningModel
#' @templateVar XType `matrix`
#' @templateVar YType `vector` or `matrix`
#' @templateVar refFunction keras::keras_model_sequential()
#'
#' @description
#' `deep_learning()` is a wrapper of the [keras::keras_model_sequential()]
#' function to fit a deep learning model with the ability to tune the
#' hyperparameters with grid search or bayesian optimization in a simple way.
#' You can fit univariate and multivariate models for numeric and/or
#' categorical response variables.
#' @template tunable-description
#'
#' @template x-matrix-param
#' @param y (`data.frame` | `vector` | `matrix`) The response (dependent)
#' variable(s). If it is a `data.frame` or a `matrix` with 2 or more columns,
#' a multivariate model is assumed, a univariate model otherwise. In
#' univariate models if `y` is `character`, `logical` or `factor` a
#' categorical response is assumed. When the response is categorical with only
#' two classes a binary response is assumed, with more than two classes a
#' categorical response. When the response variable is numeric with only
#' integers values greater or equals than zero a count response is
#' assumed, a continuous response otherwise. In multivariate models response
#' variables can be of different types for mixed models.
#' @param learning_rate (`numeric`) (__tunable__) This hyperparameter controls
#' how much to change the model in response to the estimated error each time
#' the model weights are updated. 0.001 by default.
#' @param epochs_number (`numeric`) (__tunable__) An arbitrary cutoff, generally
#' defined as "one pass over the entire dataset", used to separate training
#' into distinct phases, which is useful for logging and periodic evaluation.
#' 500 by default.
#' @param batch_size (`numeric`) (__tunable__) A hyperparameter of gradient
#' descent that controls the number of training samples to work through before
#' the model's internal parameters are updated. 32 by default.
#' @param layers (`list`) The hidden layers. It must be a `list` of `list`'s
#' where each entry will represent a hidden layer in the neural network. Each
#' inner `list` can contain the following fields with a vector of values:
#'
#' * `"neurons_number"`: (`numeric`) (__tunable__) The number of neurons in that
#' layer. 50 by default.
#' * `"neurons_proportion"`: (`numeric`) (__tunable__) Similar to
#' `"neurons_number"` but the provided values will be the proportion specified
#' times the number of columns in `x`, so a value of 1 means "use as many
#' neurons as columns in `x`", 0.5 means use as neurons number the half of
#' number of columns in `x`. This is combined with the values of
#' `"neurons_number"` for tuning. `NULL` by default.
#' * `"activation"`: (`character`) (__tunable__) (case not sensitive) The name
#' of the activation function to apply in this layer. The available activation
#' functions are `"linear"`, `"relu"`, `"elu"`, `"selu"`, `"hard_sigmoid"`,
#' `"sigmoid"`, `"softmax"`, `"softplus"`, `"softsign"`, `"tanh"`,
#' `"exponential"`. This hyperparameter can only be tuned with grid search
#' tuning, with bayesian optimization a fixed value have to be provided.
#' `"relu"` by default.
#' * `"dropout"`: (`numeric`) (__tunable__) The proportion of neurons randomly
#' selected and set to 0 at each step during training process, which helps
#' prevent overfitting. 0 by default.
#' * `"lasso_penalty"`: (`numeric`) (__tunable__) The regularization value
#' between \[0, 1\] for penalizing that layer with Lasso (a.k.a L1) penalty. 0
#' by default (no penalty).
#' * `"ridge_penalty"`: (`numeric`) (__tunable__) The regularization value
#' between \[0, 1\] for penalizing that layer with Ridge (a.k.a L2) penalty.
#' Note that if both penalization params (Ridge and Lasso) are sent, the
#' ElasticNet penalization is implemented, that is a combination of both of
#' them. 0 by default (no penalty).
#'
#' You can provide as many `list`'s as you want, each of them representing a
#' hidden layer and you do not need to provide all the parameters. If one
#' parameter is not provided, the default value described above is used. By
#' default the next `list` is used:
#'
#' `list(
#' list(
#' neurons_number = 50,
#' neurons_proportion = NULL,
#' activation = "relu",
#' dropout = 0,
#' ridge_penalty = 0,
#' lasso_penalty = 0
#' )
#' )`
#' @param output_penalties (`list`) The penalty values for the output layer. The
#' list can contain the following two fields:
#'
#' * `"lasso_penalty"`: (`numeric`) (__tunable__) The regularization value
#' between \[0, 1\] for penalizing that layer with Lasso (a.k.a L1) penalty. 0
#' by default (no penalty).
#' * `"ridge_penalty"`: (`numeric`) (__tunable__) The regularization value
#' between \[0, 1\] for penalizing that layer with Ridge (a.k.a L2) penalty.
#' Note that if both penalization params (Ridge and Lasso) are sent, the
#' ElasticNet penalization, is implemented that is a combination of both of
#' them. 0 by default (no penalty).
#'
#' You do not have to provide the two values, if one of them is not provided the
#' default value is used. By default the next `list` is used:
#'
#' `list(
#' ridge_penalty = 0,
#' lasso_penalty = 0
#' )`
#' @template cv-tune-params
#' @param optimizer (`character(1)`) (case not sensitive) Algorithm used to
#' reduce the loss function and update the weights in backpropagation. The
#' available options are `"adadelta"`, `"adagrad"`, `"adamax"`, `"adam"`,
#' `"nadam"`, `"rmsprop"` and `"sgd"`. `"adam"` by default.
#' @param loss_function (`character(1)`) (case not sensitive) The name of the
#' loss function the model will seek to minimize during training and tuning.
#' You can find the complete list of available loss functions in the Details
#' section below. This parameter can be used only in univariate analysis.
#' `NULL` by default which selects one automatically based on the type of the
#' response variable `y`: `"mean_squared_error"` for continuous, `"poisson"`
#' for counts, `"binary_crossentropy"` for binary and
#' `"categorical_crossentropy"` for categorical.
#' @param with_platt_scaling (`logical(1)`) Should Platt scaling be used to fit
#' the model and adjust the predictions? Only available for univariate models
#' with a numeric or binary response variable. For more information, see
#' Details section below. `FALSE` by default.
#' @param platt_proportion (`numeric(1)`) The proportion of individuals used to
#' fit the linear model required for Platt scaling. Note that this parameter
#' is used only when `with_platt_scaling` is `TRUE`. 0.3 by default.
#' @param shuffle (`logical(1)`) Should the training data be shuffled before
#' each epoch? `TRUE` by default.
#' @param early_stop (`logical(1)`) Should the model stop training when the loss
#' function has stopped improving? `FALSE` by default.
#' @param early_stop_patience (`numeric(1)`) The number of epochs with no
#' improvement after which training will be stopped. Note that this parameter
#' is used only when `early_stop` is `TRUE`. 50 by default.
#' @template other-base-params
#'
#' @template details-no-variance
#' @template details-remove-nas
#' @template details-tuning
#' @details
#' __Important:__ Unlike the other models, when tuning deep learning models
#' steps 6 and 7 are omited in the algorithm, instead `train` and `test`
#' datasets are sent to `keras`, the first one to fit the model and the second
#' one to compute the loss function at the end of each epoch, so at the end, the
#' saved value in step 8 is the validation loss value returned by `keras` in the
#' last epoch. `tune_loss_function` parameter cannot be used in `deep_learning`
#' function since the same loss function evaluated at each epoch and specified
#' in `loss_function` parameter is used for tuning too.
#'
#' ## Last (output) layer
#'
#' By default this function selects the activation function and the number of
#' neurons for the last layer of the model based on the response variable(s)
#' type(s). For continuous responses the `"linear"` (identity) activation
#' function is used with one neuron, for count responses the `"exponential"`
#' with one neuron, for binary responses the `"sigmoid"` with one neuron and for
#' categorical responses `"softmax"` with as many neurons as number of
#' categories.
#'
#' ## Loss functions
#'
#' The available options of the `loss_function` parameter are:
#'
#' _Probabilistic losses_
#'
#' * `"binary_crossentropy"`
#' * `"categorical_crossentropy"`
#' * `"sparse_categorical_crossentropy"`
#' * `"poisson"`
#' * `"kl_divergence"`
#'
#' _Regression losses_
#'
#' * `"mean_squared_error"`
#' * `"mean_absolute_error"`
#' * `"mean_absolute_percentage_error"`
#' * `"mean_squared_logarithmic_error"`
#' * `"cosine_similarity"`
#' * `"huber"`
#' * `"log_cosh"`
#'
#' _Hinge losses for "maximum-margin" classification_
#'
#' * `"hinge"`
#' * `"squared_hinge"`
#' * `"categorical_hinge"`
#'
#' ## Platt scaling
#'
#' It is a way of improving the training process of deep learning models that
#' uses a calibration based on a model that is already trained and applied via
#' a post-processing operation.
#'
#' After tuninig, Platt scaling calibration divides the dataset into `Training`
#' and `Calibration` datasets, then it uses `Training` to fit the deep learning
#' model with the best hyperparameters combination and with this model computes
#' the predictions of the `Calibration` dataset. Finally with the predicted and
#' true values, a linear model is fitted (observed in function of predicted),
#' this linear model corresponds to the calibration and when a new prediction is
#' going to be made, first the deep learning model is used and the resulting
#' predicted value is calibrated with the linear model.
#'
#' Note that Platt scaling calibration only works for numeric and binary
#' response variables of univariate models.
#'
#' @template return-model
#'
#' @seealso [predict.Model()]
#' @family models
#'
#' @example inst/examples/deep_learning.R
#'
#' @export
deep_learning <- function(x, y,
learning_rate = 0.001,
epochs_number = 500,
batch_size = 32,
layers = list(
list(
neurons_number = 50,
neurons_proportion = NULL,
activation = "relu",
dropout = 0,
ridge_penalty = 0,
lasso_penalty = 0
)
),
output_penalties = list(
ridge_penalty = 0,
lasso_penalty = 0
),
tune_type = "Grid_search",
tune_cv_type = "K_fold",
tune_folds_number = 5,
tune_testing_proportion = 0.2,
tune_folds = NULL,
tune_grid_proportion = 1,
tune_bayes_samples_number = 10,
tune_bayes_iterations_number = 10,
optimizer = "adam",
loss_function = NULL,
with_platt_scaling = FALSE,
platt_proportion = 0.3,
shuffle = TRUE,
early_stop = FALSE,
early_stop_patience = 50,
validate_params = TRUE,
seed = NULL,
verbose = TRUE) {
assert_logical(validate_params, len = 1, any.missing = FALSE)
is_multivariate <- NCOL(y) > 1
if (validate_params) {
validate_deep_learning(
x = x,
y = y,
is_multivariate = is_multivariate,
learning_rate = learning_rate,
epochs_number = epochs_number,
batch_size = batch_size,
layers = layers,
output_penalties = output_penalties,
tune_type = tune_type,
tune_cv_type = tune_cv_type,
tune_folds_number = tune_folds_number,
tune_testing_proportion = tune_testing_proportion,
tune_folds = tune_folds,
tune_grid_proportion = tune_grid_proportion,
tune_bayes_samples_number = tune_bayes_samples_number,
tune_bayes_iterations_number = tune_bayes_iterations_number,
optimizer = optimizer,
loss_function = loss_function,
with_platt_scaling = with_platt_scaling,
platt_proportion = platt_proportion,
shuffle = shuffle,
early_stop = early_stop,
early_stop_patience = early_stop_patience,
seed = seed,
verbose = verbose
)
}
old_random_state <- NULL
if (!is.null(seed)) {
old_random_state <- get_rand_state()
# set_random_seed sets R seed too
py_hush(set_random_seed(seed))
warning(
"When you use a seed GPU parallelism are disabled since it can result ",
"in non-deterministic execution patterns, so if you have a GPU in your ",
"computer and you want to use it for parallelism, do not use a seed."
)
}
on.exit(set_rand_state(old_random_state))
start_time <- Sys.time()
model <- DeepLearningModel$new(
x = x,
y = y,
is_multivariate = is_multivariate,
learning_rate = learning_rate,
epochs_number = epochs_number,
batch_size = batch_size,
layers = layers,
output_penalties = output_penalties,
tune_type = tune_type,
tune_cv_type = tune_cv_type,
tune_folds_number = tune_folds_number,
tune_testing_proportion = tune_testing_proportion,
tune_folds = tune_folds,
tune_grid_proportion = tune_grid_proportion,
tune_bayes_samples_number = tune_bayes_samples_number,
tune_bayes_iterations_number = tune_bayes_iterations_number,
optimizer = optimizer,
loss_function = loss_function,
with_platt_scaling = with_platt_scaling,
platt_proportion = platt_proportion,
shuffle = shuffle,
early_stop = early_stop,
early_stop_patience = early_stop_patience
)
wrapper_function <- get_verbose_function(verbose)
wrapper_function(py_capture_output(py_suppress_warnings(model$fit())))
end_time <- Sys.time()
model$execution_time <- difftime(end_time, start_time)
wrapper_function(print_model_time_execution(model$execution_time))
return(model)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.