Nothing
# H20 AUTOML ----
#' General Interface for H2O AutoML Time Series Models
#'
#' `automl_reg()` is a way to generate a _specification_ of a AutoML model
#' before fitting and allows the model to be created using
#' different packages. Currently the only package is `h2o`.
#'
#' @param mode A single character string for the type of model.
#' The only possible value for this model is "regression".
#'
#' @return
#' An updated model specification with classes `automl_reg` and `model_spec`.
#'
#' @details
#'
#' Other options and arguments can be set using `set_engine()`.
#'
#' The model can be created using the fit() function using the following engines:
#'
#' - __H2O__ "h2o" (the default)
#'
#' @section Engine: h2o
#'
#' The engine uses `h2o.automl()`.
#'
#' @section Fit Details:
#'
#' The following features are REQUIRED to be available in the incoming data for the
#' fitting process.
#'
#' - __Fit:__ `fit(y ~ ., data)`: Includes a target feature that is a
#' function of a "date" feature.
#'
#' - __Predict:__ `predict(model, new_data)` where `new_data` contains
#' a column named "date".
#'
#' __Date and Date-Time Variable__
#'
#' It's a requirement to have a date or date-time variable as a predictor.
#' The `fit()` interface accepts date and date-time features and handles them internally.
#'
#'
#' @seealso [fit.model_spec()], [set_engine()]
#'
#' @examples
#' \dontrun{
#' library(tidymodels)
#' library(modeltime.h2o)
#' library(h2o)
#' library(tidyverse)
#' library(timetk)
#'
#' data_tbl <- walmart_sales_weekly %>%
#' select(id, Date, Weekly_Sales)
#'
#' splits <- time_series_split(
#' data_tbl,
#' assess = "3 month",
#' cumulative = TRUE
#' )
#'
#' recipe_spec <- recipe(Weekly_Sales ~ ., data = training(splits)) %>%
#' step_timeseries_signature(Date)
#'
#' train_tbl <- bake(prep(recipe_spec), training(splits))
#' test_tbl <- bake(prep(recipe_spec), testing(splits))
#'
#' # Initialize H2O
#'
#' h2o.init(
#' nthreads = -1,
#' ip = 'localhost',
#' port = 54321
#' )
#'
#'
#'
#' # ---- MODEL SPEC ----
#' model_spec <- automl_reg(mode = 'regression') %>%
#' set_engine(
#' engine = 'h2o',
#' max_runtime_secs = 30,
#' max_runtime_secs_per_model = 30,
#' project_name = 'project_01',
#' nfolds = 5,
#' max_models = 1000,
#' exclude_algos = c("DeepLearning"),
#' seed = 786
#' )
#'
#' model_spec
#'
#' # ---- TRAINING ----
#' # Important: Make sure the date is included as regressor.
#'
#' # This training process should take 30-40 seconds
#' model_fitted <- model_spec %>%
#' fit(Weekly_Sales ~ ., data = train_tbl)
#'
#' model_fitted
#'
#' # ---- PREDICT ----
#' # - IMPORTANT: New Data must have date feature
#'
#' predict(model_fitted, test_tbl)
#'
#' # Shutdown H2O when Finished.
#' # Make sure to save any work before.
#' h2o.shutdown(prompt = FALSE)
#' }
#'
#' @export
automl_reg <- function(mode = "regression") {
args <- list()
parsnip::new_model_spec(
"automl_reg",
args = args,
eng_args = NULL,
mode = mode,
method = NULL,
engine = "h2o"
)
}
#' @export
print.automl_reg <- function(x, ...) {
cat("H2O AutoML Model Specification (", x$mode, ")\n\n", sep = "")
parsnip::model_printer(x, ...)
if(!is.null(x$method$fit$args)) {
cat("Model fit template:\n")
print(parsnip::show_call(x))
}
invisible(x)
}
#' @export
#' @importFrom parsnip translate
translate.automl_reg <- function(x, engine = x$engine, ...) {
if (is.null(engine)) {
message("Used `engine = 'h2o'` for translation.")
engine <- "h2o"
}
x <- parsnip::translate.default(x, engine, ...)
x
}
# FIT -----
#' H2O AutoML Modeling Function (Bridge)
#'
#' @param x A dataframe of xreg (exogenous regressors)
#' @param y A numeric vector of values to fit
#' @param ... Additional arguments passed to `h2o.automl()`.
#'
#' @return
#' A fitted model with class `automl_fit_impl` and `modeltime_bridge`.
#'
#' @export
automl_fit_impl <- function(x, y, ...) {
others <- list(...)
# y <- all.vars(formula)[1]
# x <- attr(stats::terms(formula, data = data), "term.labels")
# X & Y NAMES
y_nm <- "target"
x_nms <- names(x)
# CONSTRUCT TRAINING FRAME
if (!inherits(data, "H2OFrame")) {
message("Converting to H2OFrame...")
training_frame <- tibble::tibble(x) %>%
dplyr::mutate(target = y) %>%
dplyr::mutate_if(is.ordered, ~{factor(.x, ordered = F)}) %>%
h2o::as.h2o()
} else {
training_frame <- x
training_frame["target"] <- y
}
# INDEX & PERIOD
# Determine Period, Index Col, and Index
index_tbl <- tibble::as_tibble(x) %>% modeltime::parse_index_from_data()
# period <- modeltime::parse_period_from_index(index_tbl, 'auto')
idx_col <- names(index_tbl)
idx <- timetk::tk_index(index_tbl)
# TRAIN H2O AUTOML
message("\nTraining H2O AutoML...")
args <- list(
x = x_nms,
y = y_nm,
training_frame = training_frame
)
res <- make_h2o_call("h2o.automl", args, others)
# LEADERBOARD
leaderboard_tbl <- tibble::as_tibble(res@leaderboard)
model_id <- leaderboard_tbl %>%
dplyr::slice(1) %>%
dplyr::pull(model_id)
model <- h2o::h2o.getModel(model_id)
# MODELING FUNCTION
.f <- purrr::compose(
purrr::partial(h2o::h2o.predict, newdata = training_frame),
tibble::as_tibble,
purrr::as_vector,
base::as.numeric,
.dir = 'forward'
)
yhat <- .f(model)
# MESSAGING
message("\n\nLeaderboard: ")
print(res@leaderboard)
message(glue::glue("\n\nUsing top model: {model_id}"))
# RETURN
modeltime::new_modeltime_bridge(
class = "automl_fit_impl",
# Models
models = list(
model_1 = model
),
# Data - .actual, .fitted, and .residuals columns
data = tibble::tibble(
!!idx_col := idx,
.actual = base::as.numeric(y),
.fitted = yhat,
.residuals = base::as.numeric(y) - yhat
),
extras = list(
leaderboard = leaderboard_tbl,
x = x_nms,
y = y_nm
),
# Description - Convert arima model parameters to short description
desc = stringr::str_glue('H2O AutoML - {stringr::str_to_title(model@algorithm)}')
)
}
# MAPS TO THE LOCATION OF THE H2O MODEL SO THE CORRECT INFO IS SWAPPED
swap_h2o_model.automl_fit_impl <- function(object, h2o_model) {
object$models$model_1 <- h2o_model
object$desc <- stringr::str_glue('H2O AutoML - {stringr::str_to_title(h2o_model@algorithm)}')
return(object)
}
# CALL TO H2O
make_h2o_call <- function(.fn, args, others) {
# remove args with NULLs
args <- args[lengths(args) != 0]
# create unevaluated model call
model_call <- rlang::call2(.fn = .fn, !!!args, .ns = "h2o")
# add others if not NULL
if (length(others) > 0) {
model_call <- rlang::call_standardise(model_call)
model_call <- rlang::call_modify(model_call, !!!others)
}
rlang::eval_tidy(model_call)
}
#' @export
print.automl_fit_impl <- function(x, ...) {
cat("\n")
cat(x$desc)
cat("\n")
cat("--------")
cat("\nModel: ")
print(x$models$model_1)
invisible(x)
}
# PREDICT ----
#' @export
predict.automl_fit_impl <- function(object, new_data, ...) {
automl_predict_impl(object, new_data, ...)
}
#' Bridge prediction Function for H2O AutoML Models
#'
#' @inheritParams parsnip::predict.model_fit
#' @param ... Additional arguments passed to `h2o::h2o.predict()`
#'
#' @return
#' A vector of values (predictions) with class `numeric`.
#'
#' @export
automl_predict_impl <- function(object, new_data, ...) {
# PREPARE INPUTS
model <- object$models$model_1
if (!inherits(new_data, "H2OFrame")) {
message("Converting to H2OFrame...")
new_data <- new_data %>%
dplyr::mutate_if(is.ordered, ~{factor(.x, ordered = F)}) %>%
h2o::as.h2o()
}
preds <- h2o::h2o.predict(model, new_data, ...) %>%
tidyr::as_tibble() %>%
dplyr::pull(predict)
return(preds)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.