#' Add RPART forecast model
#'
#' \code{add_rpart_forecast_model} is a function to add a single RPART tree
#' forecast model to a (named) list of forecast models. The forecast model has
#' hyper-parameters that are automatically tuned.
#'
#' @param fc_models A named list of forecast models, with for each forecast
#' model a list with the model itself and a table with forecast values.
#' @param ts_object_train A time series object, which contains only the training
#' data.
#' @param ts_object_valid A time series object, which contains the validation
#' data. This is used for multivariate frameworks, thus it should have the
#' forecasted/actual values of the external regressors as well.
#' @param fc_name A character string specifying the name to be used for the new
#' model that is added to the list of existing forecast models.
#' @param model_type A character string indicating whether a univariate model
#' (without external regressors) or a multivariate model (with external
#' regressors) should be estimated.
#' @param periods_ahead A positive integer value indicating the number of
#' periods to forecast ahead.
#' @param periods_history A positive integer value indicating the number of
#' historic datapoints to use for training, which is only relevant for
#' specific forecast methods such as drift and mean.
#' @param verbose Boolean, indicating whether or not the function should print
#' messages while running.
#'
#' @return A named list of forecast models, with for each forecast model a list
#' with the model itself and a table with forecast values.
#'
#' @importFrom magrittr '%>%'
#' @importFrom timetk tk_augment_timeseries_signature
#' @importFrom purrr pmap_dbl
#' @import tibble
#' @import dplyr
#' @importFrom crayon make_style bold italic bgRed red green blue
#' @import rpart
#' @importFrom tstools date_to_period period_delta period_to_last_day
#' transform_data_to_ts_object
#'
#' @examples
#' ts_object_train <- tstools::initialize_ts_forecast_data(
#' data = dummy_gasprice,
#' date_col = "year_month",
#' col_of_interest = "gasprice",
#' group_cols = c("state", "oil_company"),
#' xreg_cols = c("spotprice", "gemprice")
#' ) %>%
#' dplyr::filter(grouping == "state = New York & oil_company = CompanyA") %>%
#' dplyr::slice(1:189) %>%
#' tstools::transform_data_to_ts_object()
#' ts_object_valid <- tstools::initialize_ts_forecast_data(
#' data = dummy_gasprice,
#' date_col = "year_month",
#' col_of_interest = "gasprice",
#' group_cols = c("state", "oil_company"),
#' xreg_cols = c("spotprice", "gemprice")
#' ) %>%
#' dplyr::filter(grouping == "state = New York & oil_company = CompanyA") %>%
#' dplyr::slice(190:191) %>%
#' tstools::transform_data_to_ts_object()
#' add_rpart_forecast_model(
#' fc_models = list(),
#' ts_object_train = ts_object_train,
#' ts_object_valid = ts_object_valid,
#' fc_name = "Example_RPART",
#' model_type = "multivariate",
#' periods_ahead = 2
#' )
add_rpart_forecast_model <- function(fc_models, ts_object_train, ts_object_valid = NULL,
fc_name, model_type = c("univariate", "multivariate"),
periods_ahead = 1, periods_history = Inf, keep_fc_model_objects = FALSE,
verbose = FALSE, log_message = "") {
# Check to make sure fc_models is a list
if (!is.list(fc_models)) stop(paste0("Object 'fc_models' is of class ",paste0(class(fc_models), collapse = "/")," ... \n\n Put in a list!"))
# Check to make sure ts_object_train is a times series object
if (!is.ts(ts_object_train)) stop(paste0("Object 'ts_object_train' is of class ",paste0(class(ts_object_train), collapse = "/")," ... \n\n Put in a time series object!"))
# Check to make sure fc_name is a string
if (!is.character(fc_name)) stop(paste0("Parameter 'fc_name' is of class ",paste0(class(fc_name), collapse = "/")," ... \n\n Put in a character string!"))
# Check to make sure ts_valid_data has enough observations if multivariate framework
model_type <- match.arg(model_type)
if (model_type == "multivariate") {
if (is.null(ts_object_valid)) {stop("The parameter 'ts_object_valid' is required for a multivariate model!")} else {
if (nrow(ts_object_valid) < periods_ahead) {
message <- paste0("Number of observations in 'ts_object_valid' (", nrow(ts_object_valid), ") is smaller than required periods_ahead (", periods_ahead, ")!")
stop(message)
}
}
}
# Check to make sure periods_ahead is a non-negative whole number
if (!(is.numeric(periods_ahead) & periods_ahead > 0 & periods_ahead == suppressWarnings(as.integer(periods_ahead)))) {
message <- paste0("The parameter 'periods_ahead' should be a positive integer value, instead of '",periods_ahead,"' ... ")
stop(message)
}
# Check to make sure periods_history is a non-negative whole number
if (periods_history != Inf) {
if (!(is.numeric(periods_history) & periods_history > 0 & periods_history == suppressWarnings(as.integer(periods_history)))) {
message <- paste0("The parameter 'periods_history' should be a positive integer value, instead of '",periods_history,"' ... ")
stop(message)
}
}
# Return fc_models as is if forecast is already available
if (fc_name %in% names(fc_models)) {
return(fc_models)
}
# Record start time
start_time <- Sys.time()
# Determine the months for which a forecast needs to be made
fc_periods <- get_fc_periods(ts_object_train, periods_ahead)
# Reduce length of the training set
ts_object_train <- trim_ts_object(
ts_object = ts_object_train,
max_length = periods_history,
from_left = F
)
# Run for univariate case
if (model_type == "univariate") {
# Decompose for ML
ML_train_data <- decompose_ts_object_for_ML(ts_object_train)
# Calculate first order difference of col_of_interest
ML_train_data <- ML_train_data %>%
dplyr::mutate(col_of_interest = col_of_interest - dplyr::lag(col_of_interest)) %>%
dplyr::filter(!is.na(col_of_interest))
# Create ML fc data
ML_fc_data <- tibble::tibble(
period = c(
ts_object_to_periods(ts_object_train),
fc_periods
) %>%
tstools::period_to_last_day()
) %>%
timetk::tk_augment_timeseries_signature() %>%
dplyr::filter(tstools::date_to_period(period) %in% fc_periods)
}
# Run for multivariate case
if (model_type == "multivariate") {
# Combine ts_objects into one
ts_object_full <- union_ts_objects(
ts_object_1 = ts_object_train,
ts_object_2 = ts_object_valid
)
# Decompose for ML
ML_full_data <- decompose_ts_object_for_ML(ts_object_full)
# Split of train data
ML_train_data <- ML_full_data %>%
dplyr::filter(!(period %in% tstools::period_to_last_day(fc_periods)))
# Calculate first order difference for col_of_interest
ML_train_data <- ML_train_data %>%
dplyr::mutate(col_of_interest = col_of_interest - dplyr::lag(col_of_interest)) %>%
dplyr::filter(!is.na(col_of_interest))
# Create ML fc data
ML_fc_data <- ML_full_data %>%
dplyr::select(-col_of_interest) %>%
dplyr::filter(period %in% tstools::period_to_last_day(fc_periods))
}
# Take out original_col_of_interest in ML datasets
if ("original_col_of_interest" %in% colnames(ML_train_data)) {
ML_train_data <- ML_train_data %>%
dplyr::select(-original_col_of_interest)
}
if ("original_col_of_interest" %in% colnames(ML_fc_data)) {
ML_fc_data <- ML_fc_data %>%
dplyr::select(-original_col_of_interest)
}
# Set seed to enable reproduction of results
set.seed(42)
# Run each parameter combination and get resulting MAPE via the rpart_fit function
create_runs_from_parameter_set <- function(parameters) {
suppressWarnings(
try(
expr = {
parameters %>%
dplyr::mutate(
mape = purrr::pmap_dbl(
.l = list(
"minsplit" = minsplit,
"maxdepth" = maxdepth,
"cp" = cp,
"xval" = xval
),
.f = rpart_fit,
ML_data = ML_train_data
)
)
},
silent = TRUE
)
)
}
# Create grid of "default" RPART tree parameters
parameters <- expand.grid(
minsplit = c(as.integer((nrow(ML_train_data)/2)/3), 2),
maxdepth = c(2, 10, 30),
cp = c(0.001, 0.0001, 0.00001),
xval = c(5, 10),
stringsAsFactors = FALSE
)
# Create initial runs from grid search
runs <- create_runs_from_parameter_set(parameters)
# Check for errors
if (grepl("Error",runs[1])) {
# Show error messages
if (verbose) {
log_message <- paste0(red(bold(format(Sys.time(), "%H:%M:%S"))), " - ", log_message, ":")
message <- paste(log_message, bgRed("ERROR"), "for calling forecast method:", bold("rpart (initial grid-search)"))
ParallelLogger::logError(message)
message <- paste(log_message, italic(red(runs)))
ParallelLogger::logError(message)
}
# Return without adding a model
return(fc_models)
}
# In case of multiple instances of min(mape), randomly select one
runs <- runs %>%
dplyr::filter(mape == min(mape)) %>%
dplyr::sample_n(1)
# Create grid of parameters around first set of parameters in "run"
parameters <- expand.grid(
minsplit = as.integer(runif(2, min = as.integer(0.5*runs$minsplit), max = as.integer(1.5*runs$minsplit))),
maxdepth = as.integer(runif(2, min = as.integer(0.5*runs$maxdepth), max = min(30, as.integer(1.5*runs$maxdepth)))),
cp = runif(2, min = 0.5*runs$cp, max = 1.5*runs$cp),
xval = as.integer(runif(2, min = 0.5*runs$xval, max = 1.5*runs$xval)),
stringsAsFactors = FALSE
)
# Add parameters from previous run
parameters <- runs %>%
dplyr::select(-mape) %>%
dplyr::bind_rows(parameters)
# Compare between the last two parameter combinations
runs <- create_runs_from_parameter_set(parameters)
# Check for errors
if (grepl("Error",runs[1])) {
# Show error messages
if (verbose) {
log_message <- paste0(red(bold(format(Sys.time(), "%H:%M:%S"))), " - ", log_message, ":")
message <- paste(log_message, bgRed("ERROR"), "for calling forecast method:", bold("rpart (zoomed-in grid-search)"))
ParallelLogger::logError(message)
message <- paste(log_message, italic(red(runs)))
ParallelLogger::logError(message)
}
# Return without adding a model
return(fc_models)
}
# In case of multiple instances of min(mape), randomly select one
runs <- runs %>%
dplyr::filter(mape == min(mape)) %>%
dplyr::sample_n(1)
# Run fine-tuned RPART tree regression
rpart_init <- suppressWarnings(
try(
expr = {
rpart::rpart(
formula = col_of_interest ~ .,
data = ML_train_data,
control = rpart::rpart.control(
minsplit = runs$minsplit,
maxdepth = runs$maxdepth,
cp = runs$cp,
xval = runs$xval
)
)
},
silent = TRUE
)
)
# Check for errors
if (grepl("Error",rpart_init[1])) {
# Show error messages
if (verbose) {
log_message <- paste0(red(bold(format(Sys.time(), "%H:%M:%S"))), " - ", log_message, ":")
message <- paste(log_message, bgRed("ERROR"), "for calling forecast method:", bold("rpart (final selected model)"))
ParallelLogger::logError(message)
message <- paste(log_message, italic(red(rpart_init)))
ParallelLogger::logError(message)
}
# Return without adding a model
return(fc_models)
}
# Run forecast
fc_rpart <- suppressWarnings(
try(
expr = {
predict(rpart_init, ML_fc_data)
},
silent = TRUE
)
)
# Check for errors
if (grepl("Error",fc_rpart[1])) {
# Show error messages
if (verbose) {
log_message <- paste0(red(bold(format(Sys.time(), "%H:%M:%S"))), " - ", log_message, ":")
message <- paste(log_message, bgRed("ERROR"), "for calling forecast method:", bold("rpart (forecast)"))
ParallelLogger::logError(message)
message <- paste(log_message, italic(red(fc_rpart)))
ParallelLogger::logError(message)
}
# Return without adding a model
return(fc_models)
}
# Create fc_date and periods
fc_date <- tstools::period_delta(min(fc_periods), -1)
# Calculate duraction in seconds
duration <- paste0(format.default(round(as.numeric(difftime(Sys.time(), start_time, units = 'sec')), 1), nsmall = 1), "s")
# Message
if (verbose) {
# Create style
ING_orange <- make_style("#FF6200")
# Create log_message
log_message <- paste0(ING_orange(bold(format(Sys.time(), "%H:%M:%S"))), " - ", log_message)
log_message <- paste0(log_message, "(in ", green(duration), "): ", blue("rpart"))
ParallelLogger::logInfo(log_message)
}
# Get forecast values
fc_values <- as.vector(tail(ts_object_train[,"col_of_interest"], 1)) + as.vector(cumsum(fc_rpart))
# Create new forecast model
if (keep_fc_model_objects) {
fc_model <- list(
fc_model = list(
model = rpart_init,
fc_data = tibble::tibble(
fc_date = fc_date,
period = fc_periods,
fc_value = as.numeric(fc_values)
),
ML_train_data = ML_train_data
)
)
} else {
fc_model <- list(
fc_model = list(
model = rpart_init$variable.importance,
fc_data = tibble::tibble(
fc_date = fc_date,
period = fc_periods,
fc_value = as.numeric(fc_values)
)
)
)
}
names(fc_model) <- fc_name
# Combine within list of existing forecast models and return
fc_models %>%
append(fc_model) %>%
return()
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.