package_overview.R
In forecastML: Time Series Forecasting with Machine Learning Methods

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(fig.width = 7.15, fig.height = 4)
knitr::opts_knit$set(fig.width = 7.15, fig.height = 4)

## ---- eval = FALSE------------------------------------------------------------
#  install.packages("forecastML")

## ---- warning = FALSE, message = FALSE----------------------------------------
library(forecastML)
library(dplyr)
library(DT)
library(ggplot2)
library(glmnet)
library(randomForest)

data("data_seatbelts", package = "forecastML")
data <- data_seatbelts

date_frequency <- "1 month"  # Time step frequency.

# The date indices, which don't come with the stock dataset, should not be included in the modeling data.frame.
dates <- seq(as.Date("1969-01-01"), as.Date("1984-12-01"), by = date_frequency)

data$PetrolPrice <- round(data$PetrolPrice, 3)

data <- data[, c("DriversKilled", "kms", "PetrolPrice", "law")]
DT::datatable(head(data, 5))

## -----------------------------------------------------------------------------
data_train <- data[1:(nrow(data) - 12), ]
data_test <- data[(nrow(data) - 12 + 1):nrow(data), ]

## -----------------------------------------------------------------------------
p <- ggplot(data, aes(x = dates, y = DriversKilled))
p <- p + geom_line()
p <- p + geom_vline(xintercept = dates[nrow(data_train)], color = "red", size = 1.1)
p <- p + theme_bw() + xlab("Dataset index")
p

## -----------------------------------------------------------------------------
outcome_col <- 1  # The column index of our DriversKilled outcome.

horizons <- c(1, 3, 6, 12)  # 4 models that forecast 1, 1:3, 1:6, and 1:12 time steps ahead.

# A lookback across select time steps in the past. Feature lags 1 through 9, for instance, will be 
# silently dropped from the 12-step-ahead model.
lookback <- c(1:6, 9, 12, 15)

# A non-lagged feature that changes through time whose value we either know (e.g., month) or whose 
# value we would like to forecast.
dynamic_features <- "law"

data_list <- forecastML::create_lagged_df(data_train,
                                          outcome_col = outcome_col,
                                          type = "train",
                                          horizons = horizons,
                                          lookback = lookback,
                                          date = dates[1:nrow(data_train)],
                                          frequency = date_frequency,
                                          dynamic_features = dynamic_features
                                          )

## -----------------------------------------------------------------------------
DT::datatable(head(data_list$horizon_6, 10), options = list(scrollX = TRUE))

## -----------------------------------------------------------------------------
plot(data_list)

## ---- warnings = FALSE, message = FALSE---------------------------------------
windows <- forecastML::create_windows(lagged_df = data_list, window_length = 12, skip = 48,
                                      window_start = NULL, window_stop = NULL,
                                      include_partial_window = TRUE)
windows

## -----------------------------------------------------------------------------
plot(windows, data_list, show_labels = TRUE)

## -----------------------------------------------------------------------------
# Example 1 - LASSO
# Alternatively, we could define an outcome column identifier argument, say, 'outcome_col = 1' in 
# this function or just 'outcome_col' and then set the argument as 'outcome_col = 1' in train_model().
model_function <- function(data) {
  
  # The 'law' feature is constant during some of our outer-loop validation datasets so we'll 
  # simply drop it so that glmnet converges.
  constant_features <- which(unlist(lapply(data[, -1], function(x) {!(length(unique(x)) > 1)})))
  
  if (length(constant_features) > 1) {
    data <- data[, -c(constant_features + 1)]  # +1 because we're skipping over the outcome column.
  }

  x <- data[, -(1), drop = FALSE]
  y <- data[, 1, drop = FALSE]
  x <- as.matrix(x, ncol = ncol(x))
  y <- as.matrix(y, ncol = ncol(y))

  model <- glmnet::cv.glmnet(x, y, nfolds = 3)
  return(list("model" = model, "constant_features" = constant_features))
}

# Example 2 - Random Forest
# Alternatively, we could define an outcome column identifier argument, say, 'outcome_col = 1' in 
# this function or just 'outcome_col' and then set the argument as 'outcome_col = 1' in train_model().
model_function_2 <- function(data) {

  outcome_names <- names(data)[1]
  model_formula <- formula(paste0(outcome_names,  "~ ."))

  model <- randomForest::randomForest(formula = model_formula, data = data, ntree = 200)
  return(model)
}

## -----------------------------------------------------------------------------
#future::plan(future::multiprocess)

model_results <- forecastML::train_model(data_list, windows, model_name = "LASSO",
                                         model_function, use_future = FALSE)

model_results_2 <- forecastML::train_model(data_list, windows, model_name = "RF", 
                                           model_function_2, use_future = FALSE)

## -----------------------------------------------------------------------------
# Example 1 - LASSO.
prediction_function <- function(model, data_features) {
  
  if (length(model$constant_features) > 1) {  # 'model' was passed as a list.
    data_features <- data_features[, -c(model$constant_features )]
  }

  x <- as.matrix(data_features, ncol = ncol(data_features))

  data_pred <- data.frame("y_pred" = predict(model$model, x, s = "lambda.min"))
  return(data_pred)
}

# Example 2 - Random Forest.
prediction_function_2 <- function(model, data_features) {

  data_pred <- data.frame("y_pred" = predict(model, data_features))
  return(data_pred)
}

## ---- warning = FALSE, message = FALSE----------------------------------------
data_results <- predict(model_results, model_results_2,
                        prediction_function = list(prediction_function, prediction_function_2), 
                        data = data_list)

## -----------------------------------------------------------------------------
data_results$DriversKilled_pred <- round(data_results$DriversKilled_pred, 0)
DT::datatable(head(data_results, 30), options = list(scrollX = TRUE))

## -----------------------------------------------------------------------------
plot(data_results, type = "prediction", horizons = c(1, 6, 12))

## -----------------------------------------------------------------------------
plot(data_results, type = "residual", horizons = c(1, 6, 12))

## -----------------------------------------------------------------------------
plot(data_results, type = "forecast_stability", windows = 3)

## ---- warning = FALSE, message = FALSE----------------------------------------
data_error <- forecastML::return_error(data_results)

# Global error.
data_error$error_global[, -1] <- lapply(data_error$error_global[, -1], round, 1)

DT::datatable(data_error$error_global, options = list(scrollX = TRUE), )

## -----------------------------------------------------------------------------
plot(data_error, type = "window", facet = ~ horizon, horizons = c(1, 6, 12))

## -----------------------------------------------------------------------------
plot(data_error, type = "horizon", facet = ~ horizon, horizons = c(1, 6, 12))

## -----------------------------------------------------------------------------
plot(data_error, type = "global", facet = ~ horizon)

## -----------------------------------------------------------------------------
hyper_function <- function(model) {

  lambda_min <- model$model$lambda.min
  lambda_1se <- model$model$lambda.1se

  data_hyper <- data.frame("lambda_min" = lambda_min, "lambda_1se" = lambda_1se)
  return(data_hyper)
}

## ---- warning = FALSE, message = FALSE----------------------------------------
data_hyper <- forecastML::return_hyper(model_results, hyper_function)

plot(data_hyper, data_results, data_error, type = "stability", horizons = c(1, 6, 12))
plot(data_hyper, data_results, data_error, type = "error", c(1, 6, 12))

## -----------------------------------------------------------------------------
data_forecast_list <- forecastML::create_lagged_df(data_train,
                                                   outcome_col = outcome_col,
                                                   type = "forecast",
                                                   horizons = horizons,
                                                   lookback = lookback,
                                                   date = dates[1:nrow(data_train)],
                                                   frequency = date_frequency,
                                                   dynamic_features = dynamic_features
                                                   )

DT::datatable(head(data_forecast_list$horizon_6), options = list(scrollX = TRUE))

## -----------------------------------------------------------------------------
for (i in seq_along(data_forecast_list)) {
  data_forecast_list[[i]]$law <- 1
}

## ---- warning = FALSE, message = FALSE----------------------------------------
data_forecast <- predict(model_results, model_results_2,  # ... supports any number of ML models.
                         prediction_function = list(prediction_function, prediction_function_2), 
                         data = data_forecast_list)

data_forecast$DriversKilled_pred <- round(data_forecast$DriversKilled_pred, 0)

DT::datatable(head(data_forecast, 10), options = list(scrollX = TRUE))

## -----------------------------------------------------------------------------
plot(data_forecast,
     data_actual = data[-(1:150), ],  # Actuals from the training and test data sets.
     actual_indices = dates[-(1:150)], 
     horizons = c(1, 6, 12))

## -----------------------------------------------------------------------------
data_error <- forecastML::return_error(data_forecast,
                                       data_test = data_test,
                                       test_indices = dates[(nrow(data_train) + 1):length(dates)])

plot(data_error, facet = ~ horizon, type = "window")

plot(data_error, facet = ~ horizon, type = "horizon")

plot(data_error, facet = ~ horizon, type = "global")

## -----------------------------------------------------------------------------
data_list <- forecastML::create_lagged_df(data_train,
                                          outcome_col = outcome_col,
                                          type = "train",
                                          horizons = horizons,
                                          lookback = lookback,
                                          date = dates[1:nrow(data_train)],
                                          frequency = date_frequency,
                                          dynamic_features = dynamic_features
                                          )

## -----------------------------------------------------------------------------
windows <- forecastML::create_windows(data_list, window_length = 0)

plot(windows, data_list, show_labels = TRUE)

## -----------------------------------------------------------------------------
model_results <- forecastML::train_model(data_list, windows,  model_name = "LASSO", model_function)

data_results <- predict(model_results, prediction_function = list(prediction_function), data = data_list)

DT::datatable(head(data_results, 10), options = list(scrollX = TRUE))
plot(data_results, type = "prediction", horizons = c(1, 6, 12))

## ---- warning = FALSE, message = FALSE----------------------------------------
data_error <- forecastML::return_error(data_results)

data_error$error_global[, -1] <- lapply(data_error$error_global[, -1], round, 1)

DT::datatable(head(data_error$error_global), options = list(scrollX = TRUE))

## -----------------------------------------------------------------------------
data_forecast_list <- forecastML::create_lagged_df(data_train,
                                                   outcome_col = outcome_col,
                                                   type = "forecast",
                                                   horizons = horizons,
                                                   lookback = lookback,
                                                   date = dates[1:nrow(data_train)],
                                                   frequency = date_frequency,
                                                   dynamic_features = dynamic_features
                                                   )

for (i in seq_along(data_forecast_list)) {
  data_forecast_list[[i]]$law <- 1
}

data_forecast <- predict(model_results, prediction_function = list(prediction_function), data = data_forecast_list)

plot(data_forecast,
     data_actual = data[-(1:150), ],
     actual_indices = dates[-(1:150)],
     horizons = c(1, 6, 12))

## -----------------------------------------------------------------------------
data_error <- forecastML::return_error(data_forecast, data_test = data_test, 
                                       test_indices = dates[(nrow(data_train) + 1):nrow(data)])

plot(data_error, type = "horizon", facet = ~ horizon)

## ---- message = FALSE, warning = FALSE----------------------------------------
data_combined <- forecastML::combine_forecasts(data_forecast)

# Plot a background dataset of actuals using the most recent data.
data_actual <- data[dates >= as.Date("1980-01-01"), ]
actual_indices <- dates[dates >= as.Date("1980-01-01")]

# Plot all final forecasts plus historical data.
plot(data_combined, data_actual = data_actual, actual_indices = actual_indices)

# Error by forecast horizon.
DT::datatable(return_error(data_combined, data_actual, actual_indices)$error_by_horizon, 
              options = list(scrollX = TRUE))

# Error aggregated across forecast horizons.
DT::datatable(return_error(data_combined, data_actual, actual_indices)$error_global, 
              options = list(scrollX = TRUE))
Any scripts or data that you put into this service are public.
forecastML documentation built on July 8, 2020, 7:27 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
forecastML
Time Series Forecasting with Machine Learning Methods

inst/doc/package_overview.R
In forecastML: Time Series Forecasting with Machine Learning Methods

Try the forecastML package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

forecastML Time Series Forecasting with Machine Learning Methods

inst/doc/package_overview.R In forecastML: Time Series Forecasting with Machine Learning Methods

Try the forecastML package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

forecastML
Time Series Forecasting with Machine Learning Methods

inst/doc/package_overview.R
In forecastML: Time Series Forecasting with Machine Learning Methods