grouped_forecast.R
In forecastML: Time Series Forecasting with Machine Learning Methods

## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(fig.width = 7.15, fig.height = 4, warning = FALSE, message = FALSE)
knitr::opts_knit$set(fig.width = 7.15, fig.height = 4, warning = FALSE, message = FALSE)

## ---- message = FALSE, warning = FALSE----------------------------------------
library(forecastML)
library(dplyr)
library(DT)
library(ggplot2)
library(xgboost)

data("data_buoy_gaps", package = "forecastML")

DT::datatable(head(data_buoy_gaps), options = list(scrollX = TRUE))

## -----------------------------------------------------------------------------
data <- forecastML::fill_gaps(data_buoy_gaps, date_col = 1, frequency = '1 day', 
                              groups = 'buoy_id', static_features = c('lat', 'lon'))

print(list(paste0("The original dataset with gaps in data collection is ", nrow(data_buoy_gaps), " rows."), 
      paste0("The modified dataset with no gaps in data collection from fill_gaps() is ", nrow(data), " rows.")))

## -----------------------------------------------------------------------------
data$day <- lubridate::mday(data$date)
data$year <- lubridate::year(data$date)

## ---- message = FALSE, warning = FALSE----------------------------------------
p <- ggplot(data, aes(x = date, y = wind_spd, color = ordered(buoy_id), group = year))
p <- p + geom_line()
p <- p + facet_wrap(~ ordered(buoy_id), scales = "fixed")
p <- p + theme_bw() + theme(
  legend.position = "none"
) + xlab(NULL)
p

## -----------------------------------------------------------------------------
data$buoy_id <- as.numeric(factor(data$buoy_id))

## -----------------------------------------------------------------------------
outcome_col <- 1  # The column position of our 'wind_spd' outcome (after removing the 'date' column).

horizons <- c(1, 7, 30)  # Forecast 1, 1:7, and 1:30 days into the future.

lookback <- c(1:30, 360:370)  # Features from 1 to 30 days in the past and annually.

dates <- data$date  # Grouped time series forecasting requires dates.
data$date <- NULL  # Dates, however, don't need to be in the input data.

frequency <- "1 day"  # A string that works in base::seq(..., by = "frequency").

dynamic_features <- c("day", "year")  # Features that change through time but which will not be lagged.

groups <- "buoy_id"  # 1 forecast for each group or buoy.

static_features <- c("lat", "lon")  # Features that do not change through time.

## -----------------------------------------------------------------------------
type <- "train"  # Create a model-training dataset.

data_train <- forecastML::create_lagged_df(data, type = type, outcome_col = outcome_col,
                                           horizons = horizons, lookback = lookback,
                                           dates = dates, frequency = frequency,
                                           dynamic_features = dynamic_features,
                                           groups = groups, static_features = static_features, 
                                           use_future = FALSE)

DT::datatable(head(data_train$horizon_1), options = list(scrollX = TRUE))

## ---- message = FALSE, warning = FALSE----------------------------------------
p <- plot(data_train)  # plot.lagged_df() returns a ggplot object.
p <- p + geom_tile(NULL)  # Remove the gray border for a cleaner plot.
p

## ---- message = FALSE, warning = FALSE----------------------------------------
windows <- forecastML::create_windows(data_train, window_length = 365, skip = 730,
                                      include_partial_window = FALSE)

p <- plot(windows, data_train) + theme(legend.position = "none")
p

## ---- message = FALSE, warning = FALSE----------------------------------------
p <- plot(windows, data_train, group_filter = "buoy_id == 1") + 
  theme(legend.position = "none")
p

## -----------------------------------------------------------------------------
# The value of outcome_col can also be set in train_model() with train_model(outcome_col = 1).
model_function <- function(data, outcome_col = 1) {
  
  # xgboost cannot handle missing outcomes data.
  data <- data[!is.na(data[, outcome_col]), ]

  indices <- 1:nrow(data)
  
  set.seed(224)
  train_indices <- sample(1:nrow(data), ceiling(nrow(data) * .8), replace = FALSE)
  test_indices <- indices[!(indices %in% train_indices)]

  data_train <- xgboost::xgb.DMatrix(data = as.matrix(data[train_indices, 
                                                           -(outcome_col), drop = FALSE]),
                                     label = as.matrix(data[train_indices, 
                                                            outcome_col, drop = FALSE]))

  data_test <- xgboost::xgb.DMatrix(data = as.matrix(data[test_indices, 
                                                          -(outcome_col), drop = FALSE]),
                                    label = as.matrix(data[test_indices, 
                                                           outcome_col, drop = FALSE]))

  params <- list("objective" = "reg:linear")
  watchlist <- list(train = data_train, test = data_test)
  
  set.seed(224)
  model <- xgboost::xgb.train(data = data_train, params = params, 
                              max.depth = 8, nthread = 2, nrounds = 30,
                              metrics = "rmse", verbose = 0, 
                              early_stopping_rounds = 5, 
                              watchlist = watchlist)

  return(model)
}

## -----------------------------------------------------------------------------
#future::plan(future::multiprocess)  # Multi-core or multi-session parallel training.

model_results_cv <- forecastML::train_model(lagged_df = data_train,
                                            windows = windows,
                                            model_name = "xgboost",
                                            model_function = model_function, 
                                            use_future = FALSE)

## -----------------------------------------------------------------------------
summary(model_results_cv$horizon_1$window_1$model)

## -----------------------------------------------------------------------------
# If 'model' is passed as a named list, the prediction model would be accessed with model$model or model["model"].
prediction_function <- function(model, data_features) {
  x <- xgboost::xgb.DMatrix(data = as.matrix(data_features))
  data_pred <- data.frame("y_pred" = predict(model, x),
                          "y_pred_lower" = predict(model, x) - 2,  # Optional; in practice, forecast bounds are not hard coded.
                          "y_pred_upper" = predict(model, x) + 2)  # Optional; in practice, forecast bounds are not hard coded.
  return(data_pred)
}

## -----------------------------------------------------------------------------
data_pred_cv <- predict(model_results_cv, prediction_function = list(prediction_function), data = data_train)

## ---- message = FALSE, warning = FALSE----------------------------------------
plot(data_pred_cv) + theme(legend.position = "none")

## ---- message = FALSE, warning = FALSE----------------------------------------
plot(data_pred_cv, facet = group ~ model, group_filter = "buoy_id %in% c(1, 2, 3)", windows = 1) 

## ---- message = FALSE, warning = FALSE----------------------------------------
plot(data_pred_cv, facet = group ~ horizon, group_filter = "buoy_id %in% c(1, 2, 3)", windows = 1) 

## ---- message = FALSE, warning = FALSE----------------------------------------
data_error <- forecastML::return_error(data_pred_cv)

plot(data_error, type = "window", group_filter = "buoy_id %in% c(1, 2, 3)", metric = "mae")
plot(data_error, type = "horizon", group_filter = "buoy_id %in% c(1, 2, 3)", metric = "mae")
plot(data_error, type = "global", group_filter = "buoy_id %in% c(1, 2, 3)", metric = "mae")

## -----------------------------------------------------------------------------
type <- "forecast"  # Create a forecasting dataset for our predict() function.

data_forecast <- forecastML::create_lagged_df(data, type = type, outcome_col = outcome_col,
                                              horizons = horizons, lookback = lookback,
                                              dates = dates, frequency = frequency,
                                              dynamic_features = dynamic_features,
                                              groups = groups, static_features = static_features, 
                                              use_future = FALSE)

DT::datatable(head(data_forecast$horizon_1), options = list(scrollX = TRUE))

## -----------------------------------------------------------------------------
for (i in seq_along(data_forecast)) {
  data_forecast[[i]]$day <- lubridate::mday(data_forecast[[i]]$index)  # When dates are given, the 'index` is date-based.
  data_forecast[[i]]$year <- lubridate::year(data_forecast[[i]]$index)
}

## -----------------------------------------------------------------------------
data_forecasts <- predict(model_results_cv, prediction_function = list(prediction_function), data = data_forecast)

## ---- message = FALSE, warning = FALSE----------------------------------------
plot(data_forecasts)

## ---- message = FALSE, warning = FALSE----------------------------------------
plot(data_forecasts, facet = group ~ ., group_filter = "buoy_id %in% 1:3")

## ---- message = FALSE, warning = FALSE----------------------------------------
windows <- forecastML::create_windows(data_train, window_length = 0)

p <- plot(windows, data_train) + theme(legend.position = "none")
p

## -----------------------------------------------------------------------------
# Un-comment the code below and set 'use_future' to TRUE.
#future::plan(future::multiprocess)

model_results_no_cv <- forecastML::train_model(lagged_df = data_train, 
                                               windows = windows,
                                               model_name = "xgboost",
                                               model_function = model_function,
                                               use_future = FALSE)

## -----------------------------------------------------------------------------
data_forecasts <- predict(model_results_no_cv, prediction_function = list(prediction_function), data = data_forecast)

DT::datatable(head(data_forecasts), options = list(scrollX = TRUE))

## ---- message = FALSE, warning = FALSE----------------------------------------
data_combined <- forecastML::combine_forecasts(data_forecasts)

# Plot a background dataset of actuals using the most recent data.
data_actual <- data[dates >= as.Date("2018-11-01"), ]
actual_indices <- dates[dates >= as.Date("2018-11-01")]

# Plot all final forecasts plus historical data.
plot(data_combined, data_actual = data_actual, actual_indices = actual_indices)

## ---- message = FALSE, warning = FALSE----------------------------------------
plot(data_combined, data_actual = data_actual, actual_indices = actual_indices, 
     facet = group ~ ., group_filter = "buoy_id %in% c(1, 11, 12)")

## -----------------------------------------------------------------------------
# Plot final forecasts for a single buoy plus historical data.
plot(data_combined, data_actual = data_actual, actual_indices = actual_indices,
     group_filter = "buoy_id == 10")
Any scripts or data that you put into this service are public.
forecastML documentation built on July 8, 2020, 7:27 p.m.
rdrr.io home R language documentation Run R code online
CRAN packages Bioconductor packages R-Forge packages GitHub packages
Note that we can't provide technical support on individual packages. You should contact the package authors for that.
forecastML
Time Series Forecasting with Machine Learning Methods

inst/doc/grouped_forecast.R
In forecastML: Time Series Forecasting with Machine Learning Methods

Try the forecastML package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

forecastML Time Series Forecasting with Machine Learning Methods

inst/doc/grouped_forecast.R In forecastML: Time Series Forecasting with Machine Learning Methods

Try the forecastML package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

forecastML
Time Series Forecasting with Machine Learning Methods

inst/doc/grouped_forecast.R
In forecastML: Time Series Forecasting with Machine Learning Methods