knitr::opts_chunk$set(echo = TRUE)

Load dependencies

library("glmnet")
library("forecast")
library("stlplus")
library("fastDummies")
library("imputeTS")
library("tidyverse")
library("plotly")

Source

source("R/get_seasonal_naive.R")
source("R/get_forecast.R")
source("R/get_ets.R")
source("R/get_arima.R")
source("R/get_glm.R")
source("R/get_glmnet.R")
source("R/get_croston.R")
source("R/get_neural_network.R")
source("R/get_tbats.R")
source("R/cleansing.R")
source("R/auxiliar.R")
source("R/autoforecast.R")
source("R/feature_engineering.R")
source("R/optim_ts.R")

Job

Hyperparameter

grid_glmnet <- tidyr::expand_grid(time_weight = seq(from = 0.9, to = 1, by = 0.02)
                           , trend_discount = seq(from = 0.95, to = 1, by = 0.01)
                           , alpha = seq(from = 0, to = 1, by = 0.10))
grid_glm <- tidyr::expand_grid(time_weight = seq(from = 0.8, to = 1, by = 0.02)
                        , trend_discount = seq(from = 0.8, to = 1, by = 0.02))

parameter <- list(glmnet = list(time_weight = .94, trend_discount = .93, alpha = 0, lambda = .1
                                , grid_glmnet = grid_glmnet
                                , job = list(optim_lambda = TRUE, x_excluded = NULL
                                             , random_search_size = 0.05
                                             , n_best_model = 1))
                  , croston = list(alpha = 0.1)
                  , glm = list(time_weight = .99, trend_discount = 0.97
                               , grid_glm = grid_glm
                               , job = list(x_excluded = NULL
                                            , random_search_size = 0.1
                                            , n_best_model = 1))
                  , arima = list(p = 1, d = 1, q = 0, P = 1, D = 0, Q = 0)
                  , ets = list(ets = "ZZZ"))

Data import

Sanofi

data_init <- read_csv("demo_data.csv") %>% 
  dplyr::filter(date < "2020-02-01")

Base R


Prescribe

data_all <- data_init %>%
  prescribe_ts(key = "forecast_item", y_var = "volume", date_var = "date"
               , freq = 12, reg_name = "reg_name", reg_value = "reg_value")
ap <- prescribe_ts(.data = ap0, key = "key", y_var = "value", date_var = "date", reg_name = "reg_name", reg_value = "reg_value", freq = 12)

Single item / modularity

(.data <- data_all %>% 
   filter(key == "FI: 34142"))

Feature engineering

.data %>% 
  feature_engineering_ts()

Cleansing

.data %>%
  feature_engineering_ts() %>% # automatically creates features
  clean_ts(method = c("kalman", "winsorize", "median", "mean")) %>% 
  ggplot()+
  geom_line(aes(date_var, y_var_winsorize), col = "red")+
  geom_line(aes(date_var, y_var_kalman), col = "blue")+
  geom_line(aes(date_var, y_var_mean), col = "purple")+
  geom_line(aes(date_var, y_var), col ="black")

Model fitting

.data %>%
  feature_engineering_ts() %>% # automatically creates features
  clean_ts(method = "winsorize") %>% # options: winsorize (default), nearest, mean, median. 
  fit_ts(model = "ets", parameter = parameter)

Forecasting

.data %>%
  feature_engineering_ts() %>% # automatically creates features
  clean_ts(method = "winsorize") %>% # options: winsorize (default), nearest, mean, median. 
  fit_ts(model = "ets", parameter = parameter) %>% 
  get_forecast(horizon = 36)

Hyperparameter tuning

.data %>% 
  feature_engineering_ts() %>%
  clean_ts(method = "kalman") %>%
  optim_ts(test_size = 6, lag = 3, parameter = parameter, model = "glm")

Optimization

Profile

model_list <- c("glm", "glmnet", "neural_network", "arima", "ets"
                , "seasonal_naive", "tbats", "croston")
.data <- data_all %>% 
  filter(key == "FI: 515188")

Fast optimization

tictoc::tic()
fast_optim_forecast <- autoforecast(.data = .data, horizon = 100
             , model = model_list
             , parameter = parameter, optim_profile = "fast", method = "winsorize")
tictoc::toc()
fast_optim_forecast %>% 
  plot_ts(interactive = T)


opoyc/autoforecast documentation built on May 18, 2021, 1:29 a.m.