tests/testthat/test-recursive.R

context("RECURSIVE MODELS")



# SINGLE / RECIPE / PARSNIP ----

test_that("Recursive Tests ", {

  skip_on_cran()

  FORECAST_HORIZON <- 24

  m750_extended <- m750 %>%
    group_by(id) %>%
    future_frame(
      .length_out = FORECAST_HORIZON,
      .bind_data  = TRUE
    ) %>%
    ungroup()

  # recursive 1 - single / recipe / parsnip ----

  # Lag Recipe
  recipe_lag <- recipe(value ~ date, m750_extended) %>%
    step_lag(value, lag = 1:FORECAST_HORIZON)

  # Data Transformation
  m750_lagged <- recipe_lag %>% prep() %>% juice()

  train_data <- m750_lagged %>%
    drop_na()

  future_data <- m750_lagged %>%
    filter(is.na(value))


  # * Recursive Modeling ----
  model_fit_lm <- linear_reg() %>%
    set_engine("lm") %>%
    fit(value ~ date, data = train_data)

  model_fit_lm_recursive <- linear_reg() %>%
    set_engine("lm") %>%
    fit(value ~ ., data = train_data) %>%
    recursive(
      transform  = recipe_lag,
      train_tail = tail(train_data, FORECAST_HORIZON)
    )

  expect_s3_class(model_fit_lm_recursive, "recursive")

  # * Modeltime Forecast  ----
  forecast_tbl <- modeltime_table(
    model_fit_lm,
    model_fit_lm_recursive
  ) %>%
    modeltime_forecast(
      new_data    = future_data,
      actual_data = m750,
      keep_data   = TRUE
    )

  # Visualize
  # forecast_tbl %>% plot_modeltime_forecast()

  preds <- forecast_tbl %>% filter(.model_id == 2) %>% pull(.value)
  expect_equal(
    length(future_data$value),
    length(preds)
  )

  expect_lt(max(preds), 11500)
  expect_gt(min(preds), 9650)

  # * Modeltime Refit ----

  retrain_tbl <- m750_lagged %>% slice(1:200)
  future_tbl  <- m750_lagged %>% slice(201:224)

  refit_tbl <- modeltime_table(
    model_fit_lm_recursive
  ) %>%
    modeltime_refit(
      data = retrain_tbl
    )

  forecast_refit_tbl <- refit_tbl %>%
    modeltime_forecast(
      new_data    = future_tbl,
      actual_data = retrain_tbl
    )

  # forecast_refit_tbl %>% plot_modeltime_forecast()

  preds <- forecast_refit_tbl %>% filter(.model_id == 1) %>% pull(.value)
  expect_equal(
    length(future_tbl$value),
    length(preds)
  )

  expect_lt(max(preds), 10600)
  expect_gt(min(preds), 8800)



  # SINGLE / TRANSFORM FUNCTION / WORKFLOW ----


  # recursive 2 - single / transform func / workflow -----

  # Function run recursively that updates the forcasted dataset
  lag_transformer <- function(data){
    data %>%
      # Lags
      tk_augment_lags(value, .lags = 1:FORECAST_HORIZON)
  }

  # Data Preparation
  m750_lagged <- m750_extended %>%
    lag_transformer() %>%
    select(-id)

  train_data <- m750_lagged %>%
    drop_na()

  future_data <- m750_lagged %>%
    filter(is.na(value))

  # * Recursive Modeling ----
  wflw_fit_lm <- workflow() %>%
    add_recipe(recipe(value ~ date, train_data)) %>%
    add_model(linear_reg() %>% set_engine("lm")) %>%
    fit(train_data)

  wflw_fit_lm_recursive <- workflow() %>%
    add_recipe(recipe(value ~ ., train_data)) %>%
    add_model(linear_reg() %>% set_engine("lm")) %>%
    fit(train_data) %>%
    recursive(
      transform  = lag_transformer,
      train_tail = tail(train_data, FORECAST_HORIZON)
    )

  expect_s3_class(wflw_fit_lm_recursive, "recursive")

  # * Forecasting ----
  forecast_tbl <- modeltime_table(
    wflw_fit_lm,
    wflw_fit_lm_recursive
  ) %>%
    update_model_description(2, "LM - Lag Roll") %>%
    modeltime_forecast(
      new_data    = future_data,
      actual_data = m750
    )

  # forecast_tbl %>% plot_modeltime_forecast()

  preds <- forecast_tbl %>% filter(.model_id == 2) %>% pull(.value)
  expect_equal(
    length(future_data$value),
    length(preds)
  )

  expect_lt(max(preds), 11500)
  expect_gt(min(preds), 9650)

  # * Modeltime Refit ----

  retrain_tbl <- train_data %>% slice(1:200)
  future_tbl  <- train_data %>% slice(201:224)

  # wflw_fit_lm_recursive %>% mdl_time_refit(retrain_tbl)

  refit_tbl <- modeltime_table(
    wflw_fit_lm_recursive
  ) %>%
    modeltime_refit(
      data = retrain_tbl
    )

  forecast_refit_tbl <- refit_tbl %>%
    modeltime_forecast(
      new_data    = future_tbl,
      actual_data = retrain_tbl
    )

  # forecast_refit_tbl %>% plot_modeltime_forecast()

  preds <- forecast_refit_tbl %>% filter(.model_id == 1) %>% pull(.value)
  expect_equal(
    length(future_tbl$value),
    length(preds)
  )

  expect_lt(max(preds), 10600)
  expect_gt(min(preds), 8800)


  # PANEL / FUNCTION / PARSNIP & WORKFLOW ----

  # recursive 3 - panel / function / parsnip + workflow

  # Jumble the data to make sure it forecasts properly
  m4_monthly_updated <- m4_monthly %>%
    arrange(desc(id), date) %>%
    mutate(id = as_factor(as.character(id)))

  m4_extended <- m4_monthly_updated %>%
    group_by(id) %>%
    future_frame(
      .length_out = FORECAST_HORIZON,
      .bind_data  = TRUE
    ) %>%
    ungroup()

  # Transformation Function
  lag_transformer_grouped <- function(data){
    data %>%
      group_by(id) %>%
      # Lags
      tk_augment_lags(value, .lags = 1:FORECAST_HORIZON) %>%
      ungroup()
  }

  m4_lags <- m4_extended %>%
    lag_transformer_grouped()

  train_data <- m4_lags %>%
    drop_na()

  future_data <- m4_lags %>%
    filter(is.na(value))

  # * Recursive Modeling ----

  model_fit_lm_recursive <- linear_reg() %>%
    set_engine("lm") %>%
    fit(value ~ ., data = train_data) %>%
    recursive(
      id         = "id",
      transform  = lag_transformer_grouped,
      train_tail = panel_tail(train_data, id, FORECAST_HORIZON)
    )

  wflw_fit_lm_recursive <- workflow() %>%
    add_recipe(recipe(value ~ ., train_data)) %>%
    add_model(linear_reg() %>% set_engine("lm")) %>%
    fit(train_data) %>%
    recursive(
      id         = "id",
      transform  = lag_transformer_grouped,
      train_tail = panel_tail(train_data, id, FORECAST_HORIZON)
    )

  # wflw_fit_lm_recursive %>% class()
  expect_s3_class(model_fit_lm_recursive, "recursive_panel")
  expect_s3_class(wflw_fit_lm_recursive, "recursive_panel")

  # * Forecasting ----

  forecast_tbl <- modeltime_table(
    model_fit_lm_recursive,
    wflw_fit_lm_recursive
  ) %>%
    modeltime_forecast(
      new_data    = future_data,
      actual_data = m4_monthly,
      keep_data   = TRUE
    )

  # forecast_tbl %>% group_by(id) %>% plot_modeltime_forecast()
  preds_1 <- forecast_tbl %>% filter(.model_id == 1) %>% pull(.value)
  preds_2 <- forecast_tbl %>% filter(.model_id == 2) %>% pull(.value)
  expect_equal(
    length(future_data$value),
    length(preds_1),
    length(preds_2)
  )

  expect_equal(preds_1, preds_2)

  expect_type(preds_1, "double")

  # * Modeltime Refit ----

  retrain_tbl <- train_data %>% group_by(id) %>% dplyr::slice(1:200) %>% ungroup()
  future_tbl  <- train_data %>% group_by(id) %>% dplyr::slice(201:224) %>% ungroup()

  # wflw_fit_lm_recursive %>% mdl_time_refit(retrain_tbl)

  refit_tbl <- modeltime_table(
    model_fit_lm_recursive
    ,
    wflw_fit_lm_recursive
  ) %>%
    modeltime_refit(
      data = retrain_tbl
    )

  model_fit_lm_recursive_refit <- refit_tbl$.model[[1]]
  # model_fit_lm_recursive_refit %>% class()
  expect_s3_class(model_fit_lm_recursive_refit, "recursive_panel")

  wflw_fit_lm_recursive_refit <- refit_tbl$.model[[2]]
  # wflw_fit_lm_recursive_refit %>% class()
  expect_s3_class(wflw_fit_lm_recursive_refit, "recursive_panel")

  forecast_refit_tbl <- refit_tbl %>%
    # dplyr::slice(1) %>%
    modeltime_forecast(
      new_data    = future_tbl,
      actual_data = retrain_tbl,
      keep_data   = TRUE
    )

  # forecast_refit_tbl %>% group_by(id) %>% plot_modeltime_forecast()

  preds_1 <- forecast_refit_tbl %>% filter(.model_id == 1) %>% pull(.value)
  preds_2 <- forecast_refit_tbl %>% filter(.model_id == 2) %>% pull(.value)

  expect_equal(
    length(future_tbl$value),
    length(preds_1),
    length(preds_2)
  )

  expect_equal(preds_1, preds_2)

  expect_type(preds_1, "double")

})

Try the modeltime package in your browser

Any scripts or data that you put into this service are public.

modeltime documentation built on Sept. 2, 2023, 5:06 p.m.