tests/testthat/test-algo-window_reg.R

context("TEST window_reg() and naive_reg()")

# Data - Single Time Series
m750 <- timetk::m4_monthly %>% dplyr::filter(id == "M750")

splits <- rsample::initial_time_split(m750, prop = 0.8)

# Data - Multiple Time Series (Panel)
full_data_tbl <- timetk::m4_monthly %>%
    dplyr::group_by(id) %>%
    timetk::future_frame(date, .length_out = 60, .bind_data = TRUE) %>%
    dplyr::ungroup()

future_tbl <- full_data_tbl %>% dplyr::filter(is.na(value))

data_prepared_tbl <- full_data_tbl %>% dplyr::filter(!is.na(value))

# 1.0 NAIVE ----

# * SINGLE TIME SERIES -----

test_that("NAIVE - Single Time Series (No ID)", {

    skip_on_cran()

    model_fit <- naive_reg() %>%
        parsnip::set_engine("naive") %>%
        fit(value ~ date, data = rsample::training(splits))

    calibration_tbl <- modeltime_table(
        model_fit
    ) %>%
        modeltime_calibrate(rsample::testing(splits))

    forecast_tbl <- calibration_tbl %>%
        modeltime_forecast(
            new_data    = rsample::testing(splits),
            actual_data = m750
        )

    forecast_vec <- forecast_tbl %>%
        dplyr::filter(.model_id == 1) %>%
        dplyr::pull(.value)

    future_forecast_tbl <- calibration_tbl %>%
        modeltime_refit(m750) %>%
        modeltime_forecast(
            h = nrow(rsample::testing(splits)),
            actual_data = m750
        )

    future_forecast_vec <- future_forecast_tbl %>%
        dplyr::filter(.model_id == 1) %>%
        dplyr::pull(.value)

    expect_equal(nrow(forecast_tbl), 368)
    expect_equal(forecast_vec, rep_len(10810, 62))

    expect_equal(nrow(future_forecast_tbl), 368)
    expect_equal(future_forecast_vec, rep_len(11000, 62))

})


# * PANEL DATA ----

test_that("NAIVE - Multiple Time Series (Panel uses ID)", {

    skip_on_cran()

    model_fit_panel <- naive_reg(id = "id") %>%
        parsnip::set_engine("naive") %>%
        fit(value ~ date + id, data = data_prepared_tbl)


    future_forecast_panel_tbl <- modeltime_table(
        model_fit_panel
    ) %>%
        modeltime_forecast(
            new_data    = future_tbl,
            actual_data = data_prepared_tbl,
            keep_data   = TRUE
        )

    future_vec <- future_forecast_panel_tbl %>%
        dplyr::filter(!is.na(.model_id)) %>%
        dplyr::filter(id == "M1") %>%
        dplyr::pull(.value)


    expect_equal(nrow(future_forecast_panel_tbl), 1814)
    expect_equal(future_vec, rep_len(6890, 60))

})

# * UNSEEN PANEL DATA ----

test_that("NAIVE - Check New Factors", {

    skip_on_cran()

    wflw_fit_panel <- workflows::workflow() %>%
        workflows::add_model(naive_reg(id = "id") %>% parsnip::set_engine("naive")) %>%
        workflows::add_recipe(recipes::recipe(value ~ date + id, data = data_prepared_tbl)) %>%
        fit(data_prepared_tbl)

    # FOR SOME REASON PARSNIP MODELS FAIL
    # Error: Problem occurred during prediction. Error in model.frame.default(mod_terms, new_data, na.action = na.action, : factor id has new levels UNSEEN
    # model_fit_panel <- naive_reg(id = "id") %>%
    #     parsnip::set_engine("naive") %>%
    #     fit(value ~ date + id, data = data_prepared_tbl)

    expect_warning({
        future_forecast_panel_tbl <- modeltime_table(
            # model_fit_panel,
            wflw_fit_panel
        ) %>%
            modeltime_forecast(
                new_data    = dplyr::bind_rows(
                    future_tbl,
                    future_tbl %>%
                        dplyr::filter(id == "M1") %>%
                        dplyr::mutate(id = forcats::fct_recode(id, UNSEEN = "M1"))
                ),
                actual_data = dplyr::bind_rows(
                    data_prepared_tbl,
                    data_prepared_tbl %>%
                        dplyr::filter(id == "M1") %>%
                        dplyr::mutate(id = forcats::fct_recode(id, UNSEEN = "M1"))
                ),
                keep_data   = TRUE
            )
    })



    future_forecast_vec <- future_forecast_panel_tbl %>%
        dplyr::filter(!is.na(.model_id)) %>%
        dplyr::filter(id == "UNSEEN") %>%
        dplyr::pull(.value)


    expect_equal(future_forecast_vec, rep_len(NA_real_, 60))

})

# 2.0 SNAIVE -----

# * SINGLE TIME SERIES -----

test_that("SNAIVE - Single Time Series (No ID)", {

    skip_on_cran()

    model_fit <- naive_reg() %>%
        parsnip::set_engine("snaive") %>%
        fit(value ~ date, data = rsample::training(splits))

    calibration_tbl <- modeltime_table(
        model_fit
    ) %>%
        modeltime_calibrate(rsample::testing(splits))

    forecast_tbl <- calibration_tbl %>%
        modeltime_forecast(
            new_data    = rsample::testing(splits),
            actual_data = m750
        )

    forecast_vec <- forecast_tbl %>%
        dplyr::filter(.model_id == 1) %>%
        dplyr::pull(.value)

    last_series <- rsample::training(splits) %>%
        dplyr::slice_tail(n = 12) %>%
        dplyr::pull(value)

    future_forecast_tbl <- calibration_tbl %>%
        modeltime_refit(m750) %>%
        modeltime_forecast(
            h = nrow(rsample::testing(splits)),
            actual_data = m750
        )

    future_forecast_vec <- future_forecast_tbl %>%
        dplyr::filter(.model_id == 1) %>%
        dplyr::pull(.value)

    future_last_series <- m750 %>%
        dplyr::slice_tail(n = 12) %>%
        dplyr::pull(value)


    expect_equal(model_fit$fit$extras$period, 12)
    expect_equal(nrow(forecast_tbl), 368)
    expect_equal(forecast_vec, rep_len(last_series, 62))
    expect_equal(future_forecast_vec, rep_len(future_last_series, 62))

})


# * PANEL DATA ----

test_that("SNAIVE - Multiple Time Series (Panel ID)", {

    skip_on_cran()

    model_fit_panel <- naive_reg(id = "id") %>%
        parsnip::set_engine("snaive") %>%
        fit(value ~ date + id, data = data_prepared_tbl)

    future_forecast_panel_tbl <- modeltime_table(
        model_fit_panel
    ) %>%
        modeltime_forecast(
            new_data    = future_tbl,
            actual_data = data_prepared_tbl,
            keep_data   = TRUE
        )

    future_vec <- future_forecast_panel_tbl %>%
        dplyr::filter(id == "M1", !is.na(.model_id)) %>%
        dplyr::pull(.value)

    last_series_vec <- future_forecast_panel_tbl %>%
        dplyr::filter(id == "M1", is.na(.model_id)) %>%
        dplyr::slice_tail(n = 12) %>%
        dplyr::pull(.value)


    expect_equal(nrow(future_forecast_panel_tbl), 1814)
    expect_equal(future_vec, rep_len(last_series_vec, 60))

})


# * UNSEEN PANEL DATA ----

test_that("SNAIVE - Check New Factors", {

    skip_on_cran()

    wflw_fit_panel <- workflows::workflow() %>%
        workflows::add_model(naive_reg(id = "id") %>% parsnip::set_engine("snaive")) %>%
        workflows::add_recipe(recipes::recipe(value ~ date + id, data = data_prepared_tbl)) %>%
        fit(data_prepared_tbl)

    # FOR SOME REASON PARSNIP MODELS FAIL
    # Error: Problem occurred during prediction. Error in model.frame.default(mod_terms, new_data, na.action = na.action, : factor id has new levels UNSEEN
    # model_fit_panel <- naive_reg(id = "id") %>%
    #     parsnip::set_engine("snaive") %>%
    #     fit(value ~ date + id, data = data_prepared_tbl)

    expect_warning({
        future_forecast_panel_tbl <- modeltime_table(
            # model_fit_panel,
            wflw_fit_panel
        ) %>%
            modeltime_forecast(
                new_data    = dplyr::bind_rows(
                    future_tbl,
                    future_tbl %>%
                        dplyr::filter(id == "M1") %>%
                        dplyr::mutate(id = forcats::fct_recode(id, UNSEEN = "M1"))
                ),
                actual_data = dplyr::bind_rows(
                    data_prepared_tbl,
                    data_prepared_tbl %>%
                        dplyr::filter(id == "M1") %>%
                        dplyr::mutate(id = forcats::fct_recode(id, UNSEEN = "M1"))
                ),
                keep_data   = TRUE
            )
    })


    future_forecast_vec <- future_forecast_panel_tbl %>%
        dplyr::filter(id == "UNSEEN", !is.na(.model_id)) %>%
        dplyr::pull(.value)


    expect_equal(future_forecast_vec, rep_len(NA_real_, 60))

})


# 3.0 WINDOW -----

# * SINGLE TIME SERIES -----

test_that("WINDOW - Single Time Series (No ID)", {

    skip_on_cran()

    model_fit_1 <- window_reg(
        window_size     = 24
    ) %>%
        parsnip::set_engine("window_function", window_function = ~ mean(.x, na.rm = TRUE),) %>%
        fit(value ~ date, data = rsample::training(splits))

    model_fit_2 <- window_reg(
        window_size     = 36
    ) %>%
        parsnip::set_engine("window_function", window_function = median, na.rm = TRUE) %>%
        fit(value ~ date, data = rsample::training(splits))

    model_fit_3 <- window_reg() %>%
        parsnip::set_engine("window_function",
                   window_function = ~ tail(.x, 12),
                   na.rm = TRUE) %>%
        fit(value ~ date, data = rsample::training(splits))

    calibration_tbl <- modeltime_table(
        model_fit_1,
        model_fit_2,
        model_fit_3
    ) %>%
        modeltime_calibrate(rsample::testing(splits))

    forecast_tbl <- calibration_tbl %>%
        modeltime_forecast(
            new_data    = rsample::testing(splits),
            actual_data = m750
        )

    forecast_vec <- forecast_tbl %>%
        dplyr::filter(.model_id == 1) %>%
        dplyr::pull(.value)

    last_series <- rsample::training(splits) %>%
        dplyr::slice_tail(n = 12) %>%
        dplyr::pull(value)

    future_forecast_tbl <- calibration_tbl %>%
        modeltime_refit(m750) %>%
        modeltime_forecast(
            h = nrow(rsample::testing(splits)),
            actual_data = m750,
            keep_data = TRUE
        )

    future_forecast_vec <- future_forecast_tbl %>%
        dplyr::filter(.model_id == 1) %>%
        dplyr::pull(.value)

    future_last_series <- m750 %>%
        dplyr::slice_tail(n = 12) %>%
        dplyr::pull(value)


    expect_equal(model_fit_1$fit$extras$period, 24)
    expect_equal(nrow(forecast_tbl), 492)
    expect_true(all(forecast_vec < 10174))
    expect_true(all(forecast_vec > 10173))

})


# * PANEL DATA ----

test_that("WINDOW - Multiple Time Series (Panel ID)", {

    skip_on_cran()


    model_fit_panel <- window_reg(
            id = "id",
            window_size = 12
        ) %>%
        parsnip::set_engine("window_function", window_function = mean) %>%
        fit(value ~ date + id, data = data_prepared_tbl)

    future_forecast_panel_tbl <- modeltime_table(
        model_fit_panel
    ) %>%
        modeltime_forecast(
            new_data    = future_tbl,
            actual_data = data_prepared_tbl,
            keep_data   = TRUE
        )

    future_vec <- future_forecast_panel_tbl %>%
        dplyr::filter(id == "M1", !is.na(.model_id)) %>%
        dplyr::pull(.value)

    last_series_vec <- future_forecast_panel_tbl %>%
        dplyr::filter(id == "M1", is.na(.model_id)) %>%
        dplyr::slice_tail(n = 12) %>%
        dplyr::pull(.value)


    expect_equal(nrow(future_forecast_panel_tbl), 1814)
    expect_equal(future_vec, rep_len(mean(last_series_vec), 60))

})


# * UNSEEN PANEL DATA ----

test_that("SNAIVE - Check New Factors", {

    skip_on_cran()


    wflw_fit_panel <- workflows::workflow() %>%
        workflows::add_model(window_reg(id = "id") %>% parsnip::set_engine("window_function")) %>%
        workflows::add_recipe(recipes::recipe(value ~ date + id, data = data_prepared_tbl)) %>%
        fit(data_prepared_tbl)

    # FOR SOME REASON PARSNIP MODELS FAIL
    # Error: Problem occurred during prediction. Error in model.frame.default(mod_terms, new_data, na.action = na.action, : factor id has new levels UNSEEN
    # model_fit_panel <- naive_reg(id = "id") %>%
    #     parsnip::set_engine("snaive") %>%
    #     fit(value ~ date + id, data = data_prepared_tbl)

    expect_warning({
        future_forecast_panel_tbl <- modeltime_table(
            # model_fit_panel,
            wflw_fit_panel
        ) %>%
            modeltime_forecast(
                new_data    = dplyr::bind_rows(
                    future_tbl,
                    future_tbl %>%
                        dplyr::filter(id == "M1") %>%
                        mutate(id = forcats::fct_recode(id, UNSEEN = "M1"))
                ),
                actual_data = dplyr::bind_rows(
                    data_prepared_tbl,
                    data_prepared_tbl %>%
                        dplyr::filter(id == "M1") %>%
                        dplyr::mutate(id = forcats::fct_recode(id, UNSEEN = "M1"))
                ),
                keep_data   = TRUE
            )
    })


    future_forecast_vec <- future_forecast_panel_tbl %>%
        dplyr::filter(id == "UNSEEN", !is.na(.model_id)) %>%
        dplyr::pull(.value)


    expect_equal(future_forecast_vec, rep_len(NA_real_, 60))

})

Try the modeltime package in your browser

Any scripts or data that you put into this service are public.

modeltime documentation built on Oct. 23, 2024, 1:07 a.m.