knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

Adding lags for time series

suppressMessages(library(dplyr))
library(looplyr)
library(parsnip)
library(rsample)
library(ggplot2)

One of the simplest ways to methods to apply regression algorithms to times series forecasting is to use lagged variables.

dax_stock <- 
  EuStockMarkets %>% 
  as_tibble() %>% 
  select(value = DAX)

Here is the place where looply comes into play. We use loop_mutate function as shortcut for for loop and dplyr::mutate combination.

dax_stock_dataset <- 
  dax_stock %>% 
    loop_mutate(
      1:7, paste0("value.", .x) := lag(value, .x)
    ) %>% 
    na.omit() %>% 
    initial_time_split(prop = 0.9)

Defining model

model_linear <- 
  linear_reg() %>% 
  set_engine("lm") %>% 
  set_mode("regression") %>% 
  fit(value ~ ., training(dax_stock_dataset))

fcast <- 
  model_linear %>% 
  predict(testing(dax_stock_dataset))

true_vs_fcast <- tibble(
  true  = testing(dax_stock_dataset)$value,
  fcast = fcast$.pred
) %>% 
  mutate(n = 1:n()) %>% 
  tidyr::pivot_longer(c(true, fcast))  

ggplot(true_vs_fcast) +
  geom_line(aes(x = n, y = value, col = name)) +
  theme_minimal() +
  ggtitle("DAX stock - true values vs forecast")


krzjoa/looplyr documentation built on Oct. 30, 2020, 2:22 a.m.