knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(torch) #library(torchts) library(rsample) library(dplyr, warn.conflicts = FALSE) library(ggplot2) library(parsnip) library(timetk) # write.csv(weather_pl, file = "../weather_pl.csv") devtools::load_all()
parsnip API provides quick and convenient way to train time series models based on torch library.
First, let's prepare input data set using excelent timetk library. weather_pl dataset from torchts package contains 19-year history of weather data from two Polish "poles of extreme temperature", i.e. Suwałki and Tarnów. In this excercise, we'll use a slice of data registered in the "pole of warmth", i.e in the city of Tarnów.
tarnow_temp <- weather_pl %>% filter(station == "TRN") %>% select(date, tmax_daily, tmin_daily, press_mean_daily) tk_summary_diagnostics(tarnow_temp, date) tarnow_temp <- tarnow_temp %>% mutate(year_day = as.numeric(lubridate::yday(date)))
As we can see, this is a time series between 1th of Janury 2001 till the end of 2020 - twenty years.
In the next step, we'll split our data into two training and testing datasets using a handy time_series_split function.
EPOCHS <- 10 HORIZON <- 7 TIMESTEPS <- 28 data_split <- time_series_split( tarnow_temp, date, initial = "18 years", assess = "2 years", lag = TIMESTEPS ) mlp_model <- torchts_mlp( tmax_daily ~ date + year_day, data = training(data_split), timesteps = TIMESTEPS, horizon = HORIZON, jump = 1, epochs = EPOCHS, learn_rate = 0.001, hidden_units = c(50, 30), batch_size = 32, scale = TRUE, device = 'cuda' )
We want to generate forecast for a full year. To do so, we'll remove the outcome column values
we'd like to forecast. Bear in mind that input data differs from the inputs for classical tabular-data algorithms.
In this specific case, we have to keep some "history" (in this case: r TIMESTEPS steps, i.e: r TIMESTEPS days).
# Clear outcome variable # Ostrzeżenie, jeśli wyczyszczono za dużo danych cleared_new_data <- testing(data_split) %>% clear_outcome(date, c(tmax_daily), TIMESTEPS) fcast <- mlp_model %>% predict(new_data = testing(data_split)) fcast <- mlp_model %>% predict(new_data = cleared_new_data) fcast <- tibble(.pred = fcast) # plot(ts(fcast)) plot_forecast( testing(data_split), fcast, tmax_daily ) # fcast_vs_true <- # bind_cols( # n = 1:nrow(testing(data_split)), # actual = testing(data_split)$tmax_daily, # fcast # ) %>% # tidyr::pivot_longer(c(actual, .pred)) # # ggplot(fcast_vs_true) + # geom_line(aes(n, value, col = name)) + # theme_minimal() + # ggtitle("Forecast vs actual values") # test_input <- c( # -0.9743, -1.0750, -1.3369, -1.2966, -1.2866, -0.9440, -1.2866, -1.3168, # -0.8634, -0.9138, -1.0548, -0.9440, -0.8231, -1.2160, -1.1757, -0.8634, # -0.6519, -0.5612, -0.8131, -1.2765, -1.1153, -0.6922, -0.3799, -0.5914, # -0.9843 # ) # # inp <- torch_tensor(array(test_input, dim = c(1, 28)))$cuda() # # # mlp_model$net(x_num = inp)
In the next next, we're adding new predictor: press_mean_daily. It means that we extend the autoregressive model we obtained in the previous exercise by providing a variable that could be recognised as external regressor (what is a term commonly used in the context of methods like ARIMA etc.).
EPOCHS <- 10 HORIZON <- 7 TIMESTEPS <- 28 data_split <- time_series_split( tarnow_temp, date, initial = "18 years", assess = "2 years", lag = TIMESTEPS ) rnn_model <- torchts_rnn( tmax_daily ~ date + tmax_daily + year_day, data = training(data_split), timesteps = TIMESTEPS, horizon = HORIZON, jump = 1, epochs = EPOCHS, learn_rate = 0.001, hidden_units = 30, batch_size = 32, scale = TRUE, device = 'cuda' )
# Clear outcome variable cleared_new_data <- testing(data_split) %>% clear_outcome(date, tmax_daily, 20) # Forecast fcast <- rnn_model %>% predict(new_data = testing(data_split)) plot_forecast( head(testing(data_split), nrow(fcast)), fcast, tmax_daily )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.