knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  warning = FALSE,
  message = FALSE
)
rm(list=ls())
library(deepvars)

Reduced-form VAR

Recall the functional from of the reduced-form VAR($p$) with $K$ variables and $p$ lags (with $m \in [1,p]$)

$$ \begin{aligned} && y_t&=A_1 y_{t-1} + A_2 y_{t-2} + ... + A_p y_{t-p} + u_t \ \end{aligned} $$

where $y_{t-m}$ are $(K \times 1)$ vectors , $A_m$ are $(K \times K)$ matrices of coefficients and $u_t$ is a $(K \times 1)$ vector of residuals.

Deep VAR

The deep VAR ...

A simple benchmark

dt = data.table::data.table(deepvars::canada)
var_cols = colnames(dt)[2:ncol(dt)]

The underlying time series in levels look non-stationary

dt[,(var_cols) := lapply(.SD, function(i) c(0,diff(i))), .SDcols=var_cols]
chart_data = dt
chart_data = data.table::melt(chart_data, id.vars = "date")

ggplot2::ggplot(chart_data) +
  ggplot2::geom_line(ggplot2::aes(y=value, x=date)) +
  ggplot2::facet_wrap(variable ~ ., scales = "free_y") +
  ggplot2::theme_bw() +
  ggplot2::labs(
    x="Date",
    y="Value"
  )

Splitting data into a training and test sample ...

train_test_split <- split_sample(dt)
train_data <- train_test_split$train_data
test_data <- train_test_split$test_data

Select lag length ...

lags <- lag_order(train_data[,.SD,.SDcols=var_cols], max_lag = 12)$p

Fit the two models ...

# VAR
var_model <- vareg(train_data, lags = lags)
# Deep VAR
deepvar_model <- deepvareg(train_data, lags=lags, num_units = 100, epochs=500)

1-step ahead predictions

In-sample

y_true <- var_model$y_train
pred_var <- predict(var_model)
plot(pred_var, y_true = y_true)
pred_deepvar <- predict(deepvar_model)
plot(pred_deepvar, y_true = y_true)

Out-of-sample

X_test <- prepare_test_data(train_test_split, lags=lags)$X_test
y_test <- prepare_test_data(train_test_split, lags=lags)$y_test
pred_var <- predict(var_model, X=X_test)
plot(pred_var, y_true = y_test)
pred_dvar <- predict(deepvar_model, X=X_test)
plot(pred_dvar, y_true = y_test)

Cumulative loss

cum_loss_var <- cum_loss(var_model)$cum_loss[,type:="var"]
cum_loss_dvar <- cum_loss(deepvar_model)$cum_loss[,type:="deepvar"]
dt_plot <- rbind(cum_loss_dvar, cum_loss_var)
# dt_plot[,date:=train_data$date[date]]
dt_plot[,type:=factor(type)]
levels(dt_plot$type) <- c("Deep VAR", "VAR")
ggplot2::ggplot(data=dt_plot, ggplot2::aes(x=date, y=value, colour=type)) +
  ggplot2::geom_line() +
  ggplot2::facet_wrap(~variable, scales = "free_y") +
  ggplot2::scale_color_discrete(name="Model:") +
  ggplot2::labs(
      x="Date",
      y="Squared error"
    )
cum_loss_var <- cum_loss(var_model, X=X_test, y=y_test)$cum_loss[,type:="var"]
cum_loss_dvar <- cum_loss(deepvar_model, X=X_test, y=y_test)$cum_loss[,type:="deepvar"]
dt_plot <- rbind(cum_loss_dvar, cum_loss_var)
# dt_plot[,date:=test_data$date,by=.(variable, type)]
dt_plot[,type:=factor(type)]
ggplot2::ggplot(data=dt_plot, ggplot2::aes(x=date, y=value, colour=type)) +
  ggplot2::geom_line() +
  ggplot2::facet_wrap(~variable, scales = "free_y") +
  ggplot2::scale_color_discrete(name="Model:") +
  ggplot2::labs(
      x="Date",
      y="Squared error"
    )

RMSE

rmse_var <- rbind(
  rmse(var_model)[,model:="var"][,sample:="train"],
  rmse(var_model, X=X_test, y=y_test)[,model:="var"][,sample:="test"]
)
rmse_dvar <- rbind(
  rmse(deepvar_model)[,model:="deepvar"][,sample:="train"],
  rmse(deepvar_model, X=X_test, y=y_test)[,model:="deepvar"][,sample:="test"]
)
tab_rmse <- rbind(rmse_var, rmse_dvar)
tab_rmse <- data.table::dcast(tab_rmse, sample + variable ~ model, value.var = "value")
knitr::kable(
  tab_rmse, 
  col.names = c("Sample", "Variable", "DVAR", "VAR"),
  digits = 5
) 

h-step ahead forecasts

Charts

n_ahead <- nrow(y_test)
fcst_var <- forecast(var_model, n.ahead = n_ahead)
plot(fcst_var, y_true = y_test)
fcst_dvar <- forecast(deepvar_model, n.ahead = n_ahead)
plot(fcst_dvar, y_true = y_test)

RMSFE

In terms of the Root Mean Squared Forecasting Error the Deep VAR is clearly dominating:

tab_rmsfe <- rbind(
  rmsfe(fcst_var, y_true=y_test)[,model:="VAR"],
  rmsfe(fcst_dvar, y_true=y_test)[,model:="DVAR"]
)
tab_rmsfe <- data.table::dcast(tab_rmsfe, variable ~ model, value.var = "value")
knitr::kable(
  tab_rmsfe, 
  col.names = c("Variable", "DVAR", "VAR"),
  digits = 5
) 

Correlations

With respect to correlations between forecasts and actual outcomes there is no clear winner. The VAR appear to perform better for unemployment U, while the Deep VAR clearly dominates for production prod.

tab_cor_fcst <- rbind(
  cor_fcst(fcst_var, y_true=y_test)[,model:="VAR"],
  cor_fcst(fcst_dvar, y_true=y_test)[,model:="DVAR"]
)
tab_cor_fcst <- data.table::dcast(tab_cor_fcst, variable ~ model, value.var = "value")
knitr::kable(
  tab_cor_fcst, 
  col.names = c("Variable", "DVAR", "VAR"),
  digits = 5
) 


pat-alt/SVAA documentation built on Jan. 19, 2024, 7:45 p.m.