title: "Deep VARs" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Deep VARs} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8}
rm(list=ls())
library(deepvars)
Recall the functional from of the reduced-form VAR($p$) with $K$ variables and $p$ lags (with $m \in [1,p]$)
$$ \begin{aligned} && y_t&=A_1 y_{t-1} + A_2 y_{t-2} + ... + A_p y_{t-p} + u_t \ \end{aligned} $$
where $y_{t-m}$ are $(K \times 1)$ vectors , $A_m$ are $(K \times K)$ matrices of coefficients and $u_t$ is a $(K \times 1)$ vector of residuals.
The deep VAR ...
dt = data.table::data.table(deepvars::canada)
var_cols = colnames(dt)[2:ncol(dt)]
The underlying time series in levels look non-stationary
dt[,(var_cols) := lapply(.SD, function(i) c(0,diff(i))), .SDcols=var_cols]
chart_data = dt
chart_data = data.table::melt(chart_data, id.vars = "date")
ggplot2::ggplot(chart_data) +
ggplot2::geom_line(ggplot2::aes(y=value, x=date)) +
ggplot2::facet_wrap(variable ~ ., scales = "free_y") +
ggplot2::theme_bw() +
ggplot2::labs(
x="Date",
y="Value"
)
Splitting data into a training and test sample ...
train_test_split <- split_sample(dt)
train_data <- train_test_split$train_data
test_data <- train_test_split$test_data
Select lag length ...
lags <- lag_order(train_data[,.SD,.SDcols=var_cols], max_lag = 12)$p
Fit the two models ...
# VAR
var_model <- vareg(train_data, lags = lags)
# Deep VAR
deepvar_model <- deepvareg(train_data, lags=lags, num_units = 100, epochs=500)
y_true <- var_model$y_train
pred_var <- predict(var_model)
plot(pred_var, y_true = y_true)
pred_deepvar <- predict(deepvar_model)
plot(pred_deepvar, y_true = y_true)
X_test <- prepare_test_data(train_test_split, lags=lags)$X_test
y_test <- prepare_test_data(train_test_split, lags=lags)$y_test
pred_var <- predict(var_model, X=X_test)
plot(pred_var, y_true = y_test)
pred_dvar <- predict(deepvar_model, X=X_test)
plot(pred_dvar, y_true = y_test)
cum_loss_var <- cum_loss(var_model)$cum_loss[,type:="var"]
cum_loss_dvar <- cum_loss(deepvar_model)$cum_loss[,type:="deepvar"]
dt_plot <- rbind(cum_loss_dvar, cum_loss_var)
# dt_plot[,date:=train_data$date[date]]
dt_plot[,type:=factor(type)]
levels(dt_plot$type) <- c("Deep VAR", "VAR")
ggplot2::ggplot(data=dt_plot, ggplot2::aes(x=date, y=value, colour=type)) +
ggplot2::geom_line() +
ggplot2::facet_wrap(~variable, scales = "free_y") +
ggplot2::scale_color_discrete(name="Model:") +
ggplot2::labs(
x="Date",
y="Squared error"
)
cum_loss_var <- cum_loss(var_model, X=X_test, y=y_test)$cum_loss[,type:="var"]
cum_loss_dvar <- cum_loss(deepvar_model, X=X_test, y=y_test)$cum_loss[,type:="deepvar"]
dt_plot <- rbind(cum_loss_dvar, cum_loss_var)
# dt_plot[,date:=test_data$date,by=.(variable, type)]
dt_plot[,type:=factor(type)]
ggplot2::ggplot(data=dt_plot, ggplot2::aes(x=date, y=value, colour=type)) +
ggplot2::geom_line() +
ggplot2::facet_wrap(~variable, scales = "free_y") +
ggplot2::scale_color_discrete(name="Model:") +
ggplot2::labs(
x="Date",
y="Squared error"
)
rmse_var <- rbind(
rmse(var_model)[,model:="var"][,sample:="train"],
rmse(var_model, X=X_test, y=y_test)[,model:="var"][,sample:="test"]
)
rmse_dvar <- rbind(
rmse(deepvar_model)[,model:="deepvar"][,sample:="train"],
rmse(deepvar_model, X=X_test, y=y_test)[,model:="deepvar"][,sample:="test"]
)
tab_rmse <- rbind(rmse_var, rmse_dvar)
tab_rmse <- data.table::dcast(tab_rmse, sample + variable ~ model, value.var = "value")
knitr::kable(
tab_rmse,
col.names = c("Sample", "Variable", "DVAR", "VAR"),
digits = 5
)
|Sample |Variable | DVAR| VAR| |:------|:--------|-------:|-------:| |test |e | 0.35709| 1.52839| |test |prod | 0.46324| 2.51180| |test |rw | 1.16701| 2.10415| |test |U | 0.24542| 1.16071| |train |e | 0.02455| 0.13340| |train |prod | 0.07314| 0.23076| |train |rw | 0.06273| 0.25987| |train |U | 0.02797| 0.10419|
n_ahead <- nrow(y_test)
fcst_var <- forecast(var_model, n.ahead = n_ahead)
plot(fcst_var, y_true = y_test)
fcst_dvar <- forecast(deepvar_model, n.ahead = n_ahead)
plot(fcst_dvar, y_true = y_test)
In terms of the Root Mean Squared Forecasting Error the Deep VAR is clearly dominating:
tab_rmsfe <- rbind(
rmsfe(fcst_var, y_true=y_test)[,model:="VAR"],
rmsfe(fcst_dvar, y_true=y_test)[,model:="DVAR"]
)
tab_rmsfe <- data.table::dcast(tab_rmsfe, variable ~ model, value.var = "value")
knitr::kable(
tab_rmsfe,
col.names = c("Variable", "DVAR", "VAR"),
digits = 5
)
|Variable | DVAR| VAR| |:--------|-------:|-------:| |U | 0.24303| 1.07060| |e | 0.36467| 1.61887| |prod | 0.95375| 2.05283| |rw | 1.20869| 2.76801|
With respect to correlations between forecasts and actual outcomes there is no clear winner. The VAR appear to perform better for unemployment U
, while the Deep VAR clearly dominates for production prod
.
tab_cor_fcst <- rbind(
cor_fcst(fcst_var, y_true=y_test)[,model:="VAR"],
cor_fcst(fcst_dvar, y_true=y_test)[,model:="DVAR"]
)
tab_cor_fcst <- data.table::dcast(tab_cor_fcst, variable ~ model, value.var = "value")
knitr::kable(
tab_cor_fcst,
col.names = c("Variable", "DVAR", "VAR"),
digits = 5
)
|Variable | DVAR| VAR| |:--------|--------:|--------:| |U | -0.30654| 0.38934| |e | -0.21578| 0.01346| |prod | 0.50568| 0.14389| |rw | -0.11030| -0.25109|
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.