knitr::opts_chunk$set( collapse = TRUE, comment = "#>", warning = FALSE, message = FALSE )
rm(list=ls()) library(deepvars)
Recall the functional from of the reduced-form VAR($p$) with $K$ variables and $p$ lags (with $m \in [1,p]$)
$$ \begin{aligned} && y_t&=A_1 y_{t-1} + A_2 y_{t-2} + ... + A_p y_{t-p} + u_t \ \end{aligned} $$
where $y_{t-m}$ are $(K \times 1)$ vectors , $A_m$ are $(K \times K)$ matrices of coefficients and $u_t$ is a $(K \times 1)$ vector of residuals.
The deep VAR ...
dt = data.table::data.table(deepvars::canada) var_cols = colnames(dt)[2:ncol(dt)]
The underlying time series in levels look non-stationary
dt[,(var_cols) := lapply(.SD, function(i) c(0,diff(i))), .SDcols=var_cols] chart_data = dt chart_data = data.table::melt(chart_data, id.vars = "date") ggplot2::ggplot(chart_data) + ggplot2::geom_line(ggplot2::aes(y=value, x=date)) + ggplot2::facet_wrap(variable ~ ., scales = "free_y") + ggplot2::theme_bw() + ggplot2::labs( x="Date", y="Value" )
Splitting data into a training and test sample ...
train_test_split <- split_sample(dt) train_data <- train_test_split$train_data test_data <- train_test_split$test_data
Select lag length ...
lags <- lag_order(train_data[,.SD,.SDcols=var_cols], max_lag = 12)$p
Fit the two models ...
# VAR var_model <- vareg(train_data, lags = lags) # Deep VAR deepvar_model <- deepvareg(train_data, lags=lags, num_units = 100, epochs=500)
y_true <- var_model$y_train
pred_var <- predict(var_model) plot(pred_var, y_true = y_true)
pred_deepvar <- predict(deepvar_model) plot(pred_deepvar, y_true = y_true)
X_test <- prepare_test_data(train_test_split, lags=lags)$X_test y_test <- prepare_test_data(train_test_split, lags=lags)$y_test
pred_var <- predict(var_model, X=X_test) plot(pred_var, y_true = y_test)
pred_dvar <- predict(deepvar_model, X=X_test) plot(pred_dvar, y_true = y_test)
cum_loss_var <- cum_loss(var_model)$cum_loss[,type:="var"] cum_loss_dvar <- cum_loss(deepvar_model)$cum_loss[,type:="deepvar"] dt_plot <- rbind(cum_loss_dvar, cum_loss_var) # dt_plot[,date:=train_data$date[date]] dt_plot[,type:=factor(type)] levels(dt_plot$type) <- c("Deep VAR", "VAR") ggplot2::ggplot(data=dt_plot, ggplot2::aes(x=date, y=value, colour=type)) + ggplot2::geom_line() + ggplot2::facet_wrap(~variable, scales = "free_y") + ggplot2::scale_color_discrete(name="Model:") + ggplot2::labs( x="Date", y="Squared error" )
cum_loss_var <- cum_loss(var_model, X=X_test, y=y_test)$cum_loss[,type:="var"] cum_loss_dvar <- cum_loss(deepvar_model, X=X_test, y=y_test)$cum_loss[,type:="deepvar"] dt_plot <- rbind(cum_loss_dvar, cum_loss_var) # dt_plot[,date:=test_data$date,by=.(variable, type)] dt_plot[,type:=factor(type)] ggplot2::ggplot(data=dt_plot, ggplot2::aes(x=date, y=value, colour=type)) + ggplot2::geom_line() + ggplot2::facet_wrap(~variable, scales = "free_y") + ggplot2::scale_color_discrete(name="Model:") + ggplot2::labs( x="Date", y="Squared error" )
rmse_var <- rbind( rmse(var_model)[,model:="var"][,sample:="train"], rmse(var_model, X=X_test, y=y_test)[,model:="var"][,sample:="test"] ) rmse_dvar <- rbind( rmse(deepvar_model)[,model:="deepvar"][,sample:="train"], rmse(deepvar_model, X=X_test, y=y_test)[,model:="deepvar"][,sample:="test"] ) tab_rmse <- rbind(rmse_var, rmse_dvar) tab_rmse <- data.table::dcast(tab_rmse, sample + variable ~ model, value.var = "value") knitr::kable( tab_rmse, col.names = c("Sample", "Variable", "DVAR", "VAR"), digits = 5 )
n_ahead <- nrow(y_test) fcst_var <- forecast(var_model, n.ahead = n_ahead) plot(fcst_var, y_true = y_test)
fcst_dvar <- forecast(deepvar_model, n.ahead = n_ahead) plot(fcst_dvar, y_true = y_test)
In terms of the Root Mean Squared Forecasting Error the Deep VAR is clearly dominating:
tab_rmsfe <- rbind( rmsfe(fcst_var, y_true=y_test)[,model:="VAR"], rmsfe(fcst_dvar, y_true=y_test)[,model:="DVAR"] ) tab_rmsfe <- data.table::dcast(tab_rmsfe, variable ~ model, value.var = "value") knitr::kable( tab_rmsfe, col.names = c("Variable", "DVAR", "VAR"), digits = 5 )
With respect to correlations between forecasts and actual outcomes there is no clear winner. The VAR appear to perform better for unemployment U
, while the Deep VAR clearly dominates for production prod
.
tab_cor_fcst <- rbind( cor_fcst(fcst_var, y_true=y_test)[,model:="VAR"], cor_fcst(fcst_dvar, y_true=y_test)[,model:="DVAR"] ) tab_cor_fcst <- data.table::dcast(tab_cor_fcst, variable ~ model, value.var = "value") knitr::kable( tab_cor_fcst, col.names = c("Variable", "DVAR", "VAR"), digits = 5 )
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.