###
# Takes a list of vintages r_list, generated with roll and a monthly target variable dat
# to evaluate a forecast.
# Google Data should be log-transformed
# currently it uses a RIDGE-Regression but other models such as OLS, LASSO or
# PCA are available (change the relevant lines of code)
#' Monthly forecast evaluation of vintages
#'
#' @description
#'
#' `r lifecycle::badge('experimental')`
#'
#' [forecast_m()] uses monthly vintages generated by [roll()]
#' to predict the value of another time series for the next month.
#' Currently, this function couldn't be tested yet.
#'
#' @param r_list A Google Trends time series in form of a tibble (or tsibble)
#' which is used to predict the next month in data.
#' @param data A tibble with some data you
#' want to predict for the next month
#' that is somewhat correlated with the data in r_list.
#' @param fd A logical value to indicate if you want to use the first differences
#' in r_list (true) or normal values (false).
#'
#' @section Methods:
#' Google Data should be log-transformed
#' currently it uses a RIDGE-Regression
#' but other models such as OLS, LASSO or
#' PCA are available.
#'
#' @return Returns a list with the the predicted data and the used model
#' results.
#'
#' @importFrom dplyr across
#' @importFrom dplyr first
#' @importFrom dplyr filter
#' @importFrom dplyr group_by
#' @importFrom dplyr last
#' @importFrom dplyr mutate
#' @importFrom dplyr rename
#' @importFrom dplyr select
#' @importFrom dplyr summarise
#' @importFrom lubridate floor_date
#' @importFrom magrittr %>%
#' @importFrom glmnet cv.glmnet
#' @importFrom glmnet glmnet
#' @importFrom stats predict
#' @importFrom tibble as_tibble
#' @importFrom tibble tibble
#' @importFrom tidyselect everything
#' @export
forecast_m <- function(r_list, data, fd = T) {
# get the number of lags from the suffix
number_of_lags <- sub(".*_", "", colnames(r_list[[1]])[length(colnames(r_list[[1]]))])
# Compute lags
r_raw <- lapply(r_list, function(x) {
y <- as_tibble(x)
if (number_of_lags >= 1) y <- mutate(y, lag_1 = lag(lag_0, 1))
if (number_of_lags >= 2) y <- mutate(y, lag_2 = lag(lag_0, 2))
if (number_of_lags >= 3) y <- mutate(y, lag_3 = lag(lag_0, 3))
if (number_of_lags == 4) y <- mutate(y, lag_4 = lag(lag_0, 4))
y <- ungroup(y)
return(y)
})
# Compute first differences if fd=T
if (fd) {
r_raw <- lapply(r_raw, function(x) {
mutate(
x,
across(
.cols = -c(1, 2), function(y) c(0, diff(y, 1))
),
.keep = "all"
)
})
}
# Join with datastream data.
# Remove months, where Google did some changes to the Google Trends data.
# Remove NAs.
r_raw <- lapply(r_raw, function(x) {
left_join(x, data[1:nrow(x), ], by = "time") %>%
select(time, id, data = value, everything()) %>%
filter(time != as.Date("2011-01-01")) %>% # omit structural breaks
filter(time != as.Date("2016-01-01")) %>%
mutate(across(everything(), function(y) replace(y, y == -Inf, NA_real_))) %>%
mutate(across(everything(), function(y) replace(y, y == Inf, NA_real_)))
})
# use PCA
# r_factors <- lapply(r_raw, function(x){
# pc <- as_tibble(prcomp(x[-c(1,2)])$x)
# bind_cols(x[c(1,2)], pc[,1:min(20, length(r_raw[[1]])-2)]) %>% #number of PCs
# drop_na()
# })
# set r <- r_factors to use PCA-Model
r <- r_raw
# Impute NAs with 0
r <- lapply(r, function(x) {
x[is.na(x)] <- 0
return(x)
})
# Function to estimate the model via RIDGE regression
build_model <- function(series) {
y <- as.matrix(series[1])
x <- as.matrix(series[-c(1)])
# Cross validation
# cv <- cv.glmnet(x, y, alpha = 0)
# Alternative arguments for glmnet():
# OLS: lambda= 0
# LASSO: alpha = 1
model <- glmnet(x = x, y = y, alpha = 0, lambda = 0) # cv$lambda.min)
return(model)
}
# Trends Data to forecast with previous estimated model
covariats <- lapply(r, function(x) data.matrix(x[-(1:3)]))
# estimate model, deselect everything besides variables (columns 1, 2)
models <- lapply(data.matrix(r), function(x) build_model(x[-(1:2)]))
# forecast
pred_values <- mapply(predict, object = models, newx = covariats)
# select last value in each vintage as forecast
# for relevant quarter
last_values <- sapply(pred_values, last)
forec <- tibble(
time = seq.Date(max(first(r)$time), max(last(r)$time), by = "month"),
value = last_values
) %>%
mutate(time = floor_date(time, "month")) %>%
rename(index = value)
last_model <- last(models)
return(list(
# returns forcasted values and
forec = forec,
# model estimated with contemporary data
last_model = last_model
))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.