#' Random-Window Block Bootstrap for prediction intervals for PMPP model
#'
#' @author Michal Oleszak
#'
#' @description Produces prediction intervals for Posterior Mean Panel Predictor
#' model by means of resampling with replacement from model's residuals.
#' Block Bootstrap method takes into account heteroskedasticity of the
#' error terms, both across units and over time. Block window is chosen randomly.
#'
#' @references Oleszak, M. (2018). "Forecasting sales with micro-panels:
#' Empirical Bayes approach. Evidence from consumer goods sector.",
#' Erasmus University Thesis Repository
#'
#' @param model PMPP model, as returned by \code{pmpp()}
#' @param boot_reps integer; number of bootstrap replications
#' @param block_size integer; width of the re-sampled block of residuals
#' @param confidence numeric in (0,1); confidence level of the interval
#' @param iter iterating constant, to be used in a loop when extraction
#' from call is needed
#' @param fframe \code{data.frame} with the same columns as input data
#' to \code{model}, but with empty rows added to each
#' cross-sectional unit, as created by \code{create_fframe()}
#'
#' @return A \code{data.frame} with panel indices, lower and upper bounds and midpoint.
#'
#' @importFrom utils txtProgressBar setTxtProgressBar
#' @importFrom dplyr filter select bind_cols
#' @importFrom magrittr "%>%"
#' @export
#'
#' @examples
#' \dontrun{data(EmplUK, package = "plm")
#' EmplUK <- dplyr::filter(EmplUK, year %in% c(1978, 1979, 1980, 1981, 1982))
#' pmpp_model <- pmpp(dep_var = "emp", data = EmplUK)
#' my_fframe <- create_fframe(EmplUK, 1983:1985)
#' intervals <- pmpp_predinterval(pmpp_model, my_fframe, boot_reps = 10)
#' }
pmpp_predinterval <- function(model, fframe, boot_reps = 1000, block_size = NULL,
confidence = 0.95, iter = NULL) {
start_global <- Sys.time()
if (!is.null(iter)) {
data_orig <- model$data[[iter]]
} else {
data_orig <- model$data
}
unit_id <- model$args$panel_ind[1]
time_id <- model$args$panel_ind[2]
old_time <- unique(data_orig[[time_id]])
n.ahead <- sum(!(as.character(unique(fframe[[time_id]])) %in% old_time))
N <- model$sample_size[1]
T <- model$sample_size[2] - 1
Y_all <- matrix(data_orig[[model$args$dep_var]], nrow = T + 1, ncol = N)
if (length(model$common_coeff[-1]) > 0) {
Z_all <- vector("list", length(model$common_coeff[-1]))
for (z in seq(model$common_coeff[-1])) {
Z_all[[z]] <- matrix(data_orig[[model$args$exp_var[z]]], nrow = T + 1, ncol = N)
}
}
y_T <- Y_all[T + 1, ]
# Specify default block size ------------------------------------------------
if (is.null(block_size)) {
block_size <- as.numeric(round(T ^ (1 / 3)))
}
# Extract data from model ---------------------------------------------------
# (model's residuals u_hat and parameter estimates rho_hat, lambda_hat, alphas_hat)
u_hat <- t(model$residuals)
rho_hat <- model$common_coeff[1]
lambda_hat <- model$intercept
alphas_hat <- model$common_coeff[-1]
# Initialise the bootstrap --------------------------------------------------
bootstraps <- list()
pb <- txtProgressBar(min = 0, max = boot_reps, style = 3, char = "~")
for (R in 1:boot_reps) {
# Create random blocks & stack them up ------------------------------------
B <- matrix(nrow = N)
while (ncol(B) - 1 < T) {
j <- sample(1:(T - block_size + 1), 1, replace = TRUE)
B_j <- u_hat[, j:(j + block_size - 1)]
B <- cbind(B, B_j)
}
B <- B[, -1]
if (ncol(B) > T) {
B <- B[, 1:T]
}
# Generate bootstrap replicates y_boot ------------------------------------
y_boot <- matrix(nrow = N, ncol = T)
if (length(alphas_hat) > 0) {
for (z in seq_along(Z_all)) {
Z_all[[z]] <- alphas_hat[z] * Z_all[[z]]
}
Z_impact <- Reduce("+", Z_all)
}
for (t in 1:T) {
if (length(alphas_hat) == 0) {
y_boot[, t] <- lambda_hat + rho_hat * Y_all[t, ] + B[, t]
} else {
y_boot[, t] <- lambda_hat + rho_hat * Y_all[t, ] + Z_impact[t, ] + B[, t]
}
}
# Re-fit the model to obtain bootstrap coefficients -----------------------
# (lambda_boot, rho_boot, alpha_boot)
y_boot <- pmpp_data(y_boot)
if (!is.null(iter)) {
indata <- model$data[[iter]]
} else {
indata <- model$data
}
new_index <- subset(indata[, 1:2], indata[2] != as.character(indata[1, 2]))
newdata <- cbind(new_index, as.data.frame(y_boot)[3])
newdata[, 1] <- as.character(newdata[, 1])
newdata[, 3] <- as.numeric(newdata[, 3])
if (length(alphas_hat) > 0) {
exp_vars <- dplyr::filter(indata, get(time_id) %in% unique(newdata[[time_id]])) %>%
dplyr::select(model$args$exp_var)
newdata <- bind_cols(newdata, exp_vars)
}
bootmodel <- pmpp(
dep_var = colnames(as.data.frame(y_boot))[3],
panel_ind = colnames(new_index),
exp_var = model$args$exp_var,
csi_var = model$args$csi_var,
data = newdata,
post_mean_method = model$args$post_mean_method,
common_par_method = model$args$common_par_method,
optim_method = model$args$optim_method,
dens_grid = model$args$dens_grid,
gmm_model = model$args$gmm_model,
gmm_inst = model$args$gmm_inst
)
rho_boot <- bootmodel$common_coeff[1]
lambda_boot <- bootmodel$intercept
alphas_boot <- bootmodel$common_coeff[-1]
# Create random blocks for prediction & stack them up ---------------------
B <- matrix(nrow = N)
while (ncol(B) - 1 < n.ahead) {
j <- sample(1:(T - block_size + 1), 1, replace = TRUE)
B_j <- u_hat[, j:(j + block_size - 1)]
B <- cbind(B, B_j)
}
B <- as.matrix(B[, -1])
if (ncol(B) > n.ahead) {
B <- B[, 1:n.ahead]
}
# Compute forecasts -------------------------------------------------------
y_pred <- matrix(nrow = N, ncol = n.ahead)
rho_vec <- c()
for (i in 0:(n.ahead - 1)) {
rho_vec[i + 1] <- rho_boot ^ i
}
# AR(1) model, no external variables
if (length(alphas_boot) == 0) {
for (h in 1:n.ahead) {
y_pred[, h] <- lambda_boot * sum(rho_vec[1:h]) + rho_boot ^ h * y_T + B[, h]
}
} else {
# ARX(1) model with predicion horizon = T+1
if (n.ahead == 1) {
Z_boot_impact <- vector("list", length(alphas_boot))
names(Z_boot_impact) <- names(alphas_boot)
data_last <- dplyr::filter(data_orig, get(time_id) == max(get(time_id)))
for (a in names(alphas_boot)) {
Z_boot_impact[[a]] <- alphas_boot[a] * data_last[[a]]
}
Z_boot_impact_all_exp_vars <- Reduce("+", Z_boot_impact)
y_pred[, 1] <- lambda_boot + (rho_boot * y_T) + Z_boot_impact_all_exp_vars + B[, 1]
# ARX(1) model with horizon > T+1
} else {
laststamp <- as.character(rev(unique(fframe[[time_id]]))[1])
if (any(is.na(
dplyr::filter(fframe, !get(time_id) %in% c(old_time, laststamp)) %>%
select(model$args$exp_var)
))) {
stop("Forecasting allowed only one step ahead further than the latest exp_vars. Adjust fframe.")
}
newdata1 <- dplyr::filter(fframe, get(time_id) != laststamp)
new_time <- setdiff(unique(newdata1[[time_id]]), old_time)
newdata2 <- dplyr::filter(newdata1, get(time_id) %in%
c(new_time, max(data_orig[[time_id]])))
T_and_later <- as.character(unique(newdata2[[time_id]]))
Z_boot_impact <- vector("list", length(alphas_boot))
names(Z_boot_impact) <- names(alphas_boot)
Z_boot_impact <- lapply(Z_boot_impact, function(x) {
x <- matrix(NA, ncol = n.ahead, nrow = length(lambda_boot))
})
for (a in names(alphas_boot)) {
for (h in seq(n.ahead)) {
Z_list <- vector("list", h)
for (g in 1:h) {
Z_list[[g]] <- dplyr::filter(newdata2, get(time_id) == T_and_later[g]) %>%
dplyr::select(a)
}
for (z in seq(Z_list)) {
Z_list[[z]] <- Z_list[[z]] * rev(rho_vec[1:h])[z]
}
sum_h <- Reduce("+", Z_list)
Z_boot_impact[[a]][, h] <- as.matrix(alphas_boot[a] * sum_h)
}
}
Z_boot_impact_all_exp_vars <- Reduce("+", Z_boot_impact)
for (h in 1:n.ahead) {
y_pred[, h] <- lambda_boot * sum(rho_vec[1:h]) + rho_boot ^ h * y_T +
Z_boot_impact_all_exp_vars[, h] + B[, h]
}
}
}
# Save replication's outcome ----------------------------------------------
bootstraps[[R]] <- y_pred
setTxtProgressBar(pb, R)
}
close(pb)
# Obtain quantiles ----------------------------------------------------------
bootstraps <- lapply(bootstraps, pmpp_data)
out <- matrix(nrow = N * n.ahead, ncol = 5)
colnames(out) <- c(
unit_id, time_id,
paste0("lower_", 100 * confidence),
paste0("upper_", 100 * confidence), "midpoint"
)
out <- as.data.frame(out)
out[, 1:2] <- bootstraps[[1]][, 1:2]
cutoff <- ceiling((boot_reps * (1 - confidence)) / 2)
for (i in 1:(N * n.ahead)) {
temp <- c()
for (R in 1:boot_reps) {
temp <- c(temp, bootstraps[[R]][i, 3])
}
temp <- c(sort(temp))
temp <- temp[-c((length(temp):(length(temp) - cutoff + 1)), 1:cutoff)]
out[[paste0("lower_", 100 * confidence)]][i] <- min(temp)
out[[paste0("upper_", 100 * confidence)]][i] <- max(temp)
out[["midpoint"]][i] <- mean(temp)
}
out[[unit_id]] <- factor(out[[unit_id]],
levels = unique(model$data[[unit_id]])
)
levels(out[[time_id]]) <- setdiff(unique(fframe[[time_id]]), old_time)
# Return the output ---------------------------------------------------------
message(paste0(
"Bootstrapping done! It took ",
substring(capture.output(Sys.time() - start_global), 20), "."
))
return(out)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.