Nothing
#' Simulate panel VAR data with interactive fixed effects
#'
#' Generates synthetic panel data from the DGP of Tugan (2021), Section S10,
#' with a single common factor following an AR(1) process and factor loadings
#' drawn from \eqn{N(1, 1)}.
#'
#' @details
#' The data generating process is:
#' \deqn{y_{i,t} = c + \Theta_1 y_{i,t-1} + f_t \lambda_i + A_0 \varepsilon_{i,t},}
#' where
#' \deqn{\Theta_1 = \begin{pmatrix} 0.65 & 0.30 \\ 0.20 & 0.60 \end{pmatrix},
#' \quad \Sigma_e = \begin{pmatrix} 1 & 0.5 \\ 0.5 & 1 \end{pmatrix},}
#' \deqn{f_t = 0.5 f_{t-1} + \eta_t, \; \eta_t \sim N(0, 0.5),}
#' \deqn{\lambda_{k,i} \sim N(1, 1) \;\text{(independently)},}
#' and \eqn{A_0 = \mathrm{chol}(\Sigma_e)^\top} (Cholesky identification). A
#' burn-in of 1000 periods is discarded.
#'
#' For \code{n_vars = 2} and \code{n_lags = 1}, the parameters match the MATLAB
#' \code{GeneratingSynteticDataSets.m} (Short-Run identification variant).
#'
#' @param n_units Positive integer. Number of cross-sectional units \eqn{I}
#' (default 50).
#' @param n_time Positive integer. Number of time periods \eqn{T} after
#' burn-in (default 30).
#' @param n_vars Positive integer. Number of VAR variables \eqn{K} (default 2).
#' @param n_lags Positive integer. VAR lag order (default 1; higher lags use
#' zero coefficient matrices).
#' @param n_factors Positive integer. Number of common factors (default 1;
#' each factor has its own AR(1) process and independent loadings).
#' @param identification Character. \code{"short_run"} (default) uses
#' \eqn{A_0 = \mathrm{chol}(\Sigma_e)^\top}; \code{"long_run"} uses the
#' Blanchard-Quah impact matrix \eqn{A_0 = (I - \Theta_1)\,\mathrm{chol}(D)^\top}
#' where \eqn{D = (I-\Theta_1)^{-1}\Sigma_e(I-\Theta_1)^{-\top}}. The
#' long-run multiplier \eqn{C(1) = (I-\Theta_1)^{-1}A_0 = \mathrm{chol}(D)^\top}
#' is lower-triangular. Matches the MATLAB
#' \code{GeneratingSynteticDataSets.m} with
#' \code{Identification='WithLongRunRestrictions'}.
#' @param seed Optional integer seed for reproducibility (default 42).
#'
#' @return A list with:
#' \describe{
#' \item{y}{Array of dimension \eqn{I \times T \times K} (unit \eqn{\times}
#' time \eqn{\times} variable). No missing values.}
#' \item{beta_true}{True coefficient vector, same format as \code{fit$beta}.}
#' \item{theta_true}{True VAR coefficient array \eqn{K \times K \times n\_lags}.}
#' \item{sigma_true}{True reduced-form covariance matrix \eqn{K \times K}.}
#' \item{a0_true}{True structural impact matrix \eqn{K \times K}.}
#' \item{factors_true}{\eqn{T \times n\_factors} matrix of true factors.}
#' \item{loadings_true}{\eqn{K \times n\_units} matrix of true loadings.}
#' \item{identification}{The identification scheme used (\code{"short_run"}
#' or \code{"long_run"}).}
#' \item{diff_vars_suggested}{Integer vector: variables that should be
#' cumulated in \code{compute_irf()}. \code{integer(0)} for short-run;
#' \code{1L} for long-run (to display cumulative responses, matching the
#' MATLAB MC code's \code{DifferencedVariables = [1]}).}
#' }
#'
#' @examples
#' sim <- sim_pvarife(n_units = 30, n_time = 20, n_vars = 2,
#' n_lags = 1, n_factors = 1, seed = 1)
#' dim(sim$y) # 30 x 20 x 2
#' sim$beta_true
#'
#' @export
sim_pvarife <- function(n_units = 50L, n_time = 30L, n_vars = 2L,
n_lags = 1L, n_factors = 1L,
identification = c("short_run", "long_run"),
seed = 42L) {
identification <- match.arg(identification)
n_units <- as.integer(n_units)
n_time <- as.integer(n_time)
n_vars <- as.integer(n_vars)
n_lags <- as.integer(n_lags)
n_factors <- as.integer(n_factors)
if (!is.null(seed)) set.seed(seed)
burn_in <- 1000L
n_total <- n_time + burn_in
# --- True parameters (matching MATLAB GeneratingSynteticDataSets.m) ---
if (n_vars == 2L && n_lags == 1L) {
theta_arr <- array(0.0, dim = c(n_vars, n_vars, n_lags))
theta_arr[, , 1L] <- matrix(c(0.65, 0.20, 0.30, 0.60), nrow = 2L)
sigma_true <- matrix(c(1.0, 0.5, 0.5, 1.0), nrow = 2L)
} else {
# Generic stable VAR: diagonal AR with coefficient 0.5, unit variance
theta_arr <- array(0.0, dim = c(n_vars, n_vars, n_lags))
theta_arr[, , 1L] <- diag(0.5, n_vars)
sigma_true <- diag(n_vars)
}
# Impact matrix: depends on identification scheme
if (identification == "short_run") {
# Short-run: Cholesky of Sigma — impact matrix itself is lower-triangular
# MATLAB: A_0 = chol(Sigma_e)' (WithShortRunRestrictions)
a0 <- t(chol(sigma_true))
} else {
# Long-run: Blanchard-Quah type — long-run multiplier is lower-triangular
# MATLAB: D = inv(I-Theta_1)*Sigma_e*inv(I-Theta_1)'; A_0 = (I-Theta_1)*chol(D)'
theta1 <- theta_arr[, , 1L]
lr_inv <- solve(diag(n_vars) - theta1)
d_mat <- lr_inv %*% sigma_true %*% t(lr_inv)
a0 <- (diag(n_vars) - theta1) %*% t(chol(d_mat))
}
# Stationary mean: (I - Theta_1)^{-1} * c_vec; use c_vec = rep(1, K)
c_vec <- rep(1.0, n_vars)
if (n_lags == 1L) {
y_mean <- solve(diag(n_vars) - theta_arr[, , 1L]) %*% c_vec
} else {
y_mean <- rep(0.0, n_vars)
}
# --- Common factors: n_factors independent AR(1) processes ---
# f[t] = 0.5 * f[t-1] + eta[t], eta ~ N(0, 0.5)
f_all <- matrix(0.0, nrow = n_total + 1L, ncol = n_factors)
for (rr in seq_len(n_factors)) {
eta <- stats::rnorm(n_total, mean = 0.0, sd = sqrt(0.5))
for (tt in seq(2L, n_total + 1L)) {
f_all[tt, rr] <- 0.5 * f_all[tt - 1L, rr] + eta[tt - 1L]
}
}
f_sim <- f_all[2L:(n_total + 1L), , drop = FALSE] # n_total x n_factors
# --- Loadings: lambda[k, i, r] ~ N(1, 1), one per variable-unit-factor ---
lam_true <- array(stats::rnorm(n_vars * n_units * n_factors, mean = 1.0, sd = 1.0),
dim = c(n_vars, n_units, n_factors))
# --- Simulate VAR ---
# y[k, t, i] in MATLAB notation; we produce y_big[k, n_total, i]
y_big <- array(0.0, dim = c(n_vars, n_total, n_units))
for (ii in seq_len(n_units)) {
y_big[, 1L, ii] <- y_mean
}
for (tt in seq(2L, n_total)) {
# Common component: K x 1, same for all units except loading
f_t <- matrix(f_sim[tt, ], ncol = 1L) # n_factors x 1
for (ii in seq_len(n_units)) {
# loading for unit ii: K x n_factors
lam_i <- lam_true[, ii, , drop = FALSE]
lam_i <- matrix(lam_i, nrow = n_vars, ncol = n_factors)
# VAR contribution
y_lag <- matrix(0.0, nrow = n_vars, ncol = 1L)
for (ll in seq_len(n_lags)) {
if (tt - ll >= 1L)
y_lag <- y_lag + theta_arr[, , ll] %*% y_big[, tt - ll, ii]
}
eps_i <- matrix(stats::rnorm(n_vars), ncol = 1L)
y_big[, tt, ii] <- c_vec + y_lag + lam_i %*% f_t + a0 %*% eps_i
}
}
# Discard burn-in; permute to [i, t, k] = [n_units, n_time, n_vars]
y_stable <- y_big[, (burn_in + 1L):n_total, , drop = FALSE]
# y_stable is [k, t, i]; permute to [i, t, k]
y_out <- aperm(y_stable, c(3L, 2L, 1L)) # I x T x K
# True factors (post burn-in): T x n_factors
f_true <- f_sim[(burn_in + 1L):n_total, , drop = FALSE]
# True loadings summary: K x I (sum over factors for display)
lam_summary <- apply(lam_true, c(1L, 2L), sum) # K x I
# Build beta_true in the same format as fit$beta
# beta = [c_vec; vec(Theta_1')] (intercepts first, then lag coefficients)
theta_vec <- as.numeric(t(theta_arr[, , 1L])) # K^2 values (row-major of Theta_1)
# seq(2, n_lags) is dangerous when n_lags=1 (returns c(2,1) not integer(0))
if (n_lags >= 2L) {
for (ll in seq(2L, n_lags)) {
theta_vec <- c(theta_vec, as.numeric(t(theta_arr[, , ll])))
}
}
beta_true <- c(c_vec, theta_vec)
diff_vars_suggested <- if (identification == "long_run") 1L else integer(0L)
list(
y = y_out,
beta_true = beta_true,
theta_true = theta_arr,
sigma_true = sigma_true,
a0_true = a0,
factors_true = f_true,
loadings_true = lam_summary,
identification = identification,
diff_vars_suggested = diff_vars_suggested
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.