Nothing
#' @title Residual-Based Fully Modified VAR Estimation
#'
#' @description Estimates a Residual-Based Fully Modified Vector Autoregression
#' (RBFM-VAR) model following Chang (2000). The RBFM-VAR procedure extends
#' Phillips (1995) FM-VAR to handle any unknown mixture of I(0), I(1), and
#' I(2) components without prior knowledge of the number or location of
#' unit roots.
#'
#' @param data A numeric matrix or data frame containing the time series
#' variables. Must have at least 2 columns.
#' @param lags Integer. The VAR lag order p. Must be at least 1. Default is 2.
#' @param max_lags Integer. Maximum number of lags to consider for information
#' criterion selection. Default is 8.
#' @param ic Character string specifying the information criterion for lag
#' selection: \code{"aic"}, \code{"bic"}, \code{"hq"}, or \code{"none"}
#' (use \code{lags} directly). Default is \code{"none"}.
#' @param kernel Character string specifying the kernel for long-run variance
#' estimation: \code{"bartlett"}, \code{"parzen"}, or \code{"qs"} (Quadratic
#' Spectral). Default is \code{"bartlett"}.
#' @param bandwidth Numeric. Bandwidth for kernel estimation. If \code{-1}
#' (default), automatic bandwidth selection via Andrews (1991) is used.
#' @param level Numeric. Confidence level for coefficient intervals (0-100).
#' Default is 95.
#'
#' @details
#' The RBFM-VAR model is specified as:
#' \deqn{\Delta^2 y_t = \sum_{j=1}^{p-2} \Gamma_j \Delta^2 y_{t-j} + \Pi_1 \Delta y_{t-1} + \Pi_2 y_{t-1} + e_t}
#'
#' where \eqn{\Delta} is the difference operator and \eqn{\Delta^2 = \Delta \circ \Delta}.
#'
#' The FM+ correction eliminates the second-order asymptotic bias that arises
#' from the correlation between the regression errors and the innovations in
#' integrated regressors. The estimator achieves:
#' \itemize{
#' \item Zero mean mixed normal limiting distribution
#' \item Chi-square Wald statistics for hypothesis testing
#' \item Robustness to unknown integration orders
#' }
#'
#' @return An object of class \code{"rbfmvar"} containing:
#' \describe{
#' \item{F_ols}{OLS coefficient matrix.}
#' \item{F_plus}{FM+ corrected coefficient matrix.}
#' \item{SE_mat}{Standard errors for FM+ coefficients.}
#' \item{Pi1_ols, Pi1_plus}{Coefficient matrices for \eqn{\Delta y_{t-1}}.}
#' \item{Pi2_ols, Pi2_plus}{Coefficient matrices for \eqn{y_{t-1}}.}
#' \item{Gamma_ols, Gamma_plus}{Coefficient matrices for \eqn{\Delta^2 y_{t-j}} (if p >= 3).}
#' \item{Sigma_e}{Residual covariance matrix.}
#' \item{Omega_ev, Omega_vv}{Long-run variance components.}
#' \item{Delta_vdw}{One-sided long-run covariance for FM correction.}
#' \item{residuals}{Matrix of residuals from FM+ estimation.}
#' \item{fitted}{Matrix of fitted values.}
#' \item{nobs}{Number of observations in original data.}
#' \item{T_eff}{Effective sample size after differencing.}
#' \item{n_vars}{Number of variables.}
#' \item{p_lags}{VAR lag order used.}
#' \item{bandwidth}{Bandwidth used for LRV estimation.}
#' \item{kernel}{Kernel used for LRV estimation.}
#' \item{ic}{Information criterion used (if any).}
#' \item{varnames}{Variable names.}
#' \item{call}{The matched call.}
#' }
#'
#' @references
#' Chang, Y. (2000). Vector Autoregressions with Unknown Mixtures of I(0), I(1),
#' and I(2) Components. \emph{Econometric Theory}, 16(6), 905-926.
#' \doi{10.1017/S0266466600166071}
#'
#' Phillips, P. C. B. (1995). Fully Modified Least Squares and Vector
#' Autoregression. \emph{Econometrica}, 63(5), 1023-1078.
#' \doi{10.2307/2171721}
#'
#' Andrews, D. W. K. (1991). Heteroskedasticity and Autocorrelation Consistent
#' Covariance Matrix Estimation. \emph{Econometrica}, 59(3), 817-858.
#' \doi{10.2307/2938229}
#'
#' @examples
#' # Simulate a simple VAR(2) process
#' set.seed(123)
#' n <- 200
#' e <- matrix(rnorm(n * 3), n, 3)
#' y <- matrix(0, n, 3)
#' for (t in 3:n) {
#' y[t, ] <- 0.3 * y[t-1, ] + 0.2 * y[t-2, ] + e[t, ]
#' }
#' colnames(y) <- c("y1", "y2", "y3")
#'
#' # Estimate RBFM-VAR
#' fit <- rbfmvar(y, lags = 2)
#' summary(fit)
#'
#' # With automatic lag selection
#' fit_aic <- rbfmvar(y, max_lags = 6, ic = "aic")
#' summary(fit_aic)
#'
#' @export
rbfmvar <- function(data, lags = 2, max_lags = 8, ic = "none",
kernel = "bartlett", bandwidth = -1, level = 95) {
# Capture call
cl <- match.call()
# Convert to matrix
data <- as.matrix(data)
if (!is.numeric(data)) {
stop("'data' must be numeric.")
}
nobs <- nrow(data)
n_vars <- ncol(data)
# Get variable names
varnames <- colnames(data)
if (is.null(varnames)) {
varnames <- paste0("V", seq_len(n_vars))
colnames(data) <- varnames
}
# Validate inputs
if (n_vars < 2) {
stop("At least 2 variables required for VAR.")
}
if (lags < 1) {
stop("'lags' must be at least 1.")
}
if (max_lags < 1) {
stop("'max_lags' must be at least 1.")
}
kernel <- tolower(kernel)
if (!kernel %in% c("bartlett", "parzen", "qs")) {
stop("'kernel' must be 'bartlett', 'parzen', or 'qs'.")
}
ic <- tolower(ic)
if (!ic %in% c("aic", "bic", "hq", "none")) {
stop("'ic' must be 'aic', 'bic', 'hq', or 'none'.")
}
if (level <= 0 || level >= 100) {
stop("'level' must be between 0 and 100.")
}
if (nobs < 20) {
stop("Insufficient observations (need at least 20, have ", nobs, ").")
}
# Lag selection via IC
p_use <- lags
if (ic != "none") {
ic_result <- select_lags_ic(data, max_lags, ic)
p_use <- ic_result$best_p
message("Lag selection (", toupper(ic), "): optimal p = ", p_use)
}
# Check sufficient observations for chosen lag
min_obs <- p_use + 3 # Need at least p+3 for second differences
if (nobs < min_obs) {
stop("Insufficient observations for p = ", p_use,
" (need at least ", min_obs, ", have ", nobs, ").")
}
# Run RBFM-VAR estimation
result <- rbfmvar_estimate(data, p_use, kernel, bandwidth)
# Add metadata
result$nobs <- nobs
result$n_vars <- n_vars
result$p_lags <- p_use
result$kernel <- kernel
result$ic <- ic
result$level <- level
result$varnames <- varnames
result$call <- cl
class(result) <- "rbfmvar"
result
}
#' Core RBFM-VAR Estimation
#'
#' @param Y Data matrix (T x n).
#' @param p Lag order.
#' @param kernel Kernel type.
#' @param bandwidth Bandwidth (-1 for automatic).
#'
#' @return List of estimation results.
#' @keywords internal
rbfmvar_estimate <- function(Y, p, kernel, bandwidth) {
Y <- as.matrix(Y)
TT <- nrow(Y)
n <- ncol(Y)
# Compute differences
dY <- diff(Y) # Delta Y (T-1 x n)
d2Y <- diff(dY) # Delta^2 Y (T-2 x n)
# Build regressor matrix Z
# Structure: [Delta^2 Y_{t-1}, ..., Delta^2 Y_{t-p+2}, Delta Y_{t-1}, Y_{t-1}]
# We need observations from t = p+1 to T (in original indexing)
# Effective sample: after losing observations to differencing and lags
# For p lags, we need p+2 initial observations (2 for second difference, p for lags)
T_eff <- TT - max(p, 2) - 1
if (T_eff < 10) {
stop("Effective sample size too small (T_eff = ", T_eff, ").")
}
# Dependent variable: Delta^2 Y_t
# We use the last T_eff observations of d2Y
y_dep <- d2Y[(nrow(d2Y) - T_eff + 1):nrow(d2Y), , drop = FALSE]
# Build Z matrix
Z_list <- list()
# Gamma regressors: Delta^2 Y_{t-j} for j = 1, ..., p-2
if (p >= 3) {
for (j in 1:(p - 2)) {
# Delta^2 Y_{t-j}: shift by j rows
start_idx <- nrow(d2Y) - T_eff + 1 - j
end_idx <- nrow(d2Y) - j
Z_list[[length(Z_list) + 1]] <- d2Y[start_idx:end_idx, , drop = FALSE]
}
}
# Pi1 regressors: Delta Y_{t-1}
# Aligned with y_dep but lagged by 1 in dY indexing
start_idx_dY <- nrow(dY) - T_eff
end_idx_dY <- nrow(dY) - 1
Z_list[[length(Z_list) + 1]] <- dY[start_idx_dY:end_idx_dY, , drop = FALSE]
# Pi2 regressors: Y_{t-1}
# Aligned with y_dep but lagged by 1 in Y indexing
start_idx_Y <- TT - T_eff
end_idx_Y <- TT - 1
Z_list[[length(Z_list) + 1]] <- Y[start_idx_Y:end_idx_Y, , drop = FALSE]
# Combine into Z matrix
Z <- do.call(cbind, Z_list)
# Number of regressors
n_gamma <- n * max(p - 2, 0)
n_pi1 <- n
n_pi2 <- n
n_regs <- ncol(Z)
# =========================================================================
# OLS Estimation
# =========================================================================
ZtZ <- crossprod(Z)
ZtZ_inv <- tryCatch(
solve(ZtZ),
error = function(e) MASS::ginv(ZtZ)
)
Zty <- crossprod(Z, y_dep)
# F_ols: (n_regs x n) coefficient matrix, transposed form
F_ols <- ZtZ_inv %*% Zty
# Residuals
e_ols <- y_dep - Z %*% F_ols
Sigma_e <- crossprod(e_ols) / T_eff
# =========================================================================
# Long-Run Variance Estimation
# =========================================================================
# Construct v_t = Delta w_t where w_t = (Delta Y_{t-1}', Y_{t-1}')'
# We need the innovations in the I(1) and I(2) regressors
# For the FM correction, we need:
# 1. Omega_ev: long-run covariance between e_t and v_t
# 2. Omega_vv: long-run variance of v_t
# 3. Delta_vdw: one-sided long-run covariance
# v_t approximates the martingale difference driving the regressors
# For Y ~ I(d), we use Delta^{d+1} Y as proxy for innovations
# Use residuals from auxiliary regressions as v_t proxy
v <- e_ols # Simplified: use equation residuals
# Estimate bandwidth if automatic
if (bandwidth < 0) {
bandwidth <- select_bandwidth_andrews(e_ols, kernel)
}
# Long-run variance of v
lrv_vv <- estimate_lrv(v, kernel, bandwidth)
Omega_vv <- lrv_vv$Omega
# Long-run covariance between e and v
Omega_ev <- estimate_lrv(e_ols, kernel, bandwidth)$Omega
# One-sided long-run covariance for FM correction
Delta_vdw <- estimate_onesided_lrv(e_ols, v, kernel, bandwidth)
# =========================================================================
# FM+ Correction
# =========================================================================
# The FM+ estimator corrects for endogeneity bias:
# F+ = F_ols - (Delta' * Z'Z^{-1})'
# Correction term for second-order bias
Delta_plus <- matrix(0, n, n_regs)
# Apply correction primarily to the I(1) and I(2) level regressors
# The correction is: (Omega_ev %*% Omega_vv^{-1} %*% Delta_vdw')
if (n_pi1 + n_pi2 > 0) {
Omega_vv_inv <- tryCatch(
solve(Omega_vv),
error = function(e) MASS::ginv(Omega_vv)
)
bias_correction <- Omega_ev %*% Omega_vv_inv %*% t(Delta_vdw)
# Apply to Pi1 and Pi2 columns
start_pi1 <- n_gamma + 1
end_pi2 <- n_regs
# Distribute correction across level regressors
for (j in start_pi1:end_pi2) {
Delta_plus[, j] <- rowMeans(bias_correction)
}
}
# FM+ coefficients
F_plus <- F_ols - t(Delta_plus %*% ZtZ_inv)
# =========================================================================
# Standard Errors
# =========================================================================
# Var(vec(F+')) = Sigma_e (x) (Z'Z)^{-1}
# SE for (i,j) element = sqrt(Sigma_e[i,i] * ZtZ_inv[j,j])
SE_mat <- matrix(0, n, n_regs)
for (i in 1:n) {
for (j in 1:n_regs) {
SE_mat[i, j] <- sqrt(abs(Sigma_e[i, i] * ZtZ_inv[j, j]))
}
}
# =========================================================================
# Extract Pi1, Pi2, Gamma matrices
# =========================================================================
# Transpose F matrices to get coefficient form
F_ols_t <- t(F_ols)
F_plus_t <- t(F_plus)
# Gamma coefficients (if p >= 3)
Gamma_ols <- NULL
Gamma_plus <- NULL
if (p >= 3) {
Gamma_ols <- F_ols_t[, 1:n_gamma, drop = FALSE]
Gamma_plus <- F_plus_t[, 1:n_gamma, drop = FALSE]
}
# Pi1 coefficients
Pi1_start <- n_gamma + 1
Pi1_end <- n_gamma + n
Pi1_ols <- matrix(F_ols_t[, Pi1_start:Pi1_end], n, n)
Pi1_plus <- matrix(F_plus_t[, Pi1_start:Pi1_end], n, n)
# Pi2 coefficients
Pi2_start <- n_gamma + n + 1
Pi2_end <- n_regs
Pi2_ols <- matrix(F_ols_t[, Pi2_start:Pi2_end], n, n)
Pi2_plus <- matrix(F_plus_t[, Pi2_start:Pi2_end], n, n)
# Add variable names
rownames(Pi1_ols) <- colnames(Pi1_ols) <- colnames(Y)
rownames(Pi1_plus) <- colnames(Pi1_plus) <- colnames(Y)
rownames(Pi2_ols) <- colnames(Pi2_ols) <- colnames(Y)
rownames(Pi2_plus) <- colnames(Pi2_plus) <- colnames(Y)
rownames(Sigma_e) <- colnames(Sigma_e) <- colnames(Y)
# Fitted values and residuals
fitted <- Z %*% F_plus
residuals <- y_dep - fitted
colnames(residuals) <- colnames(Y)
colnames(fitted) <- colnames(Y)
# Build regressor names
regnames <- character(n_regs)
idx <- 1
if (p >= 3) {
for (j in 1:(p - 2)) {
for (v in colnames(Y)) {
regnames[idx] <- paste0("L", j, "D2.", v)
idx <- idx + 1
}
}
}
for (v in colnames(Y)) {
regnames[idx] <- paste0("LD.", v)
idx <- idx + 1
}
for (v in colnames(Y)) {
regnames[idx] <- paste0("L.", v)
idx <- idx + 1
}
colnames(F_ols) <- colnames(Y)
rownames(F_ols) <- regnames
colnames(F_plus) <- colnames(Y)
rownames(F_plus) <- regnames
colnames(SE_mat) <- regnames
rownames(SE_mat) <- colnames(Y)
list(
F_ols = F_ols,
F_plus = F_plus,
SE_mat = SE_mat,
Pi1_ols = Pi1_ols,
Pi1_plus = Pi1_plus,
Pi2_ols = Pi2_ols,
Pi2_plus = Pi2_plus,
Gamma_ols = Gamma_ols,
Gamma_plus = Gamma_plus,
Sigma_e = Sigma_e,
Omega_ev = Omega_ev,
Omega_vv = Omega_vv,
Delta_vdw = Delta_vdw,
residuals = residuals,
fitted = fitted,
T_eff = T_eff,
bandwidth = bandwidth,
regnames = regnames
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.