R/pid_dta.R
In pvars: VAR Modeling for Heterogeneous Panels

Documented in pid.cvm pid.dc pid.iv

#' @title Independence-based identification of panel SVAR models via Cramer-von Mises (CVM) distance
#' @description Given an estimated panel of VAR models, this function applies independence-based identification for 
#'   the structural impact matrix \eqn{B_i} of the corresponding SVAR model
#'   \deqn{y_{it} = c_{it} + A_{i1} y_{i,t-1} + ... + A_{i,p_i} y_{i,t-p_i} + u_{it}}
#'   \deqn{       = c_{it} + A_{i1} y_{i,t-1} + ... + A_{i,p_i} y_{i,t-p_i} + B_i \epsilon_{it}.}
#'   Matrix \eqn{B_i} corresponds to the unique decomposition of the least squares covariance matrix \eqn{\Sigma_{u,i} = B_i B_i'} 
#'   if the vector of structural shocks \eqn{\epsilon_{it}} contains at most one Gaussian shock (Comon, 1994).
#'   A nonparametric dependence measure, the Cramer-von Mises distance (Genest and Remillard, 2004), 
#'   determines least dependent structural shocks. The minimum is obtained by a two step optimization algorithm 
#'   similar to the technique described in Herwartz and Ploedt (2016).
#'
#' @param x An object of class '\code{pvarx}' or a list of VAR objects 
#'   that will be \link[=as.varx]{coerced} to '\code{varx}'. 
#'   Estimated panel of VAR objects.
#' @param combine Character. The combination of the individual reduced-form residuals 
#'   via '\code{group}' for the group ICA by Calhoun et al. (2001) using common structural shocks,
#'   via '\code{pool}' for the pooled shocks by Herwartz and Wang (2024) using a common rotation matrix, or
#'   via '\code{indiv}' for individual-specific \eqn{B_i \forall i} using strictly separated identification runs.
#' @param n.factors Integer. Number of common structural shocks across all individuals if the group ICA is selected.
#' @param dd Object of class '\code{indepTestDist}' generated by '\code{indepTest}' from package '\code{copula}'. 
#'   Simulated independent sample(s) of the same size as the data. 
#'   If the sample sizes \eqn{T_i - p_i} differ across '\eqn{i}', the strictly separated identification
#'   requires a list of \eqn{N} individual '\code{indepTestDist}'-objects with respective sample sizes.
#'   If \code{NULL} (the default), a suitable object will be calculated during the call of \code{\link{pid.cvm}}.
#' @param itermax Integer. Maximum number of iterations for DEoptim.
#' @param steptol Numeric. Tolerance for steps without improvement for DEoptim.
#' @param iter2 Integer. Number of iterations for the second optimization.
#' 
#' @return List of class '\code{pid}' with elements:
#' \item{A}{Matrix. The lined-up coefficient matrices \eqn{A_j, j=1,\ldots,p} 
#'    for the lagged variables in the panel VAR.}
#' \item{B}{Matrix. Mean group of the estimated structural impact matrices \eqn{B_i}, 
#'    i.e. the unique decomposition of the covariance matrices of reduced-form errors.}
#' \item{L.varx}{List of '\code{varx}' objects for the individual estimation results 
#'   to which the structural impact matrices \eqn{B_i} have been added.}
#' \item{eps0}{Matrix. The combined whitened residuals \eqn{\epsilon_{0}} 
#'   to which the ICA procedure has been applied subsequently. 
#'   These are still the unrotated baseline shocks! 
#'   \code{NULL} if '\code{indiv}' identifications have been used.}
#' \item{ICA}{List of objects resulting from the underlying ICA procedure. 
#'   \code{NULL} if '\code{indiv}' identifications have been used.}
#' \item{rotation_angles}{Numeric vector. The rotation angles 
#'   suggested by the combined identification procedure. 
#'   \code{NULL} if '\code{indiv}' identifications have been used.}
#' \item{args_pid}{List of characters and integers indicating the identification methods and specifications that have been used.}
#' \item{args_pvarx}{List of characters and integers indicating the estimator and specifications that have been used.}
#' 
#' @references Comon, P. (1994): 
#'   "Independent Component Analysis: A new Concept?", 
#'   \emph{Signal Processing}, 36, pp. 287-314.
#' @references Genest, C., and Remillard, B. (2004): 
#'   "Tests of Independence and Randomness based on the Empirical Copula Process", 
#'   Test, 13, pp. 335-370.
#' @references Herwartz, H., and Wang, S. (2024): 
#'   "Statistical Identification in Panel Structural Vector Autoregressive 
#'   Models based on Independence Criteria", 
#'   \emph{Journal of Applied Econometrics}, 39 (4), pp. 620-639.
#' @references Herwartz, H. (2018): 
#'   "Hodges Lehmann detection of structural shocks - An Analysis of macroeconomic Dynamics in the Euro Area", 
#'   \emph{Oxford Bulletin of Economics and Statistics}, 80, pp. 736-754.
#' @references Herwartz, H., and Ploedt, M. (2016): 
#'   "The Macroeconomic Effects of Oil Price Shocks: Evidence from a Statistical Identification Approach", 
#'   \emph{Journal of International Money and Finance}, 61, pp. 30-44.
#'   
#' @seealso \ldots the individual \code{\link[svars]{id.cvm}} by Lange et al. (2021) in \strong{svars}. 
#'   Note that \code{\link{pid.cvm}} relies on a modification of their procedure and 
#'   thus performs ICA on the pre-whitened shocks '\code{eps0}' directly.
#' 
#' @examples
#' \donttest{
#' # select minimal or full example #
#' is_min = TRUE
#' idx_i  = ifelse(is_min, 1, 1:14)
#' 
#' # load and prepare data #
#' data("EURO")
#' data("EU_w")
#' names_i = names(EURO[idx_i+1])  # country names (#1 is EA-wide aggregated data)
#' idx_k   = 1:4   # endogenous variables in individual data matrices
#' idx_t   = 1:76  # periods from 2001Q1 to 2019Q4
#' trend2  = idx_t^2
#' 
#' # individual VARX models with common lag-order p=2 #
#' L.data = lapply(EURO[idx_i+1], FUN=function(x) x[idx_t, idx_k])
#' L.exog = lapply(EURO[idx_i+1], FUN=function(x) cbind(trend2, x[idx_t, 5:10]))
#' L.vars = sapply(names_i, FUN=function(i) 
#'   vars::VAR(L.data[[i]], p=2, type="both", exogen=L.exog[[i]]), 
#'   simplify=FALSE)
#' 
#' # identify under common orthogonal matrix (with pooled sample size (T-p)*N) #
#' S.pind = copula::indepTestSim(n=(76-2)*length(names_i), p=length(idx_k), N=100)
#' R.pcvm = pid.cvm(L.vars, dd=S.pind, combine="pool")
#' R.irf  = irf(R.pcvm, n.ahead=50, w=EU_w)
#' plot(R.irf, selection=list(1:2, 3:4))
#' 
#' # identify individually (with same sample size T-p for all 'i') #
#' S.pind = copula::indepTestSim(n=(76-2), p=length(idx_k), N=100)
#' R.pcvm = pid.cvm(L.vars, dd=S.pind, combine="indiv")
#' R.irf  = irf(R.pcvm, n.ahead=50, w=EU_w)
#' plot(R.irf, selection=list(1:2, 3:4))
#' }
#' 
#' @family panel identification functions
#' @importFrom copula indepTestSim
#' @importFrom pbapply pblapply
#' @importFrom DEoptim DEoptim.control DEoptim
#' @export
#' 
pid.cvm <- function(x, combine=c("group", "pool", "indiv"), n.factors=NULL, dd=NULL, itermax=500, steptol=100, iter2=75){
  # define and check
  x = as.pvarx(x)
  L.resid = lapply(x$L.varx, FUN=function(x_i) x_i$resid)
  dim_N   = length(x$L.varx)
  dim_K   = nrow(x$A)
  
  # names for variables and for shocks
  names_k = rownames(x$A)
  if( is.null(names_k) ){ names_k = paste0("y", 1:dim_K) }
  if( is.null(n.factors) | combine!="group" ){ n.factors = dim_K }
  idx_s   = 1:n.factors
  names_s = paste0("epsilon[ ", idx_s, " ]")
  
  # combine reduced-form residuals
  args_id = list(method="Cramer-von Mises distance", combine=combine, itermax=itermax, steptol=steptol, iter2=iter2)
  if(combine=="group"){
    # pre-step PCAs for group ICA, from Risk et al. 2014:227 / Calhoun et al. 2001:151
    R.grp = aux_UGroup(L.resid, n.factors=n.factors)
    R.eps = R.grp$eps    # grouped whitened residuals
    dim_T = nrow(R.eps)  # number of time periods
    
    if(n.factors==1){
      # identification from a single common factor
      message("Note that group ICA has been skipped since n.factors is 1.")
      R.ica = list(S=R.eps, W=diag(n.factors))
      R.rot = NULL
    }else{
      # ICA by minimizing Cramer-von Mises distance global test statistic
      if(is.null(dd)){ dd = copula::indepTestSim(n=nrow(R.eps), p=ncol(R.eps), N=100) }  # simulated distribution of test statistics under independence
      R.ica = aux_cvmICA(R.eps, dd=dd, itermax=itermax, steptol=steptol, iter2=iter2)
      R.rot = R.ica$theta  # optimal rotation angles
      R.ica$S = R.eps %*% R.ica$W  # independent components from whitened principal components
    }
    
    # recover unique structural impact matrices by multivariate OLS, from Risk et al. 2014:227
    args_id$n.factors = n.factors
    for(i in 1:dim_N){
      U = tail(L.resid[[i]], n=c(dim_K, dim_T))  # (K x T) matrix of individual reduced-form residuals
      B = U %*% R.ica$S / (dim_T-1)  # Note the normalization SSinv = I_Q/(T-1).
      
      # add to "varx"-object
      dimnames(B) = list(names_k, names_s)
      x$L.varx[[i]]$B = B
      x$L.varx[[i]]$args_id = args_id
      class(x$L.varx[[i]]) = c("id", "varx")
    }
    
  }else if(combine=="pool"){
    # unit-wise decomposition of covariance matrices to pool the residuals, from Herwartz 2017:12
    L.Ucov = lapply(x$L.varx, FUN=function(x_i) x_i$SIGMA)  # OLS covariance matrix of residuals, from Herwartz, Wang 2024:make_pool.R
    R.pool = aux_UPool(L.resid, L.Ucov)
    R.eps  = R.pool$eps     # pooled whitened residuals
    L.Ustd = R.pool$L.Ustd  # individual matrices with standard deviations on the main diagonal
    L.Uchl = R.pool$L.Uchl  # lower triangular, Cholesky-decomposed correlation matrices
    
    # ICA by minimizing Cramer-von Mises distance global test statistic
    if(is.null(dd)){ dd = copula::indepTestSim(n=nrow(R.eps), p=ncol(R.eps), N=100) }  # simulated distribution of test statistics under independence
    R.ica = aux_cvmICA(R.eps, dd=dd, itermax=itermax, steptol=steptol, iter2=iter2)
    R.rot = R.ica$theta  # optimal rotation angles
    
    # recover unique structural impact matrices, by Herwartz 2017:12, Eq.23
    A.coef = array(NA, dim=c(dim_K, dim_K, dim_N), dimnames=list(names_k, NULL, NULL))
    for(i in 1:dim_N){ A.coef[ , , i] = L.Ustd[[i]] %*% L.Uchl[[i]] %*% R.ica$W }
    
    # column order which maximizes the sum of absolute diagonal elements
    R.mgB  = apply(A.coef, MARGIN=1:2, FUN=function(x) mean(x))  # ... of group mean
    R.sico = aux_sico(R.mgB, names_s=names_s)  # Herwartz 2017:25, Ch.4.3.3, applies this to each individual instead.
    for(i in 1:dim_N){ 
      x$L.varx[[i]]$B = A.coef[ , , i] %*% R.sico$mat_perm
      x$L.varx[[i]]$args_id = args_id
      class(x$L.varx[[i]]) = c("id", "varx")
    }
    
  }else if(combine=="indiv"){
    # strictly individual-specific identification
    idf <- function(x){
      # pre-whiten the residuals
      P_chol = t(chol(x$SIGMA))  # baseline decomposition
      R.eps  = t(solve(P_chol) %*% x$resid)  # whitened residuals
      
      # simulated distribution of test statistics under independence
      if(is.null(dd)){ 
        dd_i = copula::indepTestSim(n=nrow(R.eps), p=ncol(R.eps), N=100)
      }else if(inherits(dd, "indepTestDist")){ 
        dd_i = dd
      }else if(is.list(dd)){ 
        dd_i = dd[[i]]
      }
      
      # ICA by minimizing Cramer-von Mises distance global test statistic
      R.ica = aux_cvmICA(R.eps, dd=dd_i, itermax=itermax, steptol=steptol, iter2=iter2)
      R.rot = R.ica$theta  # optimal rotation angles
      
      # recover unique structural impact matrices
      B = P_chol %*% R.ica$W
      B = aux_sico(B)$B
      
      # add to "varx"-object
      dimnames(B) = list(names_k, names_s)
      x$B = B
      x$ICA = R.ica
      x$rotation_angles = R.rot
      x$args_id = args_id
      x$args_id$dd = dd_i
      class(x) = c("id", "varx")
      return(x)
    }
    
    # calculate structural impact matrices
    x$L.varx = lapply(x$L.varx, function(x_i) idf(x_i))
    dd = lapply(x$L.varx, function(x_i) x_i$args_id$dd)
    R.rot = R.eps = R.ica = NULL  # void panel results
    
  }else{
    stop("Incorrect specification of the combination approach!")
  }
  
  # estimate panel mean-group
  x$MG_B = aux_MG(x$L.varx, w=x$args_pvarx$w, idx_par="B")
  x$B    = x$MG_B$mean
  
  # return result
  args_id$dd = dd
  x$eps0 = R.eps
  x$ICA  = R.ica
  x$rotation_angles = R.rot
  x$args_pid = args_id
  class(x) = c("pid", "pvarx")
  return(x)
}


#' @title Independence-based identification of panel SVAR models using distance covariance (DC) statistic
#' @description Given an estimated panel of VAR models, this function applies independence-based identification for 
#'   the structural impact matrix \eqn{B_i} of the corresponding SVAR model
#'   \deqn{y_{it} = c_{it} + A_{i1} y_{i,t-1} + ... + A_{i,p_i} y_{i,t-p_i} + u_{it}}
#'   \deqn{       = c_{it} + A_{i1} y_{i,t-1} + ... + A_{i,p_i} y_{i,t-p_i} + B_i \epsilon_{it}.}
#'   Matrix \eqn{B_i} corresponds to the unique decomposition of the least squares covariance matrix \eqn{\Sigma_{u,i} = B_i B_i'} 
#'   if the vector of structural shocks \eqn{\epsilon_{it}} contains at most one Gaussian shock (Comon, 1994).
#'   A nonparametric dependence measure, the distance covariance (Szekely et al., 2007), determines 
#'   least dependent structural shocks. The algorithm described in Matteson and Tsay (2013) is applied 
#'   to calculate the matrix \eqn{B_i}.
#' 
#' @param x An object of class '\code{pvarx}' or a list of VAR objects 
#'   that will be \link[=as.varx]{coerced} to '\code{varx}'. 
#'   Estimated panel of VAR objects.
#' @param PIT Logical. If PIT='TRUE', the distribution and density of the 
#'   independent components are estimated using Gaussian kernel density estimates.
#' @param combine Character. The combination of the individual reduced-form residuals 
#'   via '\code{group}' for the group ICA by Calhoun et al. (2001) using common structural shocks,
#'   via '\code{pool}' for the pooled shocks by Herwartz and Wang (2024) using a common rotation matrix, or
#'   via '\code{indiv}' for individual-specific \eqn{B_i \forall i} using strictly separated identification runs.
#' @param n.factors Integer. Number of common structural shocks across all individuals if the group ICA is selected.
#' @param n.iterations Integer. The maximum number of iterations in the 'steadyICA' algorithm. The default in 'steadyICA' is 100.
#' 
#' @return List of class '\code{pid}' with elements:
#' \item{A}{Matrix. The lined-up coefficient matrices \eqn{A_j, j=1,\ldots,p} 
#'    for the lagged variables in the panel VAR.}
#' \item{B}{Matrix. Mean group of the estimated structural impact matrices \eqn{B_i}, 
#'    i.e. the unique decomposition of the covariance matrices of reduced-form errors.}
#' \item{L.varx}{List of '\code{varx}' objects for the individual estimation results
#'   to which the structural impact matrices \eqn{B_i} have been added.}
#' \item{eps0}{Matrix. The combined whitened residuals \eqn{\epsilon_{0}} 
#'   to which the ICA procedure has been applied subsequently. 
#'   These are still the unrotated baseline shocks! 
#'   \code{NULL} if '\code{indiv}' identifications have been used.}
#' \item{ICA}{List of objects resulting from the underlying ICA procedure. 
#'   \code{NULL} if '\code{indiv}' identifications have been used.}
#' \item{rotation_angles}{Numeric vector. The rotation angles 
#'   suggested by the combined identification procedure. 
#'   \code{NULL} if '\code{indiv}' identifications have been used.}
#' \item{args_pid}{List of characters and integers indicating the identification methods and specifications that have been used.}
#' \item{args_pvarx}{List of characters and integers indicating the estimator and specifications that have been used.}
#' 
#' @references Calhoun, V. D., Adali, T., Pearlson, G. D., and Pekar, J. J. (2001): 
#'   "A Method for Making Group Inferences from Functional MRI Data using Independent Component Analysis",
#'   \emph{Human Brain Mapping}, 16, pp. 673-690.
#' @references Comon, P. (1994): 
#'   "Independent Component Analysis: A new Concept?", 
#'   \emph{Signal Processing}, 36, pp. 287-314.
#' @references Herwartz, H., and Wang, S. (2024): 
#'   "Statistical Identification in Panel Structural Vector Autoregressive 
#'   Models based on Independence Criteria", 
#'   \emph{Journal of Applied Econometrics}, 39 (4), pp. 620-639.
#' @references Matteson, D. S., and Tsay, R. S. (2017): 
#'   "Independent Component Analysis via Distance Covariance", 
#'   \emph{Journal of the American Statistical Association}, 112, pp. 623-637.
#' @references Risk, B., Matteson, D. S., Ruppert, D., Eloyan, A., and Caffo, B. S. (2014): 
#'   "An Evaluation of Independent Component Analyses with an Application to Resting-State fMRI", 
#'   \emph{Biometrics}, 70, pp. 224-236.
#' @references Szekely, G. J., Rizzo, M. L., and Bakirov, N. K. (2007): 
#'   "Measuring and Testing Dependence by Correlation of Distances", 
#'   \emph{Annals of Statistics}, 35, pp. 2769-2794.
#' 
#' @section Notes on the Reproduction in "Examples":
#'   The reproduction of Herwartz and Wang (HW, 2024:630) serves as an 
#'   exemplary application and unit-test of the implementation by \strong{pvars}. 
#'   While \strong{vars}' \code{\link[vars]{VAR}} employs equation-wise \code{\link[stats]{lm}} 
#'   with the QR-decomposition of the regressor matrix \eqn{X}, HW2024 and accordingly 
#'   the reproduction by \code{\link{pvarx.VAR}} both calculate \eqn{X'(XX')^{-1}} 
#'   for the multivariate least-squares estimation of \eqn{A_i}. Moreover, 
#'   both use \code{\link[steadyICA]{steadyICA}} for identification such that the 
#'   reproduction result for the pooled rotation matrix \code{Q} is close to HW2024, 
#'   the mean absolute difference between both 4x4 matrices is less than 0.0032. 
#'   Note that the single EA-Model is estimated and identified the same way, 
#'   which can be extracted as a separate '\code{varx}' object from the trivial 
#'   panel object by \code{$L.varx[[1]]} and even bootstrapped by \code{\link{sboot.mb}}.
#'   
#'   Some differences remain such that the example does not exactly 
#'   reproduce the results in HW2024. To account for the \eqn{n} exogenous and 
#'   deterministic regressors in slot \code{$D}, \code{\link{pvarx.VAR}} calculates 
#'   \eqn{\Sigma_{u,i}} with the degrees of freedom \eqn{T-Kp_i-n} instead of 
#'   HW2024's \eqn{T-Kp_i-1}. Moreover, the confidence bands for the IRF are 
#'   based on \strong{pvars}' \link[=sboot.pmb]{panel moving-block-} instead of 
#'   HW2024's wild bootstrap. The responses of real GDP and of inflation are not 
#'   scaled by 0.01, unlike in HW2024. Note that both bootstrap procedures keep 
#'   \code{D} fixed over their iterations. 
#' @example inst/examples/pid_dc.R
#' @family panel identification functions
#' @importFrom steadyICA steadyICA W2theta
#' @export
#' 
pid.dc <- function(x, combine=c("group", "pool", "indiv"), n.factors=NULL, n.iterations=100, PIT=FALSE){
  # define and check
  x = as.pvarx(x)
  L.resid = lapply(x$L.varx, FUN=function(x_i) x_i$resid)
  dim_N   = length(x$L.varx)
  dim_K   = nrow(x$A)
  
  # names for variables and for shocks
  names_k = rownames(x$A)
  if( is.null(names_k) ){ names_k = paste0("y", 1:dim_K) }
  if( is.null(n.factors) | combine!="group" ){ n.factors = dim_K }
  idx_s   = 1:n.factors
  names_s = paste0("epsilon[ ", idx_s, " ]")
  
  # combine reduced-form residuals
  args_id = list(method="Distance covariances", combine=combine, n.iterations=n.iterations, PIT=PIT)
  if(combine=="group"){
    # pre-step PCAs for group ICA, from Risk et al. 2014:227 / Calhoun et al. 2001:151
    R.grp = aux_UGroup(L.resid, n.factors=n.factors)
    R.eps = R.grp$eps    # grouped whitened residuals
    dim_T = nrow(R.eps)  # number of time periods
    
    if(n.factors==1){
      # identification from a single common factor
      message("Note that group ICA has been skipped since n.factors is 1.")
      R.ica = list(S=R.eps, W=diag(n.factors))
      R.rot = NULL
    }else{
      # ICA by minimizing distance covariance (measure is also the test statistic)
      R.ica = suppressMessages(steadyICA::steadyICA(X=R.eps, symmetric=TRUE, PIT=PIT, maxit=n.iterations))
      R.rot = steadyICA::W2theta(R.ica$W)  # optimal rotation angles
    }
    
    # recover unique structural impact matrices by multivariate OLS, from Risk et al. 2014:227
    args_id$n.factors = n.factors
    for(i in 1:dim_N){
      U = tail(L.resid[[i]], n=c(dim_K, dim_T))  # (K x T) matrix of individual reduced-form residuals
      B = U %*% R.ica$S / (dim_T-1)  # Note the normalization SSinv = I_Q/(T-1).
      
      # add to "varx"-object
      dimnames(B) = list(names_k, names_s)
      x$L.varx[[i]]$B = B
      x$L.varx[[i]]$args_id = args_id
      class(x$L.varx[[i]]) = c("id", "varx")
    }
    
  }else if(combine=="pool"){
    # unit-wise decomposition of covariance matrices to pool the residuals, from Herwartz 2017:12
    L.Ucov = lapply(x$L.varx, FUN=function(x_i) x_i$SIGMA)  # OLS covariance matrix of residuals, from Herwartz, Wang 2024:make_pool.R
    R.pool = aux_UPool(L.resid, L.Ucov)
    R.eps  = R.pool$eps     # pooled whitened residuals
    L.Ustd = R.pool$L.Ustd  # individual matrices with standard deviations on the main diagonal
    L.Uchl = R.pool$L.Uchl  # lower triangular, Cholesky-decomposed correlation matrices
    
    # ICA by minimizing distance covariance (measure is also the test statistic)
    R.ica = suppressMessages(steadyICA::steadyICA(X=R.eps, symmetric=TRUE, PIT=PIT, maxit=n.iterations))
    R.rot = steadyICA::W2theta(R.ica$W)  # optimal rotation angles
    
    # recover unique structural impact matrices, by Herwartz 2017:12, Eq.23
    A.coef = array(NA, dim=c(dim_K, dim_K, dim_N), dimnames=list(names_k, NULL, NULL))
    for(i in 1:dim_N){ A.coef[ , , i] = L.Ustd[[i]] %*% L.Uchl[[i]] %*% R.ica$W }
    
    # column order which maximizes the sum of absolute diagonal elements
    R.mgB  = apply(A.coef, MARGIN=1:2, FUN=function(x) mean(x))  # ... of group mean
    R.sico = aux_sico(R.mgB, names_s=names_s)  # Herwartz 2017:25, Ch.4.3.3, applies this to each individual instead.
    for(i in 1:dim_N){ 
      x$L.varx[[i]]$B = A.coef[ , , i] %*% R.sico$mat_perm
      x$L.varx[[i]]$args_id = args_id
      class(x$L.varx[[i]]) = c("id", "varx")
    }
    
  }else if(combine=="indiv"){
    # strictly individual-specific identification
    idf <- function(x){
      # pre-whiten the residuals
      P_chol = t(chol(x$SIGMA))  # baseline decomposition
      R.eps  = t(solve(P_chol) %*% x$resid)  # whitened residuals
      
      # ICA by minimizing distance covariance (measure is also the test statistic)
      R.ica = suppressMessages(steadyICA::steadyICA(X=R.eps, symmetric=TRUE, PIT=PIT, maxit=n.iterations))
      R.rot = steadyICA::W2theta(R.ica$W)  # optimal rotation angles
      
      # recover unique structural impact matrices
      B = P_chol %*% R.ica$W
      B = aux_sico(B)$B
      
      # add to "varx"-object
      dimnames(B) = list(names_k, names_s)
      x$B = B
      x$ICA = R.ica
      x$rotation_angles = R.rot
      x$args_id = args_id
      class(x) = c("id", "varx")
      return(x)
    }
    
    # calculate structural impact matrices
    x$L.varx = lapply(x$L.varx, function(x_i) idf(x_i))
    R.rot = R.eps = R.ica = NULL  # void panel results
    
  }else{
    stop("Incorrect specification of the combination approach!")
  }
  
  # estimate panel mean-group
  x$MG_B = aux_MG(x$L.varx, w=x$args_pvarx$w, idx_par="B")
  x$B    = x$MG_B$mean
  
  # return result
  x$eps0 = R.eps
  x$ICA  = R.ica
  x$rotation_angles = R.rot
  x$args_pid = args_id
  class(x) = c("pid", "pvarx")
  return(x)
}


#' @title Identification of panel SVAR models by means of proxy variables
#' @description Given an estimated panel of VAR models, this function 
#'   uses proxy variables to partially identify
#'   the structural impact matrix \eqn{B_i} of the corresponding SVAR model
#'   \deqn{y_{it} = c_{it} + A_{i1} y_{i,t-1} + ... + A_{i,p_i} y_{i,t-p_i} + u_{it}}
#'   \deqn{       = c_{it} + A_{i1} y_{i,t-1} + ... + A_{i,p_i} y_{i,t-p_i} + B_i \epsilon_{it}.}
#'   In general, identification procedures determine \eqn{B_i} up to column ordering, scale, and sign. 
#'   For a unique solution, \code{pid.iv} follows the literature on proxy SVAR. 
#'   The \eqn{S} columns in \eqn{B_i = [B_{i,1} : B_{i,2}]} of the identified shocks 
#'   \eqn{\epsilon_{its}, s=1,\ldots,S,} are ordered first, and the variance 
#'   \eqn{\sigma^2_{\epsilon,is} = 1} is normalized to unity (see e.g. Lunsford 
#'   2015:6, Eq. 9). Further, the sign is fixed to a positive correlation 
#'   between proxy and shock series. A normalization of the impulsed shock 
#'   that may fix the size of the impact response in the IRF can be imposed 
#'   subsequently via '\code{normf}' in \code{\link{irf.pvarx}} and \code{\link{sboot.pmb}}.
#' 
#' @param x An object of class '\code{pvarx}' or a list of VAR objects 
#'   that will be \link[=as.varx]{coerced} to '\code{varx}'. 
#'   Estimated panel of VAR objects.
#' @param iv List. A single '\code{data.frame}' of the \eqn{L} common proxy time 
#'   series \eqn{m_t} or a list of \eqn{N} '\code{data.frame}' objects of the \eqn{L} 
#'   individual proxy time series \eqn{m_{it}}. The proxies must have the same 
#'   succession \eqn{l = 1,\ldots,L} in each individual '\code{data.frame}'.
#' @param combine Character. The combination of the individual reduced-form residuals 
#'   via '\code{pool}' for the pooled shocks by Empting et al. (2025) using a common orthogonal matrix or
#'   via '\code{indiv}' for individual-specific \eqn{B_i \forall i} using strictly separated identification runs.
#' @param S2 Character. Identification within multiple proxies \eqn{m_{it}} 
#'   via '\code{MR}' for lower-triangular \eqn{[I_S : -B_{i,11} B_{i,12}^{-1} ] B_{i,1}} by Mertens, Ravn (2013), 
#'   via '\code{JL}' for chol\eqn{(\Sigma_{mu,i} \Sigma_{u,i}^{-1} \Sigma_{um,i})} by Jentsch, Lunsford (2021), or
#'   via '\code{NQ}' for the nearest orthogonal matrix from \code{\link[base]{svd}} decomposition by Empting et al. (2025). 
#'   In case of \eqn{S=L=1} proxy, all three choices provide identical results on \eqn{B_{i,1}}.
#'   In case of \code{combine='pool'}, the argument is automatically fixed to '\code{NQ}'.
#' @param cov_u Character. Selection of the estimated residual covariance matrices \eqn{\hat{\Sigma}_{u,i}} 
#'   to be used in the identification procedure. 
#'   Either \code{'OMEGA'} (the default) for \eqn{\hat{U}_i \hat{U}_i'/T_i} as used in Mertens, Ravn (2013) and Jentsch, Lunsford (2021)
#'   or \code{'SIGMA'} for \eqn{\hat{U}_i \hat{U}_i'/(T_i - n_{zi})}, which corrects for the number of regressors \eqn{n_{zi}}. 
#'   Both character options refer to the name of the respective estimate in the '\code{varx}' objects.
#' @param R0 Matrix. A \eqn{(L \times S)} selection matrix for '\code{NQ}' that 
#'   governs the attribution of the \eqn{L} proxies to their specific \eqn{S} 
#'   structural shock series. If \code{NULL} (the default), \code{R0} 
#'   \eqn{= I_S} will be used such that the \eqn{S=L} columns of \eqn{B_{i,1}} are 
#'   one-by-one estimated from the \eqn{S=L} proxy series '\code{iv}' available. 
#' 
#' @return List of class '\code{pid}' with elements:
#' \item{A}{Matrix. The lined-up coefficient matrices \eqn{A_j, j=1,\ldots,p} 
#'    for the lagged variables in the panel VAR.}
#' \item{B}{Matrix. Mean group of the estimated structural impact matrices \eqn{B_i}, 
#'    i.e. the unique decomposition of the covariance matrices of reduced-form errors.}
#' \item{L.varx}{List of '\code{varx}' objects for the individual estimation results
#'   to which the structural impact matrices \eqn{B_i} have been added.}
#' \item{eps0}{Matrix. The combined whitened residuals \eqn{\epsilon_{0}} 
#'   to which the pooled identification procedure has been applied subsequently.
#'   These are still the unrotated baseline shocks! 
#'   \code{NULL} if '\code{indiv}' identifications have been used.}
#' \item{Q}{Matrix. The orthogonal matrix suggested by the pooled identification 
#'   procedure. \code{NULL} if '\code{indiv}' identifications have been used.}
#' \item{args_pid}{List of characters and integers indicating the 
#'   identification methods and specifications that have been used.}
#' \item{args_pvarx}{List of characters and integers indicating the 
#'   estimator and specifications that have been used.}
#' 
#' @references Mertens, K., and Ravn, M. O. (2013): 
#'   "The Dynamic Effects of Personal and Corporate Income Tax Changes in the 
#'   United States", \emph{American Economic Review}, 103, pp. 1212-1247.
#' @references Jentsch, C., and Lunsford, K. G. (2019): 
#'   "The Dynamic Effects of Personal and Corporate Income Tax Changes in the 
#'   United States: Comment", \emph{American Economic Review}, 109, pp. 2655-2678.
#' @references Jentsch, C., and Lunsford, K. G. (2021):
#'   "Asymptotically Valid Bootstrap Inference for Proxy SVARs",
#'   \emph{Journal of Business and Economic Statistics}, 40, pp. 1876-1891.
#' @references Empting, L. F. T., Maxand, S., Oeztuerk, S., and Wagner, K. (2025): 
#'   "Inference in Panel SVARs with Cross-Sectional Dependence of Unknown Form".
#' 
#' @examples
#' data("PCIT")
#' names_k = c("APITR", "ACITR", "PITB", "CITB", "GOV", "RGDP", "DEBT")
#' names_l = c("m_PI", "m_CI")  # proxy names
#' names_s = paste0("epsilon[ ", c("PI", "CI"), " ]")  # shock names
#' dim_p   = 4  # lag-order
#' 
#' # estimate and identify panel SVAR #
#' L.vars = list(USA = vars::VAR(PCIT[ , names_k], p=dim_p, type="const"))
#' L.iv   = list(USA = PCIT[-(1:dim_p), names_l])
#' R.pid  = pid.iv(L.vars, iv=L.iv, S2="NQ", cov_u="SIGMA", combine="pool")
#' colnames(R.pid$B)[1:2] = names_s  # labeling
#' 
#' @family panel identification functions
#' @importFrom steadyICA steadyICA W2theta
#' @export
#' 
pid.iv <- function(x, iv, S2=c("MR", "JL", "NQ"), cov_u="OMEGA", R0=NULL, combine=c("pool", "indiv")){
  # define and check
  x = as.pvarx(x)
  dim_N  = length(x$L.varx)
  names_i = names(x$L.varx)
  L.iv = aux_check(iv, "iv", dim_N, names_i)
  if( is.null(names_i) ){ names_i = 1:dim_N }
  if( combine=="pool" & S2!="NQ" ){ warning("Resorting to 'NQ' for pooling."); S2 = "NQ" }
  
  # combine reduced-form residuals
  args_id = list(method="Proxy", S2=S2, cov_u=cov_u, R0=R0, combine=combine)
  if(combine=="pool"){
    # define
    names_k = if( !is.null(rownames(x$A)) ){ rownames(x$A) }else{ names_k = paste0("y", 1:nrow(x$A)) }
    L.resid = lapply(x$L.varx, FUN=function(x_i) x_i$resid)
    L.Ucov  = lapply(x$L.varx, FUN=function(x_i) x_i[[cov_u]])
    
    # unit-wise decomposition of covariance matrices to pool the residuals, from Herwartz 2017:12
    R.pool = aux_UPool(L.resid, L.Ucov)
    R.eps  = R.pool$eps     # pooled whitened residuals
    L.Ustd = R.pool$L.Ustd  # individual matrices with standard deviations on the main diagonal
    L.Uchl = R.pool$L.Uchl  # lower triangular, Cholesky-decomposed correlation matrices
    
    # identify via pooled proxy variables (L x NT)
    L.iv = sapply(names_i, FUN=function(i) tail(L.iv[[i]], n=c(NA, ncol(L.resid[[i]]))), simplify=FALSE)
    R.id = aux_idNQ(eps=R.eps, m=do.call("cbind", L.iv), R0=R0)
    
    # recover unique structural impact matrices, by Herwartz 2017:12, Eq.23
    for(i in 1:dim_N){ 
      x$L.varx[[i]]$B = L.Ustd[[i]] %*% L.Uchl[[i]] %*% R.id$Q
      x$L.varx[[i]]$args_id = args_id
      x$L.varx[[i]]$args_id$iv = L.iv[[i]]
      rownames(x$L.varx[[i]]$B) = names_k
      class(x$L.varx[[i]]) = c("id", "varx")
    }
    
  }else if(combine=="indiv"){
    # strictly individual-specific identification
    idf <- function(x, iv){
      # identify via proxy variables
      R.id = aux_idIV(u=x$resid, m=iv, S2=S2, SIGMA_uu=x[[cov_u]], R0=R0)
      
      # add to "varx"-object
      x$B = R.id$B
      x$Q = R.id$Q
      x$udv = R.id$udv
      x$F_stats = R.id$F_stats
      x$args_id = args_id
      x$args_id$iv = R.id$m
      class(x) = c("id", "varx")
      return(x)
    }
    
    # calculate structural impact matrices
    x$L.varx = sapply(names_i, function(i) idf(x$L.varx[[i]], iv=L.iv[[i]]), simplify=FALSE)
    L.iv = lapply(x$L.varx, function(i) i$args_id$iv)  # proxy time series of accordant dim_T
    R.id = list(); R.eps = NULL  # void panel results
    
  }else{
    stop("Incorrect specification of the combination approach!")
  }
  
  # estimate panel mean-group
  x$MG_B = aux_MG(x$L.varx, w=x$args_pvarx$w, idx_par="B")
  x$B    = x$MG_B$mean
  
  # return result
  x$eps0 = R.eps
  x$udv  = R.id$udv
  x$Q    = R.id$Q
  x$args_pid = args_id
  x$args_pid$L.iv = L.iv
  class(x) = c("pid", "pvarx")
  return(x)
}