R/tenAR.R

Defines functions predRolling predict.tenAR tenAR mplot.acf mplot tenAR.bic tenAR.SE.MLE tenAR.SE.LSE MAR1.CC MAR1.RR tenAR.MLE tenAR.LS tenAR.PROJ tenAR.A tenAR.VAR MAR.SE MAR1.MLE MAR1.LS MAR2.PROJ MAR1.PROJ MAR1.RRCC.SE MAR1.RRLS.SE matAR.RR.se matAR.RR.est tenAR.est taxi.sim.FM taxi.sim.AR tenAR.sim

Documented in matAR.RR.est matAR.RR.se mplot mplot.acf predict.tenAR taxi.sim.AR taxi.sim.FM tenAR.est tenAR.sim

###Functions of Autoregressive Models

#' Generate TenAR(p) tensor time series
#'
#' Simulate from the TenAR(p) model.
#'@name tenAR.sim
#'@rdname tenAR.sim
#'@aliases tenAR.sim
#'@usage tenAR.sim(t, dim, R, P, rho, cov, A = NULL, Sig = NULL)
#'@export
#'@import tensor rTensor
#'@importFrom MASS ginv
#'@importFrom stats rnorm
#'@importFrom pracma randortho
#'@importFrom Matrix rankMatrix
#'@param t length of output series, a strictly positive integer.
#'@param dim dimension of the tensor at each time.
#'@param R Kronecker rank for each lag.
#'@param P autoregressive order.
#'@param rho spectral radius of coefficient matrix \eqn{\Phi}.
#'@param cov covariance matrix of the error term: diagonal ("iid"), separable ("mle"), random ("svd").
#'@param A coefficient matrices. If not provided, they are randomly generated according to given \code{dim}, \code{R}, \code{P} and \code{rho}.
#' It is a multi-layer list, the first layer for the lag \eqn{1 \le i \le P}, the second the term \eqn{1 \le r \le R}, and the third the mode \eqn{1 \le k \le K}.
#' See "Details" of \code{\link{tenAR.est}}.
#'@param Sig only if \code{cov=mle}, a list of initial values of \eqn{\Sigma_1,\ldots,\Sigma_K}. The default are identity matrices.
#'@return A tensor-valued time series generated by the TenAR(p) model.
#'@seealso \code{\link{tenFM.sim}}
#'@examples
#' set.seed(123)
#' dim <- c(3,3,3)
#' xx <- tenAR.sim(t=500, dim, R=2, P=1, rho=0.5, cov='iid')
tenAR.sim <- function(t, dim, R, P, rho, cov, A=NULL, Sig=NULL){
  if (missing(A) || is.null(A)){A <- tenAR.A(dim, R, P, rho)}
  K <- length(A[[1]][[1]])
  P = length(A)
  R = c()
  for (l in c(1:P)){
    R[l] = length(A[[l]])
  }
  dim <- c()
  for (i in c(1:K)){
    dim[i] <- nrow(A[[1]][[1]][[i]])
  }
  phi = list()
  for (l in c(1:P)){
    if (R[l] == 0){
      phi[[l]] = 0
    } else {
      phi[[l]] <-  Reduce("+", lapply(1:R[l], function(j) {rTensor::kronecker_list(rev(A[[l]][[j]]))}))
    }
  }
  
  x <- array(0, c(t+500,prod(dim)))
  for (l in c(1:P)){
    x[l,] <- rnorm(prod(dim))
  }

  if (cov == "mle"){
    if (missing(Sig) || is.null(Sig)){
      Sig.true <- lapply(1:K, function(i){
        Q <- pracma::randortho(dim[i])
        D <- abs(diag(rnorm(dim[i])))
        Q %*% D %*% t(Q)
      })
      Sig.true <- fro.rescale(list(Sig.true))[[1]]
      Sig.true.sqrt <- lapply(1:K, function(i){
        pracma::sqrtm(Sig.true[[i]])$B
      })
    } else {
      Sig.true = Sig
      Sig.true.sqrt <- lapply(1:K, function(i){
        pracma::sqrtm(Sig.true[[i]])$B
      })
    }
    E = rTensor::kronecker_list(rev(Sig.true.sqrt))
  }

  if (cov == "svd"){
    if (missing(Sig) || is.null(Sig)){
      Q <- pracma::randortho(prod(dim))
      D <- sqrt(abs(diag(rnorm(prod(dim)))))
      E <- Q %*% D
    } else {
      E <- Sig
    }
  }

  for (i in c((P+1):(500 + t))){ # burning number = 500
    if (cov == "iid"){
      e <- rnorm(prod(dim), mean=0, sd=1)
    } else if (cov == "mle"){
      e <- E %*% rnorm(prod(dim))
    } else if (cov == "svd"){
      e <- E %*% rnorm(prod(dim))
    } else {
      stop("Please specify cov")
    }
    temp = 0
    for (l in c(1:P)){
      temp = temp + phi[[l]] %*% x[i-l, ]
    }
    x[i,] <-  temp + e
  }
  return(array(x[501:(500+t),], c(t, dim)))
}


#' Simulate taxi data by tenAR models
#'
#' Simulate tensor time series by autoregressive models, using estimated coefficients by taxi data.
#'@name taxi.sim.AR
#'@rdname taxi.sim.AR
#'@aliases taxi.sim.AR
#'@usage taxi.sim.AR(t, print.tar.coef=FALSE, print.sig=FALSE, seed=123)
#'@export
#'@param t length of output series.
#'@param print.tar.coef print coefficients, default FALSE.
#'@param print.sig print covariance matrices, default FALSE.
#'@param seed random seed.
#'@return A tensor-valued time series of dimension (5,5,7).
#'@seealso \code{\link{taxi.sim.FM}}
#'@examples
#' xx = taxi.sim.AR(t=753)
taxi.sim.AR <- function(t, print.tar.coef=FALSE, print.sig=FALSE, seed=123){
  K <- 3
  dim <- c(5,5,7)
  x <- array(0, c(t,prod(dim)))
  Tar.coef <- list(list(list(),list()))
  Tar.sig <- list()
  Tar.coef[[1]][[1]][[1]] = 
    array(c(0.0532,0.0220,-0.0090,-0.1021,0.0176,
            0.1635,0.1503, 0.1232, 0.0951,0.1740,
            0.0361,0.0707, 0.0657, 0.0765,0.1113,
            0.5076,0.4123, 0.3168, 0.0612,0.4606,
            0.1972,0.1400, 0.1434, 0.1353,0.1092), c(5,5))
  Tar.coef[[1]][[1]][[2]] = 
    array(c(-0.1095,-0.0493,-0.0445, 0.0073,-0.0727,
            0.2774, 0.1965, 0.1307, 0.1345, 0.1493,
            0.1715, 0.0872, 0.0826, 0.0698, 0.0963,
            -0.6000,-0.3752,-0.2758,-0.1281,-0.3728,
            0.0552, 0.0257, 0.0204, 0.0994,-0.0321), c(5,5))
  Tar.coef[[1]][[1]][[3]] = 
    array(c(0.2556, 0.1556, 0.3113,0.3996,0.5329,0.4577,0.5227,
            0.2699, 0.2802, 0.2222,0.3630,0.5680,0.7809,0.5239,
            0.7632, 0.4705, 0.4032,0.4098,0.3075,0.3417,0.3929,
            0.4776, 0.4051, 0.4617,0.4953,0.4097,0.2892,0.4677,
            -0.0580,-0.0485,-0.0499,0.2356,0.1852,0.2084,0.1204,
            0.1758, 0.4057, 0.4102,0.3457,0.5127,0.5856,0.5199,
            0.1707, 0.3203, 0.2702,0.1234,0.2110,0.2197,0.4511), c(7,7))
  Tar.coef[[1]][[2]][[1]] = 
    array(c(0.4287,-0.0358,0.0367,-0.0044, 0.0043,
            -0.0723, 0.3955,0.0167,-0.0178,-0.0271,
            -0.0556, 0.0213,0.2698,-0.1532, 0.0222,
            0.1399, 0.1557,0.0894, 0.6041, 0.12063,
            -0.0425,-0.0380,0.0340, 0.1001, 0.3234), c(5,5))
  Tar.coef[[1]][[2]][[2]] = 
    array(c(-0.3992,-0.0254, 0.0234, 0.0077, 0.0580,
            -0.1088,-0.3678,-0.1012, 0.0192,-0.0521,
            -0.1378,-0.1245,-0.4167,-0.0181,-0.1964,
            -0.1222,-0.0307,-0.0721,-0.3552,-0.1618,
            0.0074,-0.0325,-0.0861,-0.0201,-0.4961), c(5,5))
  Tar.coef[[1]][[2]][[3]] = 
    array(c(-1.0060,-0.1431,-0.0633,-0.2255,-0.0677,-0.0829, 0.0056,
            -0.1196,-0.7815,-0.1571,-0.0798,-0.0760,-0.0424,-0.0654,
            0.0331,-0.0809,-0.6270,-0.0962,-0.0987,-0.0991,-0.0213,
            -0.0626,-0.0728,-0.1158,-0.4132,-0.0885, 0.0068,-0.0132,
            -0.0897,-0.1278,-0.0521,-0.1041,-0.4861,-0.0448,-0.1392,
            0.0070,-0.0662,-0.1004,-0.0846,-0.1224,-0.4761,-0.1168,
            -0.0455,-0.2491,-0.3330,-0.3430,-0.3031,-0.3690,-0.6041), c(7,7))
  Tar.sig[[1]] = 
    array(c(0.7754,0.1420,0.1163,0.0663,0.1333,
            0.1420,0.5846,0.0773,0.0522,0.0844,
            0.1163,0.0773,0.4227,0.0455,0.0829,
            0.0663,0.0522,0.0455,0.7122,0.0454,
            0.1333,0.0844,0.0829,0.0454,0.5275), c(5,5))
  Tar.sig[[2]] = 
    array(c(0.8059,0.1585,0.1290,0.0492,0.1474,
            0.1585,0.4694,0.0801,0.0240,0.0840,
            0.1290,0.0801,0.4033,0.0299,0.0935,
            0.0492,0.0240,0.0299,0.3690,0.0320,
            0.1474,0.0840,0.0935,0.0320,0.5459), c(5,5))
  Tar.sig[[3]] = 
    array(c(156.7240,38.4326, 26.3676, 23.1721, 18.9742, 16.0869, 16.2405,
            38.4326, 142.4746,32.9095, 29.2238, 24.4317, 21.5026, 17.6571,
            26.3676, 32.9095, 139.7549,37.2252, 31.3168, 26.6982, 20.1618,
            23.1721, 29.2238, 37.2252, 150.7263,44.4215, 38.4494, 25.9977,
            18.9742, 24.4317, 31.3168, 44.4215, 156.0611,49.1344, 30.9949,
            16.0869, 21.5026, 26.6982, 38.4494, 49.1344, 170.4497,36.1325,
            16.2405, 17.6571, 20.1618, 25.9977, 30.9949, 36.1325, 157.0083), c(7,7))
  set.seed(seed)
  x = tenAR.sim(t, dim, R=2, P=1, rho=0.8, cov="mle", A=Tar.coef, Sig=Tar.sig)
  
  if(print.tar.coef==TRUE){
    print('Coefficient matrices:')
    print(Tar.coef)
  }
  if(print.sig==TRUE){
    print('Covariance matrices:')
    print(Tar.sig)
  }
  
  return(x)
}


#' Simulate taxi data by factor models
#'
#' Simulate tensor time series by factor models, using estimated coefficients by taxi data.
#'@name taxi.sim.FM
#'@rdname taxi.sim.FM
#'@aliases taxi.sim.FM
#'@usage taxi.sim.FM(t, print.tar.coef=FALSE, print.loading=FALSE, seed=216)
#'@export
#'@import tensor
#'@importFrom stats rnorm
#'@param t length of output series.
#'@param print.tar.coef print coefficients used for simulation, default FALSE.
#'@param print.loading print loading matrices used for simulation, default FALSE.
#'@param seed random seed.
#'@return A tensor-valued time series of dimension (4,4,3).
#'@seealso \code{\link{taxi.sim.AR}}
#'@examples
#' xx = taxi.sim.FM(t=252)
taxi.sim.FM <- function(t,print.tar.coef=FALSE,print.loading=FALSE,seed=216){
  dims <- c(4,4,3)
  Tar.coef <- list(list(list()))
  Tar.coef[[1]][[1]][[1]] = array(c(-0.4163,0.0603,-0.0199,0.0598,
                                    -0.1268,-0.6219,-0.0551,-0.0251,
                                    -0.0127,-0.0001,-0.4572,-0.0376,
                                    0.0609,0.0252,0.0629,-0.4402),c(4,4))
  Tar.coef[[1]][[1]][[2]] = array(c(-0.5453,-0.0369,0.0001,-0.1130,
                                    -0.0373,-0.3590,-0.0214,0.0495,
                                    0.0143,0.0460,-0.5629,0.1233,
                                    -0.0004,-0.0562,-0.0165,-0.4665),c(4,4))
  Tar.coef[[1]][[1]][[3]] = array(c(2.4676,-0.1332,-0.8460,
                                    0.0790,2.7971,1.0344,
                                    0.0161,0.1530,2.2103),c(3,3))
  
  set.seed(seed)
  Ft.sim = tenAR.sim(t,c(4,4,3),1,1,0.75,cov='iid',A=Tar.coef)
  TenFM.loading <- list()
  TenFM.loading[[1]] <- array(c(-0.0174,0.5156,0.7721,-0.0091,
                                0.0144,0.0642,-0.0669,0.2077,
                                0.1589,-0.1657,0.1534,0.0974,
                                -0.0244,-0.2971,0.1886,0.4857,
                                0.5956,0.4564,0.0048,0.0893,
                                0.0954,0.1663,-0.1619,-0.0754,
                                0.0425,0.1996,-0.1284,0.0394,
                                0.1303,-0.075,0.9188,0.0558,
                                0.2527,-0.0502,0.0412,-0.0475
                                ,0.0488,0.1473,-0.0298,0.0373,
                                -0.0908,-0.0362,0.0222,0.217,
                                -0.0499,0.8853,0.2375,0.2719),c(12,4))
  TenFM.loading[[2]] <- array(c(0.0702,0.1259,0.0871,0.0326,
                                -0.1502,-0.0305,-0.0944,0.1303,
                                -0.0689,0.8668,0.326,0.2426,
                                0.0149,-0.1486,-0.003,0.3198,
                                0.6435,0.5675,0.2212,0.0831,
                                0.0294,0.2313,-0.1049,-0.135,
                                0.1907,0.3001,-0.4953,0.0643,
                                0.1615,-0.4072,0.6467,-0.0243,
                                0.0842,0.0563,0.04,0.0406,
                                -0.0172,0.4402,0.679,0.0091,
                                0.0411,0.0321,0.2927,0.2469,
                                0.3524,-0.1841,0.0998,0.1658),c(12,4))
  TenFM.loading[[3]] <- array(c(0.0154,-0.0069,-0.0202,-0.0274,0.012,0.1211,
                                0.5732,0.6597,0.2467,0.0892,0.0782,-0.0387,
                                -0.1062,-0.1301,-0.1278,-0.0974,-0.0549,-0.0906,
                                -0.1478,1e-04,0.0444,0.146,0.1595,0.0884,
                                -0.0185,-9e-04,0.0081,0.0133,0.0073,0.0038,
                                -0.0264,0.0503,0.4405,0.5292,0.3513,0.3057,
                                0.3002,0.2485,0.1912,0.1482,0.0859,0.1151,
                                0.1946,0.0649,-0.0482,-0.1162,-0.1059,-0.0711,
                                0.1099,0.0588,0.0311,0.0172,0.012,0.0164,
                                0.033,0.0503,-0.1235,-0.1558,-0.0354,0.0292,
                                0.0678,0.1212,0.1926,0.2267,0.2584,0.3059,
                                0.2854,0.3422,0.3934,0.391,0.3234,0.2422),c(24,3))
  if(print.tar.coef==TRUE){
    print('Tensor Autoregressive Coefficient matrices used to simulate the tensor factor data:')
    print(Tar.coef)
  }
  if(print.loading==TRUE){
    print('Tensor Factor loading matrices used to simulate the tensor data:')
    print(TenFM.loading)
  }
  Xt.sim = tensor(tensor(tensor(Ft.sim,TenFM.loading[[1]],2,2),
                         TenFM.loading[[2]],2,2),
                  TenFM.loading[[3]],2,2)
  set.seed(seed)
  y.midtown = Xt.sim*10 + array(rnorm(prod(dim(Xt.sim))),dim(Xt.sim))
  return(y.midtown)
}


#' Estimation for Autoregressive Model of Tensor-Valued Time Series
#'
#' Estimation function for tensor autoregressive models. Methods include
#' projection (PROJ), Least Squares (LSE), maximum likelihood estimation (MLE)
#' and vector autoregressive model (VAR), as determined by the value of \code{method}.
#'@details
#' Tensor autoregressive model (of autoregressive order one) has the form:
#' \deqn{X_t = \sum_{r=1}^R X_{t-1} \times_{1}  A_1^{(r)} \times_{2}  \cdots \times_{K} A_K^{(r)} + E_t,}
#' where \eqn{A_k^{(r)}} are \eqn{d_k \times d_k} coefficient matrices, \eqn{k=1,\cdots,K}, and \eqn{E_t} is a tensor white noise. \eqn{R} is the Kronecker rank.
#' The model of autoregressive order \eqn{P} takes the form
#' \deqn{X_t = \sum_{i=1}^{P} \sum_{r=1}^{R_i} X_{t-i} \times_{1} A_{1}^{(ir)} \times_{2}  \cdots \times_{K} A_{K}^{(ir)} + E_t.}
#' For the "MLE" method, we also assume,
#'  \deqn{\mathrm{Cov}(\mathrm{vec}(E_t))= \Sigma_K \otimes \Sigma_{K-1} \otimes \cdots \otimes \Sigma_1,}
#'@name tenAR.est
#'@rdname tenAR.est
#'@aliases tenAR.est
#'@usage tenAR.est(xx,R=1,P=1,method="LSE",init.A=NULL,init.sig=NULL,niter=150,tol=1e-6)
#'@export
#'@import tensor rTensor
#'@importFrom methods new
#'@param xx \eqn{T \times d_1 \times \cdots \times d_K} tensor-valued time series, \eqn{T} is the length of the series.
#'@param method character string, specifying the type of the estimation method to be used. \describe{
#'  \item{\code{"PROJ",}}{Projection method.}
#'  \item{\code{"LSE",}}{Least squares.}
#'  \item{\code{"MLE",}}{MLE under a separable cov(vec(\eqn{E_t})).}
#'  \item{\code{"VAR",}}{VAR(\eqn{P}) model for the \eqn{\mathrm{vec}(E_t)}.}
#'}
#'@param R Kronecker rank for each lag, a vector for \eqn{P>1}, a positive integer, it assumes same number of terms in each lag.
#'@param P Autoregressive order, a positive integer.
#'@param init.A initial values of coefficient matrices \eqn{A_k^{(ir)}} in estimation algorithms, which is a multi-layer list such that
#' the first layer for the lag \eqn{1 \le i \le P}, the second the term \eqn{1 \le r \le R}, and the third the mode \eqn{1 \le k \le K}.
#' See "Details". By default, we use PROJ estimators as initial values.
#'@param init.sig only if \code{method=MLE}, a list of initial values of \eqn{\Sigma_1,\ldots,\Sigma_K}. The default are identity matrices.
#'@param niter maximum number of iterations if error stays above \code{tol}.
#'@param tol error tolerance in terms of the Frobenius norm.
#'@return return a list containing the following:\describe{
#'\item{\code{A}}{a list of estimated coefficient matrices \eqn{A_k^{(ir)}}. It is a multi-layer list,
#' the first layer for the lag \eqn{1 \le i \le P}, the second the term \eqn{1 \le r \le R}, and the third the mode \eqn{1 \le k \le K}. See "Details".}
#'\item{\code{SIGMA}}{only if \code{method=MLE}, a list of estimated \eqn{\Sigma_1,\ldots,\Sigma_K}.}
#'\item{\code{res}}{residuals}
#'\item{\code{Sig}}{sample covariance matrix of the residuals vec(\eqn{\hat E_t}).}
#'\item{\code{cov}}{grand covariance matrix of all estimated entries of \eqn{A_k^{(ir)}}}
#'\item{\code{sd}}{standard errors of the coefficient matrices \eqn{A_k^{(ir)}}, returned as a list aligned with \code{A}.}
#'\item{\code{niter}}{number of iterations.}
#'\item{\code{BIC}}{value of extended Bayesian information criterion.}
#'}
#'@references
#'Rong Chen, Han Xiao, and Dan Yang. "Autoregressive models for matrix-valued time series". Journal of Econometrics, 2020.
#'
#'Zebang Li, Han Xiao. "Multi-linear tensor autoregressive models". arxiv preprint arxiv:2110.00928 (2021).
#'
#'@examples
#' set.seed(333)
#'
#' # case 1: tensor-valued time series
#'
#' dim <- c(2,2,2)
#' xx <- tenAR.sim(t=100, dim, R=2, P=1, rho=0.5, cov='iid')
#' est <- tenAR.est(xx, R=2, P=1, method="LSE") # two-term tenAR(1) model
#' A <- est$A # A is a multi-layer list
#'
#' length(A) == 1 # TRUE, since the order P = 1
#' length(A[[1]]) == 2 # TRUE, since the number of terms R = 2
#' length(A[[1]][[1]]) == 3 # TRUE, since the mode K = 3
#'
#' # est <- tenAR.est(xx, R=c(1,2), P=2, method="LSE") # tenAR(2) model with R1=1, R2=2
#'
#' # case 2: matrix-valued time series
#'
#' dim <- c(2,2)
#' xx <- tenAR.sim(t=100, dim, R=2, P=1, rho=0.5, cov='iid')
#' est <- tenAR.est(xx, R=2, P=1, method="LSE") # two-term MAR(1) model 
#' A <- est$A # A is a multi-layer list
#'
#' length(A) == 1 # TRUE, since the order P = 1
#' length(A[[1]]) == 2 # TRUE, since the number of terms R = 2
#' length(A[[1]][[1]]) == 2 # TRUE, since the mode K = 2
tenAR.est <- function(xx, R=1, P=1, method="LSE", init.A=NULL, init.sig=NULL, niter=150, tol=1e-6){
  if (identical("PROJ", method)) {
    model <- tenAR.PROJ(xx, R, P)
  } else if (identical("LSE", method)) {
    model <- tenAR.LS(xx, R, P, init.A, niter, tol, print.true=FALSE)
  } else if (identical("MLE", method)) {
    model <- tenAR.MLE(xx, R, P, init.A, init.sig, niter, tol, print.true=FALSE)
  } else if (identical("VAR", method)) {
    model <- tenAR.VAR(xx, P)
  } else {
    stop("Please specify the type you want to use. See manuals or run ?tenAR for details.")
  }
  # Create an S3 object of the tenAR class
  tenAR(model)
}


#' Estimation for Reduced Rank MAR(1) Model
#'
#' Estimation of the reduced rank MAR(1) model, using least squares (RRLSE) or MLE (RRMLE), as determined by the value of \code{method}.
#'@details
#' The reduced rank MAR(1) model takes the form:
#' \deqn{X_t =  A_1 X_{t-1} A_2^{^\top} + E_t,}
#' where \eqn{A_i} are \eqn{d_i \times d_i} coefficient matrices of ranks \eqn{\mathrm{rank}(A_i) = k_i \le d_i}, \eqn{i=1,2}. For the MLE method we also assume
#'  \deqn{\mathrm{Cov}(\mathrm{vec}(E_t))=\Sigma_2 \otimes \Sigma_1}
#'
#'@name matAR.RR.est
#'@rdname matAR.RR.est
#'@aliases matAR.RR.est
#'@usage matAR.RR.est(xx, method, A1.init=NULL, A2.init=NULL,Sig1.init=NULL,Sig2.init=NULL,
#'k1=NULL, k2=NULL, niter=200,tol=1e-4)
#'@export
#'@import tensor rTensor expm
#'@importFrom MASS ginv
#'@param xx \eqn{T \times d_1 \times d_2} matrix-valued time series, \eqn{T} is the length of the series.
#'@param method character string, specifying the method of the estimation to be used. \describe{
#'  \item{\code{"RRLSE",}}{Least squares.}
#'  \item{\code{"RRMLE",}}{MLE under a separable cov(vec(\eqn{E_t})).}
#'}
#'@param A1.init initial value of \eqn{A_1}. The default is the identity matrix.
#'@param A2.init  initial value of \eqn{A_2}. The default is the identity matrix.
#'@param Sig1.init only if \code{method=RRMLE}, initial value of \eqn{\Sigma_1}. The default is the identity matrix.
#'@param Sig2.init only if \code{method=RRMLE}, initial value of \eqn{\Sigma_2}. The default is the identity matrix.
#'@param k1  rank of \eqn{A_1}, a positive integer.
#'@param k2  rank of \eqn{A_2}, a positive integer.
#'@param niter maximum number of iterations if error stays above \code{tol}.
#'@param tol tolerance in terms of the Frobenius norm.
#'@param niter maximum number of iterations if error stays above \code{tol}.
#'@param tol relative Frobenius norm error tolerance.
#'@return return a list containing the following:\describe{
#'\item{\code{A1}}{estimator of \eqn{A_1}, a \eqn{d_1} by \eqn{d_1} matrix.}
#'\item{\code{A2}}{estimator of \eqn{A_2}, a \eqn{d_2} by \eqn{d_2} matrix.}
#'\item{\code{loading}}{a list of estimated \eqn{U_i}, \eqn{V_i}, 
#' where we write \eqn{A_i=U_iD_iV_i} as the singular value decomposition (SVD) of \eqn{A_i}, \eqn{i = 1,2}.}
#'\item{\code{Sig1}}{only if \code{method=MLE}, when \eqn{\mathrm{Cov}(\mathrm{vec}(E_t))=\Sigma_2 \otimes \Sigma_1}.}
#'\item{\code{Sig2}}{only if \code{method=MLE}, when \eqn{\mathrm{Cov}(\mathrm{vec}(E_t))=\Sigma_2 \otimes \Sigma_1}.}
#'\item{\code{res}}{residuals.}
#'\item{\code{Sig}}{sample covariance matrix of the residuals vec(\eqn{\hat E_t}).}
#'\item{\code{cov}}{a list containing \describe{
#'  \item{\code{Sigma}}{asymptotic covariance matrix of (vec( \eqn{\hat A_1}),vec(\eqn{\hat A_2^{\top}})).}
#'  \item{\code{Theta1.u}, \code{Theta1.v}}{asymptotic covariance matrix of vec(\eqn{\hat U_1}), vec(\eqn{\hat V_1}).}
#'  \item{\code{Theta2.u}, \code{Theta2.v}}{asymptotic covariance matrix of vec(\eqn{\hat U_2}), vec(\eqn{\hat V_2}).}
#'}}
#'\item{\code{sd.A1}}{element-wise standard errors of \eqn{\hat A_1}, aligned with \code{A1}.}
#'\item{\code{sd.A2}}{element-wise standard errors of \eqn{\hat A_2}, aligned with \code{A2}.}
#'\item{\code{niter}}{number of iterations.}
#'\item{\code{BIC}}{value of the extended Bayesian information criterion.}
#'}
#'@references
#'Reduced Rank Autoregressive Models for Matrix Time Series, by Han Xiao, Yuefeng Han, Rong Chen and Chengcheng Liu.
#'@examples
#' set.seed(333)
#' dim <- c(3,3)
#' xx <- tenAR.sim(t=500, dim, R=2, P=1, rho=0.5, cov='iid')
#' est <- matAR.RR.est(xx, method="RRLSE", k1=1, k2=1)
matAR.RR.est <- function(xx, method, A1.init=NULL, A2.init=NULL, Sig1.init=NULL, Sig2.init=NULL,k1=NULL, k2=NULL, niter=200,tol=1e-4){
  if (identical("PROJ", method)) {
    model <- MAR1.PROJ(xx) # just keep it there
  } else if (identical("LSE", method)) {
    model <- MAR1.LS(xx, niter=niter, tol=tol) # just keep it there
  } else if (identical("MLE", method)) {
    model <- MAR1.MLE(xx, Sigl.init=Sig1.init,Sigr.init=Sig2.init, niter=niter, tol=tol) # just keep it there
  } else if (identical("VAR", method)) {
    model <- tenAR.VAR(xx, P=1)
  } else if (identical("RRLSE", method)) {
    model <- MAR1.RR(xx=xx, k1=k1, k2=k2, A1.init=A1.init, A2.init=A2.init, niter=niter, tol=tol)
  } else if (identical("RRMLE", method)) {
    model <- MAR1.CC(xx=xx, k1=k1, k2=k2, A1.init=A1.init, A2.init=A2.init, Sigl.init=Sig1.init, Sigr.init=Sig2.init, niter=niter, tol=tol)
  } else {
    stop("Please specify the method in MAR.")
  }
  # Create an S3 object of the tenAR class
  tenAR(model)
}


#' Asymptotic Covariance Matrix of One-Term Reduced rank MAR(1) Model
#'
#' Asymptotic covariance matrix of the reduced rank MAR(1) model. If \code{Sigma1} and \code{Sigma2} is provided in input,
#' we assume a separable covariance matrix, Cov(vec(\eqn{E_t})) = \eqn{\Sigma_2 \otimes \Sigma_1}.
#'@name matAR.RR.se
#'@rdname matAR.RR.se
#'@aliases matAR.RR.se
#'@usage matAR.RR.se(A1,A2,k1,k2,method,Sigma.e=NULL,Sigma1=NULL,Sigma2=NULL,RU1=diag(k1),
#'RV1=diag(k1),RU2=diag(k2),RV2=diag(k2),mpower=100)
#'@import tensor rTensor expm
#'@importFrom MASS ginv
#'@export
#'@param A1 left coefficient matrix.
#'@param A2 right coefficient matrix.
#'@param k1 rank of \eqn{A_1}.
#'@param k2 rank of \eqn{A_2}.
#'@param method character string, specifying the method of the estimation to be used. \describe{
#'  \item{\code{"RRLSE",}}{Least squares.}
#'  \item{\code{"RRMLE",}}{MLE under a separable cov(vec(\eqn{E_t})).}
#'}
#'@param Sigma.e only if \code{method} = "RRLSE". Cov(vec(\eqn{E_t})) = Sigma.e: covariance matrix of dimension \eqn{(d_1 d_2) \times (d_1 d_2)}
#'@param Sigma1,Sigma2 only if \code{method} = "RRMLE". Cov(vec(\eqn{E_t})) = \eqn{\Sigma_2 \otimes \Sigma_1}. \eqn{\Sigma_i} is \eqn{d_i \times d_i}, \eqn{i=1,2}.
#'@param RU1,RV1,RU2,RV2 orthogonal rotations of \eqn{U_1,V_1,U_2,V_2}, e.g., new_U1=U1 \code{RU1}.
#'@param mpower truncate the VMA(\eqn{\infty}) representation of vec(\eqn{X_t}) at \code{mpower} for the purpose of calculating the autocovariances. The default is 100.
#'@return a list containing the following:\describe{
#'\item{\code{Sigma}}{asymptotic covariance matrix of (vec(\eqn{\hat A_1}),vec(\eqn{\hat A_2^T})).}
#'\item{\code{Theta1.u}}{asymptotic covariance matrix of vec(\eqn{\hat U_1}).}
#'\item{\code{Theta1.v}}{asymptotic covariance matrix of vec(\eqn{\hat V_1}).}
#'\item{\code{Theta2.u}}{asymptotic covariance matrix of vec(\eqn{\hat U_2}).}
#'\item{\code{Theta2.v}}{asymptotic covariance matrix of vec(\eqn{\hat V_2}).}
#'}
#'@references
#'Han Xiao, Yuefeng Han, Rong Chen and Chengcheng Liu, Reduced Rank Autoregressive Models for Matrix Time Series.
matAR.RR.se <- function(A1,A2,k1,k2,method,Sigma.e=NULL,Sigma1=NULL,Sigma2=NULL,RU1=diag(k1),RV1=diag(k1),RU2=diag(k2),RV2=diag(k2),mpower=100){
  if (method == "RRLSE"){
    return(MAR1.RRLS.SE(A1,A2,k1,k2,Sigma.e,RU1=diag(k1),RV1=diag(k1),RU2=diag(k2),RV2=diag(k2),mpower=100))
  } else if (method == "RRMLE") {
    return(MAR1.RRCC.SE(A1,A2,k1,k2,Sigma1,Sigma2,RU1=diag(k1),RV1=diag(k1),RU2=diag(k2),RV2=diag(k2),mpower=100))
  } else {
    stop("please specify method.")
  }
}


MAR1.RRLS.SE <- function(A1,A2,k1,k2,Sigma.e,RU1=diag(k1),RV1=diag(k1),RU2=diag(k2),RV2=diag(k2),mpower=100){
  # iterative least square
  # X_t = A1 X_{t-1} A2^T + E_t
  # RU1,RV1,RU2,RV2: rotation of U1,V1,U2,V2, e.g., new_U1=U1 RU1
  # Cov(vec(E_t)) = Sigma.e : of dimension d1d2\times d1d2
  # truncate vec(X_t) to mpower term, i.e. VMA(mpower)
  # return Sigma.LS: asymptotic covariance matrix of (vec(\hat A1),vec(\hat A2^T))
  #  Theta1.LS.u, Theta1.LS.v: asymptotic covariance matrix of vec(\hat U1), vec(\hat V1)
  #  Theta2.LS.u, Theta2.LS.v: asymptotic covariance matrix of vec(\hat U2), vec(\hat V2)
  d1 <- dim(A1)[1]
  d2 <- dim(A2)[1]
  Sigma.ee <- array(Sigma.e,c(d1,d2,d1,d2))
  B <- kronecker(A2,A1)
  #Sigma.x.vec, Sigma.xx: covariance matrix of vec(X_t)
  Sigma.x.vec <- Sigma.e
  for(i in 1:mpower){
    Sigma.x.vec <- Sigma.x.vec+(B%^%i)%*%Sigma.e%*%t(B%^%i)
  }
  Sigma.xx <- array(Sigma.x.vec,c(d1,d2,d1,d2))
  Gamma1 <- tensor(Sigma.xx,t(A1)%*%A1,c(1,3),c(1,2))
  Gamma2 <- tensor(Sigma.xx,t(A2)%*%A2,c(2,4),c(1,2))
  Gamma3 <- kronecker(diag(1,d1),A1)%*%matrix(aperm(Sigma.xx,c(3,1,4,2)),d1^2,d2^2)%*% kronecker(t(A2),diag(1,d2))
  H <- matrix(NA_real_,d1^2+d2^2,d1^2+d2^2)
  H[1:d1^2,1:d1^2] <- as.vector(A1)%*%t(as.vector(A1)) + kronecker(Gamma2,diag(1,d1))
  H[1:d1^2,(d1^2+1):(d1^2+d2^2)] <- Gamma3
  H[(d1^2+1):(d1^2+d2^2),1:d1^2] <- t(Gamma3)
  H[(d1^2+1):(d1^2+d2^2),(d1^2+1):(d1^2+d2^2)] <- kronecker(diag(1,d2),Gamma1)
  H.inv <- solve(H)
  D1 <- Gamma2%*%t(A1)%*% ginv(A1%*%Gamma2%*%t(A1)) %*%A1
  D2 <- Gamma1%*%t(A2)%*% ginv(A2%*%Gamma1%*%t(A2)) %*%A2
  P1 <- A1%*%ginv(t(A1)%*%A1)%*%t(A1)
  P2 <- A2%*%ginv(t(A2)%*%A2)%*%t(A2)
  Sigma.Q <- matrix(NA_real_,d1^2+d2^2,d1^2+d2^2)
  M1 <- kronecker(t(A2),P1)%*%Sigma.e%*%kronecker(A2,P1)
  M2 <- kronecker(t(A2),diag(d1)-P1)%*%Sigma.e%*%kronecker(A2,P1)
  M3 <- kronecker(t(A2),P1)%*%Sigma.e%*%kronecker(A2,diag(d1)-P1)
  M4 <- kronecker(t(A2),diag(d1)-P1)%*%Sigma.e%*%kronecker(A2,diag(d1)-P1)
  Sigma.Q1 <- Sigma.Q2 <- Sigma.Q3 <- Sigma.Q4 <- matrix(0,d1^2,d1^2)
  for(i in 1:d1){
    for(j in 1:d1){
      for(k in 1:d1){
        for(l in 1:d1){
          Sigma.Q1[(i-1)*d1+j,(k-1)*d1+l] <- sum(Sigma.xx[i,,k,] * M1[(1:d2-1)*d1+j,(1:d2-1)*d1+l])
          Sigma.Q2[(i-1)*d1+j,(k-1)*d1+l] <- sum(Sigma.xx[i,,k,] * M2[(1:d2-1)*d1+j,(1:d2-1)*d1+l])
          Sigma.Q3[(i-1)*d1+j,(k-1)*d1+l] <- sum(Sigma.xx[i,,k,] * M3[(1:d2-1)*d1+j,(1:d2-1)*d1+l])
          Sigma.Q4[(i-1)*d1+j,(k-1)*d1+l] <- sum(Sigma.xx[i,,k,] * M4[(1:d2-1)*d1+j,(1:d2-1)*d1+l])
        }
      }
    }
  }
  Sigma.Q[1:d1^2,1:d1^2] <- Sigma.Q1+kronecker(D1,diag(d1))%*%Sigma.Q2+Sigma.Q3%*%kronecker(t(D1),diag(d1))+
    kronecker(D1,diag(d1))%*%Sigma.Q4%*%kronecker(t(D1),diag(d1))
  M1 <- kronecker(P2,t(A1))%*%Sigma.e%*%kronecker(P2,A1)
  M2 <- kronecker(P2,t(A1))%*%Sigma.e%*%kronecker(diag(d2)-P2,A1)
  M3 <- kronecker(diag(d2)-P2,t(A1))%*%Sigma.e%*%kronecker(P2,A1)
  M4 <- kronecker(diag(d2)-P2,t(A1))%*%Sigma.e%*%kronecker(diag(d2)-P2,A1)
  Sigma.Q1 <- Sigma.Q2 <- Sigma.Q3 <- Sigma.Q4 <- matrix(0,d2^2,d2^2)
  for(i in 1:d2){
    for(j in 1:d2){
      for(k in 1:d2){
        for(l in 1:d2){
          Sigma.Q1[(i-1)*d2+j,(k-1)*d2+l] <- sum(Sigma.xx[,j,,l] * M1[(i-1)*d1+(1:d1),(k-1)*d1+(1:d1)])
          Sigma.Q2[(i-1)*d2+j,(k-1)*d2+l] <- sum(Sigma.xx[,j,,l] * M2[(i-1)*d1+(1:d1),(k-1)*d1+(1:d1)])
          Sigma.Q3[(i-1)*d2+j,(k-1)*d2+l] <- sum(Sigma.xx[,j,,l] * M3[(i-1)*d1+(1:d1),(k-1)*d1+(1:d1)])
          Sigma.Q4[(i-1)*d2+j,(k-1)*d2+l] <- sum(Sigma.xx[,j,,l] * M4[(i-1)*d1+(1:d1),(k-1)*d1+(1:d1)])
        }
      }
    }
  }
  Sigma.Q[(d1^2+1):(d1^2+d2^2),(d1^2+1):(d1^2+d2^2)] <- Sigma.Q1+Sigma.Q2%*%kronecker(diag(d2),t(D2))+
    kronecker(diag(d2),D2)%*%Sigma.Q3+kronecker(diag(d2),D2)%*%Sigma.Q4%*%kronecker(diag(d2),t(D2))
  M1 <- kronecker(t(A2),P1)%*%Sigma.e%*%kronecker(P2,A1)
  M2 <- kronecker(t(A2),P1)%*%Sigma.e%*%kronecker(diag(d2)-P2,A1)
  M3 <- kronecker(t(A2),diag(d1)-P1)%*%Sigma.e%*%kronecker(P2,A1)
  M4 <- kronecker(t(A2),diag(d1)-P1)%*%Sigma.e%*%kronecker(diag(d2)-P2,A1)
  Sigma.Q1 <- Sigma.Q2 <- Sigma.Q3 <- Sigma.Q4 <- matrix(0,d1^2,d2^2)
  for(i in 1:d1){
    for(j in 1:d1){
      for(k in 1:d2){
        for(l in 1:d2){
          Sigma.Q1[(i-1)*d1+j,(k-1)*d2+l] <- sum(Sigma.xx[i,,,l] * M1[(1:d2-1)*d1+j,(k-1)*d1+(1:d1)])
          Sigma.Q2[(i-1)*d1+j,(k-1)*d2+l] <- sum(Sigma.xx[i,,,l] * M2[(1:d2-1)*d1+j,(k-1)*d1+(1:d1)])
          Sigma.Q3[(i-1)*d1+j,(k-1)*d2+l] <- sum(Sigma.xx[i,,,l] * M3[(1:d2-1)*d1+j,(k-1)*d1+(1:d1)])
          Sigma.Q4[(i-1)*d1+j,(k-1)*d2+l] <- sum(Sigma.xx[i,,,l] * M4[(1:d2-1)*d1+j,(k-1)*d1+(1:d1)])
        }
      }
    }
  }
  Sigma.Q[1:d1^2,(d1^2+1):(d1^2+d2^2)] <- Sigma.Q1+Sigma.Q2%*%kronecker(diag(d2),t(D2))+
    kronecker(D1,diag(d1))%*%Sigma.Q3+kronecker(D1,diag(d1))%*%Sigma.Q4%*%kronecker(diag(d2),t(D2))
  Sigma.Q[(d1^2+1):(d1^2+d2^2),1:d1^2] <- t(Sigma.Q[1:d1^2,(d1^2+1):(d1^2+d2^2)])
  Sigma.LS <- H.inv%*%Sigma.Q%*%H.inv
  Sigma.LS11 <- Sigma.LS[1:d1^2,1:d1^2]
  Sigma.LS11.t <- array(Sigma.LS11,c(d1,d1,d1,d1))
  Sigma.LS11.t <- aperm(Sigma.LS11.t,c(2,1,4,3))
  Sigma.LS11.t <- matrix(Sigma.LS11.t, d1^2, d1^2)
  Sigma.LS22.t <- Sigma.LS[(d1^2+1):(d1^2+d2^2),(d1^2+1):(d1^2+d2^2)]
  Sigma.LS22 <- array(Sigma.LS22.t,c(d2,d2,d2,d2))
  Sigma.LS22 <- aperm(Sigma.LS22,c(2,1,4,3))
  Sigma.LS22 <- matrix(Sigma.LS22, d2^2, d2^2)
  svd.A1 <- svd(A1)
  #k1 <- length(svd.A1$d[svd.A1$d>1e-10])
  D1 <- diag(c(svd.A1$d[1:k1],1))[1:k1,1:k1]
  U1 <- svd.A1$u[,1:k1]
  V1 <- svd.A1$v[,1:k1]
  if(k1<d1){
    U1c <- svd.A1$u[,(k1+1):d1]
    V1c <- svd.A1$v[,(k1+1):d1]
  }else if(k1==d1){
    U1c <- 0
    V1c <- 0
  }
  #U1c <- svd.A1$u[,(k1+1):d1]  # original version : didn't consider if k1=d1
  #V1c <- svd.A1$v[,(k1+1):d1]
  e1 <- diag(d1)
  J1 <- matrix(0,d1^2,d1^2)
  for(i in 1:d1){
    J1[,((i-1)*d1+1):(i*d1)] <- kronecker(diag(d1),e1[,i])
  }
  C1 <- kronecker(A1,diag(d1)) + kronecker(diag(d1),A1)%*%J1
  Sigma.LS.1u <- C1%*%Sigma.LS11%*%t(C1)
  C1.t <- kronecker(t(A1),diag(d1)) + kronecker(diag(d1),t(A1))%*%J1
  Sigma.LS.1v <- C1.t%*%Sigma.LS11.t%*%t(C1.t)
  e1 <- diag(k1)
  L1 <- matrix(0,k1^2,k1)
  for(i in 1:k1){
    L1[,i] <- kronecker(e1[,i],e1[,i])
  }

  if(k1<d1){
    R1u <- cbind(kronecker(diag(k1),U1), kronecker(diag(k1),U1c)) %*% rbind(solve(kronecker(D1^2,diag(k1)) -
                                                                                    kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(U1),t(U1)),
                                                                            kronecker(diag(1/c(svd.A1$d[1:k1],1))[1:k1,1:k1]^2%*%t(U1), t(U1c))  )
    R1v <- cbind(kronecker(diag(k1),V1), kronecker(diag(k1),V1c)) %*% rbind(solve(kronecker(D1^2,diag(k1)) -
                                                                                    kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(V1),t(V1)),
                                                                            kronecker(diag(1/c(svd.A1$d[1:k1],1))[1:k1,1:k1]^2%*%t(V1), t(V1c))  )
  }else if(k1==d1){
    R1u <- kronecker(diag(k1),U1) %*% solve(kronecker(D1^2,diag(k1)) -
                                              kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(U1),t(U1))
    R1v <- kronecker(diag(k1),V1) %*% solve(kronecker(D1^2,diag(k1)) -
                                              kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(V1),t(V1))

  }

  # R1u <- cbind(kronecker(diag(k1),U1), kronecker(diag(k1),U1c)) %*% rbind(solve(kronecker(D1^2,diag(k1)) -
  #                                                                                 kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(U1),t(U1)),
  #                                                                         kronecker(diag(1/c(svd.A1$d[1:k1],1))[1:k1,1:k1]^2%*%t(U1), t(U1c))  )
  # R1v <- cbind(kronecker(diag(k1),V1), kronecker(diag(k1),V1c)) %*% rbind(solve(kronecker(D1^2,diag(k1)) -
  #                                                                                 kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(V1),t(V1)),
  #                                                                         kronecker(diag(1/c(svd.A1$d[1:k1],1))[1:k1,1:k1]^2%*%t(V1), t(V1c))  )
  Theta1.LS.u <- R1u%*% Sigma.LS.1u %*%t(R1u)
  Theta1.LS.v <- R1v%*% Sigma.LS.1v %*%t(R1v)
  Theta1.LS.u <- kronecker(t(RU1),diag(d1))%*%Theta1.LS.u
  Theta1.LS.v <- kronecker(t(RV1),diag(d1))%*%Theta1.LS.v
  svd.A2 <- svd(A2)
  #k2 <- length(svd.A2$d[svd.A2$d>1e-10])
  D2 <- diag(c(svd.A2$d[1:k2],1))[1:k2,1:k2]
  U2 <- svd.A2$u[,1:k2]
  V2 <- svd.A2$v[,1:k2]
  if(k2<d2){
    U2c <- svd.A2$u[,(k2+1):d2]
    V2c <- svd.A2$v[,(k2+1):d2]
  }else if(k2==d2){
    U2c <- 0
    V2c <- 0
  }
  #U2c <- svd.A2$u[,(k2+1):d2]
  #V2c <- svd.A2$v[,(k2+1):d2]
  e2 <- diag(d2)
  J2 <- matrix(0,d2^2,d2^2)
  for(i in 1:d2){
    J2[,((i-1)*d2+1):(i*d2)] <- kronecker(diag(d2),e2[,i])
  }
  C2 <- kronecker(A2,diag(d2)) + kronecker(diag(d2),A2)%*%J2
  Sigma.LS.2u <- C2%*%Sigma.LS22%*%t(C2)
  C2.t <- kronecker(t(A2),diag(d2)) + kronecker(diag(d2),t(A2))%*%J2
  Sigma.LS.2v <- C2.t%*%Sigma.LS22.t%*%t(C2.t)
  e2 <- diag(k2)
  L2 <- matrix(0,k2^2,k2)
  for(i in 1:k2){
    L2[,i] <- kronecker(e2[,i],e2[,i])
  }


  if(k2<d2){
    R2u <- cbind(kronecker(diag(k2),U2), kronecker(diag(k2),U2c)) %*% rbind(solve(kronecker(D2^2,diag(k2)) -
                                                                                    kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(U2),t(U2)),
                                                                            kronecker(diag(1/c(svd.A2$d[1:k2],1))[1:k2,1:k2]^2%*%t(U2), t(U2c))  )
    R2v <- cbind(kronecker(diag(k2),V2), kronecker(diag(k2),V2c)) %*% rbind(solve(kronecker(D2^2,diag(k2)) -
                                                                                    kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(V2),t(V2)),
                                                                            kronecker(diag(1/c(svd.A2$d[1:k2],1))[1:k2,1:k2]^2%*%t(V2), t(V2c))  )
  }else if(k2==d2){
    R2u <- kronecker(diag(k2),U2) %*% solve(kronecker(D2^2,diag(k2)) -
                                              kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(U2),t(U2))
    R2v <- kronecker(diag(k2),V2) %*% solve(kronecker(D2^2,diag(k2)) -
                                              kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(V2),t(V2))

  }


  # R2u <- cbind(kronecker(diag(k2),U2), kronecker(diag(k2),U2c)) %*% rbind(solve(kronecker(D2^2,diag(k2)) -
  #                                                                                 kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(U2),t(U2)),
  #                                                                         kronecker(diag(1/c(svd.A2$d[1:k2],1))[1:k2,1:k2]^2%*%t(U2), t(U2c))  )
  # R2v <- cbind(kronecker(diag(k2),V2), kronecker(diag(k2),V2c)) %*% rbind(solve(kronecker(D2^2,diag(k2)) -
  #                                                                                 kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(V2),t(V2)),
  #                                                                         kronecker(diag(1/c(svd.A2$d[1:k2],1))[1:k2,1:k2]^2%*%t(V2), t(V2c))  )
  Theta2.LS.u <- R2u%*% Sigma.LS.2u %*%t(R2u)
  Theta2.LS.v <- R2v%*% Sigma.LS.2v %*%t(R2v)
  Theta2.LS.u <- kronecker(t(RU2),diag(d2))%*%Theta2.LS.u
  Theta2.LS.v <- kronecker(t(RV2),diag(d2))%*%Theta2.LS.v
  return(list("Sigma"=Sigma.LS,"Theta1.u"=Theta1.LS.u,"Theta1.v"=Theta1.LS.v,
              "Theta2.u"=Theta2.LS.u,"Theta2.v"=Theta2.LS.v))
}


MAR1.RRCC.SE <- function(A1,A2,k1,k2,Sigma1,Sigma2,RU1=diag(k1),RV1=diag(k1),RU2=diag(k2),RV2=diag(k2),mpower=100){
  # canonical correlation analysis
  # X_t = A1 X_{t-1} A2^T + E_t
  # RU1,RV1,RU2,RV2: rotation of U1,V1,U2,V2, e.g., new_U1=U1 RU1
  # Cov(vec(E_t)) = Sigma.e=Sigma2 \otimes \Sigma1 : of dimension d1d2\times d1d2
  # truncate vec(X_t) to mpower term, i.e. VMA(mpower)
  # return Sigma.CC: asymptotic covariance matrix of (vec(\hat A1),vec(\hat A2^T))
  #  Theta1.CC.u, Theta1.CC.v: asymptotic covariance matrix of vec(\hat U1), vec(\hat V1)
  #  Theta2.CC.u, Theta2.CC.v: asymptotic covariance matrix of vec(\hat U2), vec(\hat V2)
  d1 <- dim(A1)[1]
  d2 <- dim(A2)[1]
  Sigma1.inv=solve(Sigma1)
  Sigma2.inv=solve(Sigma2)
  Sigma.e=kronecker(Sigma2,Sigma1)
  Sigma.e.inv=kronecker(Sigma2.inv,Sigma1.inv)
  Sigma.ee <- array(Sigma.e,c(d1,d2,d1,d2))
  B <- kronecker(A2,A1)
  #Sigma.x.vec, Sigma.xx: covariance matrix of vec(X_t)
  Sigma.x.vec <- Sigma.e
  for(i in 1:mpower){
    Sigma.x.vec <- Sigma.x.vec+(B%^%i)%*%Sigma.e%*%t(B%^%i)
  }
  Sigma.xx <- array(Sigma.x.vec,c(d1,d2,d1,d2))
  Gamma1 <- tensor(Sigma.xx,t(A1)%*%Sigma1.inv%*%A1,c(1,3),c(1,2))
  Gamma2 <- tensor(Sigma.xx,t(A2)%*%Sigma2.inv%*%A2,c(2,4),c(1,2))
  Gamma3 <- kronecker(diag(1,d1),Sigma1.inv%*%A1)%*%matrix(aperm(Sigma.xx,c(3,1,4,2)),d1^2,d2^2)%*%
    kronecker(t(A2)%*%Sigma2.inv,diag(1,d2))
  H <- matrix(NA_real_,d1^2+d2^2,d1^2+d2^2)
  H[1:d1^2,1:d1^2] <- as.vector(A1)%*%t(as.vector(A1)) + kronecker(Gamma2,Sigma1.inv)
  H[1:d1^2,(d1^2+1):(d1^2+d2^2)] <- Gamma3
  H[(d1^2+1):(d1^2+d2^2),1:d1^2] <- t(Gamma3)
  H[(d1^2+1):(d1^2+d2^2),(d1^2+1):(d1^2+d2^2)] <- kronecker(Sigma2.inv,Gamma1)
  H.inv <- solve(H)
  D1 <- Gamma2%*%t(A1)%*% ginv(A1%*%Gamma2%*%t(A1)) %*%A1
  D2 <- Gamma1%*%t(A2)%*% ginv(A2%*%Gamma1%*%t(A2)) %*%A2
  P1 <- Sigma1.inv%*%A1%*%ginv(t(A1)%*%Sigma1.inv%*%A1)%*%t(A1)
  P2 <- Sigma2.inv%*%A2%*%ginv(t(A2)%*%Sigma2.inv%*%A2)%*%t(A2)
  Sigma.Q <- matrix(NA_real_,d1^2+d2^2,d1^2+d2^2)
  M1 <- kronecker(t(A2),P1)%*%Sigma.e.inv%*%kronecker(A2,P1)
  M2 <- kronecker(t(A2),diag(d1)-P1)%*%Sigma.e.inv%*%kronecker(A2,P1)
  M3 <- kronecker(t(A2),P1)%*%Sigma.e.inv%*%kronecker(A2,diag(d1)-P1)
  M4 <- kronecker(t(A2),diag(d1)-P1)%*%Sigma.e.inv%*%kronecker(A2,diag(d1)-P1)
  Sigma.Q1 <- Sigma.Q2 <- Sigma.Q3 <- Sigma.Q4 <- matrix(0,d1^2,d1^2)
  for(i in 1:d1){
    for(j in 1:d1){
      for(k in 1:d1){
        for(l in 1:d1){
          Sigma.Q1[(i-1)*d1+j,(k-1)*d1+l] <- sum(Sigma.xx[i,,k,] * M1[(1:d2-1)*d1+j,(1:d2-1)*d1+l])
          Sigma.Q2[(i-1)*d1+j,(k-1)*d1+l] <- sum(Sigma.xx[i,,k,] * M2[(1:d2-1)*d1+j,(1:d2-1)*d1+l])
          Sigma.Q3[(i-1)*d1+j,(k-1)*d1+l] <- sum(Sigma.xx[i,,k,] * M3[(1:d2-1)*d1+j,(1:d2-1)*d1+l])
          Sigma.Q4[(i-1)*d1+j,(k-1)*d1+l] <- sum(Sigma.xx[i,,k,] * M4[(1:d2-1)*d1+j,(1:d2-1)*d1+l])
        }
      }
    }
  }
  Sigma.Q[1:d1^2,1:d1^2] <- Sigma.Q1+kronecker(D1,diag(d1))%*%Sigma.Q2+Sigma.Q3%*%kronecker(t(D1),diag(d1))+
    kronecker(D1,diag(d1))%*%Sigma.Q4%*%kronecker(t(D1),diag(d1))
  M1 <- kronecker(P2,t(A1))%*%Sigma.e.inv%*%kronecker(P2,A1)
  M2 <- kronecker(P2,t(A1))%*%Sigma.e.inv%*%kronecker(diag(d2)-P2,A1)
  M3 <- kronecker(diag(d2)-P2,t(A1))%*%Sigma.e.inv%*%kronecker(P2,A1)
  M4 <- kronecker(diag(d2)-P2,t(A1))%*%Sigma.e.inv%*%kronecker(diag(d2)-P2,A1)
  Sigma.Q1 <- Sigma.Q2 <- Sigma.Q3 <- Sigma.Q4 <- matrix(0,d2^2,d2^2)
  for(i in 1:d2){
    for(j in 1:d2){
      for(k in 1:d2){
        for(l in 1:d2){
          Sigma.Q1[(i-1)*d2+j,(k-1)*d2+l] <- sum(Sigma.xx[,j,,l] * M1[(i-1)*d1+(1:d1),(k-1)*d1+(1:d1)])
          Sigma.Q2[(i-1)*d2+j,(k-1)*d2+l] <- sum(Sigma.xx[,j,,l] * M2[(i-1)*d1+(1:d1),(k-1)*d1+(1:d1)])
          Sigma.Q3[(i-1)*d2+j,(k-1)*d2+l] <- sum(Sigma.xx[,j,,l] * M3[(i-1)*d1+(1:d1),(k-1)*d1+(1:d1)])
          Sigma.Q4[(i-1)*d2+j,(k-1)*d2+l] <- sum(Sigma.xx[,j,,l] * M4[(i-1)*d1+(1:d1),(k-1)*d1+(1:d1)])
        }
      }
    }
  }
  Sigma.Q[(d1^2+1):(d1^2+d2^2),(d1^2+1):(d1^2+d2^2)] <- Sigma.Q1+Sigma.Q2%*%kronecker(diag(d2),t(D2))+
    kronecker(diag(d2),D2)%*%Sigma.Q3+kronecker(diag(d2),D2)%*%Sigma.Q4%*%kronecker(diag(d2),t(D2))
  M1 <- kronecker(t(A2),P1)%*%Sigma.e.inv%*%kronecker(P2,A1)
  M2 <- kronecker(t(A2),P1)%*%Sigma.e.inv%*%kronecker(diag(d2)-P2,A1)
  M3 <- kronecker(t(A2),diag(d1)-P1)%*%Sigma.e.inv%*%kronecker(P2,A1)
  M4 <- kronecker(t(A2),diag(d1)-P1)%*%Sigma.e.inv%*%kronecker(diag(d2)-P2,A1)
  Sigma.Q1 <- Sigma.Q2 <- Sigma.Q3 <- Sigma.Q4 <- matrix(0,d1^2,d2^2)
  for(i in 1:d1){
    for(j in 1:d1){
      for(k in 1:d2){
        for(l in 1:d2){
          Sigma.Q1[(i-1)*d1+j,(k-1)*d2+l] <- sum(Sigma.xx[i,,,l] * M1[(1:d2-1)*d1+j,(k-1)*d1+(1:d1)])
          Sigma.Q2[(i-1)*d1+j,(k-1)*d2+l] <- sum(Sigma.xx[i,,,l] * M2[(1:d2-1)*d1+j,(k-1)*d1+(1:d1)])
          Sigma.Q3[(i-1)*d1+j,(k-1)*d2+l] <- sum(Sigma.xx[i,,,l] * M3[(1:d2-1)*d1+j,(k-1)*d1+(1:d1)])
          Sigma.Q4[(i-1)*d1+j,(k-1)*d2+l] <- sum(Sigma.xx[i,,,l] * M4[(1:d2-1)*d1+j,(k-1)*d1+(1:d1)])
        }
      }
    }
  }
  Sigma.Q[1:d1^2,(d1^2+1):(d1^2+d2^2)] <- Sigma.Q1+Sigma.Q2%*%kronecker(diag(d2),t(D2))+
    kronecker(D1,diag(d1))%*%Sigma.Q3+kronecker(D1,diag(d1))%*%Sigma.Q4%*%kronecker(diag(d2),t(D2))
  Sigma.Q[(d1^2+1):(d1^2+d2^2),1:d1^2] <- t(Sigma.Q[1:d1^2,(d1^2+1):(d1^2+d2^2)])
  Sigma.CC <- H.inv%*%Sigma.Q%*%H.inv
  Sigma.CC11 <- Sigma.CC[1:d1^2,1:d1^2]
  Sigma.CC11.t <- array(Sigma.CC11,c(d1,d1,d1,d1))
  Sigma.CC11.t <- aperm(Sigma.CC11.t,c(2,1,4,3))
  Sigma.CC11.t <- matrix(Sigma.CC11.t, d1^2, d1^2)
  Sigma.CC22.t <- Sigma.CC[(d1^2+1):(d1^2+d2^2),(d1^2+1):(d1^2+d2^2)]
  Sigma.CC22 <- array(Sigma.CC22.t,c(d2,d2,d2,d2))
  Sigma.CC22 <- aperm(Sigma.CC22,c(2,1,4,3))
  Sigma.CC22 <- matrix(Sigma.CC22, d2^2, d2^2)
  svd.A1 <- svd(A1)
  #k1 <- length(svd.A1$d[svd.A1$d>1e-10])
  D1 <- diag(c(svd.A1$d[1:k1],1))[1:k1,1:k1]
  U1 <- svd.A1$u[,1:k1]
  V1 <- svd.A1$v[,1:k1]
  if(k1<d1){
    U1c <- svd.A1$u[,(k1+1):d1]
    V1c <- svd.A1$v[,(k1+1):d1]
  }else if(k1==d1){
    U1c <- 0
    V1c <- 0
  }
  e1 <- diag(d1)
  J1 <- matrix(0,d1^2,d1^2)
  for(i in 1:d1){
    J1[,((i-1)*d1+1):(i*d1)] <- kronecker(diag(d1),e1[,i])
  }
  C1 <- kronecker(A1,diag(d1)) + kronecker(diag(d1),A1)%*%J1
  Sigma.CC.1u <- C1%*%Sigma.CC11%*%t(C1)
  C1.t <- kronecker(t(A1),diag(d1)) + kronecker(diag(d1),t(A1))%*%J1
  Sigma.CC.1v <- C1.t%*%Sigma.CC11.t%*%t(C1.t)
  e1 <- diag(k1)
  L1 <- matrix(0,k1^2,k1)
  for(i in 1:k1){
    L1[,i] <- kronecker(e1[,i],e1[,i])
  }

  if(k1<d1){
    R1u <- cbind(kronecker(diag(k1),U1), kronecker(diag(k1),U1c)) %*% rbind(solve(kronecker(D1^2,diag(k1)) -
                                                                                    kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(U1),t(U1)),
                                                                            kronecker(diag(1/c(svd.A1$d[1:k1],1))[1:k1,1:k1]^2%*%t(U1), t(U1c))  )
    R1v <- cbind(kronecker(diag(k1),V1), kronecker(diag(k1),V1c)) %*% rbind(solve(kronecker(D1^2,diag(k1)) -
                                                                                    kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(V1),t(V1)),
                                                                            kronecker(diag(1/c(svd.A1$d[1:k1],1))[1:k1,1:k1]^2%*%t(V1), t(V1c))  )
  }else if(k1==d1){
    R1u <- kronecker(diag(k1),U1) %*% solve(kronecker(D1^2,diag(k1)) -
                                              kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(U1),t(U1))
    R1v <- kronecker(diag(k1),V1) %*% solve(kronecker(D1^2,diag(k1)) -
                                              kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(V1),t(V1))

  }


  Theta1.CC.u <- R1u%*% Sigma.CC.1u %*%t(R1u)
  Theta1.CC.v <- R1v%*% Sigma.CC.1v %*%t(R1v)
  Theta1.CC.u <- kronecker(t(RU1),diag(d1))%*%Theta1.CC.u
  Theta1.CC.v <- kronecker(t(RV1),diag(d1))%*%Theta1.CC.v
  svd.A2 <- svd(A2)
  #k2 <- length(svd.A2$d[svd.A2$d>1e-10])
  D2 <- diag(c(svd.A2$d[1:k2],1))[1:k2,1:k2]
  U2 <- svd.A2$u[,1:k2]
  V2 <- svd.A2$v[,1:k2]
  if(k1<d1){
    U2c <- svd.A2$u[,(k2+1):d2]
    V2c <- svd.A2$v[,(k2+1):d2]
  }else if(k2==d2){
    U2c <- 0
    V2c <- 0
  }
  e2 <- diag(d2)
  J2 <- matrix(0,d2^2,d2^2)
  for(i in 1:d2){
    J2[,((i-1)*d2+1):(i*d2)] <- kronecker(diag(d2),e2[,i])
  }
  C2 <- kronecker(A2,diag(d2)) + kronecker(diag(d2),A2)%*%J2
  Sigma.CC.2u <- C2%*%Sigma.CC22%*%t(C2)
  C2.t <- kronecker(t(A2),diag(d2)) + kronecker(diag(d2),t(A2))%*%J2
  Sigma.CC.2v <- C2.t%*%Sigma.CC22.t%*%t(C2.t)
  e2 <- diag(k2)
  L2 <- matrix(0,k2^2,k2)
  for(i in 1:k2){
    L2[,i] <- kronecker(e2[,i],e2[,i])
  }
  if(k2<d2){
    R2u <- cbind(kronecker(diag(k2),U2), kronecker(diag(k2),U2c)) %*% rbind(solve(kronecker(D2^2,diag(k2)) -
                                                                                    kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(U2),t(U2)),
                                                                            kronecker(diag(1/c(svd.A2$d[1:k2],1))[1:k2,1:k2]^2%*%t(U2), t(U2c))  )
    R2v <- cbind(kronecker(diag(k2),V2), kronecker(diag(k2),V2c)) %*% rbind(solve(kronecker(D2^2,diag(k2)) -
                                                                                    kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(V2),t(V2)),
                                                                            kronecker(diag(1/c(svd.A2$d[1:k2],1))[1:k2,1:k2]^2%*%t(V2), t(V2c))  )
  }else if(k2==d2){
    R2u <- kronecker(diag(k2),U2) %*% solve(kronecker(D2^2,diag(k2)) -
                                              kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(U2),t(U2))
    R2v <- kronecker(diag(k2),V2) %*% solve(kronecker(D2^2,diag(k2)) -
                                              kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(V2),t(V2))
  }
  Theta2.CC.u <- R2u%*% Sigma.CC.2u %*%t(R2u)
  Theta2.CC.v <- R2v%*% Sigma.CC.2v %*%t(R2v)
  Theta2.CC.u <- kronecker(t(RU2),diag(d2))%*%Theta2.CC.u
  Theta2.CC.v <- kronecker(t(RV2),diag(d2))%*%Theta2.CC.v
  return(list("Sigma"=Sigma.CC,"Theta1.u"=Theta1.CC.u,"Theta1.v"=Theta1.CC.v,
              "Theta2.u"=Theta2.CC.u,"Theta2.v"=Theta2.CC.v))
}


MAR1.PROJ <- function(xx){
  # xx: T * p * q
  # X_t = LL X_{t-1} RR + E_t
  # Sig = cov(vec(E_t))
  # one-step projection estimation
  # Return LL, RR, and estimate of Sig
  dd <- dim(xx)
  T <- dd[1]
  p <- dd[2]
  q <- dd[3]
  xx.mat <- matrix(xx,T,p*q)
  # kroneck <- t(xx.mat[2:T,]) %*% xx.mat[1:(T-1),] %*% solve(t(xx.mat[1:(T-1),]) %*% xx.mat[1:(T-1),])
  kroneck <- t(xx.mat[2:T,]) %*% xx.mat[1:(T-1),] %*% chol2inv(chol(t(xx.mat[1:(T-1),]) %*% xx.mat[1:(T-1),]))
  ans.projection <- projection(kroneck,r=1,p,q,p,q)
  a <- svd(ans.projection[[1]][[1]],nu=0,nv=0)$d[1]
  LL <- ans.projection[[1]][[1]] / a
  RR <- t(ans.projection[[1]][[2]]) * a
  res = xx[2:T,,,drop=FALSE] - aperm(tensor(tensor(xx[1:(T-1),,,drop=FALSE],RR,3,1),LL,2,2),c(1,3,2))
  Sig <- matrix(tensor(res,res,1,1),p*q)/(T-1)
  # sd <- MAR.SE(xx, t(RR), LL, Sig)
  return(list(A1=LL,A2=RR,Sig=Sig))
}


MAR2.PROJ <- function(xx, R=1, P=1){
  # xx: T * p * q
  # X_t = A1 X_{t-1} t(A2) + E_t
  # Sig = cov(vec(E_t))
  # one-step projection estimation
  # Return LL, RR, and estimate of Sig
  dd <- dim(xx)
  T <- dd[1]
  d1 <- dd[2]
  d2 <- dd[3]
  # xx.mat <- matrix(xx,T,d1*d2)
  # kroneck <- t(xx.mat[2:T,]) %*% xx.mat[1:(T-1),] %*% solve(t(xx.mat[1:(T-1),]) %*% xx.mat[1:(T-1),])
  A = list()
  kroneck <- tenAR.VAR(xx, P)$coef
  for (i in c(1:P)){
    ans.projection <- projection(kroneck[[i]],R[i],d2,d1,d2,d1)
    for (j in c(1:R[i])){
      a = svd(ans.projection[[j]][[1]],nu=0,nv=0)$d[1]
      ans.projection[[j]][[1]] <- ans.projection[[j]][[1]] / a
      ans.projection[[j]][[2]] <- ans.projection[[j]][[2]] * a
    }
    A[[i]] <- ans.projection
  }
  return(list(A=A))
}


MAR1.LS <- function(xx,niter=50,tol=1e-6,print.true = FALSE){
  # xx: T * p * q
  # X_t = LL X_{t-1} RR + E_t
  # Sig = cov(vec(E_t))
  # LS criterion
  # iterative algorithm between LL <--> RR
  # Return LL, RR, and estimate of Sig
  dd=dim(xx)
  T <- dd[1]
  p <- dd[2]
  q <- dd[3]
  LL.old <- diag(p)
  RR.old <- diag(q)
  dis <- 1
  iiter <- 1

  while(iiter <= niter & dis >= tol){
    # estimate RR0
    temp <- tensor(xx[1:(T-1),,,drop=FALSE],LL.old,2,2)  # (T-1) * q * p
    AA <- tensor(temp,temp,c(1,3),c(1,3))
    BB <- tensor(temp,xx[2:T,,,drop=FALSE],c(1,3),c(1,2))
    RR <- solve(AA,BB)
    # estimate LL0
    temp <- tensor(xx[1:(T-1),,,drop=FALSE],RR,3,1)  # (T-1) * p * q
    AA <- tensor(temp,temp,c(1,3),c(1,3))
    BB <- t(tensor(temp,xx[2:T,,,drop=FALSE],c(1,3),c(1,3)))
    LL <- t(solve(t(AA),t(BB)))
    a <- svd(LL,nu=0,nv=0)$d[1]
    LL <- LL / a
    RR <- RR * a
    # update for the next iteration
    dis <- sqrt(sum((kronecker(t(RR),LL)-kronecker(t(RR.old),LL.old))^2))
    LL.old <- LL
    RR.old <- RR
    iiter <- iiter + 1
    if(print.true==TRUE){
      print(LL)
      print(RR)
    }
  }
  res=xx[2:T,,,drop=FALSE] - aperm(tensor(tensor(xx[1:(T-1),,,drop=FALSE],RR,3,1),LL,2,2),c(1,3,2))
  Sig <- matrix(tensor(res,res,1,1),p*q)/(T-1)
  sd <- MAR.SE(xx, t(RR), LL, Sig)
  return(list(A1=LL,A2=t(RR),res=res,Sig=Sig,niter=iiter,sd=sd,data=xx))
}


MAR1.MLE <- function(xx,LL.init=NULL,Sigl.init=NULL,Sigr.init=NULL,niter=50,tol=1e-6,print.true = FALSE){
  # xx: T * p * q
  # X_t = LL X_{t-1} RR + E_t
  # Sig = cov(vec(E_t)) = Sigr otimes Sigl
  # optimization criterion is likelihood
  # iterative algorithm between LL <--> RR <--> Sig_r <--> Sig_l
  # Return LL, RR, Sigl, Sigr
  dd=dim(xx)
  T <- dd[1]
  p <- dd[2]
  q <- dd[3]
  if(is.null(LL.init)){
    LL.old <- diag(p)
  } else{
    LL.old <- LL.init
  }
  RR.old <- diag(q)
  if(is.null(Sigl.init)){
    Sigl.old <- diag(p)
  } else{
    Sigl.old <- Sigl.init
  }
  if(is.null(Sigr.init)){
    Sigr.old <- diag(q)
  } else{
    Sigr.old <- Sigr.init
  }
  dis <- 1
  iiter <- 1

  while(iiter <= niter & dis >= tol){
    Sigl.inv.old <- ginv(Sigl.old)
    Sigr.inv.old <- ginv(Sigr.old)
    # estimate RR0
    temp1 <- tensor(xx[1:(T-1),,,drop=FALSE],LL.old,2,2)  # (T-1) * q * p
    temp2 <- tensor(temp1,Sigl.inv.old,3,2)  # (T-1) * q * p
    AA <- tensor(temp1,temp2,c(1,3),c(1,3))
    BB <- tensor(temp2,xx[2:T,,,drop=FALSE],c(1,3),c(1,2))
    RR <- solve(AA,BB)
    # estimate LL0
    temp1 <- tensor(xx[1:(T-1),,,drop=FALSE],RR,3,1)  # (T-1) * p * q
    temp2 <- tensor(temp1,Sigr.inv.old,3,1)  # (T-1) * p * q
    AA <- tensor(temp1,temp2,c(1,3),c(1,3))
    BB <- t(tensor(temp2,xx[2:T,,,drop=FALSE],c(1,3),c(1,3)))
    LL <- t(solve(t(AA),t(BB)))
    res=xx[2:T,,,drop=FALSE] - aperm(tensor(tensor(xx[1:(T-1),,,drop=FALSE],RR,3,1),LL,2,2),c(1,3,2))
    temp <- tensor(res,Sigl.inv.old,2,1) # (T-1) * q * p
    Sigr <- tensor(temp,res,c(1,3),c(1,2))/T/p
    temp <- tensor(res,ginv(Sigr),3,1) # (T-1) * p * q
    Sigl <- tensor(temp,res,c(1,3),c(1,3))/T/q
    a <- svd(LL,nu=0,nv=0)$d[1]
    LL <- LL / a
    RR <- RR * a
    a <- eigen(Sigl)$values[1]
    Sigl <- Sigl / a
    Sigr <- Sigr * a

    # update for the next iteration
    dis1 <- sqrt(sum((kronecker(t(RR),LL)-kronecker(t(RR.old),LL.old))^2))
    dis2 <- sqrt(sum((kronecker(Sigr,Sigl)-kronecker(Sigr.old,Sigl.old))^2))
    dis <- max(dis1,dis2)
    LL.old <- LL
    RR.old <- RR
    Sigr.old <- Sigr
    Sigl.old <- Sigl
    iiter <- iiter + 1
    if(print.true==TRUE){
      print(LL)
      print(RR)
    }
  }
  Sig <- kronecker(Sigr,Sigl)
  sd <- MAR.SE(xx, t(RR), LL, Sig)
  return(list(A1=LL,A2=t(RR),res=res,Sig1=Sigl,Sig2=Sigr,Sig=Sig,niter=iiter,sd=sd,data=xx))
}


MAR.SE <- function(xx, B, A, Sigma){
  dd <- dim(xx)
  T <- dd[1]
  p <- dd[2]
  q <- dd[3]
  BX <- tensor(xx,B,3,2) # Tpq
  AX <- tensor(xx,A,2,2) # Tqp
  BXI <- apply(BX,1,function(x){kronecker(t(x),diag(p))})
  AXI <- apply(AX,1,function(x){kronecker(diag(q),t(x))})
  BXI.array <- array(BXI,c(p*q,p^2,T)) #pq*p^2*T
  AXI.array <- array(AXI,c(p*q,q^2,T)) #pq*q^2*T
  # W transpose
  Wt <- abind(BXI.array,AXI.array,along=2) #pq*(p^2+q^2)*T
  EWWt <- tensor(Wt,Wt,c(1,3),c(1,3))/T #(p^2+q^2)*(p^2+q^2)
  alpha <- as.vector(A)
  beta <- as.vector(t(B))
  gamma <- c(alpha,rep(0,q^2))
  H <- EWWt + gamma %*% t(gamma) #(p^2+q^2)*(p^2+q^2)
  WSigma <- tensor(Wt,Sigma,1,1) #(p^2+q^2)*T*pq
  EWSigmaWt <- tensor(WSigma,Wt,c(3,2),c(1,3))/T
  Hinv <- solve(H)
  Xi <- Hinv %*% EWSigmaWt %*% Hinv
  Xi <- Xi/T
}


tenAR.VAR <- function(xx, P){
  dd=dim(xx)
  t <- dd[1]
  n <- prod(dd[-1])
  if (prod(dd[-1]) > t){stop("sample size T too small")}
  yy=apply(xx, MARGIN=1, as.vector)

  x = 0
  for (l in c(1:P)){
    if (l == 1){x = yy[,(P):(t-1)]} else {x = rbind(x, yy[,(P+1-l):(t-l)])}
  }
  y = yy[,(P+1):t]
  coef = t(solve(x %*% t(x)) %*% x %*% t(y))
  res= y - coef %*% x
  phi = list()
  for (l in c(1:P)){
    phi[[l]] = coef[,(n*(l-1)+1):(n*l)]
  }
  return(list(coef=phi, res=res, data=xx))
}


tenAR.A <- function(dim,R,P,rho){
  K <- length(dim)
  A <- lapply(1:P, function(p) {lapply(1:max(1,R[p]), function(j) {lapply(1:K, function(i) {diag(dim[i])})})})
  for (p in c(1:P)){
    if (R[p] == 0) next
    for (j in c(1:R[p])){
      for (i in c(1:K)){
        A[[p]][[j]][[i]] <- matrix(rnorm(dim[i]^2), c(dim[i],dim[i]))
      }
    }
  }
  eigen = M.eigen(A, R, P, dim)
  for (l in c(1:P)){
    if (R[l] == 0) next
    for (j in c(1:R[l])){
      A[[l]][[j]][[K]] <- rho * A[[l]][[j]][[K]]/ eigen
    }
    A[[l]] <- fro.order(fro.rescale(A[[l]]))
  }
  eigen = M.eigen(A, R, P, dim)
  return(A)
}


tenAR.PROJ <- function(xx,R,P){
  dim <- dim(xx)[-1]
  if (length(dim) == 2){
    A = MAR2.PROJ(xx,R,P)
    return(list(A=A))
  }
  if (length(dim) != 3){
    stop("temporarily 'tenAR.PROJ' only support K=2,3.")
  }
  mm <- tenAR.VAR(xx, P)$coef
  A = list()
  for (p in c(1:P)){
    if (is.na(R[p])) stop("p != length(R)")
    if (R[p] == 0) next
    tt <- aperm(trearrange(mm[[p]], rev(dim)))
    A[[p]] <- fro.order(fro.rescale(ten.proj(tt, dim, R[p])))
  }
  # A1 = A
  # phi1 = kronecker(kronecker(A1[[1]][[1]][[3]], A1[[1]][[1]][[2]]), A1[[1]][[1]][[1]]) + kronecker(kronecker(A1[[1]][[2]][[3]], A1[[1]][[2]][[2]]), A1[[1]][[2]][[1]])
  # sqrt(sum((mm[[1]] - phi1)**2))
  return(list(A=A))
}


tenAR.LS <- function(xx, R, P, init.A=NULL, niter=150, tol=1e-5, print.true=FALSE){
  if (!(mode(xx) == "S4")) {xx <- as.tensor(xx)}
  if (length(P) == 2){P = P[1]; rolling=TRUE} else {rolling = FALSE} # len(P) > 1 implies rolling in func 'tenAR.predict'
  dim <- dim(xx)[-1]  # dimension of data
  K <- length(dim)  # number of tensor modes
  t <- dim(xx)[1]  # time length
  if (length(R) == 1 & P > 1){  # user can only give R1 to represent a vector R if all each term number same
    R = rep(R, P)
  }
  if (K==2|K>3){  # for initialization
    if (is.null(init.A)) {
      A.old = list()
      for (p in c(1:P)){
        if (is.na(R[p])) stop("p != length(R)")
        if (R[p] == 0) next
        A.old[[p]] <- lapply(1:R[p], function(j) {lapply(1:K, function(i) {0.5*diag(dim[i])})})
      }
    } else {A.old <- init.A}
  } else if (K==3) {  # the current code only support 3 mode tensor projection
    if (is.null(init.A)) {A.old <- tenAR.PROJ(xx@data,R,P)$A} else {A.old <- init.A}
  } else {stop("dimension K of time series must at least be 2")}
  
  A.new <- A.old
  Tol <- tol*sqrt(sum(dim^2))*sum(R)
  dis <- 1
  iiter <- 1
  AX = A.new
  ttlMode = c(1:K) + 1  # these are tensor slicing parameters for
  ttlMode2 = c(1:(K+1))
  subxx = list()
  for (l in c(0:P)){  # tensor data for each lag
    subxx[[l+1]] = abind::asub(xx, (1+P-l):(t-l), 1, drop=FALSE)
  }
 
  while(iiter <= niter & dis >= tol){
    for (p in c(1:P)){
      if (R[p] == 0) next
      for (r in c(1:R[p])){
        for (k in c(K:1)){ # update last matrix first
          AX[[p]][[r]][[k]] = rTensor::ttl(subxx[[p+1]], A.new[[p]][[r]][-k], c(2:(K+1))[-k])
          temp = AX[[p]][[r]][[k]]
          L1 <- 0
          for (l in c(1:P)){
            if (R[l] == 0 | (l == p & R[l] == 1)) next
            if(l == p){rMode = c(1:R[l])[-r]} else {rMode = c(1:R[l])}
            L1 <- L1 + Reduce("+",lapply(rMode, function(n) {rTensor::ttl(subxx[[l+1]], A.new[[l]][[n]], ttlMode)}))
          }
          temp2 <- subxx[[1]] - L1
          RR <- tensor(temp@data,temp@data,ttlMode2[-(k+1)],ttlMode2[-(k+1)])
          LL <- tensor(temp2@data,temp@data,ttlMode2[-(k+1)],ttlMode2[-(k+1)])
          A.new[[p]][[r]][[k]] <- LL %*% ginv(RR)
        }
      }
    }
    dis = ten.dis.A(A.new, A.old, R, K)
    for (p in c(1:P)){
      if (R[p] == 0) next
      A.new[[p]] <- svd.rescale(A.new[[p]])
    }
    A.old <- A.new
    iiter <- iiter + 1
    if (print.true == TRUE){
      print(dis)
      print(paste('iiter num=',iiter))
    }
  }
  for (p in c(1:P)){
    if (R[p] == 0) next
    A.new[[p]] <- fro.order(fro.rescale(A.new[[p]]))
  }
  if (rolling){ # rolling forecast, no need calculate other parameters
    return(list(A=A.new))
  }
  res <- ten.res(subxx,A.new,P,R,K,ttlMode)@data
  Sig <- matrix(tensor(res,res,1,1),prod(dim))/(t-1)
  cov = tenAR.SE.LSE(dim, R, P, K, t, AX, A.new, Sig)
  sd <- covtosd(cov, dim, R)
  bic <- IC(xx, res, R, t, dim)
  return(list(A=A.new,niter=iiter,Sig=Sig,res=res,cov=cov,sd=sd,BIC=bic,data=xx))
}


tenAR.MLE <- function(xx, R, P, init.A=NULL, init.sig=NULL, niter=150, tol=1e-5, print.true = FALSE){
  if (!(mode(xx) == "S4")) {xx <- as.tensor(xx)}
  if (length(P) == 2){P = P[1]; rolling=TRUE} else {rolling = FALSE} # len(P) > 1 implies rolling in func 'tenAR.predict'
  dim <- dim(xx)[-1]
  K <- length(dim)
  t <- dim(xx)[1]
  if (length(R) == 1 & P > 1){
    R = rep(R, P)
  }
  if (K==2|K>3){
    if (is.null(init.A)) {
      A.old = list()
      for (p in c(1:P)){
        if (is.na(R[p])) stop("p != length(R)")
        if (R[p] == 0) next
        A.old[[p]] <- lapply(1:R[p], function(j) {lapply(1:K, function(i) {0.5*diag(dim[i])})})
      }
    } else {A.old <- init.A}
  } else if (K==3) {
    if (is.null(init.A)) {A.old <- tenAR.PROJ(xx@data,R,P)$A} else {A.old <- init.A}
  } else {stop("dimension K of time series must at least be 2")}
  if (is.null(init.sig)) {Sig.old <- lapply(1:K, function(i) {diag(dim[i])})} else {Sig.old <- init.sig}
  Sig.new <- Sig.old
  Sig.new.inv <- lapply(1:K, function (k) {solve(Sig.new[[k]])})
  A.new <- A.old
  Tol <- tol*sqrt(sum(dim^2))*sum(R)
  dis <- 1
  iiter <- 1
  AX = A.new
  ttlMode = c(1:K) + 1
  ttlMode2 = c(1:(K+1))
  subxx = list()
  for (l in c(0:P)){  # tensor data for each lag
    subxx[[l+1]] = abind::asub(xx, (1+P-l):(t-l), 1, drop=FALSE)
  }
  while(iiter <= niter & dis >= Tol){
    for (p in c(1:P)){
      if (R[p] == 0) next
      for (r in c(1:R[p])){
        res.old <- ten.res(subxx,A.new,P,R,K,ttlMode)
        for (k in c(K:1)){
          rs <- rTensor::ttl(res.old, Sig.new.inv[-k], ttlMode[-k])
          Sig.new[[k]] <- tensor(res.old@data, rs@data, ttlMode2[-(k+1)],ttlMode2[-(k+1)])/(t-1)/prod(dim[-k])
          Sig.new.inv <- lapply(1:K, function (k) {ginv(Sig.new[[k]])})
        }
        for (k in c(K:1)){
          sphi <-  lapply(1:K, function (k) {Sig.new.inv[[k]] %*% (A.new[[p]][[r]][[k]])})
          AX[[p]][[r]][[k]] = rTensor::ttl(subxx[[p+1]], A.new[[p]][[r]][-k], ttlMode[-k])
          temp1 <- rTensor::ttl(subxx[[p+1]], sphi[-k], ttlMode[-k])
          L1 <- 0
          for (l in c(1:P)){
            if (R[l] == 0 | (l == p & R[l] == 1)) next
            if(l == p){rMode = c(1:R[l])[-r]} else {rMode = c(1:R[l])}
            L1 <- L1 + Reduce("+",lapply(rMode, function(n) {rTensor::ttl(subxx[[l+1]], A.new[[l]][[n]], ttlMode)}))
          }
          temp2 <- subxx[[1]] - L1
          RR <- tensor(AX[[p]][[r]][[k]]@data,temp1@data,ttlMode2[-(k+1)],ttlMode2[-(k+1)])
          LL <- tensor(temp2@data,temp1@data,ttlMode2[-(k+1)],ttlMode2[-(k+1)])
          A.new[[p]][[r]][[k]] <- LL %*% ginv(RR)
        }
      }
      A.new[[p]] <- svd.rescale(A.new[[p]])
      Sig.new <- eigen.rescale(list(Sig.new))[[1]]
    }
    dis = ten.dis.A(A.new, A.old, R, K)
    Sig.old <- Sig.new
    A.old <- A.new
    iiter <- iiter + 1
    if (print.true == TRUE){
      print(dis)
      print(paste('iiter num=',iiter))
    }
  }
  for (p in c(1:P)){
    if (R[p] == 0) next
    A.new[[p]] <- fro.order(fro.rescale(A.new[[p]]))
  }
  if (rolling){ # for rolling forecast, no need calculate other parameters
    return(list(A=A.new))
  }
  res <- ten.res(subxx,A.new,P,R,K,ttlMode)@data
  Sig <- matrix(tensor(res,res,1,1), prod(dim))/(t-1)
  SIG = rTensor::kronecker_list(rev(Sig.new))
  cov = tenAR.SE.MLE(dim, R, P, K, t, AX, A.new, SIG)
  sd <- covtosd(cov, dim, R)
  bic <- IC(xx, res, R, t, dim)
  return(list(A=A.new, SIGMA=Sig.new, niter=iiter, Sig=Sig, res=res, cov=cov, sd=sd, BIC=bic, data=xx))
}


MAR1.RR <- function(xx, k1, k2, A1.init=NULL, A2.init=NULL, niter=200, tol=1e-4,  print.true=FALSE){
  # xx: T * p * q
  # X_t = LL X_{t-1} RR' + E_t ### NOTE! This function is written with RR'.
  # Sig = cov(vec(E_t)) = Sigr \otimes Sigl
  # optimization criterion is likelihood
  # iterative algorithm between LL <--> Sigma_l <--> RR <--> Sig_r
  # Return LL, RR, Sigl, Sigr
  dd=dim(xx)
  T <- dd[1]
  p <- dd[2]
  q <- dd[3]
  if(is.null(A1.init) | is.null(A2.init)){
    init = initializer(xx, k1, k2)
  }
  if(is.null(A1.init)){
    LL.old <- init$A1
  } else{
    LL.old <- A1.init
  }
  if(is.null(A2.init)){
    RR.old <- init$A2
  } else{
    RR.old <- A2.init
  }
  Tol=tol*sqrt(p^2+q^2)
  dis <- 1
  iiter <- 1

  while(iiter <= niter & dis >= Tol){
    ## Save old
    LL.oold=LL.old
    RR.oold=RR.old
    # estimate LL0
    temp1 <- tensor(xx[1:(T-1),,,drop=FALSE],RR.old,3,2)  # (T-1) * p * q
    AA <- tensor(temp1,temp1,c(1,3),c(1,3))
    BB <- tensor(xx[2:T,,,drop=FALSE],temp1,c(1,3),c(1,3))
    LL <- BB%*%ginv(AA)
    U <- svd(LL%*%t(BB))$u[,1:k1]
    LL <- U%*%t(U)%*%LL
    # update for next iteration
    a=svd(LL,nu=0,nv=0)$d[1]
    LL=LL/a
    dis3=sum((LL-LL.old)^2)
    LL.old <- LL

    # estimate RR0
    temp1 <- tensor(xx[1:(T-1),,,drop=FALSE],LL.old,2,2)  # (T-1) * q * p
    AA <- tensor(temp1,temp1,c(1,3),c(1,3))
    BB <- tensor(xx[2:T,,,drop=FALSE],temp1,c(1,2),c(1,3))
    RR <- BB%*%ginv(AA)
    U <- svd(RR%*%t(BB))$u[,1:k2]
    RR <- U%*%t(U)%*%RR
    # update for next iteration
    dis3=dis3+sum((RR-RR.old)^2)
    RR.old <- RR

    # update for the next iteration
    dis1 <- sqrt(sum((kronecker(t(RR),LL)-kronecker(t(RR.oold),LL.oold))^2))
    dis3 = sqrt(dis3)
    dis <- dis3
    iiter <- iiter + 1

    #print(max(abs(eigen(LL)$values)))
    #print(max(abs(eigen(RR)$values)))

    if(print.true==TRUE){
      print(dis)
      print(paste('iiter num=',iiter))
    }
  }
  a <- sqrt(sum(LL^2))
  LL <- LL / a
  RR <- RR * a
  res=xx[2:T,,,drop=FALSE] - aperm(tensor(tensor(xx[1:(T-1),,,drop=FALSE],RR,3,2),LL,2,2),c(1,3,2))
  Sig <- matrix(tensor(res,res,1,1),p*q)/(T-1)
  bic <- T*p*q*log(sum(res^2/(T*p*q))) ##+log(T*p*q)*(bic.penalty(p,k1)+bic.penalty(q,k2))
  cov <- matAR.RR.se(LL,RR,k1,k2,method="RRLSE",Sigma.e=Sig,RU1=diag(k1),RV1=diag(k1),RU2=diag(k2),RV2=diag(k2),mpower=100)
  sd.A1 <- sqrt(array(diag(cov$Sigma)[1:p^2], c(p,p))/T)
  sd.A2 <- sqrt(array(diag(cov$Sigma)[(p^2+1):(p^2+q^2)], c(q,q))/T)
  #sd <- list(sqrt(array(diag(cov$Sigma)[1:p^2], c(p,p))/T), sqrt(array(diag(cov$Sigma)[(p^2+1):(p^2+q^2)], c(q,q))/T))
  loading = list(U1=svd(LL)$u,V1=svd(LL)$v,U2=svd(RR)$u,V2=svd(RR)$v)
  return(list(A1=LL,A2=RR,loading=loading,res=res,Sig=Sig,BIC=bic,niter=iiter-1,cov=cov,sd.A1=sd.A1,sd.A2=sd.A2,data=xx))
}


MAR1.CC <- function(xx,k1,k2,A1.init=NULL,A2.init=NULL,Sigl.init=NULL,Sigr.init=NULL,niter=200,tol=1e-4,print.true = FALSE){
  # xx: T * p * q
  # X_t = LL X_{t-1} RR' + E_t ### NOTE! This function is written with RR'.
  # Sig = cov(vec(E_t)) = Sigr \otimes Sigl
  # optimization criterion is likelihood
  # iterative algorithm between LL <--> Sigma_l <--> RR <--> Sig_r
  # Return LL, RR, Sigl, Sigr
  dd=dim(xx)
  T <- dd[1]
  p <- dd[2]
  q <- dd[3]
  # init = MAR1.LS(xx, k1, k2)
  if(is.null(A1.init) | is.null(A2.init)){
    init = initializer(xx, k1, k2)
  }
  if(is.null(A1.init)){
    LL.old <- init$A1
  } else{
    LL.old <- A1.init
  }
  if(is.null(A2.init)){
    RR.old <- init$A2
  } else{
    RR.old <- A2.init
  }
  # init.sig <- initializer.sig(xx)
  if(is.null(Sigl.init)){
    Sigl.old <- diag(p)
  } else{
    Sigl.old <- Sigl.init
  }
  if(is.null(Sigr.init)){
    Sigr.old <- diag(q)
  } else{
    Sigr.old <- Sigr.init
  }
  Tol=tol*sqrt(p^2+q^2)
  dis <- 1
  iiter <- 1
  while(iiter <= niter & dis >= Tol){

    ## Save old
    LL.oold=LL.old
    RR.oold=RR.old
    Sigl.oold=Sigl.old
    Sigr.oold=Sigr.old

    # estimate LL0 and Sigl
    Sigr.inv.old <- ginv(Sigr.old)
    temp1 <- tensor(xx[1:(T-1),,,drop=FALSE],RR.old,3,2)  # (T-1) * p * q
    temp2 <- tensor(temp1,Sigr.inv.old,3,1)  # (T-1) * p * q
    AA <- tensor(temp1,temp2,c(1,3),c(1,3))
    BB <- tensor(xx[2:T,,,drop=FALSE],temp2,c(1,3),c(1,3))
    LL <- BB%*%ginv(AA)
    res <- xx[2:T,,,drop=FALSE] - aperm(tensor(tensor(xx[1:(T-1),,,drop=FALSE],RR.old,3,2),LL,2,2),c(1,3,2)) # (T-1) * p * q
    temp <- tensor(res,Sigr.inv.old,3,1) # (T-1) * p * q
    Sigl <- tensor(temp,res,c(1,3),c(1,3))/T/q
    Sigl.spec <- eigen(Sigl)
    Sigl.root <- Sigl.spec$vectors%*%diag(sqrt(Sigl.spec$values))%*%t(Sigl.spec$vectors)
    Sigl.root.inv <- Sigl.spec$vectors%*%diag(1/sqrt(Sigl.spec$values))%*%t(Sigl.spec$vectors)
    U <- svd(Sigl.root.inv%*%LL%*%t(BB)%*%Sigl.root.inv)$u[,1:k1]
    LL <- Sigl.root%*%U%*%t(U)%*%Sigl.root.inv%*%LL
    res <- xx[2:T,,,drop=FALSE] - aperm(tensor(tensor(xx[1:(T-1),,,drop=FALSE],RR.old,3,2),LL,2,2),c(1,3,2)) # (T-1) * p * q
    temp <- tensor(res,Sigr.inv.old,3,1) # (T-1) * p * q
    Sigl <- tensor(temp,res,c(1,3),c(1,3))/T/q
    # update for next iteration
    a=svd(LL,nu=0,nv=0)$d[1]
    LL=LL/a
    dis3=sum((LL-LL.old)^2)
    LL.old <- LL
    Sigl.old <- Sigl


    # estimate RR0 and Sigr
    Sigl.inv.old <- ginv(Sigl.old)
    temp1 <- tensor(xx[1:(T-1),,,drop=FALSE],LL.old,2,2)  # (T-1) * q * p
    temp2 <- tensor(temp1,Sigl.inv.old,3,1)  # (T-1) * q * p
    AA <- tensor(temp1,temp2,c(1,3),c(1,3))
    BB <- tensor(xx[2:T,,,drop=FALSE],temp2,c(1,2),c(1,3))
    RR <- BB%*%ginv(AA)
    res <- xx[2:T,,,drop=FALSE] - aperm(tensor(tensor(xx[1:(T-1),,,drop=FALSE],RR,3,2),LL.old,2,2),c(1,3,2)) # (T-1) * p * q
    temp <- tensor(res,Sigl.inv.old,2,1) # (T-1) * q * p
    Sigr <- tensor(temp,res,c(1,3),c(1,2))/T/p
    Sigr.spec <- eigen(Sigr)
    Sigr.root <- Sigr.spec$vectors%*%diag(sqrt(Sigr.spec$values))%*%t(Sigr.spec$vectors)
    Sigr.root.inv <- Sigr.spec$vectors%*%diag(1/sqrt(Sigr.spec$values))%*%t(Sigr.spec$vectors)
    U <- svd(Sigr.root.inv%*%RR%*%t(BB)%*%Sigr.root.inv)$u[,1:k2]
    RR <- Sigr.root%*%U%*%t(U)%*%Sigr.root.inv%*%RR
    res <- xx[2:T,,,drop=FALSE] - aperm(tensor(tensor(xx[1:(T-1),,,drop=FALSE],RR,3,2),LL.old,2,2),c(1,3,2)) # (T-1) * p * q
    temp <- tensor(res,Sigl.inv.old,2,1) # (T-1) * q * p
    Sigr <- tensor(temp,res,c(1,3),c(1,2))/T/p
    # update for next iteration
    dis3=dis3+sum((RR-RR.old)^2)
    RR.old <- RR
    Sigr.old <- Sigr


    a <- eigen(Sigl)$values[1]
    Sigl <- Sigl / a
    Sigr <- Sigr * a
    ### cat(eigen(Sigl)$values[1]," ",eigen(Sigr)$values[1], " ")

    # update for the next iteration
    dis1 <- sqrt(sum((kronecker(t(RR),LL)-kronecker(t(RR.oold),LL.oold))^2))
    ### cat(dis1," ")
    dis2 <- sqrt(sum((kronecker(Sigr,Sigl)-kronecker(Sigr.oold,Sigl.oold))^2))
    ### cat(dis2," ",dis3,"\n")
    dis3 = sqrt(dis3)
    ### dis <- max(dis1,dis2)
    dis <- dis3
    Sigr.old <- Sigr
    Sigl.old <- Sigl
    iiter <- iiter + 1
    if(print.true==TRUE){
      print(LL)
      print(RR)
    }
  }
  a <- sqrt(sum(LL^2))
  LL <- LL / a
  RR <- RR * a
  Sig <- matrix(tensor(res,res,1,1),p*q)/(T-1)
  #Sig <- kronecker(Sigr,Sigl)
  #cov <- matAR.RR.se(LL,RR,k1,k2,method="RRMLE",Sigma1=Sigl,Sigma2=Sigr,RU1=diag(k1),RV1=diag(k1),RU2=diag(k2),RV2=diag(k2),mpower=100)
  cov = MAR1.RRCC.SE(LL,RR,k1,k2,Sigl,Sigr,RU1=diag(k1),RV1=diag(k1),RU2=diag(k2),RV2=diag(k2),mpower=100)
  sd.A1 = sqrt(array(diag(cov$Sigma)[1:p^2], c(p,p))/T)
  sd.A2 = sqrt(array(diag(cov$Sigma)[(p^2+1):(p^2+q^2)], c(q,q))/T)
  #sd <- list(sqrt(array(diag(cov$Sigma)[1:p^2], c(p,p))/T), sqrt(array(diag(cov$Sigma)[(p^2+1):(p^2+q^2)], c(q,q))/T))
  loading = list(U1=svd(LL)$u,V1=svd(LL)$v,U2=svd(RR)$u,V2=svd(RR)$v)
  return(list(A1=LL,A2=RR,loading=loading,res=res,Sig1=Sigl,Sig2=Sigr,Sig=Sig,niter=iiter-1, cov=cov, sd.A1=sd.A1, sd.A2=sd.A2, data=xx))
}


tenAR.SE.LSE <- function(dim, r, p, K, t, AX, A, Sigma){
  # dim = dim; r=R; p=P; K=K; t=t; AX = AX; A=A.new; Sigma=Sig;
  pdim = prod(dim) # d1d2d3
  fdim = dim**2
  ndim <- sum(fdim) # d1^2+d2^2+d^3
  Gamma <- matrix(0,sum(r)*ndim,sum(r)*ndim)
  n = 0
  for (i in c(1:p)){
    for (j in c(1:r[i])){
      for (k in c(1:(K-1))){
        r1 <- matrix(0, sum(r)*ndim, 1)
        a = as.vector(A[[i]][[j]][[k]])
        r1[( n*ndim + sum(fdim[0:(k-1)]) + 1): (n*ndim + sum(fdim[0:k])),] = a
        Gamma = Gamma + r1 %*% t(r1)
        if (K==2){
          for (l in c(1:r[i])){
            if (l != j){
              r1 <- matrix(0, sum(r)*ndim, 1)
              a = as.vector(A[[i]][[l]][[k]])
              r1[( n*ndim + sum(fdim[0:(k-1)]) + 1): (n*ndim + sum(fdim[0:k])),] = a
              Gamma = Gamma + r1 %*% t(r1)
            }
          }
        }
      }
      n = n + 1
    }
  }
  WT = c(); Q = list(); perm = list(); size = list()
  for (i in c(1:p)){
    for (j in c(1:r[i])){
      for (k in c(1:K)){
        if (i == 1 & j == 1){  # only initialize Q once
          size[[k]] = c(dim[k], prod(dim[-k]), t-p)
          perm[[k]] = c(k+1, (1:K)[-k] + 1, 1)
          s = if (k == K) 1 else prod(dim[(k+1):K])
          if (k == 1){
            Q[[1]] = diag(prod(dim))
          } else if (k == K){
            Q[[K]] = pm(dim[K], prod(dim[1:(K-1)]))
          } else {
            Q[[k]] = kronecker(diag(s), pm(dim[k], prod(dim[1:(k-1)])))
          }
        }
        # AXX = abind::asub(AX[[i]][[j]][[k]], (1+p-i):(t-i), 1, drop=FALSE)
        AXX = AX[[i]][[j]][[k]]
        AXfold = array(aperm(AXX@data, perm[[k]]), size[[k]])
        AXI = apply(AXfold,3,function(x){kronecker(x, diag(dim[k])) %*% Q[[k]]})
        AXI.array <- array(AXI,c(dim[k]^2,pdim,t))
        WT <- abind(WT, AXI.array,along=1)
      }
    }
  }
  WT = aperm(WT, c(3,1,2))
  WSigma <- tensor(WT,Sigma,3,1) #t*(d1^2+d2^2+d^3)*(d1d2d3)
  EWSigmaWt <- tensor(WSigma,WT,c(3,1),c(3,1))/t
  H <- tensor(WT,WT,c(3,1),c(3,1))/t + Gamma #r(d1^2+d2^2+d^2)*r(d1^2+d2^2+d^3)
  Hinv <- ginv(H)
  Xi <- Hinv %*% EWSigmaWt %*% Hinv
  return(Xi/t)
}


tenAR.SE.MLE <- function(dim, r, p, K, t, AX, A, Sigma){
  # dim = dim; r=R; p=P; K=K; t=t; AX = AX; A=A.new; Sigma=Sig;
  pdim = prod(dim) # d1d2d3
  fdim = dim**2
  ndim <- sum(fdim) # d1^2+d2^2+d^3
  Gamma <- matrix(0,sum(r)*ndim,sum(r)*ndim)
  r1 <- matrix(0, sum(r)*ndim, 1)
  n = 0
  for (i in c(1:p)){
    for (j in c(1:r[i])){
      for (k in c(1:(K-1))){
        r1 <- matrix(0, sum(r)*ndim, 1)
        a = as.vector(A[[i]][[j]][[k]])
        r1[( n*ndim + sum(fdim[0:(k-1)]) + 1): (n*ndim + sum(fdim[0:k])),] = a
        Gamma = Gamma + r1 %*% t(r1)
        if (K==2){
          for (l in c(1:r[i])){
            if (l != j){
              r1 <- matrix(0, sum(r)*ndim, 1)
              a = as.vector(A[[i]][[l]][[k]])
              r1[( n*ndim + sum(fdim[0:(k-1)]) + 1): (n*ndim + sum(fdim[0:k])),] = a
              Gamma = Gamma + r1 %*% t(r1)
            }
          }
        }
      }
      n = n + 1
    }
  }
  WT = c(); Q = list(); perm = list(); size = list()
  for (i in c(1:p)){
    for (j in c(1:r[i])){
      for (k in c(1:K)){
        if (i == 1 & j == 1){  # only initialize Q once
          size[[k]] = c(dim[k], prod(dim[-k]), t-p)
          perm[[k]] = c(k+1, (1:K)[-k] + 1, 1)
          s = if (k == K) 1 else prod(dim[(k+1):K])
          if (k == 1){
            Q[[1]] = diag(prod(dim))
          } else if (k == K){
            Q[[K]] = pm(dim[K], prod(dim[1:(K-1)]))
          } else {
            Q[[k]] = kronecker(diag(s), pm(dim[k], prod(dim[1:(k-1)])))
          }
        }
        # AXX = abind::asub(AX[[i]][[j]][[k]], (1+p-i):(t-i), 1, drop=FALSE)
        AXX = AX[[i]][[j]][[k]]
        AXfold = array(aperm(AXX@data, perm[[k]]), size[[k]])
        AXI = apply(AXfold,3,function(x){kronecker(x, diag(dim[k])) %*% Q[[k]]})
        AXI.array <- array(AXI,c(dim[k]^2,pdim,t))
        WT <- abind(WT, AXI.array,along=1)
      }
    }
  }
  WT = aperm(WT, c(3,1,2))
  WSigma <- tensor(WT,solve(Sigma),3,1) 
  EWSigmaWt <- tensor(WSigma,WT,c(3,1),c(3,1))/t
  H <- EWSigmaWt + Gamma
  Hinv <- ginv(H)
  Xi <- Hinv %*% EWSigmaWt %*% Hinv
  return(Xi/t)
}


tenAR.bic <- function(xx, rmax=5){
  if (mode(xx) != "S4") {xx <- rTensor::as.tensor(xx)}
  dim <- xx@modes[-1]
  t <- xx@modes[[1]]
  ans <- c()
  for (r in c(1:rmax)){
    est <- tenAR.LS(xx, R=r, P=1)
    res <- est$res
    ans[r] <- IC(xx,res,r,t, dim)
  }
  which.min(ans)
}

#' Plot Matrix-Valued Time Series
#'
#' Plot matrix-valued time series, can be also used to plot a given slice of a tensor-valued time series.
#'@name mplot
#'@rdname mplot
#'@aliases mplot
#'@export
#'@importFrom graphics par
#'@param xx  \eqn{T \times d_1 \times d_2} matrix-valued time series. Note that the number of mode is 3, where the first mode is time.
#'@return a figure.
#'@examples
#' dim <- c(3,3,3)
#' xx <- tenAR.sim(t=50, dim, R=2, P=1, rho=0.5, cov='iid')
#' mplot(xx[1:30,,,1])
mplot <- function(xx){
  if (mode(xx) == "S4"){xx = xx@data}
  dim = dim(xx)
  time = array(c(1:dim[1]),dim[1])
  opar <- par(mfrow=c(dim[2],dim[3]),mai=0.05*c(1,1,1,1),oma=c(2,2,0,0))
  on.exit(par(opar))
  for(i in 1:dim[2]){
    for(j in 1:dim[3]){
      if(i!=dim[2] & j!=1){
        plot(time,xx[,i,j],type='l',xaxt='n',yaxt='n',ylim=range(xx[,i,]))
      }
      if(i!=dim[2] & j==1){
        plot(time,xx[,i,j],type='l',xaxt='n',ylim=range(xx[,i,]))
      }
      if(i==dim[2] & j!=1){
        plot(time,xx[,i,j],type='l',yaxt='n',ylim=range(xx[,i,]))
      }
      if(i==dim[2] & j==1){
        plot(time,xx[,i,j],type='l',ylim=range(xx[,i,]))
      }
    }
  }

}


#' Plot ACF of Matrix-Valued Time Series
#'
#' Plot ACF of matrix-valued time series, can be also used to plot ACF of a given slice of a tensor-valued time series.
#'@name mplot.acf
#'@rdname mplot.acf
#'@aliases mplot.acf
#'@export
#'@importFrom graphics par
#'@importFrom graphics plot
#'@importFrom stats acf
#'@param xx  \eqn{T \times d_1 \times d_2} matrix-valued time series. Note that the number of mode is 3, where the first mode is time.
#'@return a figure.
#'@examples
#' dim <- c(3,3,3)
#' xx <- tenAR.sim(t=50, dim, R=2, P=1, rho=0.5, cov='iid')
#' mplot.acf(xx[1:30,,,1])
mplot.acf <- function(xx){
  if (mode(xx) == "S4"){xx = xx@data}
  dim = dim(xx)
  opar <- par(mfrow=c(dim[2],dim[3]),mai=0.05*c(1,1,1,1),oma=c(2,2,0,0))
  on.exit(par(opar))
  for(i in 1:dim[2]){
    for(j in 1:dim[3]){
      if(i!=dim[2] & j!=1){
        acf(xx[,i,j])
      }
      if(i!=dim[2] & j==1){
        acf(xx[,i,j])
      }
      if(i==dim[2] & j!=1){
        acf(xx[,i,j])
      }
      if(i==dim[2] & j==1){
        acf(xx[,i,j])
      }
    }
  }
}


# Define S3 class for the base model
tenAR <- function(model) {
  structure(model, class = "tenAR")
}


#' Predict funcions for Tensor Autoregressive Models
#'
#' S3 method for the 'tenAR' class using the generic predict function. Prediction based on the tensor autoregressive model or reduced rank MAR(1) model. If \code{rolling = TRUE}, returns the rolling forecasts.
#'@name predict.tenAR
#'@rdname predict.tenAR
#'@aliases predict.tenAR
#'@export
#'@importFrom abind abind
#'@param object a model object returned by \code{tenAR.est()}.
#'@param n.ahead prediction horizon.
#'@param xx \eqn{T^{\prime} \times d_1 \times \cdots \times d_K} new tensor time series to be used for prediction. Must have at least \code{n.ahead} length.
#'@param rolling TRUE or FALSE, rolling forecast, is FALSE by default.
#'@param n0 only if \code{rolling = TRUE}, the starting point of rolling forecast.
#'@param ... Additional arguments passed to the method.
#'@return
#'a tensor time series of length \code{n.ahead} if \code{rolling = FALSE};
#'
#'a tensor time series of length \eqn{T^{\prime} - n_0 - n.ahead + 1} if \code{rolling = TRUE}.
#'@seealso 'predict.ar' or 'predict.arima'
#'@examples
#' set.seed(333)
#' dim <- c(2,2,2)
#' t = 20
#' xx <- tenAR.sim(t, dim, R=2, P=1, rho=0.5, cov='iid')
#' est <- tenAR.est(xx, R=1, P=1, method="LSE")
#' pred <- predict(est, n.ahead = 1)
#' # rolling forcast
#' n0 = t - min(50,t/2)
#' pred.rolling <- predict(est, n.ahead = 5, xx = xx, rolling=TRUE, n0)
#'
#' # prediction for reduced rank MAR(1) model
#' dim <- c(2,2)
#' t = 20
#' xx <- tenAR.sim(t, dim, R=1, P=1, rho=0.5, cov='iid')
#' est <- matAR.RR.est(xx, method="RRLSE", k1=1, k2=1)
#' pred <- predict(est, n.ahead = 1)
#' # rolling forcast
#' n0 = t - min(50,t/2)
#' pred.rolling <- predict(est, n.ahead = 5, rolling=TRUE, n0=n0)
predict.tenAR <- function(object, n.ahead=1, xx=NULL, rolling=FALSE, n0=NULL, ...) {
  if (is.null(xx)) {xx <- object$data}
  if (is.null(object$SIGMA)){method = "LSE"} else {method = "MLE"}
  if (!is.null(object$A1)){
    A <- list(list(list(object$A1, object$A2)))
    if (is.null(object$Sig1)){
      method = "RRLSE"
    } else {
      method = "RRMLE"
    }
  } else if (!is.null(object$coef)){
    A <- object$coef
    method = "VAR"
  } else {
    A <- object$A
  }
  if (mode(xx) != "S4") {xx <- rTensor::as.tensor(xx)}
  if (rolling == TRUE){
    return(predRolling(A, xx@data, n.ahead, method, n0))
  }
  P <- length(A)
  R <- sapply(c(1:P), function(l){length(A[[l]])})
  K <- xx@num_modes - 1
  dim <- xx@modes
  ttt <- (dim[1]+1):(dim[1]+n.ahead)
  for(tt in ttt){
    L1 = 0
    for (l in c(1:P)){
      if (R[l] == 0) next
      L1 <- L1 + Reduce("+",lapply(c(1:R[l]), function(n) {rTensor::ttl(abind::asub(xx, tt-l, 1, drop=FALSE), A[[l]][[n]], (c(1:K) + 1))}))
    }
    xx <- as.tensor(abind(xx@data, L1@data, along=1))
  }
  return(abind::asub(xx@data, ttt, 1, drop=FALSE))
}


predRolling <- function(A, xx, n.ahead, method, n0){
  if (method == "RRLSE" | method == "RRMLE"){
    k1 <- rankMatrix(A[[1]][[1]][[1]])
    k2 <- rankMatrix(A[[1]][[1]][[2]])
  }
  P <- length(A)
  dim <- dim(xx)[-1]
  t <- dim(xx)[1]
  if (method == "VAR"){
    yy <- apply(xx, MARGIN=1, as.vector)
  } else {
    R <- sapply(c(1:P), function(l){length(A[[l]])})
    K <- length(dim)
  }
  if(is.null(n0)){n0 = t - min(50,t/2)}
  ttt <- (n0):(t - n.ahead)
  for(tt in ttt){
    tti <- tt - ttt[1] + 1
    print(paste("rolling forcast t =", tti))
    if (method == "RRLSE"){
      model <- MAR1.RR(abind::asub(xx, 1:tt, 1, drop=FALSE), k1, k2)
      A <- list(list(list(model$A1, model$A2)))
    } else if (method == "RRMLE"){
      model <- MAR1.CC(abind::asub(xx, 1:tt, 1, drop=FALSE), k1, k2)
      A <- list(list(list(model$A1, model$A2)))
    } else if (method == "VAR"){
      model = tenAR.est(abind::asub(xx, 1:tt, 1, drop=FALSE), P=P, method="VAR")
      A <- model$coef
    } else {
      model = tenAR.est(abind::asub(xx, 1:tt, 1, drop=FALSE), R, c(P,0), method)
      A <- model$A
    }
    
    L1 = 0
    for (l in c(1:P)){
      if (method == "VAR"){
        L1 <- L1 + array(A[[l]] %*% yy[,tt-l+1], c(1,dim(xx)[-1])) 
      } else {
        if (R[l] == 0) next
        L1 <- L1 + Reduce("+",lapply(c(1:R[l]), function(n) {rTensor::ttl(abind::asub(rTensor::as.tensor(xx), tt-l+1, 1, drop=FALSE), A[[l]][[n]], (c(1:K) + 1))}))@data
      }
    }

    if (tti == 1){xx.pred = L1} else {xx.pred = abind(xx.pred, L1, along=1)}
  }
  return(xx.pred)
}

Try the tensorTS package in your browser

Any scripts or data that you put into this service are public.

tensorTS documentation built on May 31, 2023, 7 p.m.