R/tenAR.R
In tensorTS: Factor and Autoregressive Models for Tensor Time Series

Documented in matAR.RR.est matAR.RR.se mplot mplot.acf predict.tenAR taxi.sim.AR taxi.sim.FM tenAR.est tenAR.sim

###Functions of Autoregressive Models

#' Generate TenAR(p) tensor time series
#'
#' Simulate from the TenAR(p) model.
#'@name tenAR.sim
#'@rdname tenAR.sim
#'@aliases tenAR.sim
#'@usage tenAR.sim(t, dim, R, P, rho, cov, A = NULL, Sig = NULL)
#'@export
#'@import tensor rTensor
#'@importFrom MASS ginv
#'@importFrom stats rnorm
#'@importFrom pracma randortho
#'@importFrom Matrix rankMatrix
#'@param t length of output series, a strictly positive integer.
#'@param dim dimension of the tensor at each time.
#'@param R Kronecker rank for each lag.
#'@param P autoregressive order.
#'@param rho spectral radius of coefficient matrix \eqn{\Phi}.
#'@param cov covariance matrix of the error term: diagonal ("iid"), separable ("mle"), random ("svd").
#'@param A coefficient matrices. If not provided, they are randomly generated according to given \code{dim}, \code{R}, \code{P} and \code{rho}.
#' It is a multi-layer list, the first layer for the lag \eqn{1 \le i \le P}, the second the term \eqn{1 \le r \le R}, and the third the mode \eqn{1 \le k \le K}.
#' See "Details" of \code{\link{tenAR.est}}.
#'@param Sig only if \code{cov=mle}, a list of initial values of \eqn{\Sigma_1,\ldots,\Sigma_K}. The default are identity matrices.
#'@return A tensor-valued time series generated by the TenAR(p) model.
#'@seealso \code{\link{tenFM.sim}}
#'@examples
#' set.seed(123)
#' dim <- c(3,3,3)
#' xx <- tenAR.sim(t=500, dim, R=2, P=1, rho=0.5, cov='iid')
tenAR.sim <- function(t, dim, R, P, rho, cov, A=NULL, Sig=NULL){
  if (missing(A) || is.null(A)){A <- tenAR.A(dim, R, P, rho)}
  K <- length(A[[1]][[1]])
  P = length(A)
  R = c()
  for (l in c(1:P)){
    R[l] = length(A[[l]])
  }
  dim <- c()
  for (i in c(1:K)){
    dim[i] <- nrow(A[[1]][[1]][[i]])
  }
  phi = list()
  for (l in c(1:P)){
    if (R[l] == 0){
      phi[[l]] = 0
    } else {
      phi[[l]] <-  Reduce("+", lapply(1:R[l], function(j) {rTensor::kronecker_list(rev(A[[l]][[j]]))}))
    }
  }
  
  x <- array(0, c(t+500,prod(dim)))
  for (l in c(1:P)){
    x[l,] <- rnorm(prod(dim))
  }

  if (cov == "mle"){
    if (missing(Sig) || is.null(Sig)){
      Sig.true <- lapply(1:K, function(i){
        Q <- pracma::randortho(dim[i])
        D <- abs(diag(rnorm(dim[i])))
        Q %*% D %*% t(Q)
      })
      Sig.true <- fro.rescale(list(Sig.true))[[1]]
      Sig.true.sqrt <- lapply(1:K, function(i){
        pracma::sqrtm(Sig.true[[i]])$B
      })
    } else {
      Sig.true = Sig
      Sig.true.sqrt <- lapply(1:K, function(i){
        pracma::sqrtm(Sig.true[[i]])$B
      })
    }
    E = rTensor::kronecker_list(rev(Sig.true.sqrt))
  }

  if (cov == "svd"){
    if (missing(Sig) || is.null(Sig)){
      Q <- pracma::randortho(prod(dim))
      D <- sqrt(abs(diag(rnorm(prod(dim)))))
      E <- Q %*% D
    } else {
      E <- Sig
    }
  }

  for (i in c((P+1):(500 + t))){ # burning number = 500
    if (cov == "iid"){
      e <- rnorm(prod(dim), mean=0, sd=1)
    } else if (cov == "mle"){
      e <- E %*% rnorm(prod(dim))
    } else if (cov == "svd"){
      e <- E %*% rnorm(prod(dim))
    } else {
      stop("Please specify cov")
    }
    temp = 0
    for (l in c(1:P)){
      temp = temp + phi[[l]] %*% x[i-l, ]
    }
    x[i,] <-  temp + e
  }
  return(array(x[501:(500+t),], c(t, dim)))
}


#' Simulate taxi data by tenAR models
#'
#' Simulate tensor time series by autoregressive models, using estimated coefficients by taxi data.
#'@name taxi.sim.AR
#'@rdname taxi.sim.AR
#'@aliases taxi.sim.AR
#'@usage taxi.sim.AR(t, print.tar.coef=FALSE, print.sig=FALSE, seed=123)
#'@export
#'@param t length of output series.
#'@param print.tar.coef print coefficients, default FALSE.
#'@param print.sig print covariance matrices, default FALSE.
#'@param seed random seed.
#'@return A tensor-valued time series of dimension (5,5,7).
#'@seealso \code{\link{taxi.sim.FM}}
#'@examples
#' xx = taxi.sim.AR(t=753)
taxi.sim.AR <- function(t, print.tar.coef=FALSE, print.sig=FALSE, seed=123){
  K <- 3
  dim <- c(5,5,7)
  x <- array(0, c(t,prod(dim)))
  Tar.coef <- list(list(list(),list()))
  Tar.sig <- list()
  Tar.coef[[1]][[1]][[1]] = 
    array(c(0.0532,0.0220,-0.0090,-0.1021,0.0176,
            0.1635,0.1503, 0.1232, 0.0951,0.1740,
            0.0361,0.0707, 0.0657, 0.0765,0.1113,
            0.5076,0.4123, 0.3168, 0.0612,0.4606,
            0.1972,0.1400, 0.1434, 0.1353,0.1092), c(5,5))
  Tar.coef[[1]][[1]][[2]] = 
    array(c(-0.1095,-0.0493,-0.0445, 0.0073,-0.0727,
            0.2774, 0.1965, 0.1307, 0.1345, 0.1493,
            0.1715, 0.0872, 0.0826, 0.0698, 0.0963,
            -0.6000,-0.3752,-0.2758,-0.1281,-0.3728,
            0.0552, 0.0257, 0.0204, 0.0994,-0.0321), c(5,5))
  Tar.coef[[1]][[1]][[3]] = 
    array(c(0.2556, 0.1556, 0.3113,0.3996,0.5329,0.4577,0.5227,
            0.2699, 0.2802, 0.2222,0.3630,0.5680,0.7809,0.5239,
            0.7632, 0.4705, 0.4032,0.4098,0.3075,0.3417,0.3929,
            0.4776, 0.4051, 0.4617,0.4953,0.4097,0.2892,0.4677,
            -0.0580,-0.0485,-0.0499,0.2356,0.1852,0.2084,0.1204,
            0.1758, 0.4057, 0.4102,0.3457,0.5127,0.5856,0.5199,
            0.1707, 0.3203, 0.2702,0.1234,0.2110,0.2197,0.4511), c(7,7))
  Tar.coef[[1]][[2]][[1]] = 
    array(c(0.4287,-0.0358,0.0367,-0.0044, 0.0043,
            -0.0723, 0.3955,0.0167,-0.0178,-0.0271,
            -0.0556, 0.0213,0.2698,-0.1532, 0.0222,
            0.1399, 0.1557,0.0894, 0.6041, 0.12063,
            -0.0425,-0.0380,0.0340, 0.1001, 0.3234), c(5,5))
  Tar.coef[[1]][[2]][[2]] = 
    array(c(-0.3992,-0.0254, 0.0234, 0.0077, 0.0580,
            -0.1088,-0.3678,-0.1012, 0.0192,-0.0521,
            -0.1378,-0.1245,-0.4167,-0.0181,-0.1964,
            -0.1222,-0.0307,-0.0721,-0.3552,-0.1618,
            0.0074,-0.0325,-0.0861,-0.0201,-0.4961), c(5,5))
  Tar.coef[[1]][[2]][[3]] = 
    array(c(-1.0060,-0.1431,-0.0633,-0.2255,-0.0677,-0.0829, 0.0056,
            -0.1196,-0.7815,-0.1571,-0.0798,-0.0760,-0.0424,-0.0654,
            0.0331,-0.0809,-0.6270,-0.0962,-0.0987,-0.0991,-0.0213,
            -0.0626,-0.0728,-0.1158,-0.4132,-0.0885, 0.0068,-0.0132,
            -0.0897,-0.1278,-0.0521,-0.1041,-0.4861,-0.0448,-0.1392,
            0.0070,-0.0662,-0.1004,-0.0846,-0.1224,-0.4761,-0.1168,
            -0.0455,-0.2491,-0.3330,-0.3430,-0.3031,-0.3690,-0.6041), c(7,7))
  Tar.sig[[1]] = 
    array(c(0.7754,0.1420,0.1163,0.0663,0.1333,
            0.1420,0.5846,0.0773,0.0522,0.0844,
            0.1163,0.0773,0.4227,0.0455,0.0829,
            0.0663,0.0522,0.0455,0.7122,0.0454,
            0.1333,0.0844,0.0829,0.0454,0.5275), c(5,5))
  Tar.sig[[2]] = 
    array(c(0.8059,0.1585,0.1290,0.0492,0.1474,
            0.1585,0.4694,0.0801,0.0240,0.0840,
            0.1290,0.0801,0.4033,0.0299,0.0935,
            0.0492,0.0240,0.0299,0.3690,0.0320,
            0.1474,0.0840,0.0935,0.0320,0.5459), c(5,5))
  Tar.sig[[3]] = 
    array(c(156.7240,38.4326, 26.3676, 23.1721, 18.9742, 16.0869, 16.2405,
            38.4326, 142.4746,32.9095, 29.2238, 24.4317, 21.5026, 17.6571,
            26.3676, 32.9095, 139.7549,37.2252, 31.3168, 26.6982, 20.1618,
            23.1721, 29.2238, 37.2252, 150.7263,44.4215, 38.4494, 25.9977,
            18.9742, 24.4317, 31.3168, 44.4215, 156.0611,49.1344, 30.9949,
            16.0869, 21.5026, 26.6982, 38.4494, 49.1344, 170.4497,36.1325,
            16.2405, 17.6571, 20.1618, 25.9977, 30.9949, 36.1325, 157.0083), c(7,7))
  set.seed(seed)
  x = tenAR.sim(t, dim, R=2, P=1, rho=0.8, cov="mle", A=Tar.coef, Sig=Tar.sig)
  
  if(print.tar.coef==TRUE){
    print('Coefficient matrices:')
    print(Tar.coef)
  }
  if(print.sig==TRUE){
    print('Covariance matrices:')
    print(Tar.sig)
  }
  
  return(x)
}


#' Simulate taxi data by factor models
#'
#' Simulate tensor time series by factor models, using estimated coefficients by taxi data.
#'@name taxi.sim.FM
#'@rdname taxi.sim.FM
#'@aliases taxi.sim.FM
#'@usage taxi.sim.FM(t, print.tar.coef=FALSE, print.loading=FALSE, seed=216)
#'@export
#'@import tensor
#'@importFrom stats rnorm
#'@param t length of output series.
#'@param print.tar.coef print coefficients used for simulation, default FALSE.
#'@param print.loading print loading matrices used for simulation, default FALSE.
#'@param seed random seed.
#'@return A tensor-valued time series of dimension (4,4,3).
#'@seealso \code{\link{taxi.sim.AR}}
#'@examples
#' xx = taxi.sim.FM(t=252)
taxi.sim.FM <- function(t,print.tar.coef=FALSE,print.loading=FALSE,seed=216){
  dims <- c(4,4,3)
  Tar.coef <- list(list(list()))
  Tar.coef[[1]][[1]][[1]] = array(c(-0.4163,0.0603,-0.0199,0.0598,
                                    -0.1268,-0.6219,-0.0551,-0.0251,
                                    -0.0127,-0.0001,-0.4572,-0.0376,
                                    0.0609,0.0252,0.0629,-0.4402),c(4,4))
  Tar.coef[[1]][[1]][[2]] = array(c(-0.5453,-0.0369,0.0001,-0.1130,
                                    -0.0373,-0.3590,-0.0214,0.0495,
                                    0.0143,0.0460,-0.5629,0.1233,
                                    -0.0004,-0.0562,-0.0165,-0.4665),c(4,4))
  Tar.coef[[1]][[1]][[3]] = array(c(2.4676,-0.1332,-0.8460,
                                    0.0790,2.7971,1.0344,
                                    0.0161,0.1530,2.2103),c(3,3))
  
  set.seed(seed)
  Ft.sim = tenAR.sim(t,c(4,4,3),1,1,0.75,cov='iid',A=Tar.coef)
  TenFM.loading <- list()
  TenFM.loading[[1]] <- array(c(-0.0174,0.5156,0.7721,-0.0091,
                                0.0144,0.0642,-0.0669,0.2077,
                                0.1589,-0.1657,0.1534,0.0974,
                                -0.0244,-0.2971,0.1886,0.4857,
                                0.5956,0.4564,0.0048,0.0893,
                                0.0954,0.1663,-0.1619,-0.0754,
                                0.0425,0.1996,-0.1284,0.0394,
                                0.1303,-0.075,0.9188,0.0558,
                                0.2527,-0.0502,0.0412,-0.0475
                                ,0.0488,0.1473,-0.0298,0.0373,
                                -0.0908,-0.0362,0.0222,0.217,
                                -0.0499,0.8853,0.2375,0.2719),c(12,4))
  TenFM.loading[[2]] <- array(c(0.0702,0.1259,0.0871,0.0326,
                                -0.1502,-0.0305,-0.0944,0.1303,
                                -0.0689,0.8668,0.326,0.2426,
                                0.0149,-0.1486,-0.003,0.3198,
                                0.6435,0.5675,0.2212,0.0831,
                                0.0294,0.2313,-0.1049,-0.135,
                                0.1907,0.3001,-0.4953,0.0643,
                                0.1615,-0.4072,0.6467,-0.0243,
                                0.0842,0.0563,0.04,0.0406,
                                -0.0172,0.4402,0.679,0.0091,
                                0.0411,0.0321,0.2927,0.2469,
                                0.3524,-0.1841,0.0998,0.1658),c(12,4))
  TenFM.loading[[3]] <- array(c(0.0154,-0.0069,-0.0202,-0.0274,0.012,0.1211,
                                0.5732,0.6597,0.2467,0.0892,0.0782,-0.0387,
                                -0.1062,-0.1301,-0.1278,-0.0974,-0.0549,-0.0906,
                                -0.1478,1e-04,0.0444,0.146,0.1595,0.0884,
                                -0.0185,-9e-04,0.0081,0.0133,0.0073,0.0038,
                                -0.0264,0.0503,0.4405,0.5292,0.3513,0.3057,
                                0.3002,0.2485,0.1912,0.1482,0.0859,0.1151,
                                0.1946,0.0649,-0.0482,-0.1162,-0.1059,-0.0711,
                                0.1099,0.0588,0.0311,0.0172,0.012,0.0164,
                                0.033,0.0503,-0.1235,-0.1558,-0.0354,0.0292,
                                0.0678,0.1212,0.1926,0.2267,0.2584,0.3059,
                                0.2854,0.3422,0.3934,0.391,0.3234,0.2422),c(24,3))
  if(print.tar.coef==TRUE){
    print('Tensor Autoregressive Coefficient matrices used to simulate the tensor factor data:')
    print(Tar.coef)
  }
  if(print.loading==TRUE){
    print('Tensor Factor loading matrices used to simulate the tensor data:')
    print(TenFM.loading)
  }
  Xt.sim = tensor(tensor(tensor(Ft.sim,TenFM.loading[[1]],2,2),
                         TenFM.loading[[2]],2,2),
                  TenFM.loading[[3]],2,2)
  set.seed(seed)
  y.midtown = Xt.sim*10 + array(rnorm(prod(dim(Xt.sim))),dim(Xt.sim))
  return(y.midtown)
}


#' Estimation for Autoregressive Model of Tensor-Valued Time Series
#'
#' Estimation function for tensor autoregressive models. Methods include
#' projection (PROJ), Least Squares (LSE), maximum likelihood estimation (MLE)
#' and vector autoregressive model (VAR), as determined by the value of \code{method}.
#'@details
#' Tensor autoregressive model (of autoregressive order one) has the form:
#' \deqn{X_t = \sum_{r=1}^R X_{t-1} \times_{1}  A_1^{(r)} \times_{2}  \cdots \times_{K} A_K^{(r)} + E_t,}
#' where \eqn{A_k^{(r)}} are \eqn{d_k \times d_k} coefficient matrices, \eqn{k=1,\cdots,K}, and \eqn{E_t} is a tensor white noise. \eqn{R} is the Kronecker rank.
#' The model of autoregressive order \eqn{P} takes the form
#' \deqn{X_t = \sum_{i=1}^{P} \sum_{r=1}^{R_i} X_{t-i} \times_{1} A_{1}^{(ir)} \times_{2}  \cdots \times_{K} A_{K}^{(ir)} + E_t.}
#' For the "MLE" method, we also assume,
#'  \deqn{\mathrm{Cov}(\mathrm{vec}(E_t))= \Sigma_K \otimes \Sigma_{K-1} \otimes \cdots \otimes \Sigma_1,}
#'@name tenAR.est
#'@rdname tenAR.est
#'@aliases tenAR.est
#'@usage tenAR.est(xx,R=1,P=1,method="LSE",init.A=NULL,init.sig=NULL,niter=150,tol=1e-6)
#'@export
#'@import tensor rTensor
#'@importFrom methods new
#'@param xx \eqn{T \times d_1 \times \cdots \times d_K} tensor-valued time series, \eqn{T} is the length of the series.
#'@param method character string, specifying the type of the estimation method to be used. \describe{
#'  \item{\code{"PROJ",}}{Projection method.}
#'  \item{\code{"LSE",}}{Least squares.}
#'  \item{\code{"MLE",}}{MLE under a separable cov(vec(\eqn{E_t})).}
#'  \item{\code{"VAR",}}{VAR(\eqn{P}) model for the \eqn{\mathrm{vec}(E_t)}.}
#'}
#'@param R Kronecker rank for each lag, a vector for \eqn{P>1}, a positive integer, it assumes same number of terms in each lag.
#'@param P Autoregressive order, a positive integer.
#'@param init.A initial values of coefficient matrices \eqn{A_k^{(ir)}} in estimation algorithms, which is a multi-layer list such that
#' the first layer for the lag \eqn{1 \le i \le P}, the second the term \eqn{1 \le r \le R}, and the third the mode \eqn{1 \le k \le K}.
#' See "Details". By default, we use PROJ estimators as initial values.
#'@param init.sig only if \code{method=MLE}, a list of initial values of \eqn{\Sigma_1,\ldots,\Sigma_K}. The default are identity matrices.
#'@param niter maximum number of iterations if error stays above \code{tol}.
#'@param tol error tolerance in terms of the Frobenius norm.
#'@return return a list containing the following:\describe{
#'\item{\code{A}}{a list of estimated coefficient matrices \eqn{A_k^{(ir)}}. It is a multi-layer list,
#' the first layer for the lag \eqn{1 \le i \le P}, the second the term \eqn{1 \le r \le R}, and the third the mode \eqn{1 \le k \le K}. See "Details".}
#'\item{\code{SIGMA}}{only if \code{method=MLE}, a list of estimated \eqn{\Sigma_1,\ldots,\Sigma_K}.}
#'\item{\code{res}}{residuals}
#'\item{\code{Sig}}{sample covariance matrix of the residuals vec(\eqn{\hat E_t}).}
#'\item{\code{cov}}{grand covariance matrix of all estimated entries of \eqn{A_k^{(ir)}}}
#'\item{\code{sd}}{standard errors of the coefficient matrices \eqn{A_k^{(ir)}}, returned as a list aligned with \code{A}.}
#'\item{\code{niter}}{number of iterations.}
#'\item{\code{BIC}}{value of extended Bayesian information criterion.}
#'}
#'@references
#'Rong Chen, Han Xiao, and Dan Yang. "Autoregressive models for matrix-valued time series". Journal of Econometrics, 2020.
#'
#'Zebang Li, Han Xiao. "Multi-linear tensor autoregressive models". arxiv preprint arxiv:2110.00928 (2021).
#'
#'@examples
#' set.seed(333)
#'
#' # case 1: tensor-valued time series
#'
#' dim <- c(2,2,2)
#' xx <- tenAR.sim(t=100, dim, R=2, P=1, rho=0.5, cov='iid')
#' est <- tenAR.est(xx, R=2, P=1, method="LSE") # two-term tenAR(1) model
#' A <- est$A # A is a multi-layer list
#'
#' length(A) == 1 # TRUE, since the order P = 1
#' length(A[[1]]) == 2 # TRUE, since the number of terms R = 2
#' length(A[[1]][[1]]) == 3 # TRUE, since the mode K = 3
#'
#' # est <- tenAR.est(xx, R=c(1,2), P=2, method="LSE") # tenAR(2) model with R1=1, R2=2
#'
#' # case 2: matrix-valued time series
#'
#' dim <- c(2,2)
#' xx <- tenAR.sim(t=100, dim, R=2, P=1, rho=0.5, cov='iid')
#' est <- tenAR.est(xx, R=2, P=1, method="LSE") # two-term MAR(1) model 
#' A <- est$A # A is a multi-layer list
#'
#' length(A) == 1 # TRUE, since the order P = 1
#' length(A[[1]]) == 2 # TRUE, since the number of terms R = 2
#' length(A[[1]][[1]]) == 2 # TRUE, since the mode K = 2
tenAR.est <- function(xx, R=1, P=1, method="LSE", init.A=NULL, init.sig=NULL, niter=150, tol=1e-6){
  if (identical("PROJ", method)) {
    model <- tenAR.PROJ(xx, R, P)
  } else if (identical("LSE", method)) {
    model <- tenAR.LS(xx, R, P, init.A, niter, tol, print.true=FALSE)
  } else if (identical("MLE", method)) {
    model <- tenAR.MLE(xx, R, P, init.A, init.sig, niter, tol, print.true=FALSE)
  } else if (identical("VAR", method)) {
    model <- tenAR.VAR(xx, P)
  } else {
    stop("Please specify the type you want to use. See manuals or run ?tenAR for details.")
  }
  # Create an S3 object of the tenAR class
  tenAR(model)
}


#' Estimation for Reduced Rank MAR(1) Model
#'
#' Estimation of the reduced rank MAR(1) model, using least squares (RRLSE) or MLE (RRMLE), as determined by the value of \code{method}.
#'@details
#' The reduced rank MAR(1) model takes the form:
#' \deqn{X_t =  A_1 X_{t-1} A_2^{^\top} + E_t,}
#' where \eqn{A_i} are \eqn{d_i \times d_i} coefficient matrices of ranks \eqn{\mathrm{rank}(A_i) = k_i \le d_i}, \eqn{i=1,2}. For the MLE method we also assume
#'  \deqn{\mathrm{Cov}(\mathrm{vec}(E_t))=\Sigma_2 \otimes \Sigma_1}
#'
#'@name matAR.RR.est
#'@rdname matAR.RR.est
#'@aliases matAR.RR.est
#'@usage matAR.RR.est(xx, method, A1.init=NULL, A2.init=NULL,Sig1.init=NULL,Sig2.init=NULL,
#'k1=NULL, k2=NULL, niter=200,tol=1e-4)
#'@export
#'@import tensor rTensor expm
#'@importFrom MASS ginv
#'@param xx \eqn{T \times d_1 \times d_2} matrix-valued time series, \eqn{T} is the length of the series.
#'@param method character string, specifying the method of the estimation to be used. \describe{
#'  \item{\code{"RRLSE",}}{Least squares.}
#'  \item{\code{"RRMLE",}}{MLE under a separable cov(vec(\eqn{E_t})).}
#'}
#'@param A1.init initial value of \eqn{A_1}. The default is the identity matrix.
#'@param A2.init  initial value of \eqn{A_2}. The default is the identity matrix.
#'@param Sig1.init only if \code{method=RRMLE}, initial value of \eqn{\Sigma_1}. The default is the identity matrix.
#'@param Sig2.init only if \code{method=RRMLE}, initial value of \eqn{\Sigma_2}. The default is the identity matrix.
#'@param k1  rank of \eqn{A_1}, a positive integer.
#'@param k2  rank of \eqn{A_2}, a positive integer.
#'@param niter maximum number of iterations if error stays above \code{tol}.
#'@param tol tolerance in terms of the Frobenius norm.
#'@param niter maximum number of iterations if error stays above \code{tol}.
#'@param tol relative Frobenius norm error tolerance.
#'@return return a list containing the following:\describe{
#'\item{\code{A1}}{estimator of \eqn{A_1}, a \eqn{d_1} by \eqn{d_1} matrix.}
#'\item{\code{A2}}{estimator of \eqn{A_2}, a \eqn{d_2} by \eqn{d_2} matrix.}
#'\item{\code{loading}}{a list of estimated \eqn{U_i}, \eqn{V_i}, 
#' where we write \eqn{A_i=U_iD_iV_i} as the singular value decomposition (SVD) of \eqn{A_i}, \eqn{i = 1,2}.}
#'\item{\code{Sig1}}{only if \code{method=MLE}, when \eqn{\mathrm{Cov}(\mathrm{vec}(E_t))=\Sigma_2 \otimes \Sigma_1}.}
#'\item{\code{Sig2}}{only if \code{method=MLE}, when \eqn{\mathrm{Cov}(\mathrm{vec}(E_t))=\Sigma_2 \otimes \Sigma_1}.}
#'\item{\code{res}}{residuals.}
#'\item{\code{Sig}}{sample covariance matrix of the residuals vec(\eqn{\hat E_t}).}
#'\item{\code{cov}}{a list containing \describe{
#'  \item{\code{Sigma}}{asymptotic covariance matrix of (vec( \eqn{\hat A_1}),vec(\eqn{\hat A_2^{\top}})).}
#'  \item{\code{Theta1.u}, \code{Theta1.v}}{asymptotic covariance matrix of vec(\eqn{\hat U_1}), vec(\eqn{\hat V_1}).}
#'  \item{\code{Theta2.u}, \code{Theta2.v}}{asymptotic covariance matrix of vec(\eqn{\hat U_2}), vec(\eqn{\hat V_2}).}
#'}}
#'\item{\code{sd.A1}}{element-wise standard errors of \eqn{\hat A_1}, aligned with \code{A1}.}
#'\item{\code{sd.A2}}{element-wise standard errors of \eqn{\hat A_2}, aligned with \code{A2}.}
#'\item{\code{niter}}{number of iterations.}
#'\item{\code{BIC}}{value of the extended Bayesian information criterion.}
#'}
#'@references
#'Reduced Rank Autoregressive Models for Matrix Time Series, by Han Xiao, Yuefeng Han, Rong Chen and Chengcheng Liu.
#'@examples
#' set.seed(333)
#' dim <- c(3,3)
#' xx <- tenAR.sim(t=500, dim, R=2, P=1, rho=0.5, cov='iid')
#' est <- matAR.RR.est(xx, method="RRLSE", k1=1, k2=1)
matAR.RR.est <- function(xx, method, A1.init=NULL, A2.init=NULL, Sig1.init=NULL, Sig2.init=NULL,k1=NULL, k2=NULL, niter=200,tol=1e-4){
  if (identical("PROJ", method)) {
    model <- MAR1.PROJ(xx) # just keep it there
  } else if (identical("LSE", method)) {
    model <- MAR1.LS(xx, niter=niter, tol=tol) # just keep it there
  } else if (identical("MLE", method)) {
    model <- MAR1.MLE(xx, Sigl.init=Sig1.init,Sigr.init=Sig2.init, niter=niter, tol=tol) # just keep it there
  } else if (identical("VAR", method)) {
    model <- tenAR.VAR(xx, P=1)
  } else if (identical("RRLSE", method)) {
    model <- MAR1.RR(xx=xx, k1=k1, k2=k2, A1.init=A1.init, A2.init=A2.init, niter=niter, tol=tol)
  } else if (identical("RRMLE", method)) {
    model <- MAR1.CC(xx=xx, k1=k1, k2=k2, A1.init=A1.init, A2.init=A2.init, Sigl.init=Sig1.init, Sigr.init=Sig2.init, niter=niter, tol=tol)
  } else {
    stop("Please specify the method in MAR.")
  }
  # Create an S3 object of the tenAR class
  tenAR(model)
}


#' Asymptotic Covariance Matrix of One-Term Reduced rank MAR(1) Model
#'
#' Asymptotic covariance matrix of the reduced rank MAR(1) model. If \code{Sigma1} and \code{Sigma2} is provided in input,
#' we assume a separable covariance matrix, Cov(vec(\eqn{E_t})) = \eqn{\Sigma_2 \otimes \Sigma_1}.
#'@name matAR.RR.se
#'@rdname matAR.RR.se
#'@aliases matAR.RR.se
#'@usage matAR.RR.se(A1,A2,k1,k2,method,Sigma.e=NULL,Sigma1=NULL,Sigma2=NULL,RU1=diag(k1),
#'RV1=diag(k1),RU2=diag(k2),RV2=diag(k2),mpower=100)
#'@import tensor rTensor expm
#'@importFrom MASS ginv
#'@export
#'@param A1 left coefficient matrix.
#'@param A2 right coefficient matrix.
#'@param k1 rank of \eqn{A_1}.
#'@param k2 rank of \eqn{A_2}.
#'@param method character string, specifying the method of the estimation to be used. \describe{
#'  \item{\code{"RRLSE",}}{Least squares.}
#'  \item{\code{"RRMLE",}}{MLE under a separable cov(vec(\eqn{E_t})).}
#'}
#'@param Sigma.e only if \code{method} = "RRLSE". Cov(vec(\eqn{E_t})) = Sigma.e: covariance matrix of dimension \eqn{(d_1 d_2) \times (d_1 d_2)}
#'@param Sigma1,Sigma2 only if \code{method} = "RRMLE". Cov(vec(\eqn{E_t})) = \eqn{\Sigma_2 \otimes \Sigma_1}. \eqn{\Sigma_i} is \eqn{d_i \times d_i}, \eqn{i=1,2}.
#'@param RU1,RV1,RU2,RV2 orthogonal rotations of \eqn{U_1,V_1,U_2,V_2}, e.g., new_U1=U1 \code{RU1}.
#'@param mpower truncate the VMA(\eqn{\infty}) representation of vec(\eqn{X_t}) at \code{mpower} for the purpose of calculating the autocovariances. The default is 100.
#'@return a list containing the following:\describe{
#'\item{\code{Sigma}}{asymptotic covariance matrix of (vec(\eqn{\hat A_1}),vec(\eqn{\hat A_2^T})).}
#'\item{\code{Theta1.u}}{asymptotic covariance matrix of vec(\eqn{\hat U_1}).}
#'\item{\code{Theta1.v}}{asymptotic covariance matrix of vec(\eqn{\hat V_1}).}
#'\item{\code{Theta2.u}}{asymptotic covariance matrix of vec(\eqn{\hat U_2}).}
#'\item{\code{Theta2.v}}{asymptotic covariance matrix of vec(\eqn{\hat V_2}).}
#'}
#'@references
#'Han Xiao, Yuefeng Han, Rong Chen and Chengcheng Liu, Reduced Rank Autoregressive Models for Matrix Time Series.
matAR.RR.se <- function(A1,A2,k1,k2,method,Sigma.e=NULL,Sigma1=NULL,Sigma2=NULL,RU1=diag(k1),RV1=diag(k1),RU2=diag(k2),RV2=diag(k2),mpower=100){
  if (method == "RRLSE"){
    return(MAR1.RRLS.SE(A1,A2,k1,k2,Sigma.e,RU1=diag(k1),RV1=diag(k1),RU2=diag(k2),RV2=diag(k2),mpower=100))
  } else if (method == "RRMLE") {
    return(MAR1.RRCC.SE(A1,A2,k1,k2,Sigma1,Sigma2,RU1=diag(k1),RV1=diag(k1),RU2=diag(k2),RV2=diag(k2),mpower=100))
  } else {
    stop("please specify method.")
  }
}


MAR1.RRLS.SE <- function(A1,A2,k1,k2,Sigma.e,RU1=diag(k1),RV1=diag(k1),RU2=diag(k2),RV2=diag(k2),mpower=100){
  # iterative least square
  # X_t = A1 X_{t-1} A2^T + E_t
  # RU1,RV1,RU2,RV2: rotation of U1,V1,U2,V2, e.g., new_U1=U1 RU1
  # Cov(vec(E_t)) = Sigma.e : of dimension d1d2\times d1d2
  # truncate vec(X_t) to mpower term, i.e. VMA(mpower)
  # return Sigma.LS: asymptotic covariance matrix of (vec(\hat A1),vec(\hat A2^T))
  #  Theta1.LS.u, Theta1.LS.v: asymptotic covariance matrix of vec(\hat U1), vec(\hat V1)
  #  Theta2.LS.u, Theta2.LS.v: asymptotic covariance matrix of vec(\hat U2), vec(\hat V2)
  d1 <- dim(A1)[1]
  d2 <- dim(A2)[1]
  Sigma.ee <- array(Sigma.e,c(d1,d2,d1,d2))
  B <- kronecker(A2,A1)
  #Sigma.x.vec, Sigma.xx: covariance matrix of vec(X_t)
  Sigma.x.vec <- Sigma.e
  for(i in 1:mpower){
    Sigma.x.vec <- Sigma.x.vec+(B%^%i)%*%Sigma.e%*%t(B%^%i)
  }
  Sigma.xx <- array(Sigma.x.vec,c(d1,d2,d1,d2))
  Gamma1 <- tensor(Sigma.xx,t(A1)%*%A1,c(1,3),c(1,2))
  Gamma2 <- tensor(Sigma.xx,t(A2)%*%A2,c(2,4),c(1,2))
  Gamma3 <- kronecker(diag(1,d1),A1)%*%matrix(aperm(Sigma.xx,c(3,1,4,2)),d1^2,d2^2)%*% kronecker(t(A2),diag(1,d2))
  H <- matrix(NA_real_,d1^2+d2^2,d1^2+d2^2)
  H[1:d1^2,1:d1^2] <- as.vector(A1)%*%t(as.vector(A1)) + kronecker(Gamma2,diag(1,d1))
  H[1:d1^2,(d1^2+1):(d1^2+d2^2)] <- Gamma3
  H[(d1^2+1):(d1^2+d2^2),1:d1^2] <- t(Gamma3)
  H[(d1^2+1):(d1^2+d2^2),(d1^2+1):(d1^2+d2^2)] <- kronecker(diag(1,d2),Gamma1)
  H.inv <- solve(H)
  D1 <- Gamma2%*%t(A1)%*% ginv(A1%*%Gamma2%*%t(A1)) %*%A1
  D2 <- Gamma1%*%t(A2)%*% ginv(A2%*%Gamma1%*%t(A2)) %*%A2
  P1 <- A1%*%ginv(t(A1)%*%A1)%*%t(A1)
  P2 <- A2%*%ginv(t(A2)%*%A2)%*%t(A2)
  Sigma.Q <- matrix(NA_real_,d1^2+d2^2,d1^2+d2^2)
  M1 <- kronecker(t(A2),P1)%*%Sigma.e%*%kronecker(A2,P1)
  M2 <- kronecker(t(A2),diag(d1)-P1)%*%Sigma.e%*%kronecker(A2,P1)
  M3 <- kronecker(t(A2),P1)%*%Sigma.e%*%kronecker(A2,diag(d1)-P1)
  M4 <- kronecker(t(A2),diag(d1)-P1)%*%Sigma.e%*%kronecker(A2,diag(d1)-P1)
  Sigma.Q1 <- Sigma.Q2 <- Sigma.Q3 <- Sigma.Q4 <- matrix(0,d1^2,d1^2)
  for(i in 1:d1){
    for(j in 1:d1){
      for(k in 1:d1){
        for(l in 1:d1){
          Sigma.Q1[(i-1)*d1+j,(k-1)*d1+l] <- sum(Sigma.xx[i,,k,] * M1[(1:d2-1)*d1+j,(1:d2-1)*d1+l])
          Sigma.Q2[(i-1)*d1+j,(k-1)*d1+l] <- sum(Sigma.xx[i,,k,] * M2[(1:d2-1)*d1+j,(1:d2-1)*d1+l])
          Sigma.Q3[(i-1)*d1+j,(k-1)*d1+l] <- sum(Sigma.xx[i,,k,] * M3[(1:d2-1)*d1+j,(1:d2-1)*d1+l])
          Sigma.Q4[(i-1)*d1+j,(k-1)*d1+l] <- sum(Sigma.xx[i,,k,] * M4[(1:d2-1)*d1+j,(1:d2-1)*d1+l])
        }
      }
    }
  }
  Sigma.Q[1:d1^2,1:d1^2] <- Sigma.Q1+kronecker(D1,diag(d1))%*%Sigma.Q2+Sigma.Q3%*%kronecker(t(D1),diag(d1))+
    kronecker(D1,diag(d1))%*%Sigma.Q4%*%kronecker(t(D1),diag(d1))
  M1 <- kronecker(P2,t(A1))%*%Sigma.e%*%kronecker(P2,A1)
  M2 <- kronecker(P2,t(A1))%*%Sigma.e%*%kronecker(diag(d2)-P2,A1)
  M3 <- kronecker(diag(d2)-P2,t(A1))%*%Sigma.e%*%kronecker(P2,A1)
  M4 <- kronecker(diag(d2)-P2,t(A1))%*%Sigma.e%*%kronecker(diag(d2)-P2,A1)
  Sigma.Q1 <- Sigma.Q2 <- Sigma.Q3 <- Sigma.Q4 <- matrix(0,d2^2,d2^2)
  for(i in 1:d2){
    for(j in 1:d2){
      for(k in 1:d2){
        for(l in 1:d2){
          Sigma.Q1[(i-1)*d2+j,(k-1)*d2+l] <- sum(Sigma.xx[,j,,l] * M1[(i-1)*d1+(1:d1),(k-1)*d1+(1:d1)])
          Sigma.Q2[(i-1)*d2+j,(k-1)*d2+l] <- sum(Sigma.xx[,j,,l] * M2[(i-1)*d1+(1:d1),(k-1)*d1+(1:d1)])
          Sigma.Q3[(i-1)*d2+j,(k-1)*d2+l] <- sum(Sigma.xx[,j,,l] * M3[(i-1)*d1+(1:d1),(k-1)*d1+(1:d1)])
          Sigma.Q4[(i-1)*d2+j,(k-1)*d2+l] <- sum(Sigma.xx[,j,,l] * M4[(i-1)*d1+(1:d1),(k-1)*d1+(1:d1)])
        }
      }
    }
  }
  Sigma.Q[(d1^2+1):(d1^2+d2^2),(d1^2+1):(d1^2+d2^2)] <- Sigma.Q1+Sigma.Q2%*%kronecker(diag(d2),t(D2))+
    kronecker(diag(d2),D2)%*%Sigma.Q3+kronecker(diag(d2),D2)%*%Sigma.Q4%*%kronecker(diag(d2),t(D2))
  M1 <- kronecker(t(A2),P1)%*%Sigma.e%*%kronecker(P2,A1)
  M2 <- kronecker(t(A2),P1)%*%Sigma.e%*%kronecker(diag(d2)-P2,A1)
  M3 <- kronecker(t(A2),diag(d1)-P1)%*%Sigma.e%*%kronecker(P2,A1)
  M4 <- kronecker(t(A2),diag(d1)-P1)%*%Sigma.e%*%kronecker(diag(d2)-P2,A1)
  Sigma.Q1 <- Sigma.Q2 <- Sigma.Q3 <- Sigma.Q4 <- matrix(0,d1^2,d2^2)
  for(i in 1:d1){
    for(j in 1:d1){
      for(k in 1:d2){
        for(l in 1:d2){
          Sigma.Q1[(i-1)*d1+j,(k-1)*d2+l] <- sum(Sigma.xx[i,,,l] * M1[(1:d2-1)*d1+j,(k-1)*d1+(1:d1)])
          Sigma.Q2[(i-1)*d1+j,(k-1)*d2+l] <- sum(Sigma.xx[i,,,l] * M2[(1:d2-1)*d1+j,(k-1)*d1+(1:d1)])
          Sigma.Q3[(i-1)*d1+j,(k-1)*d2+l] <- sum(Sigma.xx[i,,,l] * M3[(1:d2-1)*d1+j,(k-1)*d1+(1:d1)])
          Sigma.Q4[(i-1)*d1+j,(k-1)*d2+l] <- sum(Sigma.xx[i,,,l] * M4[(1:d2-1)*d1+j,(k-1)*d1+(1:d1)])
        }
      }
    }
  }
  Sigma.Q[1:d1^2,(d1^2+1):(d1^2+d2^2)] <- Sigma.Q1+Sigma.Q2%*%kronecker(diag(d2),t(D2))+
    kronecker(D1,diag(d1))%*%Sigma.Q3+kronecker(D1,diag(d1))%*%Sigma.Q4%*%kronecker(diag(d2),t(D2))
  Sigma.Q[(d1^2+1):(d1^2+d2^2),1:d1^2] <- t(Sigma.Q[1:d1^2,(d1^2+1):(d1^2+d2^2)])
  Sigma.LS <- H.inv%*%Sigma.Q%*%H.inv
  Sigma.LS11 <- Sigma.LS[1:d1^2,1:d1^2]
  Sigma.LS11.t <- array(Sigma.LS11,c(d1,d1,d1,d1))
  Sigma.LS11.t <- aperm(Sigma.LS11.t,c(2,1,4,3))
  Sigma.LS11.t <- matrix(Sigma.LS11.t, d1^2, d1^2)
  Sigma.LS22.t <- Sigma.LS[(d1^2+1):(d1^2+d2^2),(d1^2+1):(d1^2+d2^2)]
  Sigma.LS22 <- array(Sigma.LS22.t,c(d2,d2,d2,d2))
  Sigma.LS22 <- aperm(Sigma.LS22,c(2,1,4,3))
  Sigma.LS22 <- matrix(Sigma.LS22, d2^2, d2^2)
  svd.A1 <- svd(A1)
  #k1 <- length(svd.A1$d[svd.A1$d>1e-10])
  D1 <- diag(c(svd.A1$d[1:k1],1))[1:k1,1:k1]
  U1 <- svd.A1$u[,1:k1]
  V1 <- svd.A1$v[,1:k1]
  if(k1<d1){
    U1c <- svd.A1$u[,(k1+1):d1]
    V1c <- svd.A1$v[,(k1+1):d1]
  }else if(k1==d1){
    U1c <- 0
    V1c <- 0
  }
  #U1c <- svd.A1$u[,(k1+1):d1]  # original version : didn't consider if k1=d1
  #V1c <- svd.A1$v[,(k1+1):d1]
  e1 <- diag(d1)
  J1 <- matrix(0,d1^2,d1^2)
  for(i in 1:d1){
    J1[,((i-1)*d1+1):(i*d1)] <- kronecker(diag(d1),e1[,i])
  }
  C1 <- kronecker(A1,diag(d1)) + kronecker(diag(d1),A1)%*%J1
  Sigma.LS.1u <- C1%*%Sigma.LS11%*%t(C1)
  C1.t <- kronecker(t(A1),diag(d1)) + kronecker(diag(d1),t(A1))%*%J1
  Sigma.LS.1v <- C1.t%*%Sigma.LS11.t%*%t(C1.t)
  e1 <- diag(k1)
  L1 <- matrix(0,k1^2,k1)
  for(i in 1:k1){
    L1[,i] <- kronecker(e1[,i],e1[,i])
  }

  if(k1<d1){
    R1u <- cbind(kronecker(diag(k1),U1), kronecker(diag(k1),U1c)) %*% rbind(solve(kronecker(D1^2,diag(k1)) -
                                                                                    kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(U1),t(U1)),
                                                                            kronecker(diag(1/c(svd.A1$d[1:k1],1))[1:k1,1:k1]^2%*%t(U1), t(U1c))  )
    R1v <- cbind(kronecker(diag(k1),V1), kronecker(diag(k1),V1c)) %*% rbind(solve(kronecker(D1^2,diag(k1)) -
                                                                                    kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(V1),t(V1)),
                                                                            kronecker(diag(1/c(svd.A1$d[1:k1],1))[1:k1,1:k1]^2%*%t(V1), t(V1c))  )
  }else if(k1==d1){
    R1u <- kronecker(diag(k1),U1) %*% solve(kronecker(D1^2,diag(k1)) -
                                              kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(U1),t(U1))
    R1v <- kronecker(diag(k1),V1) %*% solve(kronecker(D1^2,diag(k1)) -
                                              kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(V1),t(V1))

  }

  # R1u <- cbind(kronecker(diag(k1),U1), kronecker(diag(k1),U1c)) %*% rbind(solve(kronecker(D1^2,diag(k1)) -
  #                                                                                 kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(U1),t(U1)),
  #                                                                         kronecker(diag(1/c(svd.A1$d[1:k1],1))[1:k1,1:k1]^2%*%t(U1), t(U1c))  )
  # R1v <- cbind(kronecker(diag(k1),V1), kronecker(diag(k1),V1c)) %*% rbind(solve(kronecker(D1^2,diag(k1)) -
  #                                                                                 kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(V1),t(V1)),
  #                                                                         kronecker(diag(1/c(svd.A1$d[1:k1],1))[1:k1,1:k1]^2%*%t(V1), t(V1c))  )
  Theta1.LS.u <- R1u%*% Sigma.LS.1u %*%t(R1u)
  Theta1.LS.v <- R1v%*% Sigma.LS.1v %*%t(R1v)
  Theta1.LS.u <- kronecker(t(RU1),diag(d1))%*%Theta1.LS.u
  Theta1.LS.v <- kronecker(t(RV1),diag(d1))%*%Theta1.LS.v
  svd.A2 <- svd(A2)
  #k2 <- length(svd.A2$d[svd.A2$d>1e-10])
  D2 <- diag(c(svd.A2$d[1:k2],1))[1:k2,1:k2]
  U2 <- svd.A2$u[,1:k2]
  V2 <- svd.A2$v[,1:k2]
  if(k2<d2){
    U2c <- svd.A2$u[,(k2+1):d2]
    V2c <- svd.A2$v[,(k2+1):d2]
  }else if(k2==d2){
    U2c <- 0
    V2c <- 0
  }
  #U2c <- svd.A2$u[,(k2+1):d2]
  #V2c <- svd.A2$v[,(k2+1):d2]
  e2 <- diag(d2)
  J2 <- matrix(0,d2^2,d2^2)
  for(i in 1:d2){
    J2[,((i-1)*d2+1):(i*d2)] <- kronecker(diag(d2),e2[,i])
  }
  C2 <- kronecker(A2,diag(d2)) + kronecker(diag(d2),A2)%*%J2
  Sigma.LS.2u <- C2%*%Sigma.LS22%*%t(C2)
  C2.t <- kronecker(t(A2),diag(d2)) + kronecker(diag(d2),t(A2))%*%J2
  Sigma.LS.2v <- C2.t%*%Sigma.LS22.t%*%t(C2.t)
  e2 <- diag(k2)
  L2 <- matrix(0,k2^2,k2)
  for(i in 1:k2){
    L2[,i] <- kronecker(e2[,i],e2[,i])
  }


  if(k2<d2){
    R2u <- cbind(kronecker(diag(k2),U2), kronecker(diag(k2),U2c)) %*% rbind(solve(kronecker(D2^2,diag(k2)) -
                                                                                    kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(U2),t(U2)),
                                                                            kronecker(diag(1/c(svd.A2$d[1:k2],1))[1:k2,1:k2]^2%*%t(U2), t(U2c))  )
    R2v <- cbind(kronecker(diag(k2),V2), kronecker(diag(k2),V2c)) %*% rbind(solve(kronecker(D2^2,diag(k2)) -
                                                                                    kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(V2),t(V2)),
                                                                            kronecker(diag(1/c(svd.A2$d[1:k2],1))[1:k2,1:k2]^2%*%t(V2), t(V2c))  )
  }else if(k2==d2){
    R2u <- kronecker(diag(k2),U2) %*% solve(kronecker(D2^2,diag(k2)) -
                                              kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(U2),t(U2))
    R2v <- kronecker(diag(k2),V2) %*% solve(kronecker(D2^2,diag(k2)) -
                                              kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(V2),t(V2))

  }


  # R2u <- cbind(kronecker(diag(k2),U2), kronecker(diag(k2),U2c)) %*% rbind(solve(kronecker(D2^2,diag(k2)) -
  #                                                                                 kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(U2),t(U2)),
  #                                                                         kronecker(diag(1/c(svd.A2$d[1:k2],1))[1:k2,1:k2]^2%*%t(U2), t(U2c))  )
  # R2v <- cbind(kronecker(diag(k2),V2), kronecker(diag(k2),V2c)) %*% rbind(solve(kronecker(D2^2,diag(k2)) -
  #                                                                                 kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(V2),t(V2)),
  #                                                                         kronecker(diag(1/c(svd.A2$d[1:k2],1))[1:k2,1:k2]^2%*%t(V2), t(V2c))  )
  Theta2.LS.u <- R2u%*% Sigma.LS.2u %*%t(R2u)
  Theta2.LS.v <- R2v%*% Sigma.LS.2v %*%t(R2v)
  Theta2.LS.u <- kronecker(t(RU2),diag(d2))%*%Theta2.LS.u
  Theta2.LS.v <- kronecker(t(RV2),diag(d2))%*%Theta2.LS.v
  return(list("Sigma"=Sigma.LS,"Theta1.u"=Theta1.LS.u,"Theta1.v"=Theta1.LS.v,
              "Theta2.u"=Theta2.LS.u,"Theta2.v"=Theta2.LS.v))
}


MAR1.RRCC.SE <- function(A1,A2,k1,k2,Sigma1,Sigma2,RU1=diag(k1),RV1=diag(k1),RU2=diag(k2),RV2=diag(k2),mpower=100){
  # canonical correlation analysis
  # X_t = A1 X_{t-1} A2^T + E_t
  # RU1,RV1,RU2,RV2: rotation of U1,V1,U2,V2, e.g., new_U1=U1 RU1
  # Cov(vec(E_t)) = Sigma.e=Sigma2 \otimes \Sigma1 : of dimension d1d2\times d1d2
  # truncate vec(X_t) to mpower term, i.e. VMA(mpower)
  # return Sigma.CC: asymptotic covariance matrix of (vec(\hat A1),vec(\hat A2^T))
  #  Theta1.CC.u, Theta1.CC.v: asymptotic covariance matrix of vec(\hat U1), vec(\hat V1)
  #  Theta2.CC.u, Theta2.CC.v: asymptotic covariance matrix of vec(\hat U2), vec(\hat V2)
  d1 <- dim(A1)[1]
  d2 <- dim(A2)[1]
  Sigma1.inv=solve(Sigma1)
  Sigma2.inv=solve(Sigma2)
  Sigma.e=kronecker(Sigma2,Sigma1)
  Sigma.e.inv=kronecker(Sigma2.inv,Sigma1.inv)
  Sigma.ee <- array(Sigma.e,c(d1,d2,d1,d2))
  B <- kronecker(A2,A1)
  #Sigma.x.vec, Sigma.xx: covariance matrix of vec(X_t)
  Sigma.x.vec <- Sigma.e
  for(i in 1:mpower){
    Sigma.x.vec <- Sigma.x.vec+(B%^%i)%*%Sigma.e%*%t(B%^%i)
  }
  Sigma.xx <- array(Sigma.x.vec,c(d1,d2,d1,d2))
  Gamma1 <- tensor(Sigma.xx,t(A1)%*%Sigma1.inv%*%A1,c(1,3),c(1,2))
  Gamma2 <- tensor(Sigma.xx,t(A2)%*%Sigma2.inv%*%A2,c(2,4),c(1,2))
  Gamma3 <- kronecker(diag(1,d1),Sigma1.inv%*%A1)%*%matrix(aperm(Sigma.xx,c(3,1,4,2)),d1^2,d2^2)%*%
    kronecker(t(A2)%*%Sigma2.inv,diag(1,d2))
  H <- matrix(NA_real_,d1^2+d2^2,d1^2+d2^2)
  H[1:d1^2,1:d1^2] <- as.vector(A1)%*%t(as.vector(A1)) + kronecker(Gamma2,Sigma1.inv)
  H[1:d1^2,(d1^2+1):(d1^2+d2^2)] <- Gamma3
  H[(d1^2+1):(d1^2+d2^2),1:d1^2] <- t(Gamma3)
  H[(d1^2+1):(d1^2+d2^2),(d1^2+1):(d1^2+d2^2)] <- kronecker(Sigma2.inv,Gamma1)
  H.inv <- solve(H)
  D1 <- Gamma2%*%t(A1)%*% ginv(A1%*%Gamma2%*%t(A1)) %*%A1
  D2 <- Gamma1%*%t(A2)%*% ginv(A2%*%Gamma1%*%t(A2)) %*%A2
  P1 <- Sigma1.inv%*%A1%*%ginv(t(A1)%*%Sigma1.inv%*%A1)%*%t(A1)
  P2 <- Sigma2.inv%*%A2%*%ginv(t(A2)%*%Sigma2.inv%*%A2)%*%t(A2)
  Sigma.Q <- matrix(NA_real_,d1^2+d2^2,d1^2+d2^2)
  M1 <- kronecker(t(A2),P1)%*%Sigma.e.inv%*%kronecker(A2,P1)
  M2 <- kronecker(t(A2),diag(d1)-P1)%*%Sigma.e.inv%*%kronecker(A2,P1)
  M3 <- kronecker(t(A2),P1)%*%Sigma.e.inv%*%kronecker(A2,diag(d1)-P1)
  M4 <- kronecker(t(A2),diag(d1)-P1)%*%Sigma.e.inv%*%kronecker(A2,diag(d1)-P1)
  Sigma.Q1 <- Sigma.Q2 <- Sigma.Q3 <- Sigma.Q4 <- matrix(0,d1^2,d1^2)
  for(i in 1:d1){
    for(j in 1:d1){
      for(k in 1:d1){
        for(l in 1:d1){
          Sigma.Q1[(i-1)*d1+j,(k-1)*d1+l] <- sum(Sigma.xx[i,,k,] * M1[(1:d2-1)*d1+j,(1:d2-1)*d1+l])
          Sigma.Q2[(i-1)*d1+j,(k-1)*d1+l] <- sum(Sigma.xx[i,,k,] * M2[(1:d2-1)*d1+j,(1:d2-1)*d1+l])
          Sigma.Q3[(i-1)*d1+j,(k-1)*d1+l] <- sum(Sigma.xx[i,,k,] * M3[(1:d2-1)*d1+j,(1:d2-1)*d1+l])
          Sigma.Q4[(i-1)*d1+j,(k-1)*d1+l] <- sum(Sigma.xx[i,,k,] * M4[(1:d2-1)*d1+j,(1:d2-1)*d1+l])
        }
      }
    }
  }
  Sigma.Q[1:d1^2,1:d1^2] <- Sigma.Q1+kronecker(D1,diag(d1))%*%Sigma.Q2+Sigma.Q3%*%kronecker(t(D1),diag(d1))+
    kronecker(D1,diag(d1))%*%Sigma.Q4%*%kronecker(t(D1),diag(d1))
  M1 <- kronecker(P2,t(A1))%*%Sigma.e.inv%*%kronecker(P2,A1)
  M2 <- kronecker(P2,t(A1))%*%Sigma.e.inv%*%kronecker(diag(d2)-P2,A1)
  M3 <- kronecker(diag(d2)-P2,t(A1))%*%Sigma.e.inv%*%kronecker(P2,A1)
  M4 <- kronecker(diag(d2)-P2,t(A1))%*%Sigma.e.inv%*%kronecker(diag(d2)-P2,A1)
  Sigma.Q1 <- Sigma.Q2 <- Sigma.Q3 <- Sigma.Q4 <- matrix(0,d2^2,d2^2)
  for(i in 1:d2){
    for(j in 1:d2){
      for(k in 1:d2){
        for(l in 1:d2){
          Sigma.Q1[(i-1)*d2+j,(k-1)*d2+l] <- sum(Sigma.xx[,j,,l] * M1[(i-1)*d1+(1:d1),(k-1)*d1+(1:d1)])
          Sigma.Q2[(i-1)*d2+j,(k-1)*d2+l] <- sum(Sigma.xx[,j,,l] * M2[(i-1)*d1+(1:d1),(k-1)*d1+(1:d1)])
          Sigma.Q3[(i-1)*d2+j,(k-1)*d2+l] <- sum(Sigma.xx[,j,,l] * M3[(i-1)*d1+(1:d1),(k-1)*d1+(1:d1)])
          Sigma.Q4[(i-1)*d2+j,(k-1)*d2+l] <- sum(Sigma.xx[,j,,l] * M4[(i-1)*d1+(1:d1),(k-1)*d1+(1:d1)])
        }
      }
    }
  }
  Sigma.Q[(d1^2+1):(d1^2+d2^2),(d1^2+1):(d1^2+d2^2)] <- Sigma.Q1+Sigma.Q2%*%kronecker(diag(d2),t(D2))+
    kronecker(diag(d2),D2)%*%Sigma.Q3+kronecker(diag(d2),D2)%*%Sigma.Q4%*%kronecker(diag(d2),t(D2))
  M1 <- kronecker(t(A2),P1)%*%Sigma.e.inv%*%kronecker(P2,A1)
  M2 <- kronecker(t(A2),P1)%*%Sigma.e.inv%*%kronecker(diag(d2)-P2,A1)
  M3 <- kronecker(t(A2),diag(d1)-P1)%*%Sigma.e.inv%*%kronecker(P2,A1)
  M4 <- kronecker(t(A2),diag(d1)-P1)%*%Sigma.e.inv%*%kronecker(diag(d2)-P2,A1)
  Sigma.Q1 <- Sigma.Q2 <- Sigma.Q3 <- Sigma.Q4 <- matrix(0,d1^2,d2^2)
  for(i in 1:d1){
    for(j in 1:d1){
      for(k in 1:d2){
        for(l in 1:d2){
          Sigma.Q1[(i-1)*d1+j,(k-1)*d2+l] <- sum(Sigma.xx[i,,,l] * M1[(1:d2-1)*d1+j,(k-1)*d1+(1:d1)])
          Sigma.Q2[(i-1)*d1+j,(k-1)*d2+l] <- sum(Sigma.xx[i,,,l] * M2[(1:d2-1)*d1+j,(k-1)*d1+(1:d1)])
          Sigma.Q3[(i-1)*d1+j,(k-1)*d2+l] <- sum(Sigma.xx[i,,,l] * M3[(1:d2-1)*d1+j,(k-1)*d1+(1:d1)])
          Sigma.Q4[(i-1)*d1+j,(k-1)*d2+l] <- sum(Sigma.xx[i,,,l] * M4[(1:d2-1)*d1+j,(k-1)*d1+(1:d1)])
        }
      }
    }
  }
  Sigma.Q[1:d1^2,(d1^2+1):(d1^2+d2^2)] <- Sigma.Q1+Sigma.Q2%*%kronecker(diag(d2),t(D2))+
    kronecker(D1,diag(d1))%*%Sigma.Q3+kronecker(D1,diag(d1))%*%Sigma.Q4%*%kronecker(diag(d2),t(D2))
  Sigma.Q[(d1^2+1):(d1^2+d2^2),1:d1^2] <- t(Sigma.Q[1:d1^2,(d1^2+1):(d1^2+d2^2)])
  Sigma.CC <- H.inv%*%Sigma.Q%*%H.inv
  Sigma.CC11 <- Sigma.CC[1:d1^2,1:d1^2]
  Sigma.CC11.t <- array(Sigma.CC11,c(d1,d1,d1,d1))
  Sigma.CC11.t <- aperm(Sigma.CC11.t,c(2,1,4,3))
  Sigma.CC11.t <- matrix(Sigma.CC11.t, d1^2, d1^2)
  Sigma.CC22.t <- Sigma.CC[(d1^2+1):(d1^2+d2^2),(d1^2+1):(d1^2+d2^2)]
  Sigma.CC22 <- array(Sigma.CC22.t,c(d2,d2,d2,d2))
  Sigma.CC22 <- aperm(Sigma.CC22,c(2,1,4,3))
  Sigma.CC22 <- matrix(Sigma.CC22, d2^2, d2^2)
  svd.A1 <- svd(A1)
  #k1 <- length(svd.A1$d[svd.A1$d>1e-10])
  D1 <- diag(c(svd.A1$d[1:k1],1))[1:k1,1:k1]
  U1 <- svd.A1$u[,1:k1]
  V1 <- svd.A1$v[,1:k1]
  if(k1<d1){
    U1c <- svd.A1$u[,(k1+1):d1]
    V1c <- svd.A1$v[,(k1+1):d1]
  }else if(k1==d1){
    U1c <- 0
    V1c <- 0
  }
  e1 <- diag(d1)
  J1 <- matrix(0,d1^2,d1^2)
  for(i in 1:d1){
    J1[,((i-1)*d1+1):(i*d1)] <- kronecker(diag(d1),e1[,i])
  }
  C1 <- kronecker(A1,diag(d1)) + kronecker(diag(d1),A1)%*%J1
  Sigma.CC.1u <- C1%*%Sigma.CC11%*%t(C1)
  C1.t <- kronecker(t(A1),diag(d1)) + kronecker(diag(d1),t(A1))%*%J1
  Sigma.CC.1v <- C1.t%*%Sigma.CC11.t%*%t(C1.t)
  e1 <- diag(k1)
  L1 <- matrix(0,k1^2,k1)
  for(i in 1:k1){
    L1[,i] <- kronecker(e1[,i],e1[,i])
  }

  if(k1<d1){
    R1u <- cbind(kronecker(diag(k1),U1), kronecker(diag(k1),U1c)) %*% rbind(solve(kronecker(D1^2,diag(k1)) -
                                                                                    kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(U1),t(U1)),
                                                                            kronecker(diag(1/c(svd.A1$d[1:k1],1))[1:k1,1:k1]^2%*%t(U1), t(U1c))  )
    R1v <- cbind(kronecker(diag(k1),V1), kronecker(diag(k1),V1c)) %*% rbind(solve(kronecker(D1^2,diag(k1)) -
                                                                                    kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(V1),t(V1)),
                                                                            kronecker(diag(1/c(svd.A1$d[1:k1],1))[1:k1,1:k1]^2%*%t(V1), t(V1c))  )
  }else if(k1==d1){
    R1u <- kronecker(diag(k1),U1) %*% solve(kronecker(D1^2,diag(k1)) -
                                              kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(U1),t(U1))
    R1v <- kronecker(diag(k1),V1) %*% solve(kronecker(D1^2,diag(k1)) -
                                              kronecker(diag(k1),D1^2)+L1%*%t(L1)) %*% (diag(k1^2)-L1%*%t(L1)) %*% kronecker(t(V1),t(V1))

  }


  Theta1.CC.u <- R1u%*% Sigma.CC.1u %*%t(R1u)
  Theta1.CC.v <- R1v%*% Sigma.CC.1v %*%t(R1v)
  Theta1.CC.u <- kronecker(t(RU1),diag(d1))%*%Theta1.CC.u
  Theta1.CC.v <- kronecker(t(RV1),diag(d1))%*%Theta1.CC.v
  svd.A2 <- svd(A2)
  #k2 <- length(svd.A2$d[svd.A2$d>1e-10])
  D2 <- diag(c(svd.A2$d[1:k2],1))[1:k2,1:k2]
  U2 <- svd.A2$u[,1:k2]
  V2 <- svd.A2$v[,1:k2]
  if(k1<d1){
    U2c <- svd.A2$u[,(k2+1):d2]
    V2c <- svd.A2$v[,(k2+1):d2]
  }else if(k2==d2){
    U2c <- 0
    V2c <- 0
  }
  e2 <- diag(d2)
  J2 <- matrix(0,d2^2,d2^2)
  for(i in 1:d2){
    J2[,((i-1)*d2+1):(i*d2)] <- kronecker(diag(d2),e2[,i])
  }
  C2 <- kronecker(A2,diag(d2)) + kronecker(diag(d2),A2)%*%J2
  Sigma.CC.2u <- C2%*%Sigma.CC22%*%t(C2)
  C2.t <- kronecker(t(A2),diag(d2)) + kronecker(diag(d2),t(A2))%*%J2
  Sigma.CC.2v <- C2.t%*%Sigma.CC22.t%*%t(C2.t)
  e2 <- diag(k2)
  L2 <- matrix(0,k2^2,k2)
  for(i in 1:k2){
    L2[,i] <- kronecker(e2[,i],e2[,i])
  }
  if(k2<d2){
    R2u <- cbind(kronecker(diag(k2),U2), kronecker(diag(k2),U2c)) %*% rbind(solve(kronecker(D2^2,diag(k2)) -
                                                                                    kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(U2),t(U2)),
                                                                            kronecker(diag(1/c(svd.A2$d[1:k2],1))[1:k2,1:k2]^2%*%t(U2), t(U2c))  )
    R2v <- cbind(kronecker(diag(k2),V2), kronecker(diag(k2),V2c)) %*% rbind(solve(kronecker(D2^2,diag(k2)) -
                                                                                    kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(V2),t(V2)),
                                                                            kronecker(diag(1/c(svd.A2$d[1:k2],1))[1:k2,1:k2]^2%*%t(V2), t(V2c))  )
  }else if(k2==d2){
    R2u <- kronecker(diag(k2),U2) %*% solve(kronecker(D2^2,diag(k2)) -
                                              kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(U2),t(U2))
    R2v <- kronecker(diag(k2),V2) %*% solve(kronecker(D2^2,diag(k2)) -
                                              kronecker(diag(k2),D2^2)+L2%*%t(L2)) %*% (diag(k2^2)-L2%*%t(L2)) %*% kronecker(t(V2),t(V2))
  }
  Theta2.CC.u <- R2u%*% Sigma.CC.2u %*%t(R2u)
  Theta2.CC.v <- R2v%*% Sigma.CC.2v %*%t(R2v)
  Theta2.CC.u <- kronecker(t(RU2),diag(d2))%*%Theta2.CC.u
  Theta2.CC.v <- kronecker(t(RV2),diag(d2))%*%Theta2.CC.v
  return(list("Sigma"=Sigma.CC,"Theta1.u"=Theta1.CC.u,"Theta1.v"=Theta1.CC.v,
              "Theta2.u"=Theta2.CC.u,"Theta2.v"=Theta2.CC.v))
}


MAR1.PROJ <- function(xx){
  # xx: T * p * q
  # X_t = LL X_{t-1} RR + E_t
  # Sig = cov(vec(E_t))
  # one-step projection estimation
  # Return LL, RR, and estimate of Sig
  dd <- dim(xx)
  T <- dd[1]
  p <- dd[2]
  q <- dd[3]
  xx.mat <- matrix(xx,T,p*q)
  # kroneck <- t(xx.mat[2:T,]) %*% xx.mat[1:(T-1),] %*% solve(t(xx.mat[1:(T-1),]) %*% xx.mat[1:(T-1),])
  kroneck <- t(xx.mat[2:T,]) %*% xx.mat[1:(T-1),] %*% chol2inv(chol(t(xx.mat[1:(T-1),]) %*% xx.mat[1:(T-1),]))
  ans.projection <- projection(kroneck,r=1,p,q,p,q)
  a <- svd(ans.projection[[1]][[1]],nu=0,nv=0)$d[1]
  LL <- ans.projection[[1]][[1]] / a
  RR <- t(ans.projection[[1]][[2]]) * a
  res = xx[2:T,,,drop=FALSE] - aperm(tensor(tensor(xx[1:(T-1),,,drop=FALSE],RR,3,1),LL,2,2),c(1,3,2))
  Sig <- matrix(tensor(res,res,1,1),p*q)/(T-1)
  # sd <- MAR.SE(xx, t(RR), LL, Sig)
  return(list(A1=LL,A2=RR,Sig=Sig))
}


MAR2.PROJ <- function(xx, R=1, P=1){
  # xx: T * p * q
  # X_t = A1 X_{t-1} t(A2) + E_t
  # Sig = cov(vec(E_t))
  # one-step projection estimation
  # Return LL, RR, and estimate of Sig
  dd <- dim(xx)
  T <- dd[1]
  d1 <- dd[2]
  d2 <- dd[3]
  # xx.mat <- matrix(xx,T,d1*d2)
  # kroneck <- t(xx.mat[2:T,]) %*% xx.mat[1:(T-1),] %*% solve(t(xx.mat[1:(T-1),]) %*% xx.mat[1:(T-1),])
  A = list()
  kroneck <- tenAR.VAR(xx, P)$coef
  for (i in c(1:P)){
    ans.projection <- projection(kroneck[[i]],R[i],d2,d1,d2,d1)
    for (j in c(1:R[i])){
      a = svd(ans.projection[[j]][[1]],nu=0,nv=0)$d[1]
      ans.projection[[j]][[1]] <- ans.projection[[j]][[1]] / a
      ans.projection[[j]][[2]] <- ans.projection[[j]][[2]] * a
    }
    A[[i]] <- ans.projection
  }
  return(list(A=A))
}


MAR1.LS <- function(xx,niter=50,tol=1e-6,print.true = FALSE){
  # xx: T * p * q
  # X_t = LL X_{t-1} RR + E_t
  # Sig = cov(vec(E_t))
  # LS criterion
  # iterative algorithm between LL <--> RR
  # Return LL, RR, and estimate of Sig
  dd=dim(xx)
  T <- dd[1]
  p <- dd[2]
  q <- dd[3]
  LL.old <- diag(p)
  RR.old <- diag(q)
  dis <- 1
  iiter <- 1

  while(iiter <= niter & dis >= tol){
    # estimate RR0
    temp <- tensor(xx[1:(T-1),,,drop=FALSE],LL.old,2,2)  # (T-1) * q * p
    AA <- tensor(temp,temp,c(1,3),c(1,3))
    BB <- tensor(temp,xx[2:T,,,drop=FALSE],c(1,3),c(1,2))
    RR <- solve(AA,BB)
    # estimate LL0
    temp <- tensor(xx[1:(T-1),,,drop=FALSE],RR,3,1)  # (T-1) * p * q
    AA <- tensor(temp,temp,c(1,3),c(1,3))
    BB <- t(tensor(temp,xx[2:T,,,drop=FALSE],c(1,3),c(1,3)))
    LL <- t(solve(t(AA),t(BB)))
    a <- svd(LL,nu=0,nv=0)$d[1]
    LL <- LL / a
    RR <- RR * a
    # update for the next iteration
    dis <- sqrt(sum((kronecker(t(RR),LL)-kronecker(t(RR.old),LL.old))^2))
    LL.old <- LL
    RR.old <- RR
    iiter <- iiter + 1
    if(print.true==TRUE){
      print(LL)
      print(RR)
    }
  }
  res=xx[2:T,,,drop=FALSE] - aperm(tensor(tensor(xx[1:(T-1),,,drop=FALSE],RR,3,1),LL,2,2),c(1,3,2))
  Sig <- matrix(tensor(res,res,1,1),p*q)/(T-1)
  sd <- MAR.SE(xx, t(RR), LL, Sig)
  return(list(A1=LL,A2=t(RR),res=res,Sig=Sig,niter=iiter,sd=sd,data=xx))
}


MAR1.MLE <- function(xx,LL.init=NULL,Sigl.init=NULL,Sigr.init=NULL,niter=50,tol=1e-6,print.true = FALSE){
  # xx: T * p * q
  # X_t = LL X_{t-1} RR + E_t
  # Sig = cov(vec(E_t)) = Sigr otimes Sigl
  # optimization criterion is likelihood
  # iterative algorithm between LL <--> RR <--> Sig_r <--> Sig_l
  # Return LL, RR, Sigl, Sigr
  dd=dim(xx)
  T <- dd[1]
  p <- dd[2]
  q <- dd[3]
  if(is.null(LL.init)){
    LL.old <- diag(p)
  } else{
    LL.old <- LL.init
  }
  RR.old <- diag(q)
  if(is.null(Sigl.init)){
    Sigl.old <- diag(p)
  } else{
    Sigl.old <- Sigl.init
  }
  if(is.null(Sigr.init)){
    Sigr.old <- diag(q)
  } else{
    Sigr.old <- Sigr.init
  }
  dis <- 1
  iiter <- 1

  while(iiter <= niter & dis >= tol){
    Sigl.inv.old <- ginv(Sigl.old)
    Sigr.inv.old <- ginv(Sigr.old)
    # estimate RR0
    temp1 <- tensor(xx[1:(T-1),,,drop=FALSE],LL.old,2,2)  # (T-1) * q * p
    temp2 <- tensor(temp1,Sigl.inv.old,3,2)  # (T-1) * q * p
    AA <- tensor(temp1,temp2,c(1,3),c(1,3))
    BB <- tensor(temp2,xx[2:T,,,drop=FALSE],c(1,3),c(1,2))
    RR <- solve(AA,BB)
    # estimate LL0
    temp1 <- tensor(xx[1:(T-1),,,drop=FALSE],RR,3,1)  # (T-1) * p * q
    temp2 <- tensor(temp1,Sigr.inv.old,3,1)  # (T-1) * p * q
    AA <- tensor(temp1,temp2,c(1,3),c(1,3))
    BB <- t(tensor(temp2,xx[2:T,,,drop=FALSE],c(1,3),c(1,3)))
    LL <- t(solve(t(AA),t(BB)))
    res=xx[2:T,,,drop=FALSE] - aperm(tensor(tensor(xx[1:(T-1),,,drop=FALSE],RR,3,1),LL,2,2),c(1,3,2))
    temp <- tensor(res,Sigl.inv.old,2,1) # (T-1) * q * p
    Sigr <- tensor(temp,res,c(1,3),c(1,2))/T/p
    temp <- tensor(res,ginv(Sigr),3,1) # (T-1) * p * q
    Sigl <- tensor(temp,res,c(1,3),c(1,3))/T/q
    a <- svd(LL,nu=0,nv=0)$d[1]
    LL <- LL / a
    RR <- RR * a
    a <- eigen(Sigl)$values[1]
    Sigl <- Sigl / a
    Sigr <- Sigr * a

    # update for the next iteration
    dis1 <- sqrt(sum((kronecker(t(RR),LL)-kronecker(t(RR.old),LL.old))^2))
    dis2 <- sqrt(sum((kronecker(Sigr,Sigl)-kronecker(Sigr.old,Sigl.old))^2))
    dis <- max(dis1,dis2)
    LL.old <- LL
    RR.old <- RR
    Sigr.old <- Sigr
    Sigl.old <- Sigl
    iiter <- iiter + 1
    if(print.true==TRUE){
      print(LL)
      print(RR)
    }
  }
  Sig <- kronecker(Sigr,Sigl)
  sd <- MAR.SE(xx, t(RR), LL, Sig)
  return(list(A1=LL,A2=t(RR),res=res,Sig1=Sigl,Sig2=Sigr,Sig=Sig,niter=iiter,sd=sd,data=xx))
}


MAR.SE <- function(xx, B, A, Sigma){
  dd <- dim(xx)
  T <- dd[1]
  p <- dd[2]
  q <- dd[3]
  BX <- tensor(xx,B,3,2) # Tpq
  AX <- tensor(xx,A,2,2) # Tqp
  BXI <- apply(BX,1,function(x){kronecker(t(x),diag(p))})
  AXI <- apply(AX,1,function(x){kronecker(diag(q),t(x))})
  BXI.array <- array(BXI,c(p*q,p^2,T)) #pq*p^2*T
  AXI.array <- array(AXI,c(p*q,q^2,T)) #pq*q^2*T
  # W transpose
  Wt <- abind(BXI.array,AXI.array,along=2) #pq*(p^2+q^2)*T
  EWWt <- tensor(Wt,Wt,c(1,3),c(1,3))/T #(p^2+q^2)*(p^2+q^2)
  alpha <- as.vector(A)
  beta <- as.vector(t(B))
  gamma <- c(alpha,rep(0,q^2))
  H <- EWWt + gamma %*% t(gamma) #(p^2+q^2)*(p^2+q^2)
  WSigma <- tensor(Wt,Sigma,1,1) #(p^2+q^2)*T*pq
  EWSigmaWt <- tensor(WSigma,Wt,c(3,2),c(1,3))/T
  Hinv <- solve(H)
  Xi <- Hinv %*% EWSigmaWt %*% Hinv
  Xi <- Xi/T
}


tenAR.VAR <- function(xx, P){
  dd=dim(xx)
  t <- dd[1]
  n <- prod(dd[-1])
  if (prod(dd[-1]) > t){stop("sample size T too small")}
  yy=apply(xx, MARGIN=1, as.vector)

  x = 0
  for (l in c(1:P)){
    if (l == 1){x = yy[,(P):(t-1)]} else {x = rbind(x, yy[,(P+1-l):(t-l)])}
  }
  y = yy[,(P+1):t]
  coef = t(solve(x %*% t(x)) %*% x %*% t(y))
  res= y - coef %*% x
  phi = list()
  for (l in c(1:P)){
    phi[[l]] = coef[,(n*(l-1)+1):(n*l)]
  }
  return(list(coef=phi, res=res, data=xx))
}


tenAR.A <- function(dim,R,P,rho){
  K <- length(dim)
  A <- lapply(1:P, function(p) {lapply(1:max(1,R[p]), function(j) {lapply(1:K, function(i) {diag(dim[i])})})})
  for (p in c(1:P)){
    if (R[p] == 0) next
    for (j in c(1:R[p])){
      for (i in c(1:K)){
        A[[p]][[j]][[i]] <- matrix(rnorm(dim[i]^2), c(dim[i],dim[i]))
      }
    }
  }
  eigen = M.eigen(A, R, P, dim)
  for (l in c(1:P)){
    if (R[l] == 0) next
    for (j in c(1:R[l])){
      A[[l]][[j]][[K]] <- rho * A[[l]][[j]][[K]]/ eigen
    }
    A[[l]] <- fro.order(fro.rescale(A[[l]]))
  }
  eigen = M.eigen(A, R, P, dim)
  return(A)
}


tenAR.PROJ <- function(xx,R,P){
  dim <- dim(xx)[-1]
  if (length(dim) == 2){
    A = MAR2.PROJ(xx,R,P)
    return(list(A=A))
  }
  if (length(dim) != 3){
    stop("temporarily 'tenAR.PROJ' only support K=2,3.")
  }
  mm <- tenAR.VAR(xx, P)$coef
  A = list()
  for (p in c(1:P)){
    if (is.na(R[p])) stop("p != length(R)")
    if (R[p] == 0) next
    tt <- aperm(trearrange(mm[[p]], rev(dim)))
    A[[p]] <- fro.order(fro.rescale(ten.proj(tt, dim, R[p])))
  }
  # A1 = A
  # phi1 = kronecker(kronecker(A1[[1]][[1]][[3]], A1[[1]][[1]][[2]]), A1[[1]][[1]][[1]]) + kronecker(kronecker(A1[[1]][[2]][[3]], A1[[1]][[2]][[2]]), A1[[1]][[2]][[1]])
  # sqrt(sum((mm[[1]] - phi1)**2))
  return(list(A=A))
}


tenAR.LS <- function(xx, R, P, init.A=NULL, niter=150, tol=1e-5, print.true=FALSE){
  if (!(mode(xx) == "S4")) {xx <- as.tensor(xx)}
  if (length(P) == 2){P = P[1]; rolling=TRUE} else {rolling = FALSE} # len(P) > 1 implies rolling in func 'tenAR.predict'
  dim <- dim(xx)[-1]  # dimension of data
  K <- length(dim)  # number of tensor modes
  t <- dim(xx)[1]  # time length
  if (length(R) == 1 & P > 1){  # user can only give R1 to represent a vector R if all each term number same
    R = rep(R, P)
  }
  if (K==2|K>3){  # for initialization
    if (is.null(init.A)) {
      A.old = list()
      for (p in c(1:P)){
        if (is.na(R[p])) stop("p != length(R)")
        if (R[p] == 0) next
        A.old[[p]] <- lapply(1:R[p], function(j) {lapply(1:K, function(i) {0.5*diag(dim[i])})})
      }
    } else {A.old <- init.A}
  } else if (K==3) {  # the current code only support 3 mode tensor projection
    if (is.null(init.A)) {A.old <- tenAR.PROJ(xx@data,R,P)$A} else {A.old <- init.A}
  } else {stop("dimension K of time series must at least be 2")}
  
  A.new <- A.old
  Tol <- tol*sqrt(sum(dim^2))*sum(R)
  dis <- 1
  iiter <- 1
  AX = A.new
  ttlMode = c(1:K) + 1  # these are tensor slicing parameters for
  ttlMode2 = c(1:(K+1))
  subxx = list()
  for (l in c(0:P)){  # tensor data for each lag
    subxx[[l+1]] = abind::asub(xx, (1+P-l):(t-l), 1, drop=FALSE)
  }
 
  while(iiter <= niter & dis >= tol){
    for (p in c(1:P)){
      if (R[p] == 0) next
      for (r in c(1:R[p])){
        for (k in c(K:1)){ # update last matrix first
          AX[[p]][[r]][[k]] = rTensor::ttl(subxx[[p+1]], A.new[[p]][[r]][-k], c(2:(K+1))[-k])
          temp = AX[[p]][[r]][[k]]
          L1 <- 0
          for (l in c(1:P)){
            if (R[l] == 0 | (l == p & R[l] == 1)) next
            if(l == p){rMode = c(1:R[l])[-r]} else {rMode = c(1:R[l])}
            L1 <- L1 + Reduce("+",lapply(rMode, function(n) {rTensor::ttl(subxx[[l+1]], A.new[[l]][[n]], ttlMode)}))
          }
          temp2 <- subxx[[1]] - L1
          RR <- tensor(temp@data,temp@data,ttlMode2[-(k+1)],ttlMode2[-(k+1)])
          LL <- tensor(temp2@data,temp@data,ttlMode2[-(k+1)],ttlMode2[-(k+1)])
          A.new[[p]][[r]][[k]] <- LL %*% ginv(RR)
        }
      }
    }
    dis = ten.dis.A(A.new, A.old, R, K)
    for (p in c(1:P)){
      if (R[p] == 0) next
      A.new[[p]] <- svd.rescale(A.new[[p]])
    }
    A.old <- A.new
    iiter <- iiter + 1
    if (print.true == TRUE){
      print(dis)
      print(paste('iiter num=',iiter))
    }
  }
  for (p in c(1:P)){
    if (R[p] == 0) next
    A.new[[p]] <- fro.order(fro.rescale(A.new[[p]]))
  }
  if (rolling){ # rolling forecast, no need calculate other parameters
    return(list(A=A.new))
  }
  res <- ten.res(subxx,A.new,P,R,K,ttlMode)@data
  Sig <- matrix(tensor(res,res,1,1),prod(dim))/(t-1)
  cov = tenAR.SE.LSE(dim, R, P, K, t, AX, A.new, Sig)
  sd <- covtosd(cov, dim, R)
  bic <- IC(xx, res, R, t, dim)
  return(list(A=A.new,niter=iiter,Sig=Sig,res=res,cov=cov,sd=sd,BIC=bic,data=xx))
}


tenAR.MLE <- function(xx, R, P, init.A=NULL, init.sig=NULL, niter=150, tol=1e-5, print.true = FALSE){
  if (!(mode(xx) == "S4")) {xx <- as.tensor(xx)}
  if (length(P) == 2){P = P[1]; rolling=TRUE} else {rolling = FALSE} # len(P) > 1 implies rolling in func 'tenAR.predict'
  dim <- dim(xx)[-1]
  K <- length(dim)
  t <- dim(xx)[1]
  if (length(R) == 1 & P > 1){
    R = rep(R, P)
  }
  if (K==2|K>3){
    if (is.null(init.A)) {
      A.old = list()
      for (p in c(1:P)){
        if (is.na(R[p])) stop("p != length(R)")
        if (R[p] == 0) next
        A.old[[p]] <- lapply(1:R[p], function(j) {lapply(1:K, function(i) {0.5*diag(dim[i])})})
      }
    } else {A.old <- init.A}
  } else if (K==3) {
    if (is.null(init.A)) {A.old <- tenAR.PROJ(xx@data,R,P)$A} else {A.old <- init.A}
  } else {stop("dimension K of time series must at least be 2")}
  if (is.null(init.sig)) {Sig.old <- lapply(1:K, function(i) {diag(dim[i])})} else {Sig.old <- init.sig}
  Sig.new <- Sig.old
  Sig.new.inv <- lapply(1:K, function (k) {solve(Sig.new[[k]])})
  A.new <- A.old
  Tol <- tol*sqrt(sum(dim^2))*sum(R)
  dis <- 1
  iiter <- 1
  AX = A.new
  ttlMode = c(1:K) + 1
  ttlMode2 = c(1:(K+1))
  subxx = list()
  for (l in c(0:P)){  # tensor data for each lag
    subxx[[l+1]] = abind::asub(xx, (1+P-l):(t-l), 1, drop=FALSE)
  }
  while(iiter <= niter & dis >= Tol){
    for (p in c(1:P)){
      if (R[p] == 0) next
      for (r in c(1:R[p])){
        res.old <- ten.res(subxx,A.new,P,R,K,ttlMode)
        for (k in c(K:1)){
          rs <- rTensor::ttl(res.old, Sig.new.inv[-k], ttlMode[-k])
          Sig.new[[k]] <- tensor(res.old@data, rs@data, ttlMode2[-(k+1)],ttlMode2[-(k+1)])/(t-1)/prod(dim[-k])
          Sig.new.inv <- lapply(1:K, function (k) {ginv(Sig.new[[k]])})
        }
        for (k in c(K:1)){
          sphi <-  lapply(1:K, function (k) {Sig.new.inv[[k]] %*% (A.new[[p]][[r]][[k]])})
          AX[[p]][[r]][[k]] = rTensor::ttl(subxx[[p+1]], A.new[[p]][[r]][-k], ttlMode[-k])
          temp1 <- rTensor::ttl(subxx[[p+1]], sphi[-k], ttlMode[-k])
          L1 <- 0
          for (l in c(1:P)){
            if (R[l] == 0 | (l == p & R[l] == 1)) next
            if(l == p){rMode = c(1:R[l])[-r]} else {rMode = c(1:R[l])}
            L1 <- L1 + Reduce("+",lapply(rMode, function(n) {rTensor::ttl(subxx[[l+1]], A.new[[l]][[n]], ttlMode)}))
          }
          temp2 <- subxx[[1]] - L1
          RR <- tensor(AX[[p]][[r]][[k]]@data,temp1@data,ttlMode2[-(k+1)],ttlMode2[-(k+1)])
          LL <- tensor(temp2@data,temp1@data,ttlMode2[-(k+1)],ttlMode2[-(k+1)])
          A.new[[p]][[r]][[k]] <- LL %*% ginv(RR)
        }
      }
      A.new[[p]] <- svd.rescale(A.new[[p]])
      Sig.new <- eigen.rescale(list(Sig.new))[[1]]
    }
    dis = ten.dis.A(A.new, A.old, R, K)
    Sig.old <- Sig.new
    A.old <- A.new
    iiter <- iiter + 1
    if (print.true == TRUE){
      print(dis)
      print(paste('iiter num=',iiter))
    }
  }
  for (p in c(1:P)){
    if (R[p] == 0) next
    A.new[[p]] <- fro.order(fro.rescale(A.new[[p]]))
  }
  if (rolling){ # for rolling forecast, no need calculate other parameters
    return(list(A=A.new))
  }
  res <- ten.res(subxx,A.new,P,R,K,ttlMode)@data
  Sig <- matrix(tensor(res,res,1,1), prod(dim))/(t-1)
  SIG = rTensor::kronecker_list(rev(Sig.new))
  cov = tenAR.SE.MLE(dim, R, P, K, t, AX, A.new, SIG)
  sd <- covtosd(cov, dim, R)
  bic <- IC(xx, res, R, t, dim)
  return(list(A=A.new, SIGMA=Sig.new, niter=iiter, Sig=Sig, res=res, cov=cov, sd=sd, BIC=bic, data=xx))
}


MAR1.RR <- function(xx, k1, k2, A1.init=NULL, A2.init=NULL, niter=200, tol=1e-4,  print.true=FALSE){
  # xx: T * p * q
  # X_t = LL X_{t-1} RR' + E_t ### NOTE! This function is written with RR'.
  # Sig = cov(vec(E_t)) = Sigr \otimes Sigl
  # optimization criterion is likelihood
  # iterative algorithm between LL <--> Sigma_l <--> RR <--> Sig_r
  # Return LL, RR, Sigl, Sigr
  dd=dim(xx)
  T <- dd[1]
  p <- dd[2]
  q <- dd[3]
  if(is.null(A1.init) | is.null(A2.init)){
    init = initializer(xx, k1, k2)
  }
  if(is.null(A1.init)){
    LL.old <- init$A1
  } else{
    LL.old <- A1.init
  }
  if(is.null(A2.init)){
    RR.old <- init$A2
  } else{
    RR.old <- A2.init
  }
  Tol=tol*sqrt(p^2+q^2)
  dis <- 1
  iiter <- 1

  while(iiter <= niter & dis >= Tol){
    ## Save old
    LL.oold=LL.old
    RR.oold=RR.old
    # estimate LL0
    temp1 <- tensor(xx[1:(T-1),,,drop=FALSE],RR.old,3,2)  # (T-1) * p * q
    AA <- tensor(temp1,temp1,c(1,3),c(1,3))
    BB <- tensor(xx[2:T,,,drop=FALSE],temp1,c(1,3),c(1,3))
    LL <- BB%*%ginv(AA)
    U <- svd(LL%*%t(BB))$u[,1:k1]
    LL <- U%*%t(U)%*%LL
    # update for next iteration
    a=svd(LL,nu=0,nv=0)$d[1]
    LL=LL/a
    dis3=sum((LL-LL.old)^2)
    LL.old <- LL

    # estimate RR0
    temp1 <- tensor(xx[1:(T-1),,,drop=FALSE],LL.old,2,2)  # (T-1) * q * p
    AA <- tensor(temp1,temp1,c(1,3),c(1,3))
    BB <- tensor(xx[2:T,,,drop=FALSE],temp1,c(1,2),c(1,3))
    RR <- BB%*%ginv(AA)
    U <- svd(RR%*%t(BB))$u[,1:k2]
    RR <- U%*%t(U)%*%RR
    # update for next iteration
    dis3=dis3+sum((RR-RR.old)^2)
    RR.old <- RR

    # update for the next iteration
    dis1 <- sqrt(sum((kronecker(t(RR),LL)-kronecker(t(RR.oold),LL.oold))^2))
    dis3 = sqrt(dis3)
    dis <- dis3
    iiter <- iiter + 1

    #print(max(abs(eigen(LL)$values)))
    #print(max(abs(eigen(RR)$values)))

    if(print.true==TRUE){
      print(dis)
      print(paste('iiter num=',iiter))
    }
  }
  a <- sqrt(sum(LL^2))
  LL <- LL / a
  RR <- RR * a
  res=xx[2:T,,,drop=FALSE] - aperm(tensor(tensor(xx[1:(T-1),,,drop=FALSE],RR,3,2),LL,2,2),c(1,3,2))
  Sig <- matrix(tensor(res,res,1,1),p*q)/(T-1)
  bic <- T*p*q*log(sum(res^2/(T*p*q))) ##+log(T*p*q)*(bic.penalty(p,k1)+bic.penalty(q,k2))
  cov <- matAR.RR.se(LL,RR,k1,k2,method="RRLSE",Sigma.e=Sig,RU1=diag(k1),RV1=diag(k1),RU2=diag(k2),RV2=diag(k2),mpower=100)
  sd.A1 <- sqrt(array(diag(cov$Sigma)[1:p^2], c(p,p))/T)
  sd.A2 <- sqrt(array(diag(cov$Sigma)[(p^2+1):(p^2+q^2)], c(q,q))/T)
  #sd <- list(sqrt(array(diag(cov$Sigma)[1:p^2], c(p,p))/T), sqrt(array(diag(cov$Sigma)[(p^2+1):(p^2+q^2)], c(q,q))/T))
  loading = list(U1=svd(LL)$u,V1=svd(LL)$v,U2=svd(RR)$u,V2=svd(RR)$v)
  return(list(A1=LL,A2=RR,loading=loading,res=res,Sig=Sig,BIC=bic,niter=iiter-1,cov=cov,sd.A1=sd.A1,sd.A2=sd.A2,data=xx))
}


MAR1.CC <- function(xx,k1,k2,A1.init=NULL,A2.init=NULL,Sigl.init=NULL,Sigr.init=NULL,niter=200,tol=1e-4,print.true = FALSE){
  # xx: T * p * q
  # X_t = LL X_{t-1} RR' + E_t ### NOTE! This function is written with RR'.
  # Sig = cov(vec(E_t)) = Sigr \otimes Sigl
  # optimization criterion is likelihood
  # iterative algorithm between LL <--> Sigma_l <--> RR <--> Sig_r
  # Return LL, RR, Sigl, Sigr
  dd=dim(xx)
  T <- dd[1]
  p <- dd[2]
  q <- dd[3]
  # init = MAR1.LS(xx, k1, k2)
  if(is.null(A1.init) | is.null(A2.init)){
    init = initializer(xx, k1, k2)
  }
  if(is.null(A1.init)){
    LL.old <- init$A1
  } else{
    LL.old <- A1.init
  }
  if(is.null(A2.init)){
    RR.old <- init$A2
  } else{
    RR.old <- A2.init
  }
  # init.sig <- initializer.sig(xx)
  if(is.null(Sigl.init)){
    Sigl.old <- diag(p)
  } else{
    Sigl.old <- Sigl.init
  }
  if(is.null(Sigr.init)){
    Sigr.old <- diag(q)
  } else{
    Sigr.old <- Sigr.init
  }
  Tol=tol*sqrt(p^2+q^2)
  dis <- 1
  iiter <- 1
  while(iiter <= niter & dis >= Tol){

    ## Save old
    LL.oold=LL.old
    RR.oold=RR.old
    Sigl.oold=Sigl.old
    Sigr.oold=Sigr.old

    # estimate LL0 and Sigl
    Sigr.inv.old <- ginv(Sigr.old)
    temp1 <- tensor(xx[1:(T-1),,,drop=FALSE],RR.old,3,2)  # (T-1) * p * q
    temp2 <- tensor(temp1,Sigr.inv.old,3,1)  # (T-1) * p * q
    AA <- tensor(temp1,temp2,c(1,3),c(1,3))
    BB <- tensor(xx[2:T,,,drop=FALSE],temp2,c(1,3),c(1,3))
    LL <- BB%*%ginv(AA)
    res <- xx[2:T,,,drop=FALSE] - aperm(tensor(tensor(xx[1:(T-1),,,drop=FALSE],RR.old,3,2),LL,2,2),c(1,3,2)) # (T-1) * p * q
    temp <- tensor(res,Sigr.inv.old,3,1) # (T-1) * p * q
    Sigl <- tensor(temp,res,c(1,3),c(1,3))/T/q
    Sigl.spec <- eigen(Sigl)
    Sigl.root <- Sigl.spec$vectors%*%diag(sqrt(Sigl.spec$values))%*%t(Sigl.spec$vectors)
    Sigl.root.inv <- Sigl.spec$vectors%*%diag(1/sqrt(Sigl.spec$values))%*%t(Sigl.spec$vectors)
    U <- svd(Sigl.root.inv%*%LL%*%t(BB)%*%Sigl.root.inv)$u[,1:k1]
    LL <- Sigl.root%*%U%*%t(U)%*%Sigl.root.inv%*%LL
    res <- xx[2:T,,,drop=FALSE] - aperm(tensor(tensor(xx[1:(T-1),,,drop=FALSE],RR.old,3,2),LL,2,2),c(1,3,2)) # (T-1) * p * q
    temp <- tensor(res,Sigr.inv.old,3,1) # (T-1) * p * q
    Sigl <- tensor(temp,res,c(1,3),c(1,3))/T/q
    # update for next iteration
    a=svd(LL,nu=0,nv=0)$d[1]
    LL=LL/a
    dis3=sum((LL-LL.old)^2)
    LL.old <- LL
    Sigl.old <- Sigl


    # estimate RR0 and Sigr
    Sigl.inv.old <- ginv(Sigl.old)
    temp1 <- tensor(xx[1:(T-1),,,drop=FALSE],LL.old,2,2)  # (T-1) * q * p
    temp2 <- tensor(temp1,Sigl.inv.old,3,1)  # (T-1) * q * p
    AA <- tensor(temp1,temp2,c(1,3),c(1,3))
    BB <- tensor(xx[2:T,,,drop=FALSE],temp2,c(1,2),c(1,3))
    RR <- BB%*%ginv(AA)
    res <- xx[2:T,,,drop=FALSE] - aperm(tensor(tensor(xx[1:(T-1),,,drop=FALSE],RR,3,2),LL.old,2,2),c(1,3,2)) # (T-1) * p * q
    temp <- tensor(res,Sigl.inv.old,2,1) # (T-1) * q * p
    Sigr <- tensor(temp,res,c(1,3),c(1,2))/T/p
    Sigr.spec <- eigen(Sigr)
    Sigr.root <- Sigr.spec$vectors%*%diag(sqrt(Sigr.spec$values))%*%t(Sigr.spec$vectors)
    Sigr.root.inv <- Sigr.spec$vectors%*%diag(1/sqrt(Sigr.spec$values))%*%t(Sigr.spec$vectors)
    U <- svd(Sigr.root.inv%*%RR%*%t(BB)%*%Sigr.root.inv)$u[,1:k2]
    RR <- Sigr.root%*%U%*%t(U)%*%Sigr.root.inv%*%RR
    res <- xx[2:T,,,drop=FALSE] - aperm(tensor(tensor(xx[1:(T-1),,,drop=FALSE],RR,3,2),LL.old,2,2),c(1,3,2)) # (T-1) * p * q
    temp <- tensor(res,Sigl.inv.old,2,1) # (T-1) * q * p
    Sigr <- tensor(temp,res,c(1,3),c(1,2))/T/p
    # update for next iteration
    dis3=dis3+sum((RR-RR.old)^2)
    RR.old <- RR
    Sigr.old <- Sigr


    a <- eigen(Sigl)$values[1]
    Sigl <- Sigl / a
    Sigr <- Sigr * a
    ### cat(eigen(Sigl)$values[1]," ",eigen(Sigr)$values[1], " ")

    # update for the next iteration
    dis1 <- sqrt(sum((kronecker(t(RR),LL)-kronecker(t(RR.oold),LL.oold))^2))
    ### cat(dis1," ")
    dis2 <- sqrt(sum((kronecker(Sigr,Sigl)-kronecker(Sigr.oold,Sigl.oold))^2))
    ### cat(dis2," ",dis3,"\n")
    dis3 = sqrt(dis3)
    ### dis <- max(dis1,dis2)
    dis <- dis3
    Sigr.old <- Sigr
    Sigl.old <- Sigl
    iiter <- iiter + 1
    if(print.true==TRUE){
      print(LL)
      print(RR)
    }
  }
  a <- sqrt(sum(LL^2))
  LL <- LL / a
  RR <- RR * a
  Sig <- matrix(tensor(res,res,1,1),p*q)/(T-1)
  #Sig <- kronecker(Sigr,Sigl)
  #cov <- matAR.RR.se(LL,RR,k1,k2,method="RRMLE",Sigma1=Sigl,Sigma2=Sigr,RU1=diag(k1),RV1=diag(k1),RU2=diag(k2),RV2=diag(k2),mpower=100)
  cov = MAR1.RRCC.SE(LL,RR,k1,k2,Sigl,Sigr,RU1=diag(k1),RV1=diag(k1),RU2=diag(k2),RV2=diag(k2),mpower=100)
  sd.A1 = sqrt(array(diag(cov$Sigma)[1:p^2], c(p,p))/T)
  sd.A2 = sqrt(array(diag(cov$Sigma)[(p^2+1):(p^2+q^2)], c(q,q))/T)
  #sd <- list(sqrt(array(diag(cov$Sigma)[1:p^2], c(p,p))/T), sqrt(array(diag(cov$Sigma)[(p^2+1):(p^2+q^2)], c(q,q))/T))
  loading = list(U1=svd(LL)$u,V1=svd(LL)$v,U2=svd(RR)$u,V2=svd(RR)$v)
  return(list(A1=LL,A2=RR,loading=loading,res=res,Sig1=Sigl,Sig2=Sigr,Sig=Sig,niter=iiter-1, cov=cov, sd.A1=sd.A1, sd.A2=sd.A2, data=xx))
}


tenAR.SE.LSE <- function(dim, r, p, K, t, AX, A, Sigma){
  # dim = dim; r=R; p=P; K=K; t=t; AX = AX; A=A.new; Sigma=Sig;
  pdim = prod(dim) # d1d2d3
  fdim = dim**2
  ndim <- sum(fdim) # d1^2+d2^2+d^3
  Gamma <- matrix(0,sum(r)*ndim,sum(r)*ndim)
  n = 0
  for (i in c(1:p)){
    for (j in c(1:r[i])){
      for (k in c(1:(K-1))){
        r1 <- matrix(0, sum(r)*ndim, 1)
        a = as.vector(A[[i]][[j]][[k]])
        r1[( n*ndim + sum(fdim[0:(k-1)]) + 1): (n*ndim + sum(fdim[0:k])),] = a
        Gamma = Gamma + r1 %*% t(r1)
        if (K==2){
          for (l in c(1:r[i])){
            if (l != j){
              r1 <- matrix(0, sum(r)*ndim, 1)
              a = as.vector(A[[i]][[l]][[k]])
              r1[( n*ndim + sum(fdim[0:(k-1)]) + 1): (n*ndim + sum(fdim[0:k])),] = a
              Gamma = Gamma + r1 %*% t(r1)
            }
          }
        }
      }
      n = n + 1
    }
  }
  WT = c(); Q = list(); perm = list(); size = list()
  for (i in c(1:p)){
    for (j in c(1:r[i])){
      for (k in c(1:K)){
        if (i == 1 & j == 1){  # only initialize Q once
          size[[k]] = c(dim[k], prod(dim[-k]), t-p)
          perm[[k]] = c(k+1, (1:K)[-k] + 1, 1)
          s = if (k == K) 1 else prod(dim[(k+1):K])
          if (k == 1){
            Q[[1]] = diag(prod(dim))
          } else if (k == K){
            Q[[K]] = pm(dim[K], prod(dim[1:(K-1)]))
          } else {
            Q[[k]] = kronecker(diag(s), pm(dim[k], prod(dim[1:(k-1)])))
          }
        }
        # AXX = abind::asub(AX[[i]][[j]][[k]], (1+p-i):(t-i), 1, drop=FALSE)
        AXX = AX[[i]][[j]][[k]]
        AXfold = array(aperm(AXX@data, perm[[k]]), size[[k]])
        AXI = apply(AXfold,3,function(x){kronecker(x, diag(dim[k])) %*% Q[[k]]})
        AXI.array <- array(AXI,c(dim[k]^2,pdim,t))
        WT <- abind(WT, AXI.array,along=1)
      }
    }
  }
  WT = aperm(WT, c(3,1,2))
  WSigma <- tensor(WT,Sigma,3,1) #t*(d1^2+d2^2+d^3)*(d1d2d3)
  EWSigmaWt <- tensor(WSigma,WT,c(3,1),c(3,1))/t
  H <- tensor(WT,WT,c(3,1),c(3,1))/t + Gamma #r(d1^2+d2^2+d^2)*r(d1^2+d2^2+d^3)
  Hinv <- ginv(H)
  Xi <- Hinv %*% EWSigmaWt %*% Hinv
  return(Xi/t)
}


tenAR.SE.MLE <- function(dim, r, p, K, t, AX, A, Sigma){
  # dim = dim; r=R; p=P; K=K; t=t; AX = AX; A=A.new; Sigma=Sig;
  pdim = prod(dim) # d1d2d3
  fdim = dim**2
  ndim <- sum(fdim) # d1^2+d2^2+d^3
  Gamma <- matrix(0,sum(r)*ndim,sum(r)*ndim)
  r1 <- matrix(0, sum(r)*ndim, 1)
  n = 0
  for (i in c(1:p)){
    for (j in c(1:r[i])){
      for (k in c(1:(K-1))){
        r1 <- matrix(0, sum(r)*ndim, 1)
        a = as.vector(A[[i]][[j]][[k]])
        r1[( n*ndim + sum(fdim[0:(k-1)]) + 1): (n*ndim + sum(fdim[0:k])),] = a
        Gamma = Gamma + r1 %*% t(r1)
        if (K==2){
          for (l in c(1:r[i])){
            if (l != j){
              r1 <- matrix(0, sum(r)*ndim, 1)
              a = as.vector(A[[i]][[l]][[k]])
              r1[( n*ndim + sum(fdim[0:(k-1)]) + 1): (n*ndim + sum(fdim[0:k])),] = a
              Gamma = Gamma + r1 %*% t(r1)
            }
          }
        }
      }
      n = n + 1
    }
  }
  WT = c(); Q = list(); perm = list(); size = list()
  for (i in c(1:p)){
    for (j in c(1:r[i])){
      for (k in c(1:K)){
        if (i == 1 & j == 1){  # only initialize Q once
          size[[k]] = c(dim[k], prod(dim[-k]), t-p)
          perm[[k]] = c(k+1, (1:K)[-k] + 1, 1)
          s = if (k == K) 1 else prod(dim[(k+1):K])
          if (k == 1){
            Q[[1]] = diag(prod(dim))
          } else if (k == K){
            Q[[K]] = pm(dim[K], prod(dim[1:(K-1)]))
          } else {
            Q[[k]] = kronecker(diag(s), pm(dim[k], prod(dim[1:(k-1)])))
          }
        }
        # AXX = abind::asub(AX[[i]][[j]][[k]], (1+p-i):(t-i), 1, drop=FALSE)
        AXX = AX[[i]][[j]][[k]]
        AXfold = array(aperm(AXX@data, perm[[k]]), size[[k]])
        AXI = apply(AXfold,3,function(x){kronecker(x, diag(dim[k])) %*% Q[[k]]})
        AXI.array <- array(AXI,c(dim[k]^2,pdim,t))
        WT <- abind(WT, AXI.array,along=1)
      }
    }
  }
  WT = aperm(WT, c(3,1,2))
  WSigma <- tensor(WT,solve(Sigma),3,1) 
  EWSigmaWt <- tensor(WSigma,WT,c(3,1),c(3,1))/t
  H <- EWSigmaWt + Gamma
  Hinv <- ginv(H)
  Xi <- Hinv %*% EWSigmaWt %*% Hinv
  return(Xi/t)
}


tenAR.bic <- function(xx, rmax=5){
  if (mode(xx) != "S4") {xx <- rTensor::as.tensor(xx)}
  dim <- xx@modes[-1]
  t <- xx@modes[[1]]
  ans <- c()
  for (r in c(1:rmax)){
    est <- tenAR.LS(xx, R=r, P=1)
    res <- est$res
    ans[r] <- IC(xx,res,r,t, dim)
  }
  which.min(ans)
}

#' Plot Matrix-Valued Time Series
#'
#' Plot matrix-valued time series, can be also used to plot a given slice of a tensor-valued time series.
#'@name mplot
#'@rdname mplot
#'@aliases mplot
#'@export
#'@importFrom graphics par
#'@param xx  \eqn{T \times d_1 \times d_2} matrix-valued time series. Note that the number of mode is 3, where the first mode is time.
#'@return a figure.
#'@examples
#' dim <- c(3,3,3)
#' xx <- tenAR.sim(t=50, dim, R=2, P=1, rho=0.5, cov='iid')
#' mplot(xx[1:30,,,1])
mplot <- function(xx){
  if (mode(xx) == "S4"){xx = xx@data}
  dim = dim(xx)
  time = array(c(1:dim[1]),dim[1])
  opar <- par(mfrow=c(dim[2],dim[3]),mai=0.05*c(1,1,1,1),oma=c(2,2,0,0))
  on.exit(par(opar))
  for(i in 1:dim[2]){
    for(j in 1:dim[3]){
      if(i!=dim[2] & j!=1){
        plot(time,xx[,i,j],type='l',xaxt='n',yaxt='n',ylim=range(xx[,i,]))
      }
      if(i!=dim[2] & j==1){
        plot(time,xx[,i,j],type='l',xaxt='n',ylim=range(xx[,i,]))
      }
      if(i==dim[2] & j!=1){
        plot(time,xx[,i,j],type='l',yaxt='n',ylim=range(xx[,i,]))
      }
      if(i==dim[2] & j==1){
        plot(time,xx[,i,j],type='l',ylim=range(xx[,i,]))
      }
    }
  }

}


#' Plot ACF of Matrix-Valued Time Series
#'
#' Plot ACF of matrix-valued time series, can be also used to plot ACF of a given slice of a tensor-valued time series.
#'@name mplot.acf
#'@rdname mplot.acf
#'@aliases mplot.acf
#'@export
#'@importFrom graphics par
#'@importFrom graphics plot
#'@importFrom stats acf
#'@param xx  \eqn{T \times d_1 \times d_2} matrix-valued time series. Note that the number of mode is 3, where the first mode is time.
#'@return a figure.
#'@examples
#' dim <- c(3,3,3)
#' xx <- tenAR.sim(t=50, dim, R=2, P=1, rho=0.5, cov='iid')
#' mplot.acf(xx[1:30,,,1])
mplot.acf <- function(xx){
  if (mode(xx) == "S4"){xx = xx@data}
  dim = dim(xx)
  opar <- par(mfrow=c(dim[2],dim[3]),mai=0.05*c(1,1,1,1),oma=c(2,2,0,0))
  on.exit(par(opar))
  for(i in 1:dim[2]){
    for(j in 1:dim[3]){
      if(i!=dim[2] & j!=1){
        acf(xx[,i,j])
      }
      if(i!=dim[2] & j==1){
        acf(xx[,i,j])
      }
      if(i==dim[2] & j!=1){
        acf(xx[,i,j])
      }
      if(i==dim[2] & j==1){
        acf(xx[,i,j])
      }
    }
  }
}


# Define S3 class for the base model
tenAR <- function(model) {
  structure(model, class = "tenAR")
}


#' Predict funcions for Tensor Autoregressive Models
#'
#' S3 method for the 'tenAR' class using the generic predict function. Prediction based on the tensor autoregressive model or reduced rank MAR(1) model. If \code{rolling = TRUE}, returns the rolling forecasts.
#'@name predict.tenAR
#'@rdname predict.tenAR
#'@aliases predict.tenAR
#'@export
#'@importFrom abind abind
#'@param object a model object returned by \code{tenAR.est()}.
#'@param n.ahead prediction horizon.
#'@param xx \eqn{T^{\prime} \times d_1 \times \cdots \times d_K} new tensor time series to be used for prediction. Must have at least \code{n.ahead} length.
#'@param rolling TRUE or FALSE, rolling forecast, is FALSE by default.
#'@param n0 only if \code{rolling = TRUE}, the starting point of rolling forecast.
#'@param ... Additional arguments passed to the method.
#'@return
#'a tensor time series of length \code{n.ahead} if \code{rolling = FALSE};
#'
#'a tensor time series of length \eqn{T^{\prime} - n_0 - n.ahead + 1} if \code{rolling = TRUE}.
#'@seealso 'predict.ar' or 'predict.arima'
#'@examples
#' set.seed(333)
#' dim <- c(2,2,2)
#' t = 20
#' xx <- tenAR.sim(t, dim, R=2, P=1, rho=0.5, cov='iid')
#' est <- tenAR.est(xx, R=1, P=1, method="LSE")
#' pred <- predict(est, n.ahead = 1)
#' # rolling forcast
#' n0 = t - min(50,t/2)
#' pred.rolling <- predict(est, n.ahead = 5, xx = xx, rolling=TRUE, n0)
#'
#' # prediction for reduced rank MAR(1) model
#' dim <- c(2,2)
#' t = 20
#' xx <- tenAR.sim(t, dim, R=1, P=1, rho=0.5, cov='iid')
#' est <- matAR.RR.est(xx, method="RRLSE", k1=1, k2=1)
#' pred <- predict(est, n.ahead = 1)
#' # rolling forcast
#' n0 = t - min(50,t/2)
#' pred.rolling <- predict(est, n.ahead = 5, rolling=TRUE, n0=n0)
predict.tenAR <- function(object, n.ahead=1, xx=NULL, rolling=FALSE, n0=NULL, ...) {
  if (is.null(xx)) {xx <- object$data}
  if (is.null(object$SIGMA)){method = "LSE"} else {method = "MLE"}
  if (!is.null(object$A1)){
    A <- list(list(list(object$A1, object$A2)))
    if (is.null(object$Sig1)){
      method = "RRLSE"
    } else {
      method = "RRMLE"
    }
  } else if (!is.null(object$coef)){
    A <- object$coef
    method = "VAR"
  } else {
    A <- object$A
  }
  if (mode(xx) != "S4") {xx <- rTensor::as.tensor(xx)}
  if (rolling == TRUE){
    return(predRolling(A, xx@data, n.ahead, method, n0))
  }
  P <- length(A)
  R <- sapply(c(1:P), function(l){length(A[[l]])})
  K <- xx@num_modes - 1
  dim <- xx@modes
  ttt <- (dim[1]+1):(dim[1]+n.ahead)
  for(tt in ttt){
    L1 = 0
    for (l in c(1:P)){
      if (R[l] == 0) next
      L1 <- L1 + Reduce("+",lapply(c(1:R[l]), function(n) {rTensor::ttl(abind::asub(xx, tt-l, 1, drop=FALSE), A[[l]][[n]], (c(1:K) + 1))}))
    }
    xx <- as.tensor(abind(xx@data, L1@data, along=1))
  }
  return(abind::asub(xx@data, ttt, 1, drop=FALSE))
}


predRolling <- function(A, xx, n.ahead, method, n0){
  if (method == "RRLSE" | method == "RRMLE"){
    k1 <- rankMatrix(A[[1]][[1]][[1]])
    k2 <- rankMatrix(A[[1]][[1]][[2]])
  }
  P <- length(A)
  dim <- dim(xx)[-1]
  t <- dim(xx)[1]
  if (method == "VAR"){
    yy <- apply(xx, MARGIN=1, as.vector)
  } else {
    R <- sapply(c(1:P), function(l){length(A[[l]])})
    K <- length(dim)
  }
  if(is.null(n0)){n0 = t - min(50,t/2)}
  ttt <- (n0):(t - n.ahead)
  for(tt in ttt){
    tti <- tt - ttt[1] + 1
    print(paste("rolling forcast t =", tti))
    if (method == "RRLSE"){
      model <- MAR1.RR(abind::asub(xx, 1:tt, 1, drop=FALSE), k1, k2)
      A <- list(list(list(model$A1, model$A2)))
    } else if (method == "RRMLE"){
      model <- MAR1.CC(abind::asub(xx, 1:tt, 1, drop=FALSE), k1, k2)
      A <- list(list(list(model$A1, model$A2)))
    } else if (method == "VAR"){
      model = tenAR.est(abind::asub(xx, 1:tt, 1, drop=FALSE), P=P, method="VAR")
      A <- model$coef
    } else {
      model = tenAR.est(abind::asub(xx, 1:tt, 1, drop=FALSE), R, c(P,0), method)
      A <- model$A
    }
    
    L1 = 0
    for (l in c(1:P)){
      if (method == "VAR"){
        L1 <- L1 + array(A[[l]] %*% yy[,tt-l+1], c(1,dim(xx)[-1])) 
      } else {
        if (R[l] == 0) next
        L1 <- L1 + Reduce("+",lapply(c(1:R[l]), function(n) {rTensor::ttl(abind::asub(rTensor::as.tensor(xx), tt-l+1, 1, drop=FALSE), A[[l]][[n]], (c(1:K) + 1))}))@data
      }
    }

    if (tti == 1){xx.pred = L1} else {xx.pred = abind(xx.pred, L1, along=1)}
  }
  return(xx.pred)
}