R/HDSreg.R

#' @name HDSReg
#' @title High dimensional stochastic regression with latent factors
#' @description \code{HDSReg()} considers a multivariate time series model which
#' represents a high dimensional vector process as a sum of three terms: a
#' linear regression of some observed regressors, a linear combination of some
#' latent and serially correlated factors, and a vector white noise:\deqn{{\bf
#' y}_t = {\bf Dz}_t + {\bf Ax}_t + {\boldsymbol {\epsilon}}_t,} where \eqn{{\bf
#' y}_t} and \eqn{{\bf z}_t} are, respectively, observable \eqn{p\times 1} and
#' \eqn{m \times 1} time series, \eqn{{\bf x}_t} is an \eqn{r \times 1} latent
#' factor process, \eqn{{\boldsymbol{\epsilon}}_t \sim
#' \mathrm{WN}({\boldsymbol{0}},{\bf \Sigma}_{\epsilon}) } is a white noise with
#' zero mean and covariance matrix \eqn{{\bf \Sigma}_{\epsilon}} and
#' \eqn{{\boldsymbol{\epsilon}}_t} is uncorrelated with \eqn{({\bf z}_t, {\bf
#' x}_t)}, \eqn{{\bf D}} is an unknown regression coefficient matrix, and
#' \eqn{{\bf A}} is an unknown factor loading matrix. This procedure proposed in
#' Chang, Guo and Yao (2015) aims to estimate the unknown regression coefficient
#' matrix \eqn{{\bf D}}, the number of factors \eqn{r} and the factor loading
#' matrix \eqn{{\bf A}}.
#'
#' @param Y \eqn{{\bf Y} = \{{\bf y}_1, \dots , {\bf y}_n \}'}, a data matrix
#'   with \eqn{n} rows and \eqn{p} columns, where \eqn{n} is the sample size and
#'   \eqn{p} is the dimension of \eqn{{\bf y}_t}.
#' @param Z \eqn{{\bf Z} = \{{\bf z}_1, \dots , {\bf z}_n \}'}, a data matrix
#'   representing some observed regressors with \eqn{n} rows and \eqn{m}
#'   columns, where \eqn{n} is the sample size and \eqn{m} is the dimension of
#'   \eqn{{\bf z}_t}.
#' @param D A \eqn{p\times m} regression coefficient matrix \eqn{\widetilde{\bf
#'   D}}. If \code{D = NULL} (the default), our procedure will estimate
#'   \eqn{{\bf D}} first and let \eqn{\widetilde{\bf D}} be the estimate of
#'   \eqn{{\bf D}}. If \code{D} is given by \proglang{R} users, then
#'   \eqn{\widetilde{\bf D}={\bf D}}.
#' @param lag.k Time lag \eqn{k_0} used to calculate the nonnegative definte
#'   matrix \eqn{ \widehat{\mathbf{M}}}: \deqn{\widehat{\mathbf{M}}\ =\
#'   \sum_{k=1}^{k_0}\widehat{\mathbf{\Sigma}}_{\eta}(k)\widehat{\mathbf{\Sigma}}_{\eta}(k)',
#'   } where \eqn{\widehat{\bf \Sigma}_{\eta}(k)} is the sample autocovariance
#'   of \eqn{ {\boldsymbol {\eta}}_t = {\bf y}_t - \widetilde{\bf D}{\bf z}_t}
#'   at lag \eqn{k}.
#' @param twostep Logical. If \code{FALSE} (the default), then standard
#'   procedures (see \code{\link{factors}}) will be implemented to estimate
#'   \eqn{r} and \eqn{{\bf A}}. If \code{TRUE}, then a two step estimation
#'   procedure (see \code{\link{factors}}) will be implemented to estimate
#'   \eqn{r} and \eqn{{\bf A}}.

#' @seealso \code{\link{factors}}.
#' @return An object of class "HDSReg" is a list containing the following
#'   components:
#'
#'   \item{factor_num}{The estimated number of factors \eqn{\hat{r}}.}
#'   \item{reg.coff.mat}{The estimated \eqn{p \times m} regression coefficient
#'   matrix \eqn{\widetilde{\bf D}} if \code{D} is not given.}
#'   \item{loading.mat}{The estimated \eqn{p \times m} factor loading matrix
#'   \eqn{{\bf \widehat{A}}}.}
#' @references Chang, J., Guo, B. & Yao, Q. (2015).  \emph{High dimensional
#'   stochastic regression with latent factors, endogeneity and nonlinearity},
#'   Journal of Econometrics, Vol. 189, pp. 297–312.
#' @examples
#' n <- 400
#' p <- 200
#' m <- 2
#' r <- 3
#' X <- mat.or.vec(n,r)
#' x1 <- arima.sim(model=list(ar=c(0.6)),n=n)
#' x2 <- arima.sim(model=list(ar=c(-0.5)),n=n)
#' x3 <- arima.sim(model=list(ar=c(0.3)),n=n)
#' X <- cbind(x1,x2,x3)
#' X <- t(X)
#'
#' Z <- mat.or.vec(m,n)
#' S1 <- matrix(c(5/8,1/8,1/8,5/8),2,2)
#' Z[,1] <- c(rnorm(m))
#' for(i in c(2:n)){
#'   Z[,i] <- S1%*%Z[, i-1] + c(rnorm(m))
#' }
#' D <- matrix(runif(p*m, -2, 2), ncol=m)
#' A <- matrix(runif(p*r, -2, 2), ncol=r)
#' eps <- mat.or.vec(n, p)
#' eps <- matrix(rnorm(n*p), p, n)
#' Y <- D %*% Z + A %*% X + eps
#' Y <- t(Y)
#' Z <- t(Z)
#' res1 <- HDSReg(Y,Z,D,lag.k=2)
#' res2 <- HDSReg(Y,Z,lag.k=2)
#' @useDynLib HDTSA
#' @importFrom Rcpp sourceCpp
#' @importFrom Rcpp evalCpp
#' @import Rcpp
#' @export
HDSReg <- function (Y,Z,D=NULL,lag.k=1,twostep=FALSE) {
  Y_n <- nrow(Y)
  Y_p <- ncol(Y)
  Z_n <- nrow(Z)
  Z_p <- ncol(Z)
  storage.mode(Y_n) <- "integer"
  storage.mode(Y_p) <- "integer"
  storage.mode(Z_n) <- "integer"
  storage.mode(Z_p) <- "integer"
  
  #first step: estimate D (Least square) #without endogeneity and nolinearity
  if(missing(D)) {
    D <- solve(MatMult(t(Z), Z))
    D <- MatMult(D, MatMult(t(Z), Y))
    D <- t(D)
  }
  
  #estimate yt=Dzt+nt
  
  eta <- t(Y)-MatMult(D, t(Z))
  eta <- t(eta)
  factor_list <- factors(eta, lag.k, twostep)
  r <- factor_list$factor_num
  loading.mat <- factor_list$loading.mat
  outlist <- list(reg.coff.mat=D, factor_num=r, loading.mat=loading.mat)
  class(outlist) <- c("HDSReg")
  return(outlist)
}

Try the HDTSA package in your browser

Any scripts or data that you put into this service are public.

HDTSA documentation built on Jan. 7, 2023, 5:26 p.m.