QD Data Set

Documented in tp_apc

#' @title Tall-Project Imputation of Missing Value in Panel Data
#'
#' @description
#' \code{tp_apc} imputes the missing values in a given panel data using the
#' method of "Tall-Project".
#'
#' @import stats
#' @export
#'
#' @param X a matrix of size T by N with missing values.
#' @param kmax integer, indicating the maximum number of factors.
#' @param center logical, indicating whether or not X should be demeaned
#' @param standardize logical, indicating whether or not X should be scaled.
#' @param re_estimate logical, indicating whether or not output factors,
#' `Fhat`, `Lamhat`, `Dhat`, and `Chat`, should be re-estimated from the imputed data.
#'
#' @return a list of elements:
#' \item{Fhat}{estimated F}
#' \item{Lamhat}{estimated Lambda}
#' \item{Dhat}{estimated D}
#' \item{Chat}{euqals Fhat x Lamhat'}
#' \item{ehat}{equals Xhat - Chat}
#' \item{data}{X with missing data imputed}
#' \item{X}{the original data with missing values}
#' \item{kmax}{the maximum number of factors}
#' \item{center}{logical, indicating whether or not X was demeaned in the algorithm}
#' \item{standardize}{logical, indicating whether or not X was scaled in the algorithm}
#' \item{re_estimate}{logical, indicating whether or not output factors,
#' `Fhat`, `Lamhat`, `Dhat`, and `Chat`, were re-estimated from the imputed data}
#'
#' @author Yankang (Bennie) Chen <yankang.chen@@yale.edu>
#' @author Serena Ng <serena.ng@@columbia.edu>
#' @author Jushan Bai <jushan.bai@@columbia.edu>
#'
#' @references
#' Ercument Cahan, Jushan Bai, and Serena Ng (2021),
#' \emph{Factor-Based Imputation of Missing Values and Covariances in Panel Data of Large Dimensions}.
#' \url{https://arxiv.org/abs/2103.03045}



tp_apc <- function(X, kmax, center = FALSE, standardize = FALSE,
                   re_estimate = TRUE) {
  # Error checking
  if (! is.logical(center))
    stop("'center' must be logical.")
  if (! is.logical(standardize))
    stop("'standardize' must be logical.")
  if (! is.logical(re_estimate))
    stop("'re_estimate' must be logical.")
  if ((! center) & (standardize))
    stop("The option 'center = FALSE, standardize = TRUE' is not available.")
  if (! is.matrix(X))
    try(X <- as.matrix(X))
  if (! is.numeric(kmax))
    stop("'kmax' must be an integer.")
  if (kmax > min(nrow(X), ncol(X)))
    stop("'kmax' must be smaller than the size of X.")

  # Clear memory and create output object
  gc()
  out <- list()

  out$X <- X
  out$kmax <- kmax
  out$center <- center
  out$standardize <- standardize
  out$re_estimate <- re_estimate


  T <- nrow(X)
  N <- ncol(X)
  rownames(X) <- 1:T
  colnames(X) <- 1:N

  missing <- is.na(X)
  goodT <- rowSums(is.na(X)) == 0
  goodN <- colSums(is.na(X)) == 0
  T1 <- sum(goodT)
  N1 <- sum(goodN)
  mu1 <- matrix(rep(colMeans(X, na.rm = TRUE), T), nrow = T, ncol = N, byrow = TRUE)
  sd1 <- matrix(rep(apply(X, 2, stats::sd, na.rm = TRUE), T), nrow = T, ncol = N, byrow = TRUE)


  if (center & standardize){
    # demean and standardize

    XT <- (X[,goodN] - mu1[,goodN]) / sd1[,goodN]
    XN <- (X - mu1) / sd1

    bnXT <- fbi::apc(XT, kmax)
    Fhat <- bnXT$Fhat
    Dhat <- diag(bnXT$d)
    Lamhat <- matrix(rep(0, N*kmax), nrow = N, ncol = kmax)

    for (i in 1:N) {
      goodTi <- missing[, i] == FALSE
      Fn1 <- Fhat[goodTi, ]
      Reg <- Fn1
      P <- solve(t(Reg) %*% Reg) %*% t(Reg)
      Lamhat[i, ] <- P %*% XN[goodTi, i]
    }

    Chat <- Fhat %*% t(Lamhat)
    Xhat <- X   # estimated data
    Xhat[missing] <- Chat[missing] * sd1[missing] + mu1[missing]

    if (re_estimate){
      mu_hat <- matrix(rep(colMeans(Xhat), T), nrow = T, ncol = N, byrow = TRUE)
      sd_hat <- matrix(rep(apply(Xhat, 2, stats::sd), T), nrow = T, ncol = N, byrow = TRUE)
      Xhat_scaled <- (Xhat - mu_hat) / sd_hat

      reest <- fbi::apc(Xhat_scaled, kmax)
      out$Fhat <- reest$Fhat
      out$Lamhat <- reest$Lamhat
      out$Dhat <- reest$Dhat
      out$Chat <- reest$Chat * sd_hat + mu_hat
      data <- X
      data[missing] <- out$Chat[missing]
      out$data <- data

    } else {
      out$data <- Xhat
      out$Fhat <- Fhat
      out$Lamhat <- Lamhat
      out$Dhat <- Dhat
      out$Chat <- Chat * sd1 + mu1
    }


  } else if (center & (!standardize)){
    # only demean, do not standardize

    XT <- X[,goodN] - mu1[,goodN]
    XN <- X - mu1

    bnXT <- fbi::apc(XT, kmax)
    Fhat <- bnXT$Fhat
    Dhat <- diag(bnXT$d)
    Lamhat <- matrix(rep(0, N*kmax), nrow = N, ncol = kmax)

    for (i in 1:N) {
      goodTi <- missing[, i] == FALSE
      Fn1 <- Fhat[goodTi, ]
      Reg <- Fn1
      P <- solve(t(Reg) %*% Reg) %*% t(Reg)
      Lamhat[i, ] <- P %*% XN[goodTi, i]
    }

    Chat <- Fhat %*% t(Lamhat)
    Xhat <- X   # estimated data
    Xhat[missing] <- Chat[missing] + mu1[missing]

    if (re_estimate){
      mu_hat <- matrix(rep(colMeans(Xhat), T), nrow = T, ncol = N, byrow = TRUE)
      Xhat_scaled <- Xhat - mu_hat

      reest <- fbi::apc(Xhat_scaled, kmax)
      out$Fhat <- reest$Fhat
      out$Lamhat <- reest$Lamhat
      out$Dhat <- reest$Dhat
      out$Chat <- reest$Chat + mu_hat
      data <- X
      data[missing] <- out$Chat[missing]
      out$data <- data

    } else {
      out$data <- Xhat
      out$Fhat <- Fhat
      out$Lamhat <- Lamhat
      out$Dhat <- Dhat
      out$Chat <- Chat + mu1
    }


  } else {
    # no demeaning or standardizing
    XT <- X[,goodN]
    XN <- X

    bnXT <- fbi::apc(XT, kmax)
    Fhat <- bnXT$Fhat
    Dhat <- diag(bnXT$d)
    Lamhat <- matrix(rep(0, N*(kmax+1)), nrow = N, ncol = kmax+1)

    for (i in 1:N) {
      goodTi <- missing[, i] == FALSE
      Fn1 <- Fhat[goodTi, ]
      lenTi <- sum(goodTi, na.rm = TRUE)
      Reg <- cbind(matrix(rep(1, lenTi), nrow = lenTi, ncol = 1), Fn1)
      P <- solve(t(Reg) %*% Reg) %*% t(Reg)
      Lamhat[i, ] <- P %*% XN[goodTi, i]
    }

    Chat <- cbind(matrix(rep(1, T), nrow = T, ncol = 1), Fhat) %*% t(Lamhat)
    Xhat <- X   # estimated data
    Xhat[missing] <- Chat[missing]

    if (re_estimate){
      reest <- fbi::apc(Xhat, kmax)
      out$Fhat <- reest$Fhat
      out$Lamhat <- reest$Lamhat
      out$Dhat <- reest$Dhat
      out$Chat <- reest$Chat
      data <- X
      data[missing] <- out$Chat[missing]
      out$data <- data

    } else {
      out$data <- Xhat
      out$Fhat <- Fhat
      out$Lamhat <- Lamhat[,2:(kmax+1)]
      out$Dhat <- Dhat
      out$Chat <- Chat
    }

  }

  out$ehat <- out$data - out$Chat

  class(out) <- c("list", "tp")
  return(out)

}