dffm: Approximate Functional Factor Model

Documented in DNS.forecast fts.VARforecast

#' Functional Time Series Forecasting using VAR Models
#'
#' This function implements Vector Autoregression (VAR) forecasting for functional time series using preprocessed data from "fpcaobj" objects generated by fda.preprocess (FPC) or fts.cumAC (cumulative autocovariances). It allows for both in-sample fit evaluation and out-of-sample forecasting with the option to specify the model as individual autoregressive (AR) processes or a VAR process.
#'
#' @param fdaobj An object of class "fdaobj", typically the output from `fda.preprocess` or `fts.cumAC`.
#' @param K Integer specifying the number of factors to consider in the forecast model.
#' @param p Integer defining the number of lags to include in the VAR model.
#' @param AR A logical flag indicating whether to model individual AR(p) processes (TRUE) for each factor, or a collective VAR(p) model (FALSE). Default is FALSE.
#' @param start Optional integer specifying the start index for a restricted sample period.
#' @param end Optional integer specifying the end index for a restricted sample period.
#' @param h Forecast horizon. If h=0, an in-sample fit is returned. If h > 0, an out-of-sample h-step forecast is generated. Default is h=1.
#'
#' @return
#' The function returns different outputs based on the value of h.
#' If h > 0: Returns the h-step ahead forecasted functional time series curve.
#' If h = 0: Returns a list containing:
#' \item{curve.predict}{The in-sample predicted curves.}
#' \item{factors.predict}{The in-sample predicted factors.}
#' \item{factors}{The actual factors from the input object.}
#' \item{MSE}{The Mean Squared Error of the in-sample predicted curves.}
#' \item{VARmatrix}{The estimated VAR matrix.}
#'
#' @export
#' @examples
#' # Load, preprocess data, and perform in-sample prediction
#' fed = load.fed()
#' fdaobj = fda.preprocess(data = fed)
#' in_sample_fit = fts.VARforecast(fdaobj, h=0)
#' # Perform 1-step ahead prediction
#' one_step_ahead = fts.VARforecast(fdaobj, h=1)
fts.VARforecast = function(fdaobj, K = 3, p = 1, AR = FALSE, start = NULL, end = NULL, h = 1){
  ## #################################################################################
  ## fdaobj must be of class "FPCA.obj" or "cumAC.obj"
  ## K = number of factors
  ## p = number of lags
  ## AR = TRUE if individual AR(p) processes instead of a VAR(p) should be estimated
  ## start = optional start index for a restricted sample
  ## end = optional end index for a restricted sample
  ## if h=0: in-sample fit is returned, h > 0: out-of-sample h-step forecast is returned
  ## #################################################################################

  ## check input
  if(!(class(fdaobj) == "fdaobj")){
    stop("Please insert an object of class fdaobj, typically generated by the function fda.preprocess or fts.cumAC.")
  }
  loadings = fdaobj$eigenfunctions[,1:K,drop=F]
  factors = fdaobj$scores.centered[,1:K,drop=F]
  n = dim(fdaobj$scores.centered)[1]

  ## check start and end, restrict factors to specified period
  if(is.null(start)){
    start = 1
  } else {
    if((start < 1) || (start > n)){
      stop("Please specify valid start.")
    }
  }
  if(is.null(end)){
    end = n
  } else {
    if((end < 1) || (end > n)){
      stop("Please specify valid end")
    }
  }
  if(is.ts(factors)){
    factors = window(factors, start = time(factors)[start], end = time(factors)[end])
  } else {
    factors = window(factors, start = start, end = end)
  }

  ## Estimate AR or VAR model with intercept
  if(AR == TRUE){
    ## AR(p) model with intercept
    get.ARcoefficients = function(l){
      AR.LHS = embed(factors[,l],p+1)[,1]
      AR.RHS = embed(factors[,l],p+1)[,-1,drop=F]
      lm(AR.LHS~AR.RHS-1)
    }
    AR.fits = lapply(1:K, get.ARcoefficients)
    ## in-sample predicted factors
    factors.predict = sapply(AR.fits, fitted.values)
    ## coefficients in VAR matrix representation
    coef = matrix(sapply(AR.fits, coefficients), nrow=p)
    # intercept = coef[1,]
    A = list()
    for(i in 1:p) A[[i]] = diag(coef[i,], nrow = K)
    VARmatrix = do.call(cbind, A)
  } else {
    ## VAR(p) model with intercept
    VAR.LHS = embed(factors, p+1)[,1:K]
    VAR.RHS = embed(factors, p+1)[,-(1:K)]
    VAR.fit = lm(VAR.LHS ~ VAR.RHS-1)
    ## in-sample predicted factors
    factors.predict = matrix(fitted.values(VAR.fit), nrow = dim(factors)[1]-p)
    ## coefficients
    VARmatrix = t(matrix(VAR.fit$coefficients, ncol = K))
  }
  ## curve prediction
  curve.predict = factors.predict %*% t(loadings) + matrix(rep(fdaobj$meanfunction,dim(factors.predict)[1]), nrow = dim(factors.predict)[1], byrow = TRUE)
  ## assign column names and time series structure
  colnames(factors.predict) = paste0("F",1:K)
  colnames(VARmatrix) = paste0("VARp",rep(1:p, each = K),"K",rep(1:K,p))
  if(is.ts(factors)){
    factors.predict = ts(factors.predict, start = time(factors)[p+1], frequency = frequency(factors))
    curve.predict = ts(curve.predict, start = time(factors)[p+1], frequency = frequency(factors))
  }
  ## in-sample prediction errors
  if(is.ts(fdaobj$densedata)){
    inputdata = window(fdaobj$densedata, start = time(fdaobj$densedata)[start+p], end=time(fdaobj$densedata)[end])
    factors = ts(factors, start = )
  } else {
    inputdata = window(fdaobj$densedata, start = start+p, end=end)
  }
  prederrors = inputdata-curve.predict
  ## binwidth for rectangular numerical integration based on workinggrid
  binwidth = (rev(fdaobj$workinggrid)[1]-(fdaobj$workinggrid[1]))/length(fdaobj$workinggrid)
  ## L2-based MSE of prediction errors
  MSE = mean(rowSums(prederrors^2)*binwidth)
  ## return
  if(h == 0){
    out = list(
      "curve.predict" = curve.predict,
      "factors.predict" = factors.predict,
      "factors" = factors,
      "MSE" = MSE,
      "VARmatrix" = VARmatrix
    )
  } else {
    out = fts.oosForecast(VARmatrix, factors, loadings, fdaobj$meanfunction, h=h)
  }
  return(out)
}


fts.oosForecast = function(VARmatrix, factors, loadings, meanfunction, h=1){
  p = dim(VARmatrix)[2]/dim(VARmatrix)[1]
  for(i in 1:h){
    factors = rbind(factors, tail(embed(factors,p),1) %*% t(VARmatrix))
  }
  forecast = tail(factors,1) %*% t(loadings) + meanfunction
}




#' Dynamic Nelson-Siegel Forecasting for Yield Curves
#'
#' `DNS.forecast` implements the Dynamic Nelson-Siegel (DNS) model of Diebold and Li (2006) to forecast yield curves. The function accommodates both Vector Autoregression (VAR) and Autoregressive (AR) models. This function is designed to work with preprocessed functional data objects (`fdaobj`).
#'
#' @param fdaobj An object of class `fdaobj`.
#' @param p Integer defining the number of lags to include in the VAR model.
#' @param AR A logical flag indicating whether to model individual AR(p) processes (TRUE) for each factor, or a collective VAR(p) model (FALSE). Default is FALSE.
#' @param obsdata A logical flag indicating whether to use the raw data (TRUE) or the preprocessed data (FALSE) from `fdaobj`. Default is FALSE.
#' @param start Optional integer specifying the start index for a restricted sample period.
#' @param end Optional integer specifying the end index for a restricted sample period.
#' @param h An integer specifying the forecast horizon. If h=0, an in-sample fit is returned. For h > 0, an out-of-sample h-step forecast is generated. Default is h=1.
#'
#' @return
#' The function returns different outputs based on the value of h.
#' If h > 0: Returns the h-step ahead forecasted curve.
#' If h = 0: Returns a list containing:
#' \item{curve.predict}{The in-sample predicted curves.}
#' \item{factors.predict}{The in-sample predicted factors.}
#' \item{NSloadings}{The Nelson Siegel loadings.}
#' \item{betas}{The estimated betas (factors) in the Nelson-Siegel model.}
#' \item{VARmatrix}{The estimated VAR matrix.}
#'
#' @export
#' @references
#' * Diebold, F. X., & Li, C. (2006). Forecasting the term structure of government bond yields. Journal of Econometrics, 130(2), 337-364.

#' @examples
#' # Load, preprocess data, and perform in-sample prediction
#' fed = load.fed()
#' fdaobj = fda.preprocess(data = fed)
#' in_sample_fit = DNS.forecast(fdaobj, h=0)
#'
#' # Perform 1-step ahead prediction
#' one_step_ahead = DNS.forecast(fdaobj, h=1)
DNS.forecast = function(fdaobj, p=1, AR=FALSE, obsdata = FALSE, start = NULL, end = NULL, h = 1){
  ## check input
  if(!(class(fdaobj) == "fdaobj")){
    stop("Please insert an object of class fdaobj, typically generated by the function fda.preprocess or fts.cumAC.")
  }
  ## if h=0: in-sample fit is returned, h > 0: out-of-sample h-step forecast is returned
  if(obsdata == TRUE){
    data = fdaobj$raw.data
    maturities = fdaobj$observationgrid
  } else {
    data = fdaobj$densedata
    maturities = fdaobj$workinggrid
  }
  ## restrict data to specified period
  if(is.null(start)) start = 1
  if(is.null(end)) end = dim(fdaobj$densedata)[1]
  Tstart = time(data)[start]
  Tend = time(data)[end]
  data = window(data, start = Tstart, end = Tend)

  NS = matrix(nrow=length(maturities), ncol=3)
  lambda = 0.0609
  for (i in 1:length(maturities)){
    NS[i, 1] = 1
    NS[i, 2] = (1-exp(-lambda*maturities[i]))/(lambda * maturities[i])
    NS[i, 3] = (1-exp(-lambda*maturities[i]))/(lambda * maturities[i]) - exp(- lambda * maturities[i])}
  beta = matrix(nrow=dim(data)[1], ncol = 3)
  for (t in 1:dim(data)[1]){
    beta[t,] = lm(data[t,] ~ NS - 1)$coefficients
  }
  if(AR==TRUE){
    ## AR(p) model with intercept
    get.ARcoefficients = function(l){
      AR.LHS = embed(beta[,l],p+1)[,1]
      AR.RHS = embed(beta[,l],p+1)[,-1,drop=F]
      lm(AR.LHS~AR.RHS)
    }
    AR.fits = lapply(1:3, get.ARcoefficients)
    ## in-sample predicted factors
    factors.predict = sapply(AR.fits, fitted.values)
    ## coefficients in VAR matrix representation
    coef = sapply(AR.fits, coefficients)
    intercept = coef[1,]
    A = list()
    for(i in 1:p) A[[i]] = diag(coef[i+1,], nrow = 3)
    VARmatrix = do.call(cbind, A)
  } else {
    ## VAR(p) model with intercept
    VAR.LHS = embed(beta, p+1)[,1:3]
    VAR.RHS = embed(beta, p+1)[,-(1:3)]
    VAR.fit = lm(VAR.LHS ~ VAR.RHS)
    ## in-sample predicted factors
    factors.predict = matrix(fitted.values(VAR.fit), nrow = dim(beta)[1]-p)
    ## coefficients
    intercept = matrix(VAR.fit$coefficients, ncol = 3)[1,]
    VARmatrix = t(matrix(VAR.fit$coefficients, ncol = 3)[-1,])
  }
  ## curve prediction
  curve.predict = factors.predict %*% t(NS)
  ## assign column names and time series structure
  colnames(factors.predict) = paste0("F",1:3)
  colnames(VARmatrix) = paste0("VARp",rep(1:p, each = 3),"K",rep(1:3,p))
  factors.predict = ts(factors.predict, start = time(data)[p+1], frequency = frequency(data))
  curve.predict = ts(curve.predict, start = time(data)[p+1], frequency = frequency(data))
  betas = ts(beta, start = time(data)[p+1], frequency = frequency(data))
  ## return
  if(h == 0){
    out = list(
      "curve.predict" = curve.predict,
      "factors.predict" = factors.predict,
      "NSloadings" = NS,
      "betas" = betas,
      "intercept" = intercept,
      "VARmatrix" = VARmatrix
    )
  } else {
    out = DNS.oosForecast(VARmatrix, intercept, betas, NS, h=h)
  }
  return(out)
}

DNS.oosForecast = function(VARmatrix, intercept, factors, loadings, h=1){
  p = dim(VARmatrix)[2]/dim(VARmatrix)[1]
  for(i in 1:h){
    factors = rbind(factors, intercept + tail(embed(factors,p),1) %*% t(VARmatrix))
  }
  forecast = tail(factors,1) %*% t(loadings)
}