R/simulation.R
In mtarm: Bayesian Estimation of Multivariate Threshold Autoregressive Models

Documented in simtar

#'
#' @title Simulation of multivariate time series from a TAR model
#'
#' @description This function simulates multivariate time series generated by a user-specified
#' Threshold Autoregressive (TAR) model.
#'
#' @param n A positive integer specifying the length of the simulated output series.
#' @param k A positive integer specifying the dimension of the multivariate output series.
#' @param ars A list defining the TAR model structure, composed of four elements: the number
#' of regimes (\code{nregim}), the autoregressive order (\code{p}), and the maximum
#' lags of the exogenous (\code{q}) and threshold (\code{d}) series within each
#' regime. This object can be validated using the \code{ars()} function.
#' @param Intercept An optional logical indicating whether an intercept term is included
#' in each regime.
#' @param delay An optional non-negative integer specifying the delay parameter of the
#' threshold series. By default, \code{delay} is set to \code{0}.
#' @param trend An optional character string specifying the degree of deterministic time
#' trend to be included in each regime. Available options are a linear trend
#' (\code{"linear"}), a quadratic trend (\code{"quadratic"}), or no trend
#' (\code{"none"}). By default, \code{trend} is set to \code{"none"}.
#' @param nseason An optional integer, greater than or equal to 2, specifying the number of
#' seasonal periods. When provided, \code{nseason - 1} seasonal dummy variables are added to
#' the regressors within each regime. By default, \code{nseason} is set to \code{NULL},
#' thereby indicating that the TAR model has no seasonal effects.
#' @param thresholds A numeric vector of length \code{nregim-1} containing the threshold values,
#' sorted in ascending order.
#' @param parms A list with one sublist per regime. Each sublist contains two matrices: the
#' first matrix corresponds to the location parameters, and the second matrix
#' corresponds to the scale parameters of the model.
#' @param t.series A matrix containing the values of the threshold series.
#' @param ex.series A matrix containing the values of the multivariate exogenous series.
#' @param dist An optional character string specifying the multivariate distribution chosen
#' to model the noise process. Supported options include Gaussian
#' (\code{"Gaussian"}), Student-\eqn{t} (\code{"Student-t"}), Slash
#' (\code{"Slash"}), Symmetric Hyperbolic (\code{"Hyperbolic"}), Laplace
#' (\code{"Laplace"}), and Contaminated Normal (\code{"Contaminated normal"}).
#' By default, \code{dist} is set to \code{"Gaussian"}.
#' @param skewness An optional numeric vector specifying the skewness parameters of the
#' noise distribution, when applicable.
#' @param extra An optional value specifying the extra parameter of the noise distribution,
#' when required.
#' @param setar An optional positive integer indicating which component of the output
#' series is used as the threshold variable. By default, \code{setar} is set
#' to \code{NULL}, indicating that the model is not of SETAR type.
#' @param Verbose An optional logical indicating whether a description of the simulated
#'  TAR model should be printed. By default, \code{Verbose} is set to \code{TRUE}.
#' @return A \code{data.frame} containing the simulated multivariate output series, and, if
#' specified, the threshold series and multivariate exogenous series.
#' @references Vanegas, L.H. and Calderón, S.A. and Rondón, L.M. (2025) Bayesian estimation of a multivariate tar model when the
#'             noise process distribution belongs to the class of gaussian variance mixtures. International Journal
#'             of Forecasting.
#' @export simtar
#' @examples
#' \donttest{
#' ###### Simulation of a trivariate TAR model with two regimes
#' n <- 2000
#' k <- 3
#' myars <- ars(nregim=2,p=c(1,2))
#' Z <- as.matrix(arima.sim(n=n+max(myars$p),list(ar=c(0.5))))
#' probs <- sort((0.6 + runif(myars$nregim-1)*0.8)*c(1:(myars$nregim-1))/myars$nregim)
#' dist <- "Student-t"; extra <- 6
#' parms <- list()
#' for(j in 1:myars$nregim){
#'     np <- 1 + myars$p[j]*k
#'     parms[[j]] <- list()
#'     parms[[j]]$location <- c(ifelse(runif(np*k)<=0.5,1,-1)*rbeta(np*k,shape1=4,shape2=16))
#'     parms[[j]]$location <- matrix(parms[[j]]$location,np,k)
#'     parms[[j]]$scale <- rgamma(k,shape=1,scale=1)*diag(k)
#' }
#' thresholds <- quantile(Z,probs=probs)
#' out1 <- simtar(n=n, k=k, ars=myars, parms=parms, thresholds=thresholds,
#'                t.series=Z, dist=dist, extra=extra)
#' str(out1)
#'
#' fit1 <- mtar(~ Y1 + Y2 + Y3 | Z, data=out1, ars=myars, dist=dist,
#'              n.burn=2000, n.sim=3000, n.thin=2)
#' summary(fit1)
#'
#' ###### Simulation of a trivariate VAR model
#' n <- 2000
#' k <- 3
#' myars <- ars(nregim=1,p=2)
#' dist <- "Slash"; extra <- 2
#' parms <- list()
#' for(j in 1:myars$nregim){
#'     np <- 1 + myars$p[j]*k
#'     parms[[j]] <- list()
#'     parms[[j]]$location <- c(ifelse(runif(np*k)<=0.5,1,-1)*rbeta(np*k,shape1=4,shape2=16))
#'     parms[[j]]$location <- matrix(parms[[j]]$location,np,k)
#'     parms[[j]]$scale <- rgamma(k,shape=1,scale=1)*diag(k)
#' }
#' out2 <- simtar(n=n, k=k, ars=myars, parms=parms, dist=dist, extra=extra)
#' str(out2)
#'
#' fit2 <- mtar(~ Y1 + Y2 + Y3, data=out2, ars=myars, dist=dist,
#'              n.burn=2000, n.sim=3000, n.thin=2)
#' summary(fit2)
#'
###### Simulation of a trivariate SETAR model with two regimes
#' n <- 5000
#' k <- 3
#' myars <- ars(nregim=2,p=c(1,2))
#' dist <- "Laplace"
#' parms <- list()
#' for(j in 1:myars$nregim){
#'     np <- 1 + myars$p[j]*k
#'     parms[[j]] <- list()
#'     parms[[j]]$location <- c(ifelse(runif(np*k)<=0.5,1,-1)*rbeta(np*k,shape1=4,shape2=16))
#'     parms[[j]]$location <- matrix(parms[[j]]$location,np,k)
#'     parms[[j]]$scale <- rgamma(k,shape=1,scale=1)*diag(k)
#' }
#' out3 <- simtar(n=n, k=k, ars=myars, parms=parms, delay=2,
#'                thresholds=-1, dist=dist, setar=2)
#' str(out3)
#'
#' fit3 <- mtar(~ Y1 + Y2 + Y3, data=out3, ars=myars, dist=dist,
#'              n.burn=2000, n.sim=3000, n.thin=2, setar=2)
#' summary(fit3)
#'
#' }
#'
simtar <- function(n,k=2,ars=ars(),Intercept=TRUE,trend=c("none","linear","quadratic"),nseason=NULL,
                   parms,delay=0,thresholds=NULL,t.series=NULL,ex.series=NULL,dist=c("Gaussian",
                   "Student-t","Hyperbolic","Laplace","Slash","Contaminated normal","Skew-Student-t",
                   "Skew-normal"),skewness=NULL,extra=NULL,setar=NULL,Verbose=TRUE){
  # Match the selected distribution and trend options
  dist <- match.arg(dist)
  trend <- match.arg(trend)
  # Check logical arguments for validity and correct length
  if(!is.logical(Intercept) | length(Intercept)!= 1) stop("'Intercept' must be a single logical value",call.=FALSE)
  if(!is.logical(Verbose) | length(Verbose)!= 1) stop("'Verbose' must be a single logical value",call.=FALSE)
  # Check validity of the number of components of the output series
  if(k<=0 | k!=floor(k)) stop("The value of the argument 'k' must be a positive integer!",call.=FALSE)
  # Check validity of the sample size n
  if(n<=0 | n!=floor(n)) stop("The value of the argument 'n' must be a positive integer!",call.=FALSE)
  # Validate seasonal component
  if(!is.null(nseason)){
     if(nseason!=floor(nseason) | nseason<2) stop("The value of the argument 'nseason' must be an integer higher than 1",call.=FALSE)
  }
  # Seasonal effects require an intercept
  if(!is.null(nseason)) if(!Intercept) stop("The argument 'nseason' is not applicable when 'Intercept=FALSE'!",call.=FALSE)
  # Number of deterministic components: intercept, trend, seasonality
  deterministic <- Intercept + switch(trend,"linear"=1,"quadratic"=2) + ifelse(is.null(nseason),0,nseason-1)
  regim <- ars$nregim
  # Checks specific to TAR/SETAR models
  if(regim > 1){
     if(delay<0 | delay!=floor(delay)) stop("The value of the argument 'delay' must be a non-negative integer!",call.=FALSE)
     if(delay==0 & !is.null(setar)) stop("For SETAR models the value of the argument 'delay' must be a positive integer!",call.=FALSE)
     if(is.null(t.series) & is.null(setar)) stop("For TAR models the argument 't.series' is required!",call.=FALSE)
     if(is.null(thresholds)) stop("For TAR and SETAR models the argument 'thresholds' is required!",call.=FALSE) else thresholds <- sort(thresholds)
     if(length(thresholds)!=regim-1)
        stop(paste0("The length of the argument 'thresholds' must be ",regim-1,", and the length of the argument supplied is ",length(thresholds)),call.=FALSE)
  }
  # Maximum required lag
  ps <- max(ars$p,ars$q,ars$d,delay)
  # Validate threshold variable length for TAR models
  if(regim > 1 & is.null(setar)){
     if(length(t.series)!=n+ps)
        stop(paste0("The length of the argument 't.series' must be ",n+ps,", and the length of the argument supplied is ",length(t.series)),call.=FALSE)
  }
  # Validate exogenous series for ARX terms
  if(max(ars$q)>0){
     if(is.null(ex.series)) stop("An exogenous series is required!",call.=FALSE)
     if(nrow(ex.series)!=n+ps)
        stop(paste0("The number of rows of 'ex.series' must be ",n+ps,", and the number of rows of the argument supplied is ",nrow(ex.series)),call.=FALSE)
     r <- ncol(ex.series)
  }else r <- 0
  # Validate extra distribution-specific parameters
  if(dist %in% c("Student-t","Hyperbolic","Slash","Contaminated normal","Skew-Student-t")){
     if(is.null(extra))
        stop("For 'Student-t', 'Hyperbolic', 'Slash', 'Contaminated normal' and 'Skew-Student-t' distributions an extra parameter value must be specified!",call.=FALSE)
     if(dist %in% c("Student-t","Skew-Student-t","Hyperbolic","Slash") & extra[1]<=0)
        stop("For 'Student-t', 'Skew-Student-t', 'Hyperbolic', and 'Slash' distributions the extra parameter value must be positive!",call.=FALSE)
     if(dist=="Contaminated normal" & (length(extra)!=2 | any(extra<=0) | any(extra>=1)))
        stop("For 'Contaminated normal' distribution the extra parameter must be a 2-dimensional vector with values in the interval (0,1)!",call.=FALSE)
  }
  # Validate skewness parameters for skewed distributions
  if(dist %in% c("Skew-normal","Skew-Student-t") & (is.null(skewness) | length(skewness)!=k)){
     stop(paste0("For 'Skew-normal' and 'Skew-Student-t' distributions an ",k,"-dimensional skewness parameter must be specified!"),call.=FALSE)
     skewness <- matrix(skewness,k,1)
  }
  # Time index used for trend construction
  t.ids <- matrix(seq(ps+1,ps+n),n,1)
  # Construct deterministic design matrix (intercept and trend)
  name0 <- detnam <- "Intercept"
  switch(trend,
         "none"={Xd <- matrix(1,n,1)},
         "linear"={Xd <- matrix(cbind(1,t.ids),n,2)
                   name0 <- c(name0,"linear")
                   detnam <- c(detnam,"a linear time trend")},
         "quadratic"={Xd <- matrix(cbind(1,t.ids,t.ids^2),n,3)
                 name0 <- c(name0,"linear","quadratic")
                 detnam <- c(detnam,"a quadratic time trend")})
  # Add seasonal dummies if required
  if(!is.null(nseason)){
     Itil <- matrix(diag(nseason)[,-1],nseason,nseason-1)
     Xd <- cbind(Xd,t(matrix(apply(t.ids,1,function(x) Itil[x%%nseason + 1,]),nseason-1,n)))
     name0 <- c(name0,paste0("season.",2:nseason))
     detnam <- c(detnam,paste0(nseason," seasonal periods"))
  }
  # Remove intercept if not requested
  if(!Intercept){
     Xd <- as.matrix(Xd[,-1])
     name0 <- name0[-1]
     detnam <- detnam[-1]
  }
  if(Verbose){
     cat("\n\nSample size          :",n," time points")
     cat("\nOutput Series        :",paste0("Y",1:k,collapse = " | "))
     if(ars$nregim > 1) cat("\nThreshold Series     :",paste0(ifelse(is.null(setar),"Z",paste0("Y",setar))," with a delay equal to ",delay))
     if(max(ars$q)>0) cat("\nExogenous Series     :",paste0("X",1:r,collapse=" | "))
     cat("\nError Distribution   :",switch(dist,
                                           "Gaussian"=,"Laplace"=dist,
                                           "Student-t"=,"Slash"=,"Hyperbolic"=,"Contaminated normal"=paste0(dist,"(",paste0(extra,collapse=","),")"),
                                           "Skew-normal"=paste0(dist,"((",paste0(skewness,collapse=","),"))"),
                                           "Skew-Student-t"=paste0("Skew-Student-t(",extra,",",paste0("(",paste0(skewness,collapse=","),")"),")")
                                           ))
     cat("\nNumber of regimes    :",ars$nregim)
     cat("\nDeterministics       :",ifelse(length(detnam)>1,paste(detnam,collapse=" + "),detnam))
     if(min(ars$p)==max(ars$p)) cat("\nAutoregressive orders:",paste0(ars$p[1]," in each regime"))
     else cat("\nAutoregressive orders:",paste(ars$p,collapse=", "))
     if(max(ars$q)>0){
        if(min(ars$q)==max(ars$q)) cat("\nMaximum lags for ES  :",paste0(ars$q[1]," in each regime"))
        else cat("\nMaximum lags for ES  :",paste(ars$q,collapse=", "))
     }
     if(max(ars$d)>0){
        if(min(ars$d)==max(ars$d)) cat("\nMaximum lags for TS  :",paste0(ars$d[1]," in each regime"))
        else cat("\nMaximum lags for TS  :",paste(ars$d,collapse=", "))
     }
     if(ars$nregim > 1){
       thresholds1 <- paste0(c("(-Inf",paste0("(",thresholds)),",",c(paste0(thresholds,"]"),"Inf)"))
       d <- data.frame(thresholds1)
       rownames(d) <- paste("Regime",1:nrow(d))
       colnames(d) <- " "
       cat("\n\nThresholds")
       print(d)
     }
     cat("\n")
  }
  # Validation of parameter matrices dimensions and definite-positiveness of scale matrices
  for(i in 1:regim){
      parms2 <- vector()
      name <- name0
      count <- 0
      if(ncol(parms[[i]]$location)!=k | nrow(parms[[i]]$location)!=(ncol(Xd)+ars$p[i]*k+ars$q[i]*r+ars$d[i]))
         stop(paste0("The dimension of the location matrix in regime ",i," must be ",(deterministic+ars$p[i]*k+ars$q[i]*r+ars$d[i]),"X",k,"!"),call.=FALSE)
      if(ncol(Xd)>0){
         parms2 <- cbind(parms2,t(matrix(parms[[i]]$location[1:ncol(Xd),],ncol(Xd),k)),NA)
         name <- c(name,"")
         count <- count + ncol(Xd)
      }
      for(ii in 1:ars$p[i]){
          parms2 <- cbind(parms2,t(parms[[i]]$location[(count+1):(count+k),]),NA)
          name <- c(name,c(paste0("phi_",ii),rep("",k-1)),"")
          count <- count + k
      }
      if(ars$q[i]>0){
         for(ii in 1:ars$q[i]){
             parms2 <- cbind(parms2,t(parms[[i]]$location[(count+1):(count+r),]),NA)
             name <- c(name,c(paste0("beta_",ii),rep("",r-1)),"")
             count <- count + r
         }
      }
      if(ars$d[i]>0){
         for(ii in 1:ars$d[i]){
             parms2 <- cbind(parms2,t(parms[[i]]$location[(count+1):(count+1),]),NA)
             name <- c(name,paste0("delta_",ii),"")
             count <- count + 1
         }
      }
      if(Verbose){
         colnames(parms2) <- name
         rownames(parms2) <- paste0("Y",1:k)
         cat("\nRegime ",i,":\n")
         cat("\nAutoregressive coefficients\n")
         print(parms2,na.print="|")
      }
      parms[[i]]$scale2  <- try(chol(parms[[i]]$scale),silent=TRUE)
      if(!is.matrix(parms[[i]]$scale2)) stop(paste0("The scale matrix in regime ",i," is not positive definite!"),call.=FALSE)
      if(Verbose){
         colnames(parms[[i]]$scale) <- rownames(parms[[i]]$scale) <- rownames(parms2)
         cat("\nScale parameter\n")
         print(parms[[i]]$scale)
      }
  }
  # Initialize the simulated series
  myseries <- matrix(rnorm((n+ps)*k),n+ps,k)
  regimenis <- matrix(NA,n+ps,1)
  # Main simulation loop
  for(i in 1:n){
      current <- ps + i
      # Determine the current regime
      if(regim > 1){
         if(!is.null(setar)) t.series.c <- myseries[current-delay,setar]
         else t.series.c <- t.series[current-delay]
         regimeni <- cut(t.series.c,breaks=c(-Inf,thresholds,Inf),labels=1:regim)
      }else regimeni <- 1
      regimenis[current] <- regimeni
      # Build the regressor vector
      X <-  Xd[i,]
      for(j in 1:ars$p[regimeni]) X <- c(X,myseries[current-j,])
      if(ars$q[regimeni] > 0) for(j in 1:ars$q[regimeni]) X <- c(X,ex.series[current-j,])
      if(ars$d[regimeni] > 0) for(j in 1:ars$d[regimeni]) if(!is.null(setar)) X <- c(X,myseries[current-j,setar]) else X <- c(X,t.series[current-j])
      # Compute conditional mean
      Theta <- parms[[regimeni]]$location
      mu <- colSums(matrix(X,nrow(Theta),ncol(Theta))*Theta)
      # Generate latent scale mixture variable
      u <- switch(dist,
                  "Gaussian"=,
                  "Skew-normal"=1,
                  "Student-t"=,
                  "Skew-Student-t"={1/rgamma(1,shape=extra/2,rate=extra/2)},
                  "Slash"={1/rbeta(1,shape1=extra/2,shape2=1)},
                  "Contaminated normal"={ifelse(runif(1)<=extra[1],1/extra[2],1)},
                  "Laplace"={rexp(1,rate=1/8)},
                  "Hyperbolic"={rgig(n=1,lambda=1,chi=1,psi=extra^2)})
      # Skewness offset if applicable
      if(dist %in% c("Skew-normal","Skew-Student-t")) offset <- sqrt(u)*matrix(skewness*qnorm(0.5 + runif(k)*0.5),k,1)
      else offset <- matrix(0,k,1)
      # Generate the observation
      myseries[current,] <- crossprod(parms[[regimeni]]$scale2,matrix(sqrt(u)*rnorm(k),k,1)) + matrix(mu,k,1) + offset
  }
  # Remove auxiliary Cholesky factors
  for(i in 1:regim) parms[[i]]$scale2 <- NULL
  # Assemble the output data frame
  datos <- data.frame(cbind(myseries,regimenis))
  colnames(datos) <- c(paste0("Y",1:k),"Regime")
  # Add threshold variable if it is present
  if(regim > 1 & is.null(setar)) datos <- data.frame(datos,Z=t.series) else datos <- data.frame(datos)
  # Add exogenous variables if they are present
  if(max(ars$q)>0){
     colnames(ex.series) <- paste0("X",1:r)
     datos <- data.frame(datos,ex.series)
  }
  # Return the simulated dataset
  return(datos)
}
#'
#'