R/model-arima.R

Defines functions zeligARMAbreakforecaster zeligARMAlongrun zeligARMAnextstep simacf zeligArimaWrapper

Documented in simacf zeligArimaWrapper zeligARMAbreakforecaster zeligARMAlongrun zeligARMAnextstep

#' Autoregressive and Moving-Average Models with Integration for Time-Series Data
#'
#' Warning: \code{summary} does not work with timeseries models after
#' simulation.
#'
#' @param formula a symbolic representation of the model to be
#'   estimated, in the form \code{y ~ x1 + x2}, where \code{y} is the
#'   dependent variable and \code{x1} and \code{x2} are the explanatory
#'   variables, and \code{y}, \code{x1}, and \code{x2} are contained in the
#'   same dataset. (You may include more than two explanatory variables,
#'   of course.) The \code{+} symbol means ``inclusion'' not
#'   ``addition.'' You may also include interaction terms and main
#'   effects in the form \code{x1*x2} without computing them in prior
#'   steps; \code{I(x1*x2)} to include only the interaction term and
#'   exclude the main effects; and quadratic terms in the form
#'   \code{I(x1^2)}.
#' @param model the name of a statistical model to estimate.
#'   For a list of supported models and their documentation see:
#'   \url{http://docs.zeligproject.org/articles/}.
#' @param data the name of a data frame containing the variables
#'   referenced in the formula or a list of multiply imputed data frames
#'   each having the same variable names and row numbers (created by
#'   \code{Amelia} or \code{\link{to_zelig_mi}}).
#' @param ... additional arguments passed to \code{zelig},
#'   relevant for the model to be estimated.
#' @param by a factor variable contained in \code{data}. If supplied,
#'   \code{zelig} will subset
#'   the data frame based on the levels in the \code{by} variable, and
#'   estimate a model for each subset. This can save a considerable amount of
#'   effort. For example, to run the same model on all fifty states, you could
#'   use: \code{z.out <- zelig(y ~ x1 + x2, data = mydata, model = 'ls',
#'   by = 'state')} You may also use \code{by} to run models using MatchIt
#'   subclasses.
#' @param cite If is set to 'TRUE' (default), the model citation will be printed
#'   to the console.
#' @param ts The name of the variable containing the time indicator. This should be passed in as
#'     a string. If this variable is not provided, Zelig will assume that the data is already
#'     ordered by time.
#' @param cs Name of a variable that denotes the cross-sectional element of the data, for example,
#'  country name in a dataset with time-series across different countries. As a variable name,
#'  this should be in quotes. If this is not provided, Zelig will assume that all observations
#'  come from the same unit over time, and should be pooled, but if provided, individual models will
#'  be run in each cross-section.
#'  If \code{cs} is given as an argument, \code{ts} must also be provided. Additionally, \code{by}
#'  must be \code{NULL}.
#' @param order A vector of length 3 passed in as \code{c(p,d,q)} where p represents the order of the
#'     autoregressive model, d represents the number of differences taken in the model, and q represents
#'     the order of the moving average model.
#' @details
#' Currently only the Reference class syntax for time series. This model does not accept
#' Bootstraps or weights.
#'
#'
#' @return Depending on the class of model selected, \code{zelig} will return
#'   an object with elements including \code{coefficients}, \code{residuals},
#'   and \code{formula} which may be summarized using
#'   \code{summary(z.out)} or individually extracted using, for example,
#'   \code{coef(z.out)}. See
#'   \url{http://docs.zeligproject.org/articles/getters.html} for a list of
#'   functions to extract model components. You can also extract whole fitted
#'   model objects using \code{\link{from_zelig_model}}.
#' @examples
#' data(seatshare)
#' subset <- seatshare[seatshare$country == "UNITED KINGDOM",]
#' ts.out <- zarima$new()
#' ts.out$zelig(unemp ~ leftseat, order = c(1, 0, 1), data = subset)
#'
#' # Set fitted values and simulate quantities of interest
#' ts.out$setx(leftseat = 0.75)
#' ts.out$setx1(leftseat = 0.25)
#' ts.out$sim()
#'
#' @seealso Vignette: \url{http://docs.zeligproject.org/articles/zelig_arima.html}
#' @import methods
#' @export Zelig-arima
#' @exportClass Zelig-arima
#'
#' @include model-zelig.R
#' @include model-timeseries.R

zarima <- setRefClass("Zelig-arima",
                      contains = "Zelig-timeseries")

zarima$methods(
    initialize = function() {
        callSuper()
        .self$name <- "arima"
        .self$link <- "identity"
        #.self$family <- "gaussian"
        .self$fn <- quote(zeligArimaWrapper)
        #.self$linkinv <- eval(call(.self$family, .self$link))$linkinv
        .self$description <- "Autoregressive Moving-Average Models for Time-Series Data"
        # JSON
        .self$outcome <- "continuous"
        .self$wrapper <- "timeseries"
    }
)

zarima$methods(
  qi = function(simparam, mm, mm1=NULL){

    myorder <- eval(.self$zelig.call$order)
    mycoef <- coef(.self$zelig.out$z.out[[1]])
    sd <- sqrt(.self$zelig.out$z.out[[1]]$sigma2)

    ## Check mm and mm1.  Particularly for issues surrounding intercept.
    rebuildMM <- function(simparam, x){
      xnames <- colnames(x)
      snames <- colnames(simparam)
      ## parameter "intercept" can be spelt "(Intercept)"" in model matrix
      if("(Intercept)" %in% xnames){
        flag <- xnames == "(Intercept)"
        xnames[flag] <- "intercept"
        colnames(x)[flag]<- "intercept" # this is equivalent to: colnames(x) <- xnames
      }
      ## "intercept" can be included in model matrix when not an estimated parameter (for example in models with integration)
      xnamesflag <- xnames %in% snames
      x <- x[, xnamesflag, drop=FALSE]
      return(x)
    }

    mm <- rebuildMM(simparam, mm)
    if(!is.null(mm1)){
      mm1 <- rebuildMM(simparam, mm1)
    }


    ## Make ACF
    acf <- simacf(coef=mycoef, order=myorder, params=simparam, alpha=0.05)
    acf.length <- length(acf$expected.acf)
    t1 <- 2*acf.length
    t2 <- 2*acf.length


    if((.self$bsetx1)||(.self$bsetx && !.self$bsetx1)){             # could also check if mm1 is NULL
      # zeligARMAbreakforecaster() calls zeligARMAlongrun() internally
      #  return(y.shock = yseries, y.innovation = y.innov, ev.shock = evseries, ev.innovation = ev.innov)
      yseries <- zeligARMAbreakforecaster(y.init=NULL, x=mm, x1=mm1, simparam=simparam, order=myorder, sd=sd, t1=t1, t2=t2)
      # maybe check nrow(yseries)=t1 + t2 ?

      pv <- yseries$y.innovation[t1,]                # could use either $innovation or $shock here
      pv.shortrun <- yseries$y.innovation[t1+1,]     # could use either $innovation or $shock here
      pv.longrun <- yseries$y.innovation[t1+t2,]     # must use $innovation here

      # Remember, these are expectations using the same simparam in each expectation.
      ev <- yseries$ev.innovation[t1,]
      ev.shortrun <- yseries$ev.innovation[t1+1,]
      ev.longrun <- yseries$ev.innovation[t1+t2,]

      return(list(acf = acf, ev = ev, pv = pv, pv.shortrun=pv.shortrun, pv.longrun=pv.longrun, ev.shortrun=ev.shortrun, ev.longrun=ev.longrun,
                pvseries.shock=yseries$y.shock, pvseries.innovation=yseries$y.innovation,
                evseries.shock=yseries$ev.shock, evseries.innovation=yseries$ev.innovation))

    }else{
      # just call zeligARMAlongrun()
      yseries <- zeligARMAlongrun(y.init=NULL, x=mm, simparam=simparam, order=myorder, sd=sd)
      pv <- yseries$y[1,]   # zeligARMAlongrun returns the series in reverse order to zeligARMAbreakforecaster
      # Remember, these are expectations using the same simparam in each expectation:
      ev <- yseries$ev[1,]
      return(list(acf = acf, ev = ev, pv = pv))
    }
  }
)

zarima$methods(
    mcfun = function(x, b0=0, b1=1, ..., sim=TRUE){
        mu <- exp(b0 + b1 * x)
        if(sim){
            y <- rnorm(n=length(x), mean=mu)
            return(y)
        }else{
            return(mu)
        }
    }
)

#' Estimation wrapper function for arima models, to easily fit with Zelig architecture
#' @keywords internal

zeligArimaWrapper <- function(formula, order = c(1, 0, 0), ... ,
                              include.mean = TRUE, data){

    # Using with():
    # myArimaCall <- quote( arima(x=, order =, xreg= ) )
    # output <- with(data, myArimaCall )


    # Using arima() directly:
    mf <- model.frame(formula, data)

    acf3 <- as.character(formula[[3]])

    yflag <- names(mf) %in% all.vars(formula[-3])
    xflag <- names(mf) %in% all.vars(formula[-2])
    myx <- as.matrix(mf[,yflag, drop = FALSE])  # could use get_all_vars()
    is_varying(as.vector(myx), msg = 'Dependent variable does not vary for at least one of the cases.')
    myxreg <- as.matrix(mf[,xflag, drop = FALSE])

    if (("1" %in% acf3 ) & ("-" %in% acf3 )){
        include.mean <- FALSE
    }

    output <- stats::arima(x = myx, order = order, xreg = myxreg,
                           include.mean = include.mean, ...)

}


#' Construct Autocorrelation Function from Zelig object and simulated parameters
#' @keywords internal

simacf <- function(coef, order, params, alpha = 0.5){

    #order <- eval(.self$zelig.call$order)
    myar <- myma <- myar.seq <- myma.seq <- NULL

    if(order[1]>0){
        arnames <- paste("ar", 1:order[1], sep="")
        myar <- coef[arnames]
        myar.seq <- params[, arnames, drop=FALSE]
    }

    if(order[3]>0){
        manames <- paste("ma", 1:order[3], sep="")
        myma <- coef[manames]
        myma.seq <- params[, manames, drop=FALSE]
    }

    mylag.max<-10  # Need to set automatically.

    n.sims<-nrow(params)
    expected.acf <- ARMAacf(ar=myar, ma=myma, lag.max=mylag.max)
    acf.history<-matrix(NA, nrow=n.sims, ncol=length(expected.acf))      # length(expected.acf) = mylag.max +1
    for(i in 1:n.sims){
        acf.history[i,] <- ARMAacf(ar=myar.seq[i,], ma=myma.seq[i,], lag.max=mylag.max)
    }


    # Define functions to compute confidence intervals for each column in a matrix
    ci.matrix <- function(x, alpha) {
        pos.hi <- max(round((1-(alpha/2))*nrow(x)), 1)
        pos.low <-max(round((alpha/2)*nrow(x)), 1)

        ci.lower <- ci.upper <- rep(NA, ncol(x))
        for(i in 1:ncol(x)){
            temp<-sort(x[,i])
            ci.lower[i]<-temp[pos.low]
            ci.upper[i]<-temp[pos.hi]
        }
        return(list(ci.lower=ci.lower, ci.upper=ci.upper))
    }
    ci.acf <- ci.matrix(x=acf.history, alpha=0.05)

    return(list(expected.acf=expected.acf, ci.acf=ci.acf, sims.acf=acf.history))
}


#' Construct Simulated Next Step in Dynamic Series
#' @keywords internal

zeligARMAnextstep <- function(yseries=NULL, xseries, wseries=NULL, beta, ar=NULL, i=NULL, ma=NULL, sd){

    ## Check inputs
    # t is obs across time
    # s is sims
    # k is covariates
    # order is (p,q,r)
    # assume yseries (t x sims), xseries (t x k), wseries (t x s), beta (s x k), ar (s x p), ma (s x r) are matrix
    # assume sd is scalar

    ## Could construct these by using known order more deliberatively

    if(is.vector(yseries)){
        #print("warning: yseries is vector")
        yseries <- matrix(yseries, nrow=1)        # Assume if y is a vector, that we are only running one simulation chain of y, so y is (t x 1)
    }
    if(is.vector(xseries)){
        #print("warning: xseries is vector")
        xseries <- matrix(xseries, nrow=1)        # Assume if x is a vector, that there are no lagged terms, so x is (1 x k)
    }
    if(is.vector(wseries)){
        #print("warning: wseries is vector")
        wseries <- matrix(wseries, nrow=1)        # Assume if w is a vector, that we are only running one simulation chain of y, so w is (t x 1)
    }
    if(is.vector(beta)){
        #print("warning: beta is vector")
        beta <- matrix(beta, ncol=1)
    }
    if(is.vector(ar)){
        #print("warning: ar is vector")
        ar <- matrix(ar, ncol=1)
    }
    if(is.vector(ma)){
        #print("warning: ma is vector")
        ma <- matrix(ma, ncol=1)
    }

    ar.term <- function(yseries, ar, n){
        yshort <- yseries[1:ncol(ar), , drop=FALSE]           # because we only need the diagonal of a square matrix, we can avoid full matrix multiplication
        return( rowSums( ar * t(yshort) ) )       # diag[(s x p) . (p x s)] = diag[(s x s)] = (s x 1)
    }
    xt.term <- function(xseries, beta){
        return( as.vector(beta %*% t(xseries)) )  # (s x k) . t(1 x k) = (s x 1)
    }
    ma.term <- function(wseries, ma){
        wshort <- wseries[1:ncol(ma), , drop=FALSE]
        return( rowSums( ma * t(wshort)) )        # diag[(s x r) . (r x s)] = diag[(s x s)] = (s x 1)
    }

    n.sims <- ncol(yseries)
    w <- rnorm(n=n.sims, mean=0, sd=sd)
    y <- xt.term(xseries,beta) + w              # conformable if xt is vector and w vector
    if(!is.null(ar)){
        y <- y + ar.term(yseries,ar)              # conformable if y vector and ar vector
    }
    if(!is.null(ma)){
        y <- y + ma.term(wseries,ma)              # conformable if y vector and ma vector
    }

    exp.y <- y - w                              # one interpretation of an EV QI:  E(y| l(w), l(y))
    return(list(y=y, w=w, exp.y=exp.y))
}


#' Calculate the Long Run Exquilibrium for Fixed X
#' @keywords internal

zeligARMAlongrun <- function(y.init=NULL, x, simparam, order, sd, tol=NULL, burnin=20){
    if(is.null(tol)){
        tol<-0.01
    }
    ar <- i <- ma <- NULL

    ## Ensure parameter simulations in same order as model matrix
    xnames <- colnames(x)
    beta <- simparam[,xnames]

    ## Extract AR and MA terms
    if(order[1]>0){
        arnames <- paste("ar", 1:order[1], sep="")
        ar <- simparam[,arnames]
    }
    if(order[3]>0){
        manames <- paste("ma", 1:order[3], sep="")
        ma <- simparam[,manames]
    }
    timepast <- max(order[1],order[3])

    n.sims <- nrow(simparam)

    if(is.vector(x)){
        x<-matrix(x,nrow=1, ncol=length(x))
    }

    if(is.null(y.init)){
        if (!is.matrix(beta)) beta <- matrix(beta, ncol = 1)
        betabar <- t(apply(beta, 2, mean))
        y.init <- x %*% t(beta)
    }

    yseries <- matrix(y.init, nrow=timepast, ncol=n.sims, byrow=TRUE)
    wseries <- matrix(rnorm(n=timepast*n.sims), nrow=timepast, ncol=n.sims)
    evseries <- matrix(NA, nrow=timepast, ncol=n.sims)

    finished <- FALSE
    count <- 0
    while(!finished){
        y <- zeligARMAnextstep(yseries=yseries[1:timepast, ], xseries=x,
                               wseries=wseries[1:timepast, ], beta = beta,
                               ar = ar, i = i, ma = ma, sd = sd)
        yseries <- rbind(y$y, yseries)
        wseries <- rbind(y$w, wseries)
        evseries<- rbind(y$exp.y, evseries)

        #diff <- mean(abs(y.1 - y.0))  # Eventually need to determine some automated stopping rule
        count <- count+1
        finished <- count > burnin #| (diff < tol)
    }

    return(list(y.longrun=yseries, w.longrun=wseries, ev.longrun=evseries))
}


#' Construct Simulated Series with Internal Discontinuity in X
#' @keywords internal

zeligARMAbreakforecaster <- function(y.init=NULL, x, x1, simparam, order, sd, t1=5, t2=10){

    longrun.out <- zeligARMAlongrun(y.init=y.init, x=x, simparam=simparam, order=order, sd=sd)
    yseries  <- longrun.out$y.longrun
    wseries  <- longrun.out$w.longrun
    evseries <- longrun.out$ev.longrun

    ## Ensure parameter simulations in same order as model matrix
    xnames <- colnames(x)
    beta <- simparam[,xnames]

    ## Extract AR and MA terms
    ar <- i <- ma <- NULL
    if(order[1]>0){
        arnames <- paste("ar", 1:order[1], sep="")
        ar <- simparam[,arnames]
    }
    if(order[3]>0){
        manames <- paste("ma", 1:order[3], sep="")
        ma <- simparam[,manames]
    }
    timepast <- max(order[1],order[3]) # How many steps backward are needed in the series  --  could we be more precise?

    # Take a step at covariates x
    for(i in 2:t1){
        nextstep <- zeligARMAnextstep(yseries=yseries[1:timepast, ], xseries=x, wseries=wseries[1:timepast, ], beta=beta, ar=ar, i=i, ma=ma, sd=sd)
        yseries  <- rbind(nextstep$y, yseries)   # Could just change arguments so nextstep(nextstep) doesn't need to copy elsewhere.
        wseries  <- rbind(nextstep$w, wseries)
        evseries <- rbind(nextstep$exp.y, evseries)
    }

    # Introduce shock
    nextstep <- zeligARMAnextstep(yseries=yseries[1:timepast, ], xseries=x1, wseries=wseries[1:timepast, ], beta=beta, ar=ar, i=i, ma=ma, sd=sd)
    yseries  <- rbind(nextstep$y, yseries)   # Could just change arguments so nextstep(nextstep) doesn't need to copy elsewhere.
    wseries  <- rbind(nextstep$w, wseries)
    evseries <- rbind(nextstep$exp.y, evseries)

    y.innov  <- yseries
    w.innov  <- wseries  # Note: sequence of stocastic terms are going to depart now
    ev.innov <- evseries

    for(i in 2:t2){
        # Take further steps at covariates x1 (an introduction of an innovation)
        nextstep <- zeligARMAnextstep(yseries=y.innov[1:timepast, ], xseries=x1, wseries=w.innov[1:timepast, ], beta=beta, ar=ar, i=i, ma=ma, sd=sd)
        y.innov  <- rbind(nextstep$y, y.innov)  # Could just change arguments so nextstep(nextstep) doesn't need to copy elsewhere.
        w.innov  <- rbind(nextstep$w, w.innov)
        ev.innov <- rbind(nextstep$exp.y, ev.innov)

        # And take steps returning to old covariates (an introduction of a shock)
        nextstep <- zeligARMAnextstep(yseries=yseries[1:timepast, ], xseries=x, wseries=wseries[1:timepast, ], beta=beta, ar=ar, i=i, ma=ma, sd=sd)
        yseries  <- rbind(nextstep$y, yseries)   # Could just change arguments so nextstep(nextstep) doesn't need to copy elsewhere.
        wseries  <- rbind(nextstep$w, wseries)
        evseries <- rbind(nextstep$exp.y, evseries)

    }

    yseries <- yseries[1:(t1 + t2), ]  # Truncate series to last periods, removing burn-in to equilibrium
    y.innov <- y.innov[1:(t1 + t2), ]
    evseries <- evseries[1:(t1 + t2), ]
    ev.innov <- ev.innov[1:(t1 + t2), ]

    yseries <- yseries[nrow(yseries):1,]  # Change y to conventional row ordering by time before returning
    y.innov <- y.innov[nrow(y.innov):1,]
    evseries <- evseries[nrow(evseries):1, ]
    ev.innov <- ev.innov[nrow(ev.innov):1, ]

    return(list(y.shock = yseries, y.innovation = y.innov, ev.shock = evseries, ev.innovation = ev.innov))
}
IQSS/Zelig documentation built on Dec. 11, 2023, 1:51 a.m.