inars: INARS(1) Model for Integer-Valued Time Series

Documented in inars

#' @name inars
#'
#' @title INARS(1) Modelling of Integer-Valued Time Series
#'
#' @description Fit the integer-valued autoregressive process of order 1
#' with signed binomial thinning to a univariate time series that assumes
#' integer values in Z = {..., -2, -1, 0, 1, 2, ...}.
#'
#' @param formula A representation of the model to be fitted. In the case
#'     of fit without regressors, the systematic component must be ~ 1.
#' @param data The dataset.
#' @param innovation The assumed distribution for the innovation process.
#'     The skew discrete Laplace (\code{"SDL"}), Skellam (\code{"SK"}) and
#'     Discrete logistic (\code{"DLOG"}) distributions are  available.
#' @param method  Estimation method of the parameters. Maximum conditional
#'     likelihood (\code{"CML"} - default) or method of moments
#'     (\code{"MM"}) estimates can be used.
#' @param optim.control List of control parameters for \code{optim}.
#' @param ... further arguments to be passed to \code{optim}.
#'
#' @return The \code{inars} function returns an object of class
#'      "\code{inars}", which consists of a list with the following
#'      components:
#'  \describe{
#'     \item{estimates}{ The parameters estimates.}
#'     \item{fitted.values}{ One-step ahead conditional mean forecasts.}
#'     \item{residuals}{ A vector of raw residuals (Yt - E(Yt | Yt-1)).}
#'     \item{vcov}{ Asymptotic covariance matrix of the maximum likelihood
#'         estimators of all parameters in the model obtained from the
#'         Hessian matrix if \code{hessian = TRUE}. However,
#'         if the moment estimator is used, then \code{vcov = NULL}.}
#'     \item{logLik}{ Conditional log-likelihood of the fitted model,
#'         if \code{method = "CML"}.}
#'     \item{nobs}{ Number of observations.}
#'     \item{innovation}{ The assumed distribution for the innovation
#'         process.}
#'     \item{method}{ Estimation method of the parameters.}
#'     \item{call}{ The function call.}
#'     \item{formula}{ The formula used to specify the model in
#'         \code{inars}.}
#'     \item{response, x}{ The response vector, and the model matrix.}
#'     \item{optim}{ Output from the \link[stats]{optim} call for
#'         maximizing the conditional log-likelihood, if
#'         \code{method = "CML"}.}
#'     \item{converged}{ logical indicating successful convergence of
#'         optim, if \code{method = "CML"}.}
#'     \item{optim.control}{ The control arguments passed to the
#'         \link[stats]{optim} call, if \code{method = "CML"}.}
#'    }
#'
#' @references
#' Medeiros, R. M. R. & Bourguignon, M. (2021).
#'
#' Kim, H. Y., & Park, Y. (2008). A non-stationary integer-valued
#'     autoregressive model. Statistical papers, 49, 485.
#'
#' Andersson, J., & Karlis, D. (2014). A parametric time series model with
#'     covariates for integers in Z. Statistical Modelling, 14, 135--156.
#'
#' @author Rodrigo M. R. Medeiros <\email{rodrigo.matheus@live.com}>
#'
#' @examples
#' \dontrun{
#' # Dataset: tick
#' data(tick)
#' head(tick)
#'
#' # Observed time series
#' y <-  ts(tick$tick)
#'
#' ########################
#' # Descriptive analysis #
#' #######################
#'
#' summary(y)
#' sd(y)
#'
#' # Distribution
#' barplot(table(y), xlab = "Ticks", ylab = "Frequency")
#'
#' # Line plot
#' plot(y, xlab = "Time", ylab = "Ticks")
#'
#' # Autocorrelation and partial autocorrelation functions
#' acf(y, main="", xlim=c(1.2,30)); axis(side=1,at=1,labels = "1")
#' pacf(y,main="")
#'
#' ##########################
#' # Fit of the INARS model #
#' #########################
#'
#' # Without regressors
#'
#' # SDL
#' fit.sdl <- inars(tick ~ 1, data = tick)
#'
#' # Skellam
#' fit.sk <- inars(tick ~ 1, data = tick, innovation = "SK")
#'
#' # Discrete logistic
#' fit.dlog <- inars(tick ~ 1, data = tick, innovation = "DLOG")
#'
#' summary(fit.sdl)
#' summary(fit.sk)
#' summary(fit.dlog)
#'
#' # AIC and BIC comparison
#' data.frame(innovation = c("SDL", "SK", "DLOG"),
#'            AIC = c(AIC(fit.sdl), AIC(fit.sk), AIC(fit.dlog)),
#'            BIC = c(BIC(fit.sdl), BIC(fit.sk), BIC(fit.dlog)))
#'
#' # With regressor 'spread'
#'
#' # SDL
#' fit2.sdl <- inars(tick ~ spread, data = tick)
#'
#' # Skellam
#' fit2.sk <- inars(tick ~ spread, data = tick, innovation = "SK")
#'
#' # Discrete logistic
#' fit2.dlog <- inars(tick ~ spread, data = tick, innovation = "DLOG")
#'
#' summary(fit2.sdl)
#' summary(fit2.sk)
#' summary(fit2.dlog)
#'
#' data.frame(innovation = c("SDL", "SK", "DLOG"),
#'            AIC = c(AIC(fit2.sdl), AIC(fit2.sk), AIC(fit2.dlog)),
#'            BIC = c(BIC(fit2.sdl), BIC(fit2.sk), BIC(fit2.dlog)))
#'
#' # Residual analysis
#' plot(fit2.sdl)
#' plot(fit2.sk)
#' plot(fit2.dlog)
#' }
NULL
#' @rdname inars
#' @export
inars <- function(formula, data, innovation = c("SDL", "SK", "DLOG"),
                  method = c("CML", "MM"),
                  optim.control = inars_control(...), ...)
{

  ## Model call
  cl <- match.call()
  if (missing(data))
    data <- environment(formula)
  mf <- stats::model.frame(formula = formula, data = data)
  X <- stats::model.matrix(attr(mf, "terms"), data = mf)
  y <- stats::model.response(mf)

  ## Definitions
  innovation <- match.arg(innovation, c("SDL", "SK", "DLOG"))
  method <- match.arg(method, c("CML", "MM"))
  p <- NCOL(X)
  n <- length(y)

  ## Fit
  opt <- inars_est(y, X, innovation, method, optim.control = optim.control)

  ## Estimates
  alpha <- opt$par[1]
  beta <- opt$par[2:(p + 1)]
  disp <- opt$par[p + 2]
  estimates <- matrix(c(alpha, beta, disp), 1, p + 2)

  ## Fitted values (one-step ahead conditional mean forecasts)
  mu <- X%*%beta
  fitted.values <- estimates[1] * y[1:(n - 1)] + mu[2:n]

  ## Residuals
  r <- as.numeric(y[2:n] - (alpha * y[1:(n - 1)]))

  ## Covariance matrix
  vcov <- NULL
  if(optim.control$hessian & method == "CML"){
    vcov <- solve(-as.matrix(opt$hessian))
  }

  ## Names
  ifelse(p > 1,
         parnames <- c("(Intercept)", colnames(X)[2:p]),
         parnames <- "mu")
  colnames(estimates) <- c("alpha", parnames, "disp")

  ## Out
  out <- list(estimates = estimates,
              fitted.values = fitted.values,
              residuals = r,
              vcov = vcov,
              logLik = opt$value,
              nobs = n,
              innovation = innovation,
              method = method,
              call = cl,
              formula = formula,
              response = y,
              x = X)


  if (method == "CML"){
    ## Convergence status
    out$optim <- opt
    out$converged <- opt$convergence == 0
    optim.control$start <- NULL
    ifelse(innovation == "SDL",
           optim.control$optim.method <- optim.control$optim.method,
           optim.control$optim.method <- "Nelder-Mead")
    out$optim.control <- optim.control

  }


  class(out) <- "inars"
  out
}