R/newarima2.R

Defines functions auto.arima myarima newmodel arima.string summary.Arima nsdiffs CHtest calcOCSBCritVal OCSBtest checkarima is.constant

Documented in auto.arima is.constant nsdiffs summary.Arima

#' Fit best ARIMA model to univariate time series
#'
#' Returns best ARIMA model according to either AIC, AICc or BIC value. The
#' function conducts a search over possible model within the order constraints
#' provided.
#'
#' The default arguments are designed for rapid estimation of models for many time series.
#' If you are analysing just one time series, and can afford to take some more time, it
#' is recommended that you set \code{stepwise=FALSE} and \code{approximation=FALSE}.
#'
#' The number of seasonal differences is sometimes poorly chosen. If your data shows strong
#' seasonality, try setting \code{D=1} rather than relying on the automatic selection of \code{D}.
#'
#' Non-stepwise selection can be slow, especially for seasonal data. The stepwise
#' algorithm outlined in Hyndman and Khandakar (2008) is used except that the default
#' method for selecting seasonal differences is now the OCSB test rather than
#' the Canova-Hansen test. There are also some other minor variations to the
#' algorithm described in Hyndman and Khandakar (2008).
#'
#'
#' @param y a univariate time series
#' @param d Order of first-differencing. If missing, will choose a value based
#' on KPSS test.
#' @param D Order of seasonal-differencing. If missing, will choose a value
#' based on OCSB test.
#' @param max.p Maximum value of p
#' @param max.q Maximum value of q
#' @param max.P Maximum value of P
#' @param max.Q Maximum value of Q
#' @param max.order Maximum value of p+q+P+Q if model selection is not
#' stepwise.
#' @param max.d Maximum number of non-seasonal differences
#' @param max.D Maximum number of seasonal differences
#' @param start.p Starting value of p in stepwise procedure.
#' @param start.q Starting value of q in stepwise procedure.
#' @param start.P Starting value of P in stepwise procedure.
#' @param start.Q Starting value of Q in stepwise procedure.
#' @param stationary If \code{TRUE}, restricts search to stationary models.
#' @param seasonal If \code{FALSE}, restricts search to non-seasonal models.
#' @param ic Information criterion to be used in model selection.
#' @param stepwise If \code{TRUE}, will do stepwise selection (faster).
#' Otherwise, it searches over all models. Non-stepwise selection can be very
#' slow, especially for seasonal models.
#' @param trace If \code{TRUE}, the list of ARIMA models considered will be
#' reported.
#' @param approximation If \code{TRUE}, estimation is via conditional sums of
#' squares and the information criteria used for model selection are
#' approximated. The final model is still computed using maximum likelihood
#' estimation. Approximation should be used for long time series or a high
#' seasonal period to avoid excessive computation times.
#' @param truncate An integer value indicating how many observations to use in
#' model selection. The last \code{truncate} values of the series are used to
#' select a model when \code{truncate} is not \code{NULL} and
#' \code{approximation=TRUE}. All observations are used if either
#' \code{truncate=NULL} or \code{approximation=FALSE}.
#' @param xreg Optionally, a vector or matrix of external regressors, which
#' must have the same number of rows as \code{y}.
#' @param test Type of unit root test to use. See \code{\link{ndiffs}} for
#' details.
#' @param seasonal.test This determines which seasonal unit root test is used.
#' See \code{\link{nsdiffs}} for details.
#' @param allowdrift If \code{TRUE}, models with drift terms are considered.
#' @param allowmean If \code{TRUE}, models with a non-zero mean are considered.
#' @param lambda Box-Cox transformation parameter. Ignored if NULL. Otherwise,
#' data transformed before model is estimated.
#' @param biasadj Use adjusted back-transformed mean for Box-Cox
#' transformations. If TRUE, point forecasts and fitted values are mean
#' forecast. Otherwise, these points can be considered the median of the
#' forecast densities.
#' @param parallel If \code{TRUE} and \code{stepwise = FALSE}, then the
#' specification search is done in parallel. This can give a significant
#' speedup on mutlicore machines.
#' @param num.cores Allows the user to specify the amount of parallel processes
#' to be used if \code{parallel = TRUE} and \code{stepwise = FALSE}. If
#' \code{NULL}, then the number of logical cores is automatically detected and
#' all available cores are used.
#' @param x Deprecated. Included for backwards compatibility.
#' @param ... Additional arguments to be passed to \code{\link[stats]{arima}}.
#' @return Same as for \code{\link{Arima}}
#' @author Rob J Hyndman
#' @seealso \code{\link{Arima}}
#' @references Hyndman, R.J. and Khandakar, Y. (2008) "Automatic time series
#' forecasting: The forecast package for R", \emph{Journal of Statistical
#' Software}, \bold{26}(3).
#' @keywords ts
#' @examples
#' fit <- auto.arima(WWWusage)
#' plot(forecast(fit,h=20))
#'
#' @export
auto.arima <- function(y, d=NA, D=NA, max.p=5, max.q=5,
                       max.P=2, max.Q=2, max.order=5, max.d=2, max.D=1,
                       start.p=2, start.q=2, start.P=1, start.Q=1,
                       stationary=FALSE, seasonal=TRUE, ic=c("aicc", "aic", "bic"),
                       stepwise=TRUE, trace=FALSE,
                       approximation=(length(x) > 150 | frequency(x) > 12),
                       truncate=NULL, xreg=NULL,
                       test=c("kpss", "adf", "pp"), seasonal.test=c("ocsb", "ch"),
                       allowdrift=TRUE, allowmean=TRUE, lambda=NULL, biasadj=FALSE,
                       parallel=FALSE, num.cores=2, x=y, ...) {
  # Only non-stepwise parallel implemented so far.
  if (stepwise == TRUE & parallel == TRUE) {
    warning("Parallel computer is only implemented when stepwise=FALSE, the model will be fit in serial.")
    parallel <- FALSE
  }

  series <- deparse(substitute(y))
  x <- as.ts(x)
  if (NCOL(x) > 1) {
    stop("auto.arima can only handle univariate time series")
  }

  # Check for constant data
  if (is.constant(x)) {
    if (allowmean) {
      fit <- Arima(x, order = c(0, 0, 0), fixed = mean(x, na.rm = TRUE), ...)
    } else {
      fit <- Arima(x, order = c(0, 0, 0), include.mean = FALSE, ...)
    }
    fit$x <- x
    fit$series <- series
    fit$call <- match.call()
    fit$call$x <- data.frame(x = x)
    fit$constant <- TRUE
    return(fit)
  }
  ic <- match.arg(ic)
  test <- match.arg(test)
  seasonal.test <- match.arg(seasonal.test)

  # Use AIC if npar <= 3
  # AICc won't work for tiny samples.
  serieslength <- length(x)
  if (serieslength <= 3L) {
    ic <- "aic"
  }

  # Only consider non-seasonal models
  if (seasonal) {
    m <- frequency(x)
  } else {
    m <- 1
  }
  if (m < 1) {
    # warning("I can't handle data with frequency less than 1. Seasonality will be ignored.")
    m <- 1
  }
  else {
    m <- round(m)
  } # Avoid non-integer seasonal periods

  max.p <- min(max.p, floor(serieslength / 3))
  max.q <- min(max.q, floor(serieslength / 3))
  max.P <- min(max.P, floor(serieslength / 3 / m))
  max.Q <- min(max.Q, floor(serieslength / 3 / m))

  orig.x <- x

  if (!is.null(lambda)) {
    x <- BoxCox(x, lambda)
    attr(lambda, "biasadj") <- biasadj
  }

  # Check xreg and do regression if necessary
  if (!is.null(xreg)) {
    # Make sure it is a matrix with column names
    nmxreg <- deparse(substitute(xreg))
    xregg <- as.matrix(xreg)
    if (ncol(xregg) == 1 & length(nmxreg) > 1) {
      nmxreg <- "xreg"
    }
    if (is.null(colnames(xregg))) {
      colnames(xregg) <- if (ncol(xregg) == 1) {
        nmxreg
      } else {
        paste(nmxreg, 1:ncol(xregg), sep = "")
      }
    }

    # Check that xreg is not rank deficient
    # First check if any columns are constant
    constant_columns <- apply(xregg, 2, is.constant)
    if (any(constant_columns)) { # Remove first one
      xregg <- xregg[, -which(constant_columns)[1]]
    }

    # Now check if it is rank deficient
    sv <- svd(na.omit(cbind(rep(1, NROW(xregg)), xregg)))$d
    if (min(sv) / sum(sv) < .Machine$double.eps) {
      stop("xreg is rank deficient")
    }

    # Finally find residuals from regression in order
    # to estimate appropriate level of differencing
    j <- !is.na(x) & !is.na(rowSums(xregg))
    xx <- x
    xx[j] <- residuals(lm(x ~ xregg))
  }
  else {
    xx <- x
    xregg <- NULL
  }

  # Choose order of differencing
  if (stationary) {
    d <- D <- 0
  }
  if (m == 1) {
    D <- max.P <- max.Q <- 0
  } else if (is.na(D)) {
    D <- nsdiffs(xx, m = m, test = seasonal.test, max.D = max.D)
    # Make sure xreg is not null after differencing
    if (D > 0 & !is.null(xregg)) {
      diffxreg <- diff(xregg, differences = D, lag = m)
      if (any(apply(diffxreg, 2, is.constant))) {
        D <- D - 1
      }
    }
  }
  if (D > 0) {
    dx <- diff(xx, differences = D, lag = m)
  } else {
    dx <- xx
  }
  if (!is.null(xregg)) {
    if (D > 0) {
      diffxreg <- diff(xregg, differences = D, lag = m)
    } else {
      diffxreg <- xregg
    }
  }
  if (is.na(d)) {
    d <- ndiffs(dx, test = test, max.d = max.d)
    # Make sure xreg is not null after differencing
    if (d > 0 & !is.null(xregg)) {
      diffxreg <- diff(diffxreg, differences = d, lag = 1)
      if (any(apply(diffxreg, 2, is.constant))) {
        d <- d - 1
      }
    }
  }

  # Check number of differences selected
  if (D >= 2) {
    warning("Having more than one seasonal differences is not recommended. Please consider using only one seasonal difference.")
  } else if (D + d > 2) {
    warning("Having 3 or more differencing operations is not recommended. Please consider reducing the total number of differences.")
  }

  if (d > 0) {
    dx <- diff(dx, differences = d, lag = 1)
  }

  if (is.constant(dx)) {
    if (is.null(xreg)) {
      if (D > 0) {
        fit <- Arima(x, order = c(0, d, 0), seasonal = list(order = c(0, D, 0), period = m), include.constant = TRUE, fixed = mean(dx, na.rm = TRUE), ...)
      } else if (d < 2) {
        fit <- Arima(x, order = c(0, d, 0), include.constant = TRUE, fixed = mean(dx, na.rm = TRUE), ...)
      } else {
        stop("Data follow a simple polynomial and are not suitable for ARIMA modelling.")
      }
    }
    else # Perfect regression
    {
      if (D > 0) {
        fit <- Arima(x, order = c(0, d, 0), seasonal = list(order = c(0, D, 0), period = m), xreg = xreg, ...)
      } else {
        fit <- Arima(x, order = c(0, d, 0), xreg = xreg, ...)
      }
    }
    fit$x <- orig.x
    fit$series <- series
    fit$call <- match.call()
    fit$call$x <- data.frame(x = x)
    return(fit)
  }

  if (m > 1) {
    if (max.P > 0) {
      max.p <- min(max.p, m - 1)
    }
    if (max.Q > 0) {
      max.q <- min(max.q, m - 1)
    }
  }

  # Find constant offset for AIC calculation using white noise model
  if (approximation) {
    if (!is.null(truncate)) {
      tspx <- tsp(x)
      if (length(x) > truncate) {
        x <- ts(tail(x, truncate), end = tspx[2], frequency = tspx[3])
      }
    }
    if (D == 0) {
      fit <- try(stats::arima(x, order = c(0, d, 0), xreg = xreg, ...), silent = TRUE)
    } else {
      fit <- try(stats::arima(
        x, order = c(0, d, 0), seasonal = list(order = c(0, D, 0), period = m),
        xreg = xreg, ...
      ), silent = TRUE)
    }
    if (!is.element("try-error", class(fit))) {
      offset <- -2 * fit$loglik - serieslength * log(fit$sigma2)
    } else # Not sure this should ever happen
    {
      # warning("Unable to calculate AIC offset")
      offset <- 0
    }
  }
  else {
    offset <- 0
  }

  allowdrift <- allowdrift & (d + D) == 1
  allowmean <- allowmean & (d + D) == 0

  constant <- allowdrift | allowmean

  if (!stepwise) {
    bestfit <- search.arima(
      x, d, D, max.p, max.q, max.P, max.Q, max.order, stationary,
      ic, trace, approximation, xreg = xreg, offset = offset,
      allowdrift = allowdrift, allowmean = allowmean,
      parallel = parallel, num.cores = num.cores, ...
    )
    bestfit$call <- match.call()
    bestfit$call$x <- data.frame(x = x)
    bestfit$lambda <- lambda
    bestfit$x <- orig.x
    bestfit$series <- series
    bestfit$fitted <- fitted(bestfit)
    return(bestfit)
  }

  # Starting model
  if (length(x) < 10L) {
    start.p <- min(start.p, 1L)
    start.q <- min(start.q, 1L)
    start.P <- 0L
    start.Q <- 0L
  }
  p <- start.p <- min(start.p, max.p)
  q <- start.q <- min(start.q, max.q)
  P <- start.P <- min(start.P, max.P)
  Q <- start.Q <- min(start.Q, max.Q)

  results <- matrix(NA, nrow = 100, ncol = 8)

  if (approximation & trace) {
    cat("\n Fitting models using approximations to speed things up...\n")
  }

  bestfit <- myarima(x, order = c(p, d, q), seasonal = c(P, D, Q), constant = constant, ic, trace, approximation, offset = offset, xreg = xreg, ...)
  results[1, ] <- c(p, d, q, P, D, Q, constant, bestfit$ic)
  # Null model with possible constant
  fit <- myarima(x, order = c(0, d, 0), seasonal = c(0, D, 0), constant = constant, ic, trace, approximation, offset = offset, xreg = xreg, ...)
  results[2, ] <- c(0, d, 0, 0, D, 0, constant, fit$ic)
  if (fit$ic < bestfit$ic) {
    bestfit <- fit
    p <- q <- P <- Q <- 0
  }
  # Basic AR model
  if (max.p > 0 | max.P > 0) {
    fit <- myarima(x, order = c(max.p > 0, d, 0), seasonal = c((m > 1) & (max.P > 0), D, 0), constant = constant, ic, trace, approximation, offset = offset, xreg = xreg, ...)
    results[3, ] <- c(1, d, 0, m > 1, D, 0, constant, fit$ic)
    if (fit$ic < bestfit$ic) {
      bestfit <- fit
      p <- (max.p > 0)
      P <- (m > 1) & (max.P > 0)
      q <- Q <- 0
    }
  }
  # Basic MA model
  if (max.q > 0 | max.Q > 0) {
    fit <- myarima(x, order = c(0, d, max.q > 0), seasonal = c(0, D, (m > 1) & (max.Q > 0)), constant = constant, ic, trace, approximation, offset = offset, xreg = xreg, ...)
    results[4, ] <- c(0, d, 1, 0, D, m > 1, constant, fit$ic)
    if (fit$ic < bestfit$ic) {
      bestfit <- fit
      p <- P <- 0
      Q <- (m > 1) & (max.Q > 0)
      q <- (max.q > 0)
    }
  }
  k <- 4
  # Null model with no constant
  if (constant) {
    fit <- myarima(x, order = c(0, d, 0), seasonal = c(0, D, 0), constant = FALSE, ic, trace, approximation, offset = offset, xreg = xreg, ...)
    results[5, ] <- c(0, d, 0, 0, D, 0, 0, fit$ic)
    if (fit$ic < bestfit$ic) {
      bestfit <- fit
      p <- q <- P <- Q <- 0
    }
    k <- 5
  }

  startk <- 0
  while (startk < k & k < 94) {
    startk <- k
    if (P > 0 & newmodel(p, d, q, P - 1, D, Q, constant, results[1:k, ])) {
      k <- k + 1
      fit <- myarima(x, order = c(p, d, q), seasonal = c(P - 1, D, Q), constant = constant, ic, trace, approximation, offset = offset, xreg = xreg, ...)
      results[k, ] <- c(p, d, q, P - 1, D, Q, constant, fit$ic)
      if (fit$ic < bestfit$ic) {
        bestfit <- fit
        P <- (P - 1)
      }
    }
    if (P < max.P & newmodel(p, d, q, P + 1, D, Q, constant, results[1:k, ])) {
      k <- k + 1
      fit <- myarima(x, order = c(p, d, q), seasonal = c(P + 1, D, Q), constant = constant, ic, trace, approximation, offset = offset, xreg = xreg, ...)
      results[k, ] <- c(p, d, q, P + 1, D, Q, constant, fit$ic)
      if (fit$ic < bestfit$ic) {
        bestfit <- fit
        P <- (P + 1)
      }
    }
    if (Q > 0 & newmodel(p, d, q, P, D, Q - 1, constant, results[1:k, ])) {
      k <- k + 1
      fit <- myarima(x, order = c(p, d, q), seasonal = c(P, D, Q - 1), constant = constant, ic, trace, approximation, offset = offset, xreg = xreg, ...)
      results[k, ] <- c(p, d, q, P, D, Q - 1, constant, fit$ic)
      if (fit$ic < bestfit$ic) {
        bestfit <- fit
        Q <- (Q - 1)
      }
    }
    if (Q < max.Q & newmodel(p, d, q, P, D, Q + 1, constant, results[1:k, ])) {
      k <- k + 1
      fit <- myarima(x, order = c(p, d, q), seasonal = c(P, D, Q + 1), constant = constant, ic, trace, approximation, offset = offset, xreg = xreg, ...)
      results[k, ] <- c(p, d, q, P, D, Q + 1, constant, fit$ic)
      if (fit$ic < bestfit$ic) {
        bestfit <- fit
        Q <- (Q + 1)
      }
    }
    if (Q > 0 & P > 0 & newmodel(p, d, q, P - 1, D, Q - 1, constant, results[1:k, ])) {
      k <- k + 1
      fit <- myarima(x, order = c(p, d, q), seasonal = c(P - 1, D, Q - 1), constant = constant, ic, trace, approximation, offset = offset, xreg = xreg, ...)
      results[k, ] <- c(p, d, q, P - 1, D, Q - 1, constant, fit$ic)
      if (fit$ic < bestfit$ic) {
        bestfit <- fit
        Q <- (Q - 1)
        P <- (P - 1)
      }
    }
    if (Q < max.Q & P < max.P & newmodel(p, d, q, P + 1, D, Q + 1, constant, results[1:k, ])) {
      k <- k + 1
      fit <- myarima(x, order = c(p, d, q), seasonal = c(P + 1, D, Q + 1), constant = constant, ic, trace, approximation, offset = offset, xreg = xreg, ...)
      results[k, ] <- c(p, d, q, P + 1, D, Q + 1, constant, fit$ic)
      if (fit$ic < bestfit$ic) {
        bestfit <- fit
        Q <- (Q + 1)
        P <- (P + 1)
      }
    }

    if (p > 0 & newmodel(p - 1, d, q, P, D, Q, constant, results[1:k, ])) {
      k <- k + 1
      fit <- myarima(x, order = c(p - 1, d, q), seasonal = c(P, D, Q), constant = constant, ic, trace, approximation, offset = offset, xreg = xreg, ...)
      results[k, ] <- c(p - 1, d, q, P, D, Q, constant, fit$ic)
      if (fit$ic < bestfit$ic) {
        bestfit <- fit
        p <- (p - 1)
      }
    }
    if (p < max.p & newmodel(p + 1, d, q, P, D, Q, constant, results[1:k, ])) {
      k <- k + 1
      fit <- myarima(x, order = c(p + 1, d, q), seasonal = c(P, D, Q), constant = constant, ic, trace, approximation, offset = offset, xreg = xreg, ...)
      results[k, ] <- c(p + 1, d, q, P, D, Q, constant, fit$ic)
      if (fit$ic < bestfit$ic) {
        bestfit <- fit
        p <- (p + 1)
      }
    }
    if (q > 0 & newmodel(p, d, q - 1, P, D, Q, constant, results[1:k, ])) {
      k <- k + 1
      fit <- myarima(x, order = c(p, d, q - 1), seasonal = c(P, D, Q), constant = constant, ic, trace, approximation, offset = offset, xreg = xreg, ...)
      results[k, ] <- c(p, d, q - 1, P, D, Q, constant, fit$ic)
      if (fit$ic < bestfit$ic) {
        bestfit <- fit
        q <- (q - 1)
      }
    }
    if (q < max.q & newmodel(p, d, q + 1, P, D, Q, constant, results[1:k, ])) {
      k <- k + 1
      fit <- myarima(x, order = c(p, d, q + 1), seasonal = c(P, D, Q), constant = constant, ic, trace, approximation, offset = offset, xreg = xreg, ...)
      results[k, ] <- c(p, d, q + 1, P, D, Q, constant, fit$ic)
      if (fit$ic < bestfit$ic) {
        bestfit <- fit
        q <- (q + 1)
      }
    }
    if (q > 0 & p > 0 & newmodel(p - 1, d, q - 1, P, D, Q, constant, results[1:k, ])) {
      k <- k + 1
      fit <- myarima(x, order = c(p - 1, d, q - 1), seasonal = c(P, D, Q), constant = constant, ic, trace, approximation, offset = offset, xreg = xreg, ...)
      results[k, ] <- c(p - 1, d, q - 1, P, D, Q, constant, fit$ic)
      if (fit$ic < bestfit$ic) {
        bestfit <- fit
        q <- (q - 1)
        p <- (p - 1)
      }
    }
    if (q < max.q & p < max.p & newmodel(p + 1, d, q + 1, P, D, Q, constant, results[1:k, ])) {
      k <- k + 1
      fit <- myarima(x, order = c(p + 1, d, q + 1), seasonal = c(P, D, Q), constant = constant, ic, trace, approximation, offset = offset, xreg = xreg, ...)
      results[k, ] <- c(p + 1, d, q + 1, P, D, Q, constant, fit$ic)
      if (fit$ic < bestfit$ic) {
        bestfit <- fit
        q <- (q + 1)
        p <- (p + 1)
      }
    }
    if (allowdrift | allowmean) {
      if (newmodel(p, d, q, P, D, Q, !constant, results[1:k, ])) {
        k <- k + 1
        fit <- myarima(x, order = c(p, d, q), seasonal = c(P, D, Q), constant = !constant, ic, trace, approximation, offset = offset, xreg = xreg, ...)
        results[k, ] <- c(p, d, q, P, D, Q, !constant, fit$ic)
        if (fit$ic < bestfit$ic) {
          bestfit <- fit
          constant <- !constant
        }
      }
    }
  }

  # Refit using ML if approximation used for IC
  if (approximation & !is.null(bestfit$arma)) {
    if (trace) {
      cat("\n\n Now re-fitting the best model(s) without approximations...\n")
    }
    icorder <- order(results[, 8])
    nmodels <- sum(!is.na(results[, 8]))
    for (i in 1:nmodels)
    {
      k <- icorder[i]
      fit <- myarima(
        x, order = c(results[k, 1], d, results[k, 3]),
        seasonal = c(results[k, 4], D, results[k, 6]),
        constant = results[k, 7] == 1,
        ic, trace, approximation = FALSE, xreg = xreg, ...
      )
      if (fit$ic < Inf) {
        bestfit <- fit
        break
      }
    }
  }

  # Nothing fitted
  if (bestfit$ic == Inf) {
    if (trace) {
      cat("\n")
    }
    stop("No suitable ARIMA model found")
  }

  # Return best fit

  bestfit$x <- orig.x
  bestfit$series <- series
  bestfit$ic <- NULL
  bestfit$call <- match.call()
  bestfit$call$x <- data.frame(x = x)
  bestfit$lambda <- lambda
  bestfit$fitted <- fitted(bestfit)

  if (trace) {
    cat("\n\n Best model:", arima.string(bestfit, padding = TRUE), "\n\n")
  }

  return(bestfit)
}


# Calls arima from stats package and adds data to the returned object
# Also allows refitting to new data
# and drift terms to be included.
myarima <- function(x, order = c(0, 0, 0), seasonal = c(0, 0, 0), constant=TRUE, ic="aic", trace=FALSE, approximation=FALSE, offset=0, xreg=NULL, ...) {
  n <- length(x)
  m <- frequency(x)
  use.season <- (sum(seasonal) > 0) & m > 0
  diffs <- order[2] + seasonal[2]
  if (approximation) {
    method <- "CSS"
  } else {
    method <- "CSS-ML"
  }
  if (diffs == 1 & constant) {
    xreg <- cbind(drift = 1:length(x), xreg)
    if (use.season) {
      suppressWarnings(fit <- try(stats::arima(x = x, order = order, seasonal = list(order = seasonal, period = m), xreg = xreg, method = method, ...), silent = TRUE))
    } else {
      suppressWarnings(fit <- try(stats::arima(x = x, order = order, xreg = xreg, method = method, ...), silent = TRUE))
    }
  }
  else {
    if (use.season) {
      suppressWarnings(fit <- try(stats::arima(x = x, order = order, seasonal = list(order = seasonal, period = m), include.mean = constant, method = method, xreg = xreg, ...), silent = TRUE))
    } else {
      suppressWarnings(fit <- try(stats::arima(x = x, order = order, include.mean = constant, method = method, xreg = xreg, ...), silent = TRUE))
    }
  }
  if (is.null(xreg)) {
    nxreg <- 0
  } else {
    nxreg <- ncol(as.matrix(xreg))
  }
  if (!is.element("try-error", class(fit))) {
    nstar <- n - order[2] - seasonal[2] * m
    if (diffs == 1 & constant) {
      # fitnames <- names(fit$coef)
      # fitnames[length(fitnames)-nxreg] <- "drift"
      # names(fit$coef) <- fitnames
      fit$xreg <- xreg
    }
    npar <- length(fit$coef) + 1
    if (approximation) {
      fit$aic <- offset + nstar * log(fit$sigma2) + 2 * npar
    }
    if (!is.na(fit$aic)) {
      fit$bic <- fit$aic + npar * (log(nstar) - 2)
      fit$aicc <- fit$aic + 2 * npar * (npar + 1) / (nstar - npar - 1)
      fit$ic <- switch(ic, bic = fit$bic, aic = fit$aic, aicc = fit$aicc)
    }
    else {
      fit$aic <- fit$bic <- fit$aicc <- fit$ic <- Inf
    }
    # Adjust residual variance to be unbiased
    fit$sigma2 <- sum(fit$residuals ^ 2, na.rm = TRUE) / (nstar - npar + 1)

    # Check for unit roots
    minroot <- 2
    if (order[1] + seasonal[1] > 0) {
      testvec <- fit$model$phi
      k <- abs(testvec) > 1e-8
      if (sum(k) > 0) {
        last.nonzero <- max(which(k))
      } else {
        last.nonzero <- 0
      }
      if (last.nonzero > 0) {
        testvec <- testvec[1:last.nonzero]
        minroot <- min(minroot, abs(polyroot(c(1, -testvec))))
      }
    }
    if (order[3] + seasonal[3] > 0) {
      testvec <- fit$model$theta
      k <- abs(testvec) > 1e-8
      if (sum(k) > 0) {
        last.nonzero <- max(which(k))
      } else {
        last.nonzero <- 0
      }
      if (last.nonzero > 0) {
        testvec <- testvec[1:last.nonzero]
        minroot <- min(minroot, abs(polyroot(c(1, testvec))))
      }
    }
    if (minroot < 1 + 1e-2) { # Previously 1+1e-3
      fit$ic <- Inf
    } # Don't like this model
    if (trace) {
      cat("\n", arima.string(fit, padding = TRUE), ":", fit$ic)
    }
    fit$xreg <- xreg

    return(structure(fit, class = c("ARIMA", "Arima")))
  }
  else {
    # Catch errors due to unused arguments
    if (length(grep("unused argument", fit)) > 0L) {
      stop(fit[1])
    }

    if (trace) {
      cat("\n ARIMA(", order[1], ",", order[2], ",", order[3], ")", sep = "")
      if (use.season) {
        cat("(", seasonal[1], ",", seasonal[2], ",", seasonal[3], ")[", m, "]", sep = "")
      }
      if (constant & (order[2] + seasonal[2] == 0)) {
        cat(" with non-zero mean")
      } else if (constant & (order[2] + seasonal[2] == 1)) {
        cat(" with drift        ")
      } else if (!constant & (order[2] + seasonal[2] == 0)) {
        cat(" with zero mean    ")
      } else {
        cat("                   ")
      }
      cat(" :", Inf)
    }
    return(list(ic = Inf))
  }
}

newmodel <- function(p, d, q, P, D, Q, constant, results) {
  n <- nrow(results)
  for (i in 1:n)
  {
    if (identical(c(p, d, q, P, D, Q, constant), results[i, 1:7])) {
      return(FALSE)
    }
  }
  return(TRUE)
}

arima.string <- function(object, padding=FALSE) {
  order <- object$arma[c(1, 6, 2, 3, 7, 4, 5)]
  m <- order[7]
  result <- paste("ARIMA(", order[1], ",", order[2], ",", order[3], ")", sep = "")
  if (m > 1 & sum(order[4:6]) > 0) {
    result <- paste(result, "(", order[4], ",", order[5], ",", order[6], ")[", m, "]", sep = "")
  }
  if (padding & m > 1 & sum(order[4:6]) == 0) {
    result <- paste(result, "         ", sep = "")
    if (m <= 9) {
      result <- paste(result, " ", sep = "")
    } else if (m <= 99) {
      result <- paste(result, "  ", sep = "")
    } else {
      result <- paste(result, "   ", sep = "")
    }
  }
  if (!is.null(object$xreg)) {
    if (NCOL(object$xreg) == 1 & is.element("drift", names(object$coef))) {
      result <- paste(result, "with drift        ")
    } else {
      result <- paste("Regression with", result, "errors")
    }
  }
  else {
    if (is.element("constant", names(object$coef)) | is.element("intercept", names(object$coef))) {
      result <- paste(result, "with non-zero mean")
    } else if (order[2] == 0 & order[5] == 0) {
      result <- paste(result, "with zero mean    ")
    } else {
      result <- paste(result, "                  ")
    }
  }
  if (!padding) {
    # Strip trailing spaces
    result <- gsub("[ ]*$", "", result)
  }
  return(result)
}

#' @export
summary.Arima <- function(object, ...) {
  print(object)
  cat("\nTraining set error measures:\n")
  print(accuracy(object))
}



# Number of seasonal differences
#' @rdname ndiffs
#'
#' @examples
#' nsdiffs(log(AirPassengers))
#'
#' @export
nsdiffs <- function(x, m=frequency(x), test=c("ocsb", "ch"), max.D=1) {
  if (is.constant(x)) {
    return(0)
  }

  test <- match.arg(test)
  if (m == 1) {
    stop("Non seasonal data")
  } else if (m < 1) {
    warning("I can't handle data with frequency less than 1. Seasonality will be ignored.")
    return(0)
  }

  D <- 0
  if (test == "ch") {
    dodiff <- CHtest(x, m)
  } else {
    dodiff <- OCSBtest(x, m)
  }

  while (dodiff == 1 & D < max.D) {
    D <- D + 1
    x <- diff(x, lag = m)
    if (is.constant(x)) {
      return(D)
    }
    if (test == "ch") {
      dodiff <- CHtest(x, m)
    } else {
      dodiff <- OCSBtest(x, m)
    }
  }
  return(D)
}

CHtest <- function(x, m) {
  m <- round(m) # Avoid non-integer seasonal period
  if (length(x) < 2 * m + 5) {
    return(0)
  }
  chstat <- SD.test(x, m)
  crit.values <- c(
    0.4617146, 0.7479655, 1.0007818, 1.2375350, 1.4625240, 1.6920200, 1.9043096, 2.1169602,
    2.3268562, 2.5406922, 2.7391007
  )
  if (m <= 12) {
    D <- as.numeric(chstat > crit.values[m - 1])
  } else if (m == 24) {
    D <- as.numeric(chstat > 5.098624)
  } else if (m == 52) {
    D <- as.numeric(chstat > 10.341416)
  } else if (m == 365) {
    D <- as.numeric(chstat > 65.44445)
  } else {
    D <- as.numeric(chstat > 0.269 * m ^ (0.928))
  }
  return(D)
}

# Return critical values for OCSB test at 5% level
# Approximation based on extensive simulations.
calcOCSBCritVal <- function(seasonal.period) {
  log.m <- log(seasonal.period)
  return(-0.2937411 * exp(-0.2850853 * (log.m - 0.7656451) + (-0.05983644) * ((log.m - 0.7656451) ^ 2)) - 1.652202)
}


OCSBtest <- function(time.series, period) {
  period <- round(period) # Avoid non-integer seasonal period
  if (length(time.series) < (2 * period + 5)) {
    # warning("Time series too short for seasonal differencing")
    return(0)
  }

  # Compute (1-B)(1-B^m)y_t
  seas.diff.series <- diff(time.series, lag = period, differences = 1)
  diff.series <- diff(seas.diff.series, lag = 1, differences = 1)

  # Compute (1-B^m)y_{t-1}
  y.one <- time.series[2:length(time.series)]
  y.one <- diff(y.one, lag = period, differences = 1)

  # Compute (1-B)y_{t-m}
  y.two <- time.series[(1 + period):length(time.series)]
  y.two <- diff(y.two, lag = 1, differences = 1)

  # Make all series of the same length and matching time periods
  y.one <- y.one[(1 + period):(length(y.one) - 1)]
  y.two <- y.two[2:(length(y.two) - period)]
  diff.series <- diff.series[(1 + period + 1):(length(diff.series))]
  contingent.series <- diff.series

  x.reg <- cbind(y.one, y.two)
  diff.series <- ts(data = diff.series, frequency = period)

  regression <- try(Arima(diff.series, order = c(3, 0, 0), seasonal = list(order = c(1, 0, 0), period = period), xreg = x.reg), silent = TRUE)

  if (is.element("try-error", class(regression)) | tryCatch(checkarima(regression), error = function(e) TRUE)) {
    regression <- try(Arima(diff.series, order = c(3, 0, 0), seasonal = list(order = c(0, 0, 0), period = period), xreg = x.reg), silent = TRUE)

    if (is.element("try-error", class(regression)) | tryCatch(checkarima(regression), error = function(e) TRUE)) {
      regression <- try(Arima(diff.series, order = c(2, 0, 0), seasonal = list(order = c(0, 0, 0), period = period), xreg = x.reg), silent = TRUE)

      if (is.element("try-error", class(regression)) | tryCatch(checkarima(regression), error = function(e) TRUE)) {
        regression <- try(Arima(diff.series, order = c(1, 0, 0), seasonal = list(order = c(0, 0, 0), period = period), xreg = x.reg), silent = TRUE)

        if (is.element("try-error", class(regression)) | tryCatch(checkarima(regression), error = function(e) TRUE)) {
          regression <- try(lm(contingent.series ~ y.one + y.two - 1, na.action = NULL), silent = TRUE)
          if (is.element("try-error", class(regression))) {
            stop("The OCSB regression model cannot be estimated")
          }
          # Check if perfect regression. In that case, safest to do no differencing
          meanratio <- mean(abs(regression$residuals), na.rm = TRUE) / mean(abs(contingent.series), na.rm = TRUE)
          if (is.nan(meanratio)) {
            return(0)
          }
          if (meanratio < 1e-10) {
            return(0)
          }
          # Proceed to do OCSB test.
          reg.summary <- summary(regression)
          reg.coefs <- reg.summary$coefficients
          y.two.pos <- grep("y.two", rownames(reg.coefs), fixed = TRUE)
          if (length(y.two.pos) != 0) {
            y.two <- reg.coefs[y.two.pos, 3]
          } else {
            y.two <- NA
          }

          if ((is.nan(y.two)) | (is.infinite(y.two)) | (is.na(y.two)) | (is.element("try-error", class(regression)))) {
            return(1)
          } else {
            return(as.numeric(y.two >= calcOCSBCritVal(period)))
          }
        }
      }
    }
  }

  suppressWarnings(se <- sqrt(diag(regression$var.coef)))
  y.two <- regression$coef[names(regression$coef) == "y.two"] / se[names(se) == "y.two"]

  return(as.numeric(y.two >= calcOCSBCritVal(period)))
}

# Check that Arima object has positive coefficient variances without returning warnings
checkarima <- function(object) {
  suppressWarnings(test <- any(is.nan(sqrt(diag(object$var.coef)))))
  return(test)
}



#' Is an object constant?
#'
#' Returns true if the object's numerical values do not vary.
#'
#'
#' @param x object to be tested
#' @export
is.constant <- function(x) {
  x <- as.numeric(x)
  y <- rep(x[1], length(x))
  return(identical(x, y))
}
a1967fa/f-cast documentation built on May 29, 2019, 12:05 a.m.