R/outliers-tstatistics.R
In tsoutliers: Detection of Outliers in Time Series

Documented in outliers.tstatistics

##NOTE
#see return the absolute value of the t-statistics

##NOTE
# "pars" is not required for the innovational outlier
# see if some relevant simplication can be done if "types" includes only "IO"

##NOTE 
# "coefhat" is required by "outliers.regressors" called from "locate.outliers.iloop"

# types of outliers
# types = c("IO", "AO", "LS", "TC", "SLS")

outliers.tstatistics <- function(pars, resid, types = c("AO", "LS", "TC"), 
  sigma = NULL, delta = 0.7) #n.start = 50
  UseMethod("outliers.tstatistics")

outliers.tstatistics.default <- function(pars, resid, types = c("AO", "LS", "TC"), 
  sigma = NULL, delta = 0.7) #n.start = 50
{
  msg <- paste("argument", sQuote("pars"), "should be of class", 
    sQuote("ArimaPars"), #"or", sQuote("stsmSS"), 
    "\n       see", sQuote("?coefs2poly"), "in package", sQuote("tsoutliers"))#,
    #"or", sQuote("?char2numeric"), "in package", sQuote("stsm"))
  
  if (anyNA(resid))
    warning("the series \'resid\' contain NAs;\n", 
    "Potential outliers before the last NA may be overlooked")

  # if "pars" is a list, deduce the type of time series model 
  # looking at the names of the elements in "pars"

  if (inherits(pars, "list"))
  {
    tmp <- names(pars)
    if (all(c("arcoefs", "macoefs") %in% tmp)) {
      pars <- structure(pars, class = "ArimaPars")
    } else 
    if (all(c("Z", "T", "H") %in% tmp)) {
      pars <- structure(pars, class = "stsmSS")
    } else
      # if (!identical(pars, "IO"))
      # "pars" is not required for the innovational outlier
      stop(msg)

    outliers.tstatistics(pars = pars, resid = resid, types = types, 
      sigma = sigma, delta = delta) #n.start = n.start

  } else
    stop(msg)
}

outliers.tstatistics.ArimaPars <- function(pars, resid, types = c("AO", "LS", "TC"), 
  sigma = NULL, delta = 0.7) #n.start = 50
{
  # "n.start" is not used for "ArimaPars"

  n <- length(resid)
  res <- array(dim = c(n, length(types), 2), 
    dimnames = list(NULL, types, c("coefhat", "tstat")))

  if (is.null(sigma))
    sigma <- 1.483 * quantile(abs(resid - quantile(resid, probs = 0.5)), 
      probs = 0.5, na.rm = TRUE)

  ar <- pars$arcoefs
  ma <- pars$macoefs

  if (any(c("AO", "LS", "TC", "SLS") %in% types))
  {
    picoefs <- c(1, ARMAtoMA(-ma, -ar, n - 1))
    #NOTE using na.omit() here would remove possible NA values in the 
    # middle of the sample, not just the NAs generated by filter()
    #ao.xy <- as.vector(na.omit(
    #  filter(x = c(resid, rep(0, n - 1)), filter = rev(picoefs), method = "conv", sides = 1)))
    ao.xy <- filter(x = c(resid, rep(0, n - 1)), filter = rev(picoefs), method = "conv", sides = 1)
    ao.xy <- as.vector(ao.xy[-seq_along(picoefs[-1])])

    if (any(c("LS", "TC", "SLS") %in% types))
      rev.ao.xy <- rev(ao.xy)
  }

  if ("IO" %in% types)
  {
    #IOres <- cbind(coefhat = resid, tstat = resid / sigma)
    res[,"IO","coefhat"] <- resid
    res[,"IO","tstat"] <- resid / sigma
  } #else 
    #IOres <- NULL

  if ("AO" %in% types)
  {
    #xx <- rev(cumsum(picoefs^2))
    #AOres <- ao.xy / (sigma * sqrt(xx))
    xxinv <- rev(1 / cumsum(picoefs^2))
    coefhat <- ao.xy * xxinv
    #tstat <- coefhat / (sigma * sqrt(xxinv))
    #AOres <- cbind(coefhat, tstat)
    res[,"AO","coefhat"] <- coefhat
    res[,"AO","tstat"] <- coefhat / (sigma * sqrt(xxinv))
  } #else 
    #AOres <- NULL

  if ("LS" %in% types)
  {
    #dinvf <- diffinv(picoefs)[-1]
    #xx <- rev(cumsum(dinvf^2))
    #xy <- rev(diffinv(rev.ao.xy)[-1])
    #LSres <- xy / (sigma * sqrt(xx))
    xy <- rev(diffinv(rev.ao.xy)[-1])
    dinvf <- diffinv(picoefs)[-1]
    xxinv <- rev(1 / cumsum(dinvf^2))
    coefhat <- xy * xxinv
    #tstat <- coefhat / (sigma * sqrt(xxinv))
    #LSres <- cbind(coefhat, tstat)
    res[,"LS","coefhat"] <- coefhat
    res[,"LS","tstat"] <- coefhat / (sigma * sqrt(xxinv))
  } #else 
    #LSres <- NULL

  if ("TC" %in% types)
  {
    #dinvf <- filter(x = picoefs, filter = delta, method = "rec")
    #xx <- rev(cumsum(dinvf^2))   
    #xy <- rev(filter(x = rev.ao.xy, filter = delta, method = "rec"))
    #TCres <- xy / (sigma * sqrt(xx))
    xy <- rev(filter(x = rev.ao.xy, filter = delta, method = "rec"))
    dinvf <- filter(x = picoefs, filter = delta, method = "rec")
    xxinv <- rev(1 / cumsum(dinvf^2))
    coefhat <- xy * xxinv
    #tstat <- coefhat / (sigma * sqrt(xxinv))
    #TCres <- cbind(coefhat, tstat)
    res[,"TC","coefhat"] <- coefhat
    res[,"TC","tstat"] <- coefhat / (sigma * sqrt(xxinv))
  } #else 
    #TCres <- NULL

  if ("SLS" %in% types)
  {
    s <- frequency(resid)
    #dinvf <- diffinv(picoefs, lag = s)[-seq_len(s)]
    #xx <- rev(cumsum(dinvf^2))
    #xy <- rev(diffinv(rev.ao.xy, lag = s)[-seq_len(s)])
    #SLSres <- xy / (sigma * sqrt(xx))
    id <- seq_len(s)
    xy <- rev(diffinv(rev.ao.xy, lag = s)[-id])
    dinvf <- diffinv(picoefs, lag = s)[-id]
    xxinv <- rev(1 / cumsum(dinvf^2))
    coefhat <- xy * xxinv
    #tstat <- coefhat / (sigma * sqrt(xxinv))
    #SLSres <- cbind(coefhat, tstat)
    res[,"SLS","coefhat"] <- coefhat
    res[,"SLS","tstat"] <- coefhat / (sigma * sqrt(xxinv))
  } #else 
    #SLSres <- NULL

  res
}

# outliers.tstatistics.stsmSS <- function(pars, resid, types = c("AO", "LS", "TC"), 
#   sigma = NULL, delta = 0.7, n.start = 50)
# {
#   n <- length(resid)
#   res <- array(dim = c(n, length(types), 2), 
#     dimnames = list(NULL, types, c("coefhat", "tstat")))
# 
#   if (is.null(sigma))
#     sigma <- 1.483 * quantile(abs(resid - quantile(resid, probs = 0.5)), 
#       probs = 0.5, na.rm = TRUE)
# 
# ##FIXME TODO "SLS"
# 
#   if (any(c("AO", "LS", "TC") %in% types))
#   {
#     I <- ts(rep(0, length(resid) + n.start - 1), 
#       start = start(resid), frequency = frequency(resid))
#     I[n.start] <- 1
#     tmp <- KFKSDS::KF(I, pars)
# 
#     fao <- tmp$v[-seq.int(n.start-1)]
#     #NOTE using na.omit() here would remove possible NA values in the 
#     # middle of the sample, not just the NAs generated by filter()
#     #ao.xy <- as.vector(na.omit(
#     #  filter(x = c(resid, rep(0, length(resid) - 1)), 
#     #    filter = rev(fao), method = "conv", sides = 1)))
#     ao.xy <- filter(x = c(resid, rep(0, n - 1)), 
#         filter = rev(fao), method = "conv", sides = 1)
#     ao.xy <- as.vector(ao.xy[-seq_along(fao[-1])])
# 
#     if (any(c("LS", "TC") %in% types))
#       rev.ao.xy <- rev(ao.xy)
#   }
# 
#   # IO
# 
#   if ("IO" %in% types)
#   {
#     res[,"IO","coefhat"] <- resid
#     res[,"IO","tstat"] <- resid / sigma
#   }
# 
#   # AO
# 
#   if ("AO" %in% types)
#   {
#     #xx <- rev(cumsum(fao^2))
#     #AOres <- ao.xy / (sigma * sqrt(xx))
#     xxinv <- rev(1 / cumsum(fao^2))
#     coefhat <- ao.xy * xxinv
#     res[,"AO","coefhat"] <- coefhat
#     res[,"AO","tstat"] <- coefhat / (sigma * sqrt(xxinv))
#   }
# 
#   # LS
# 
#   if ("LS" %in% types)
#   {
#     #xy <- rev(diffinv(rev.ao.xy)[-1])
#     #xx <- rev(cumsum(diffinv(fao)[-1]^2))
#     #LSres <- xy / (sigma * sqrt(xx))
#     xy <- rev(diffinv(rev.ao.xy)[-1])
#     dinvf <- diffinv(fao)[-1]
#     xxinv <- rev(1 / cumsum(dinvf^2))
#     coefhat <- xy * xxinv
#     res[,"LS","coefhat"] <- coefhat
#     res[,"LS","tstat"] <- coefhat / (sigma * sqrt(xxinv))
#   }
# 
#   # TC
# 
#   if ("TC" %in% types)
#   {
#     #xy <- rev(filter(x = rev.ao.xy, filter = delta, method = "rec"))
#     #dinvf <- filter(x = fao, filter = delta, method = "rec")
#     #xx <- rev(cumsum(dinvf^2))
#     #TCres <- xy / (sigma * sqrt(xx))
#     xy <- rev(filter(x = rev.ao.xy, filter = delta, method = "rec"))
#     dinvf <- filter(x = fao, filter = delta, method = "rec")
#     xxinv <- rev(1 / cumsum(dinvf^2))
#     coefhat <- xy * xxinv
#     res[,"TC","coefhat"] <- coefhat
#     res[,"TC","tstat"] <- coefhat / (sigma * sqrt(xxinv))
#   }
# 
#   # SLS
# 
#   if ("SLS" %in% types)
#   {
# ##FIXME TODO (remove note in Rd file when done)
# 
#     #res[,"SLS","coefhat"] <- coefhat
#     #res[,"SLS","tstat"] <- coefhat / (sigma * sqrt(xxinv))
#     warning(paste("currently", sQuote("SLS"), "is not considered with", 
#       sQuote("'stsm'")))
#   }
# 
#   res
# }