##NOTE
# with tsmethod = "stsm", the element "xreg" could be defined in the
# object "stsm" instead of using argument "xreg" in "tso0". However,
# it is more convenient to let the function "stsm::stsmFit" handle this element;
# in this way, the same arguments are passed to "stats::arima" and "stsmFit"
# and the code is simplified here avoiding "if" statements depending on "tsmethod"
tso <- function(y, xreg = NULL, cval = NULL, delta = 0.7, n.start = 50,
types = c("AO", "LS", "TC"), # c("IO", "AO", "LS", "TC", "SLS")
maxit = 1, maxit.iloop = 4, cval.reduce = 0.14286,
remove.method = c("en-masse", "bottom-up"),
remove.cval = NULL,
tsmethod = c("auto.arima", "arima", "stsm"),
args.tsmethod = NULL, args.tsmodel = NULL, logfile = NULL)
{
tsmethod <- match.arg(tsmethod)
remove.method <- match.arg(remove.method)
attr.y <- attributes(y)
n <- length(y)
yname <- deparse(substitute(y))
#stopifnot(is.ts(y))
if (!is.null(args.tsmethod$xreg))
{
if (is.null(xreg))
{
# check if external regressors were defined through "args.tsmethod"
# instead of using argument "xreg"
xreg <- args.tsmethod$xreg
args.tsmethod$xreg <- NULL # this removes element "xreg" from the list
} else {
# check if external regressors were defined both
# in "xreg" and "args.tsmethod$xreg" but with different values
if (!identical(xreg, args.tsmethod$xreg))
{
warning(paste("non-null", sQuote("args.tsmethod$xreg"), "was ignored; argument ",
sQuote("xreg"), "was used instead"))
} # else # "xreg" was defined twice with the same values (no warning)
args.tsmethod$xreg <- NULL # this removes element "xreg" from the list
}
}
if (tsmethod == "stsm")
{
if (is.null(args.tsmodel$model))
args.tsmodel$model <- ifelse(frequency(y) == 1, "local-level", "BSM")
##FIXME these defaults only if stsm.method = "maxlik.fd.scoring"
if (is.null(args.tsmodel$ssd))
args.tsmodel$ssd <- TRUE
if (is.null(args.tsmodel$sgfc))
args.tsmodel$sgfc <- TRUE
# let "stsm::stsmFit" handle "xreg", not here
y <- do.call("stsm.model", args = c(list(y = y), args.tsmodel))
#ylist <- list(m = m)
} #else
#ylist <- list(x = y) # m <- y
# if "ylist" or "m <- y" were used, then the "if" statement below where "fit" is
# created could be avoided using "do.call(tsmethod, args = c(x = m, list())"
# but this involves storing two identical objects ("y" and "m" or "ylist")
# default arguments
if (is.null(args.tsmethod))
{
args.tsmethod <- switch(tsmethod,
"auto.arima" = list(allowdrift = FALSE, ic = "bic"),
"arima" = list(order = c(0, 1, 1), seasonal = list(order = c(0, 1, 1))),
"stsm" = list(stsm.method = "maxlik.td.optim", method = "L-BFGS-B",
KF.version = "KFKSDS", KF.args = list(P0cov = TRUE), gr = "numerical")) #hessian = TRUE
#list(stsm.method = "maxlik.fd.scoring", step = NULL, information = "expected"))
}
# default critical value
# the same is done in functions "locate.outliers.oloop" and "remove.outliers"
# "cval" is passed as a non-null value from tso() to those functions
# but keep there this block so that default value is used when those functions
# are called outside tso()
if (is.null(cval))
{
#n <- length(y)
if (n <= 50) {
cval <- 3
} else
if (n >= 450) {
cval <- 4
} else
cval <- round(3 + 0.0025 * (n - 50), 2)
}
cval0 <- cval
if (is.null(remove.cval))
remove.cval <- cval
# "res0" is used below to generate the output,
# "res" is overwritten until no more outliers are found
# "res0" is also used if maxit = 1
res0 <- res <- tso0(x = y, xreg = xreg, cval = cval,
delta = delta, n.start = n.start,
types = types, maxit.iloop = maxit.iloop,
remove.method = remove.method, remove.cval = remove.cval,
tsmethod = tsmethod, args.tsmethod = args.tsmethod,
logfile = logfile)
fit.wo.outliers <- res$fit0 # model without outliers (if maxit>1 res0 may change)
moall <- res$outliers
outtimes <- res$times
iter <- 1
cval <- round(cval * (1 - cval.reduce), 2)
if (nrow(moall) > 1)
while (iter < maxit)
{
##FIXME see move res0 <- res after if(...) break
if (tsmethod == "stsm")
{
##FIXME TODO create stsm object based on res$yadj as done above
warning("currently ", sQuote("maxit"), " > 1 is not allowed for ", sQuote("tsmethod=\"stsm\""))
break
}
# save "res" to have a copy of the last fitted model, res$fit;
# if in the current run no outliers are found then
# tso0() does not return the fitted model
res0 <- res
res <- tso0(x = res$yadj, xreg = xreg, cval = cval,
delta = delta, n.start = n.start,
types = types, maxit.iloop = maxit.iloop,
remove.method = remove.method, remove.cval = remove.cval,
tsmethod = tsmethod, args.tsmethod = args.tsmethod,
logfile = logfile)
##FIXME check
#discard (remove) duplicates and outliers at consecutive type points (if any)
#
#do not discard according to abs(t-stat) because the detection of outliers
#are based on res$yadj (not the original series); discarding an outlier
#from a previous iteration would require changing the current res$yadj
#
#discard outliers at an observation where an outlier (of the same or other type)
#was detected in a previous iteration
id <- which(res$outliers[,"ind"] %in% res0$outliers[,"ind"])
if (length(id) > 0)
res$outliers <- res$outliers[id,]
#discard consecutive outliers of any type, keep the outlier from previous iterations
id <- which(apply(outer(res$outliers[,"ind"], res0$outliers[,"ind"], "-"), MARGIN=1,
FUN = function(x) any(x == 1)))
if (length(id) > 0)
res$outliers <- res$outliers[id,]
if (nrow(res$outliers) == 0)
break
moall <- rbind(moall, res$outliers)
outtimes <- c(outtimes, res$times)
iter <- iter + 1
}
# final model given the detected outliers
if (nrow(moall) > 0)
{
#NOTE 'pars' is relevant only for innovational outliers,
#when 'maxit'>1, see if it would be better to use 'res' instead of 'res0',
#preferably it should be based on 'pars' from a model for the original data
#rather than the series adjusted for outliers
pars <- switch(tsmethod,
"auto.arima" = , "arima" = coefs2poly(res0$fit),
"stsm" = stsm::char2numeric(res0$fit$model))
# 'xreg': input regressor variables such as calendar effects (if any)
# 'xreg.outl': outliers regressor variables detected above (if any)
# 'xregall': all regressors ('xreg' and 'xreg.outl')
xreg.outl <- outliers.effects(mo = moall, n = n, weights = FALSE, delta = delta,
pars = pars, n.start = n.start, freq = frequency(y))
xregall <- cbind(xreg, xreg.outl)
nms.outl <- colnames(xreg.outl)
colnames(xregall) <- c(colnames(xreg), nms.outl)
##NOTE
# rerunning "auto.arima" (model selection) may not be necessary at this point
if (tsmethod == "stsm") {
fit <- do.call("stsmFit", args = c(list(x = y, xreg = xregall), args.tsmethod))
} else {
fit <- do.call(tsmethod, args = c(list(x = y, xreg = xregall), args.tsmethod))
# this is for proper printing of results from "auto.arima" and "arima"
fit$series <- yname
}
id <- colnames(xreg.outl)
if (tsmethod == "stsm")
{
##FIXME TODO
#if xregall!=xreg.outl (i.e. argument xreg is not NULL)
# xregcoefs <- fit$xreg$coef
# stde <- fit$xreg$stde
# if (is.null(stde))
# stde <- sqrt(diag(vcov(fit, type = "optimHessian")))
xregcoefs <- fit$pars[id]
tstats <- xregcoefs / fit$std.errors[id]
} else { # method "auto.arima", "arima"
xregcoefs <- coef(fit)[id]
tstats <- xregcoefs / sqrt(diag(fit$var.coef)[id])
}
moall[,"coefhat"] <- xregcoefs
moall[,"tstat"] <- tstats
oeff <- xreg.outl %*% cbind(xregcoefs)
attributes(oeff) <- attr.y #attributes(y)
yadj <- if(is.ts(y)) y - oeff else y@y - oeff
} else { # no outliers detected
fit <- fit.wo.outliers
oeff <- NULL
yadj <- if(is.ts(y)) y else y@y
}
structure(list(outliers = moall, y = if(is.ts(y)) y else y@y, yadj = yadj,
cval = cval0, fit = fit, effects = oeff, times = outtimes),
class = "tsoutliers")
}
tso0 <- function(x, xreg = NULL, cval = 3.5, delta = 0.7, n.start = 50,
types = c("AO", "LS", "TC"), maxit.iloop = 4,
remove.method = c("en-masse", "bottom-up"),
remove.cval = NULL,
tsmethod = c("auto.arima", "arima", "stsm"), args.tsmethod = NULL,
args.tsmodel = NULL, logfile = NULL)
{
# "x" can be either a "ts" object or a "stsm" object;
# if !inherits(x, "stsm") then two identical objects are stored ("x" and "y")
y <- if(is.ts(x)) { x } else x@y
#remove.method <- match.arg(remove.method)
#tsmethod <- match.arg(tsmethod)
#remove.method <- match.arg(remove.method)
fitmethod <- gsub("stsm", "stsmFit", tsmethod)
if (is.null(remove.cval))
remove.cval <- cval
# fit time series model
fit.wo.outliers <-
fit <- do.call(fitmethod, args = c(list(x = x, xreg = xreg), args.tsmethod))
#fit$series <- deparse(substitute(y))
if (!is.null(logfile))
{
cat(paste("model selection:\n"), file = logfile, append = FALSE)
capture.output(fit, file = logfile, append = TRUE)
}
# identify and locate prospective outliers by type
# given a fitted time series model
stage1 <- locate.outliers.oloop(y = y, fit = fit, types = types, cval = cval,
maxit.iloop = maxit.iloop, delta = delta, n.start = n.start, logfile = logfile)
# choose and fit the model including the outlier regressors detected so far
# (the weights of the outliers is fine tuned, to see it
# compare 'moall[,"coefhat"]' with 'coef(fit)["oeffi"]') then
# remove the outliers detected so far if they are not significant in the new model/fit
if (nrow(stage1$outliers) > 0)
{
stage2 <- remove.outliers(x = stage1, y = y, cval = remove.cval,
method = remove.method, delta = delta, n.start = n.start,
tsmethod.call = fit$call, fdiff = NULL, logfile = logfile)
#moall <- stage2$outliers
stopifnot(ncol(stage2$xreg) == length(stage2$xregcoefs))
} else
stage2 <- list(xreg = NULL, fit = stage1$fit)
# final outliers and
# original series adjusted for the outlier effects
if (!is.null(stage2$xreg))
{
# stage2$fit$xreg is not returned by arima()
moall <- stage2$outliers
##NOTE changed 2016Nov12 after changes in remove.outliers(), "moall" is updated there
#moall[,"coefhat"] <- stage2$xregcoefs
#moall[,"tstat"] <- stage2$xregtstats
oeff <- stage2$xreg %*% cbind(stage2$xregcoefs)
attributes(oeff) <- attributes(y)
yadj <- y - oeff
moall <- moall[,c("type", "ind", "coefhat", "tstat")]
outtimes <- time(y)[moall[,"ind"]]
if (frequency(y) > 1)
outseason <- formatC(as.vector(cycle(y)[moall[,"ind"]]),
width = 2, flag="0")
moall <- cbind(moall[,c("type", "ind")],
"time" = if (frequency(y) > 1) paste(floor(outtimes),
outseason, sep = ":") else outtimes,
moall[,c("coefhat","tstat")])
oind <- order(moall[,"ind"])
moall <- moall[oind,]
outtimes <- outtimes[oind]
rownames(moall) <- NULL
} else { # no outliers detected
oeff <- NULL
yadj <- y
moall <- data.frame(array(dim = c(0, 4)))
colnames(moall) <- c("type", "ind", "coefhat", "tstat")
outtimes <- NULL
}
if (!is.null(logfile))
{
msg <- paste("\nfinal outliers\n")
cat(msg, file = logfile, append = TRUE)
capture.output(moall, file = logfile, append = TRUE)
}
structure(list(outliers = moall, y = y, yadj = yadj, cval = cval,
fit0 = fit.wo.outliers, # initial model fitted without outliers
fit = stage2$fit, effects = oeff, times = outtimes),
class = "tsoutliers")
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.