Nothing
library(bsts)
library(testthat)
test_that("Prediction with olddata, but without regression.", {
data(AirPassengers)
y <- log(AirPassengers)
ss <- AddLocalLinearTrend(list(), y)
ss <- AddSeasonal(ss, y, nseasons = 12)
model <- bsts(y, state.specification = ss, niter = 200, ping = -1)
full.pred <- predict(model, horizon = 12, burn = 100)
training <- window(y, end = c(1959, 12))
small.model <- bsts(training, state.specification = ss, niter = 200, ping = -1)
pred.holdout <- predict(small.model, horizon = 12)
expect_equal(length(pred.holdout$original.series), length(y) - 12)
updated.pred <- predict(small.model, horizon = 12, olddata = y)
expect_equal(updated.pred$original.series, y)
expect_equal(length(updated.pred$original.series), length(full.pred$original.series))
})
test_that("Prediction with olddata, and with regression. ", {
data(iclaims)
training <- initial.claims[1:402, ]
holdout1 <- initial.claims[403:450, ]
holdout2 <- initial.claims[451:456, ]
ss <- AddLocalLinearTrend(list(), training$iclaimsNSA)
ss <- AddSeasonal(ss, training$iclaimsNSA, nseasons = 52)
model <- bsts(iclaimsNSA ~ ., state.specification = ss, data =
training, niter = 100)
## Predict the holdout set given the training set.
## This is really fast, because we can use saved state from the MCMC
## algorithm.
pred.full <- predict(model, newdata = rbind(holdout1, holdout2))
expect_equal(length(pred.full), 5)
expect_equal(names(pred.full), c("mean", "median", "interval",
"distribution", "original.series"))
expect_equal(length(pred.full$mean), 54)
expect_true(is.matrix(pred.full$distribution))
expect_equal(ncol(pred.full$distribution), 54)
## Predict holdout 2, given training and holdout1.
## This is much slower because we need to re-filter the 'olddata' before
## simulating the predictions.
pred.update <- predict(model, newdata = holdout2,
olddata = rbind(training, holdout1))
expect_equal(length(pred.update), 5)
expect_equal(names(pred.update), c("mean", "median", "interval",
"distribution", "original.series"))
expect_equal(length(pred.update$mean), 6)
expect_true(is.matrix(pred.update$distribution))
expect_equal(ncol(pred.update$distribution), 6)
})
test_that("Off by one error is solved", {
# Predictions for regression models experienced an off-by-one error. This
# test demonstrates that the error is solved.
## Simulate data. The pattern is a just a day-of-week pattern with
## substantial day-to-day variation.
N = 7*52
dateseq <- seq(as.Date("2000-01-02"), length.out = N, by = "days")
proportions <- c(0.005, 0.14, 0.17, 0.23, 0.265, 0.18, 0.01)
pMatrix <- matrix(0, ncol = 1, nrow = N)
for (i in 1:7) {
day.indicator <- which(weekdays(dateseq, FALSE) == weekday.names[i])
pMatrix[day.indicator, 1] = proportions[i]
}
set.seed(102)
weekly.sim <- arima.sim(list(ma=-0.1), n = 52, sd = 0.01)
weekly.sim <- exp(diffinv(weekly.sim, differences = 1, xi = log(200)))[-1]
# plot(weekly.sim, type = "l")
seqindx <- c(seq(1, N, by = 7), N + 1)
daily.sim.allocate <- do.call(rbind, lapply(1:(length(seqindx) - 1), function(i){
cbind(pMatrix[seqindx[i]:(seqindx[i + 1] - 1)] * weekly.sim[i])
}))
# add a holiday effect
independence <- which(dateseq == "2000-07-04")
daily.sim.allocate[independence] <- daily.sim.allocate[independence] * 0.1
xreg <- rep(0, N)
xreg[independence] <- 1
# colnames(xreg) <- "independence"
dat <- data.frame(y = as.numeric(daily.sim.allocate), independence = xreg,
days = weekdays(dateseq), stringsAsFactors = FALSE)
rownames(dat) <- dateseq
# dat <- zoo(dat, dateseq)
# plot(dat[,1], type = "l")
tail(dat[1:(independence+24),])
## estimation part
spec <- AddLocalLevel(list(), y = as.numeric(dat[1:(independence+24),1]))
spec <- AddSeasonal(spec, y = as.numeric(dat[1:(independence+24),1]),
nseasons = 7)
train <- 1:(independence + 24)
# estimate with regressor and intercept
niter <- 250
seed <- 8675309
mod1 <- bsts(y~independence, data = dat[train, 1:2],
state.specification = spec, niter = niter, seed = seed)
# estimate with regressor and no intercept
## mod2 <- bsts(y~independence-1, data = dat[train, 1:2],
## state.specification = spec, niter = niter, seed = seed)
# estimate with no regressor and no intercept
mod3 <- bsts(as.numeric(dat[train, 1]),
state.specification = spec, niter = niter, seed = seed)
## forecast part
test <- (independence + 25):N
horizon <- length(test)
# the newdata has only zeros.
newdata <- dat[test, 2, drop = FALSE]
p1 <- predict(mod1, newdata = newdata, seed = seed)
# p2 <- predict(mod2, newdata = newdata, seed = seed)
p3 <- predict(mod3, horizon = horizon, seed = seed)
# Comparison of output. The predictions from models 1 and 2 (which have a
# regression component) are off by one when compared to the model sans
# regression component.
comparison <- data.frame(
day = weekdays(as.Date(rownames(dat[test, 2, drop = FALSE]))),
reg1 = colMeans(p1$distribution),
# reg2 = colMeans(p2$distribution),
ts = colMeans(p3$distribution),
actual = dat[test, 1])
## > head(comparison)
## day reg1 reg2 ts actual
## 1 Saturday 3.034669 3.001731 2.038222 2.153567
## 2 Sunday 1.971062 1.922798 1.437442 1.074628
## 3 Monday 30.622732 30.597797 29.827300 30.089592
## 4 Tuesday 36.785562 36.711367 34.199968 36.537362
## 5 Wednesday 49.437153 49.367559 48.581829 49.432902
## 6 Thursday 56.878702 56.844302 55.889856 56.955300
# In the error state the first row showed reg1 and reg2 with very large values
# because they were using an off-by-1 seasonal effect
expect_true(all(abs(comparison$reg1 - comparison$ts) < 4))
expect_true(cor(comparison$reg1, comparison$ts) > .98)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.