Nothing
#' @import compiler
#' @import quantreg
#' @import foreach
#' @importFrom Matrix Matrix Diagonal sparseMatrix bdiag .solve.dgC.chol
#' @importFrom stats optim qnorm quantile time
#' @importFrom utils tail head
# library(doMC)
# registerDoMC(8) #Number of CPU cores
ensure <- function(...) stopifnot(...)
initialize <- cmpfun(function(env, nRows, nCols, len) {
env$d <- c(nRows, nCols)
env$ra <- numeric(len)
env$ia <- integer(len)
env$ja <- integer(len)
env$l <- 0L
})
append <- cmpfun(function(env, val, i, j) {
b <- env$l + 1L
env$l <- env$l + length(val)
env$ra[b:env$l] <- val
env$ia[b:env$l] <- i
env$ja[b:env$l] <- j
})
dims <- cmpfun(function(env, nRows, nCols) {
env$d <- c(nRows, nCols)
})
last <- cmpfun(function(x) {
tail(x, n = 1)
})
first <- cmpfun(function(x) {
head(x, n = 1)
})
# One dimensional territory belonging to a seasonal knot (for all seasonal knots).
# Territory is the half sum of all lengths of all segments attached to the knot
# (there can be more or less than two attached to a knot if the seasonal
# structure is not a circle or a segment).
sTerritory <- cmpfun(function(seasonalStructure) {
sKnots <- seasonalStructure$sKnots
sKnotsV <- sort(unlist(sKnots))
segments <- seasonalStructure$segments
lefts <- sapply(segments, FUN = min)
rights <- sapply(segments, FUN = max)
sKnotsNoLefts <- lapply(sKnots, FUN = function(k) {
k[!(k %in% lefts)]
})
sKnotsNoRights <- lapply(sKnots, FUN = function(k) {
k[!(k %in% rights)]
})
lDistsSum <- sapply(sKnotsNoLefts, FUN = function(k) {
ifelse(length(k) == 0, 0, sum(sapply(k, FUN = function(kk) {
kk - max(sKnotsV[sKnotsV < kk])
})))
})
rDistsSum <- sapply(sKnotsNoRights, FUN = function(k) {
ifelse(length(k) == 0, 0, sum(sapply(k, FUN = function(kk) {
min(sKnotsV[sKnotsV > kk]) - kk
})))
})
return((lDistsSum + rDistsSum) / 2)
})
# Seasonal weights taking into account the territory around a seasonal knot and the norm
sWeights <- cmpfun(function(seasonalStructure, norm = 2) {
return(sTerritory(seasonalStructure)^(1 / norm))
})
tTerritory <- cmpfun(function(timeKnots) {
distances <- diff(timeKnots)
return((c(0, distances) + c(distances, 0)) / 2)
})
tWeights <- cmpfun(function(timeKnots, norm = 2) {
return(tTerritory(timeKnots)^(1 / norm))
})
# Transforms reduced season matrix into full season matrix.
# The source and the result are in vectorised forms (seasons are changing quicker).
# Reduced seasonal matrix is a seasonal matrix where one season is omitted as input value
# (since value of that season can be calculated as negative of weighted sum of other values)
seasonalTransformer <- cmpfun(function(nKnots, seasonalStructure) {
sKnots <- seasonalStructure$sKnots
nSKnots <- length(sKnots)
weights <- sTerritory(seasonalStructure)
lw <- length(weights)
m <- rbind(Diagonal(n = nSKnots - 1), Matrix(data = -weights[-lw] / weights[lw], nrow = 1, sparse = TRUE))
l <- list(m)
for (i in 1:nKnots) {
l[[i]] <- m
}
return(bdiag(l))
})
# Transposes vectorised full seasonal matrix (to make time changing quicker in the representation)
seasonalTransposer <- cmpfun(function(nKnots, nSKnots) {
m <- new.env(parent = .GlobalEnv)
initialize(env = m, nRows = nKnots * nSKnots, nCols = nKnots * nSKnots, len = nKnots * nSKnots)
for (t in 1:nKnots) {
for (s in 1:nSKnots) {
append(m, 1, (s - 1) * nKnots + t, (t - 1) * nSKnots + s)
}
}
return(sparseMatrix(x = m$ra[1L:m$l], i = as.integer(m$ia[1L:m$l]), j = as.integer(m$ja[1L:m$l]), dims = as.integer(m$d)))
})
# Example data:
# All connection points must be knots as well.
# Distances from any connection point to the nearest knots must be same for every connection point.
# data = c(1,3,4,1,2,3,4,1,2,3,5,1,2,3,4,5)
# times = c(1,3,4,5.5,12,16,20,21,22,22.2,23,24,27,28,30,32)
# seasons = c(0,3,5,8,11,16,20,0,3,5,8,12,213,222,100,110)
# timeKnots = c(0,3,5.5,17,33)
# ttLambda = 0.7
# ssLambda = 0.02
# stLambda = 0.9
# lambdas = c(ttLambda, ssLambda, stLambda)
# seasonalStructure = list(segments = list(c(0,24), c(100,124), c(212,224), c(312,324)),
# sKnots = list(c(0,24,324),4,8,c(12, 212),16,20,c(100,124,224),104,108,c(112,312),116,120,216,220,316,320)
# )
# predictor = list(data = data, times = times, seasons = seasons, timeKnots = timeKnots, seasonalStructure = seasonalStructure, lambdas = lambdas)
checkPredictor <- cmpfun(function(predictor) {
tkn <- predictor$timeKnots
tm <- predictor$times
ss <- predictor$seasonalStructure
seg <- ss$segments
skn <- ss$sKnots
if (length(predictor$data) != length(predictor$times)) {
cat("\nVector lengths of data and times should be same...")
return(FALSE)
}
if (is.null(tkn) && is.null(predictor$seasons) && is.null(ss)) {
return(TRUE)
}
if (length(unlist(skn)) != length(unique(unlist(skn)))) {
cat("\nPoints in sKnots should be mentioned once...")
return(FALSE)
}
if (!all(unlist(seg) %in% unlist(skn))) {
cat("\nPoints in segments should be mentioned in sKnots...")
return(FALSE)
}
if (length(predictor$seasons) != length(predictor$times)) {
cat("\nVector lengths of seasons and times should be same...")
return(FALSE)
}
if (min(tm) < min(tkn)) {
cat("\nThe first time knot should be before the first observation time...")
return(FALSE)
}
if (max(tm) > max(tkn)) {
cat("\nThe last time knot should be beyond the last observation time...")
return(FALSE)
}
return(TRUE)
})
# ensure(checkPredictor(predictor))
# Too slow
# knotToIndex = cmpfun(function(knot, sKnots) { which(sapply(sKnots, FUN = function(k) {knot %in% k})) })
# Twice faster than above
knotToIndex <- cmpfun(function(knot, sKnots) {
for (k in seq_along(sKnots)) if (knot %in% sKnots[[k]]) {
return(k)
}
return(0)
})
# The fastest
knotToIndex2 <- cmpfun(function(knot, flatKnots, indexes) {
indexes[flatKnots == knot]
})
# Creates a matrix which distributes any value between knots
# to the corresponding values at knots and any value at a knot
# is applied to this knot directly.
getInfluenceMatrix <- cmpfun(function(seasons, sKnots) {
m <- new.env(parent = .GlobalEnv)
initialize(m, nRows = length(seasons), nCols = length(sKnots), len = 2 * length(seasons))
flatKnots <- unlist(sKnots)
sIndexes <- unlist(lapply(seq_along(sKnots), FUN = function(i) rep(i, length(sKnots[[i]]))))
for (i in seq_along(seasons)) {
s <- seasons[i]
if (s %in% flatKnots) {
append(m, 1, i, knotToIndex2(s, flatKnots, sIndexes))
} else {
sLeft <- max(flatKnots[flatKnots <= s])
sRight <- min(flatKnots[flatKnots > s])
d <- sRight - sLeft
append(m, (sRight - s) / d, i, knotToIndex2(sLeft, flatKnots, sIndexes))
append(m, (s - sLeft) / d, i, knotToIndex2(sRight, flatKnots, sIndexes))
}
}
return(sparseMatrix(x = m$ra[1L:m$l], i = as.integer(m$ia[1L:m$l]), j = as.integer(m$ja[1L:m$l]), dims = as.integer(m$d)))
})
# Creates a matrix which takes values at seasonal and time knots and interpolates
# these values at coordinates of the observed points.
seasonalPredictorConstructor <- cmpfun(function(predictor) {
if (!checkPredictor(predictor)) stop(paste0('Predictor "', predictor$name, '" has wrong structure...'))
data <- predictor$data
times <- predictor$times
seasons <- predictor$seasons
segments <- predictor$seasonalStructure$segments
nRows <- length(data)
tKnots <- predictor$timeKnots
nKnots <- length(tKnots)
seasonalStructure <- predictor$seasonalStructure
sKnots <- seasonalStructure$sKnots
nSKnots <- length(sKnots)
nCols <- length(tKnots) * length(sKnots)
# Static predictor: single coefficient to estimate without any regularization
if (is.null(tKnots) && is.null(seasons) && is.null(seasonalStructure)) {
return(Matrix(data = data, nrow = length(data), ncol = 1, sparse = TRUE))
}
msw <- getInfluenceMatrix(seasons, sKnots)
mtw <- getInfluenceMatrix(times, tKnots)
m <- Matrix(data = 0, nrow = nRows, ncol = nCols, sparse = TRUE)
iCol <- 1
nSCol <- dim(msw)[2]
for (t in seq_along(tKnots)) {
m[, iCol:(iCol + nSCol - 1)] <- Diagonal(x = mtw[, t]) %*% msw
iCol <- iCol + nSCol
}
mData <- Diagonal(x = data)
if (nSKnots == 1) {
result <- mData %*% m
} else {
mTr <- seasonalTransformer(nKnots, seasonalStructure)
result <- (mData %*% m) %*% mTr
}
return(result)
})
# Creates a sparse matrix which takes first discrete differences of a vector
diff1 <- cmpfun(function(nCols) {
return(diff(Diagonal(nCols)))
})
# Creates a sparse matrix which takes second discrete differences of a vector
diff2 <- cmpfun(function(nCols) {
return(diff1(nCols - 1) %*% diff1(nCols))
})
vector2Derivatives <- cmpfun(function(knots, weights) {
ensure(length(knots) - 2 == length(weights))
m <- new.env(parent = .GlobalEnv)
nCols <- length(knots)
nRows <- nCols - 2
initialize(env = m, nRows = nRows, nCols = nCols, len = nRows * 3)
for (i in 1:nRows) {
delta1 <- knots[i + 1] - knots[i]
delta2 <- knots[i + 2] - knots[i + 1]
delta <- knots[i + 2] - knots[i]
append(m, 2 * weights[i] * c(1 / (delta1 * delta), -1 / (delta1 * delta2), 1 / (delta2 * delta)), c(i, i, i), c(i, i + 1, i + 2))
}
return(sparseMatrix(x = m$ra[1L:m$l], i = as.integer(m$ia[1L:m$l]), j = as.integer(m$ja[1L:m$l]), dims = as.integer(m$d)))
})
lrKnots <- cmpfun(function(seasonalStructure) {
segments <- seasonalStructure$segments
lefts <- sapply(segments, FUN = min)
rights <- sapply(segments, FUN = max)
sKnots <- seasonalStructure$sKnots
sKnotsV <- sort(unlist(sKnots))
lKnots <- lapply(sKnots, FUN = function(k) {
sapply(k, FUN = function(kk) {
ifelse(kk %in% lefts, NA, max(sKnotsV[sKnotsV < kk]))
})
})
rKnots <- lapply(sKnots, FUN = function(k) {
sapply(k, FUN = function(kk) {
ifelse(kk %in% rights, NA, min(sKnotsV[sKnotsV > kk]))
})
})
return(list(lKnots = lKnots, rKnots = rKnots))
})
# Creates a matrix which translates a seasonal column to the second derivatives
# applied to that column. Since the seasonal structure/topology can be different
# from cycle, the procedure is rather complicated and takes into account cases
# when a node in the seasonal graph can have none/multiple incoming vertexes and/or
# none/multiple outcoming vertexes.
cycle2Derivatives <- cmpfun(function(seasonalStructure, norm = 2) {
sKnots <- seasonalStructure$sKnots
m <- new.env(parent = .GlobalEnv)
nCols <- nSKnots <- length(sKnots)
initialize(env = m, nRows = -1, nCols = nCols, len = nSKnots * 3)
lr <- lrKnots(seasonalStructure)
lKnots <- lr$lKnots
rKnots <- lr$rKnots
nlKnots <- sapply(lKnots, FUN = function(k) sum(!is.na(k)))
nrKnots <- sapply(rKnots, FUN = function(k) sum(!is.na(k)))
nRow <- 0
for (i in 1:nSKnots) {
for (jPrev in seq_along(lKnots[[i]])) {
lk <- lKnots[[i]][jPrev]
if (is.na(lk)) next
for (jNext in seq_along(rKnots[[i]])) {
rk <- rKnots[[i]][jNext]
if (is.na(rk)) next
k1 <- sKnots[[i]][jPrev]
d1 <- abs(k1 - lk)
k2 <- sKnots[[i]][jNext]
d2 <- abs(rk - k2)
d <- d1 + d2
w <- ((d1 / nrKnots[i] + d2 / nlKnots[i]) / 2)^(1 / norm)
nRow <- nRow + 1
append(m, 2 * w * c(1 / (d1 * d), -1 / (d1 * d2), 1 / (d2 * d)), c(nRow, nRow, nRow), c(knotToIndex(lk, sKnots), i, knotToIndex(rk, sKnots)))
}
}
}
dims(env = m, nRows = nRow, nCols = nCols)
return(sparseMatrix(x = m$ra[1L:m$l], i = as.integer(m$ia[1L:m$l]), j = as.integer(m$ja[1L:m$l]), dims = as.integer(m$d)))
})
# Creates a matrix to calculate regularization values
# when second derivatives are taken along seasonal dimension.
ssRegulariser <- cmpfun(function(predictor, norm = 2) {
seasonalStructure <- predictor$seasonalStructure
dataLength <- length(predictor$data)
knots <- predictor$timeKnots
sKnots <- seasonalStructure$sKnots
nKnots <- length(knots)
nSKnots <- length(sKnots)
m <- cycle2Derivatives(seasonalStructure, norm)
weights <- tWeights(knots, norm) # tWeights (not sWeights) because width along t dimension should be taken into account
ensure(length(weights) == nKnots)
listM <- list()
for (i in 1:nKnots) {
listM[[i]] <- weights[i] * m
}
m1 <- bdiag(listM)
m2 <- seasonalTransformer(nKnots, seasonalStructure)
return(m1 %*% m2)
})
# Creates a matrix to calculate regularization values
# when second derivatives are taken along time dimension.
ttRegulariser <- cmpfun(function(predictor, norm = 2) {
seasonalStructure <- predictor$seasonalStructure
timeKnots <- predictor$timeKnots
sKnots <- seasonalStructure$sKnots
nKnots <- length(timeKnots)
nSKnots <- length(sKnots)
m <- vector2Derivatives(timeKnots, tWeights(timeKnots, norm)[c(-1, -nKnots)])
weights <- sWeights(seasonalStructure, norm) # sWeights (not tWeights) because width along s dimension should be taken into account
listM <- list()
for (i in 1:nSKnots) {
listM[[i]] <- weights[i] * m
}
m1 <- bdiag(listM)
if (nSKnots >= 2) {
m2 <- seasonalTransposer(nKnots, nSKnots)
m3 <- seasonalTransformer(nKnots, seasonalStructure)
return(m1 %*% (m2 %*% m3))
} else {
return(m1)
}
})
# Translates full seasonal matrix coordinates to vector positions
translST <- cmpfun(function(s, t, nSKnots) {
return((t - 1) * nSKnots + (s - 1) %% nSKnots + 1)
})
# Creates a matrix to calculate regularization values
# when second derivatives are taken along s and t dimensions.
stRegulariser <- cmpfun(function(predictor, norm = 2) {
timeKnots <- predictor$timeKnots
seasonalStructure <- predictor$seasonalStructure
sKnots <- seasonalStructure$sKnots
nKnots <- length(timeKnots)
nSKnots <- length(sKnots)
m <- new.env(parent = .GlobalEnv)
nCols <- nKnots * nSKnots
initialize(m, nRows = -1, nCols = nCols, len = 4 * (nKnots - 1) * nSKnots)
lrk <- lrKnots(seasonalStructure)
rKnots <- lrk$rKnots
nRow <- 0
for (t in 1:(nKnots - 1)) {
for (s in 1:nSKnots) {
for (ss in seq_along(rKnots[[s]])) {
rKnot <- rKnots[[s]][ss]
if (is.na(rKnot)) next
knot <- sKnots[[s]][ss]
tDistance <- abs(timeKnots[t + 1] - timeKnots[t])
sDistance <- abs(rKnot - knot)
area <- tDistance * sDistance
rKnotInd <- knotToIndex(rKnot, sKnots)
nRow <- nRow + 1
append(
m,
(area^(1 / norm - 1)) * c(1, -1, -1, 1),
c(nRow, nRow, nRow, nRow),
c(translST(s, t, nSKnots), translST(rKnotInd, t, nSKnots), translST(s, t + 1, nSKnots), translST(rKnotInd, t + 1, nSKnots))
)
}
}
}
dims(env = m, nRows = nRow, nCols = nCols)
m1 <- sparseMatrix(x = m$ra[1L:m$l], i = as.integer(m$ia[1L:m$l]), j = as.integer(m$ja[1L:m$l]), dims = as.integer(m$d))
m2 <- seasonalTransformer(nKnots, seasonalStructure)
return(m1 %*% m2)
})
# Creates regularization matrix for one predictor.
# norm parameter specifies which norm is used L2 or L1. It affects how distance between knots should affect knot weigts.
# lambdas0or1 is a "technical" parameter and when it is TRUE then lambdas are not taken into account. It is used to create
# a "template" matrix to be later adjusted by multiplying by appropriate lambdas. It is done for performance.
predictorRegulariser <- cmpfun(function(predictor, norm = 2, lambdas0or1 = FALSE) {
nKnots <- length(predictor$timeKnots)
nSKnots <- length(predictor$seasonalStructure$sKnots)
l1 <- l2 <- l3 <- 0
result <- Matrix(data = 0, nrow = 0, ncol = max(nKnots, 1) * max(nSKnots - 1, 1)) # Empty matrix,
# it has 0 rows when l1 = l2 = l3 = 0. The only information it passes in this case is the number of columns.
if (predictor$lambdas[1] != 0) {
reg <- ifelse(lambdas0or1, 1, predictor$lambdas[1]) * ttRegulariser(predictor, norm)
l1 <- dim(reg)[1]
result <- rbind(result, reg)
}
if (predictor$lambdas[2] != 0) {
reg <- ifelse(lambdas0or1, 1, predictor$lambdas[2]) * ssRegulariser(predictor, norm)
l2 <- dim(reg)[1]
result <- rbind(result, reg)
}
if (predictor$lambdas[3] != 0) {
reg <- ifelse(lambdas0or1, 1, predictor$lambdas[3]) * stRegulariser(predictor, norm)
l3 <- dim(reg)[1]
result <- rbind(result, reg)
}
return(list(matrix = result, lengths = c(l1, l2, l3)))
})
# The "constructor" matrix is responsible for reconstruction the data from the parameters.
constructorMatrix <- function(predictors) {
l <- list()
i <- 1
predictorSeats <- list()
j <- 1
startCol <- 1
for (predictor in predictors) {
m <- seasonalPredictorConstructor(predictor)
nCol <- ncol(m)
predictorSeats[[j]] <- c(start = startCol, length = nCol)
startCol <- startCol + nCol
j <- j + 1
l[[i]] <- m
i <- i + 1
}
return(list(matrix = do.call(cbind, l), seats = predictorSeats))
}
# The "regulariser" matrix is responsible for penalty calculations.
regulariserMatrix <- cmpfun(function(predictors, norm = 2, lambdas0or1 = FALSE) {
l <- list()
i <- 1
seats <- list()
startRow <- 1
for (predictor in predictors) {
pr <- predictorRegulariser(predictor, norm, lambdas0or1)
l[[i]] <- pr$matrix
pl <- pr$lengths
seats[[i]] <- list(start = startRow, lengths = pl)
startRow <- startRow + sum(pl)
i <- i + 1
}
return(list(matrix = bdiag(l), seats = seats))
})
# Calculates a design matrix. The design matrix consists of two matrices one on top of the other:
# "constructor" matrix and "regulariser".
# The first one is responsible for reconstruction the data from the parameters.
# The second is resposible for penalty calculating the penalty corresponding to the parameters.
designMatrix <- cmpfun(function(predictors, norm = 2) {
cm <- constructorMatrix(predictors)$matrix
rm <- regulariserMatrix(predictors, norm)$matrix
return(rbind(cm, rm))
})
targetVector <- cmpfun(function(data, designMatrix) {
return(c(data, rep(0, nrow(designMatrix) - length(data))))
})
# Calculates lower and upper confidence/forecasting intervals.
getLowerUpper <- cmpfun(function(data, covMatrix, constr, range, confidence) {
if (is.null(covMatrix) || is.null(confidence)) {
return(list())
}
sigmas <- sqrt(diag(constr %*% covMatrix[range, range] %*% t(constr)))
lower <- upper <- NULL
for (conf in confidence) {
q1 <- qnorm(p = conf, mean = data, sd = sigmas)
q2 <- qnorm(p = 1 - conf, mean = data, sd = sigmas)
lower <- cbind(lower, pmin(q1, q2))
upper <- cbind(upper, pmax(q1, q2))
}
return(list(lower = lower, upper = upper))
})
extract <- cmpfun(function(coef, resid, covMatrix, constructorMatrix, seats, predictors, confidence = NULL) {
predictorsData <- list()
for (i in seq_along(predictors)) {
range <- seats[[i]]["start"]:(seats[[i]]["start"] + seats[[i]]["length"] - 1)
beta <- coef[range]
constr <- constructorMatrix[, range, drop = FALSE]
data <- as.vector(constr %*% beta)
predictorsData[[i]] <- c(list(data = data, beta = beta), getLowerUpper(data, covMatrix, constr, range, confidence))
}
data <- as.vector(constructorMatrix %*% coef)
forecastData <- c(list(data = data, beta = coef), getLowerUpper(data, covMatrix, constructorMatrix, seq_along(coef), confidence))
return(list(predictors = predictorsData, random = list(data = resid), forecast = forecastData))
})
STRDesign <- function(predictors, norm = 2) {
cm <- constructorMatrix(predictors)
rm <- regulariserMatrix(predictors, norm, lambdas0or1 = TRUE)
return(list(cm = cm, rm = rm, predictors = predictors, norm = norm))
}
lambdaMatrix <- function(lambdas, seats) {
ensure(length(lambdas) == length(seats))
v <- NULL
for (i in seq_along(lambdas)) {
v <- c(v, rep(lambdas[[i]]$lambdas[1], seats[[i]]$lengths[1]), rep(lambdas[[i]]$lambdas[2], seats[[i]]$lengths[2]), rep(lambdas[[i]]$lambdas[3], seats[[i]]$lengths[3]))
}
return(Diagonal(x = v))
}
# Returns covariance (diagonal) matrix of errors/residuals.
getISigma <- function(resid, firstLength, seats) {
d <- rep(mean(head(resid, firstLength)^2), firstLength)
resid <- tail(resid, -firstLength)
for (i in seq_along(seats)) {
for (j in seq_along(seats[[i]]$lengths)) {
if (seats[[i]]$lengths[j] > 0) {
start <- seats[[i]]$start
lBefore <- sum(seats[[i]]$lengths[seq_len(j - 1)])
range <- (start + lBefore):(start + lBefore + seats[[i]]$lengths[j] - 1)
d <- c(d, rep(mean(resid[range]^2), length(range)))
}
}
}
return(Diagonal(length(d), d))
}
#' @title STR decomposition
#' @description Seasonal-Trend decomposition of time series data using Regression.
#' @seealso \code{\link{AutoSTR}}
#' @inheritParams data
#' @inheritParams predictors
#' @inheritParams strDesign
#' @inheritParams lambdas
#' @inheritParams confidence
#' @inheritParams solver
#' @inheritParams reportDimensionsOnly
#' @inheritParams trace
#' @templateVar class STR
#' @templateVar topLevel1 \item \strong{cvMSE} -- optional cross validated (leave one out) Mean Squared Error.
#' @templateVar topLevel2 \strong{}
#' @templateVar topLevel3 \strong{}
#' @templateVar topLevel4 \strong{}
#' @templateVar topLevel5 \item \strong{method} -- always contains string \code{"STRmodel"} for this function.
#' @template returnValue
#' @references Dokumentov, A., and Hyndman, R.J. (2022)
#' STR: Seasonal-Trend decomposition using Regression,
#' \emph{INFORMS Journal on Data Science}, 1(1), 50-62.
#' \url{https://robjhyndman.com/publications/str/}
#' @examples
#' n <- 50
#' trendSeasonalStructure <- list(segments = list(c(0, 1)), sKnots = list(c(1, 0)))
#' ns <- 5
#' seasonalStructure <- list(
#' segments = list(c(0, ns)),
#' sKnots = c(as.list(1:(ns - 1)), list(c(ns, 0)))
#' )
#' seasons <- (0:(n - 1)) %% ns + 1
#' trendSeasons <- rep(1, length(seasons))
#' times <- seq_along(seasons)
#' data <- seasons + times / 4
#' plot(times, data, type = "l")
#' timeKnots <- times
#' trendData <- rep(1, n)
#' seasonData <- rep(1, n)
#' trend <- list(
#' data = trendData, times = times, seasons = trendSeasons,
#' timeKnots = timeKnots, seasonalStructure = trendSeasonalStructure, lambdas = c(1, 0, 0)
#' )
#' season <- list(
#' data = seasonData, times = times, seasons = seasons,
#' timeKnots = timeKnots, seasonalStructure = seasonalStructure, lambdas = c(10, 0, 0)
#' )
#' predictors <- list(trend, season)
#'
#' str1 <- STRmodel(data, predictors)
#' plot(str1)
#'
#' data[c(3, 4, 7, 20, 24, 29, 35, 37, 45)] <- NA
#' plot(times, data, type = "l")
#' str2 <- STRmodel(data, predictors)
#' plot(str2)
#'
#' @author Alexander Dokumentov
#' @export
STRmodel <- function(data, predictors = NULL, strDesign = NULL, lambdas = NULL,
confidence = NULL, # confidence = c(0.8, 0.95)
solver = c("Matrix", "cholesky"),
reportDimensionsOnly = FALSE,
trace = FALSE) {
if (is.null(strDesign) && !is.null(predictors)) {
strDesign <- STRDesign(predictors, norm = 2)
}
if (is.null(lambdas)) {
lambdas <- predictors
}
if (is.null(strDesign)) stop("(strDesign and lambdas) or predictors should be provided...")
if (any(confidence <= 0 | confidence >= 1)) {
stop("confidence must be between 0 and 1")
}
cm <- strDesign$cm
rm <- strDesign$rm
lm <- lambdaMatrix(lambdas, rm$seats)
design <- rbind(cm$matrix, lm %*% rm$matrix)
if (trace) {
cat("\nDesign matrix dimensions: ")
cat(dim(design))
cat("\n")
}
if (reportDimensionsOnly) {
return(NULL)
}
noNA <- !is.na(as.vector(data))
y <- as.vector(data)[noNA]
X <- design[c(noNA, rep(TRUE, nrow(design) - length(noNA))), , drop = FALSE] # noNA should be extended with TRUE values to keep rows resposible for regularisation
if (trace) {
cat("X matrix (NA removed) dimensions: ")
cat(dim(X))
cat("\n")
}
C <- cm$matrix[noNA, , drop = FALSE]
CC <- cm$matrix
if (is.null(confidence)) {
coef <- lmSolver(X, y, type = solver[1], method = solver[2], trace = trace)
dataHat <- CC %*% coef
if (is.null(predictors)) predictors <- strDesign$predictors
components <- extract(
coef = as.vector(coef), resid = as.vector(data) - as.vector(dataHat),
covMatrix = NULL, constructorMatrix = cm$matrix, seats = cm$seats,
predictors = predictors, confidence = NULL
)
result <- list(output = components, input = list(data = data, predictors = predictors, lambdas = lambdas), method = "STRmodel")
class(result) <- "STR"
return(result)
} else {
XtXinv <- solve(crossprod(X))
partialB <- XtXinv %*% t(C)
partialH <- C %*% partialB
partialDiagH <- diag(partialH)
coef <- as.vector(partialB %*% y)
yHat <- as.vector(C %*% coef)
yHatPlus <- as.vector(X %*% coef)
dataHat <- as.vector(CC %*% coef)
resid <- y - yHat
yPlus <- c(y, rep(0, length(yHatPlus) - length(y)))
residPlus <- yPlus - yHatPlus
ISigma <- getISigma(residPlus, length(y), rm$seats)
cvResid <- resid / (1 - partialDiagH)
cvMSE <- sum((cvResid)^2) / length(resid) # Estimated covarience of the errors
# Sigma = cvMSE * (XtXinv %*% crossprod(C) %*% XtXinv) # Covariance matrix of the parameters
Sigma <- crossprod((sqrt(ISigma) %*% X) %*% XtXinv) # Covariance matrix of the parameters
if (is.null(predictors)) predictors <- strDesign$predictors
components <- extract(as.vector(coef), as.vector(data) - as.vector(dataHat), Sigma, cm$matrix, cm$seats, predictors, confidence)
result <- list(output = components, input = list(data = data, predictors = predictors, lambdas = lambdas), cvMSE = cvMSE, method = "STRmodel")
class(result) <- "STR"
return(result)
}
}
nFoldSTRCV <- function(n,
trainData, fcastData, completeData = NULL, # parameter completeData is required only for iterative methods
trainC, fcastC, completeC,
regMatrix, regSeats, lambdas,
solver = c("Matrix", "cholesky"), trace = FALSE, iterControl = list(maxiter = 20, tol = 1E-6)) {
SSE <- 0
l <- 0
lm <- lambdaMatrix(lambdas, regSeats)
R <- lm %*% regMatrix
e <- NULL
if (solver[1] == "iterative") {
if (solver[2] %in% c("cg-chol", "lsmr-chol", "lsmr")) {
e <- new.env(parent = .GlobalEnv)
}
if (solver[2] %in% c("cg-chol", "lsmr-chol")) {
noNA <- !is.na(completeData)
y <- completeData[noNA]
C <- completeC[noNA, ]
X <- rbind(C, R)
coef0 <- try(lmSolver(X, y, type = solver[1], method = solver[2], env = e, iterControl = iterControl, trace = trace), silent = !trace)
if ("try-error" %in% class(coef0)) {
if (trace) cat("\nError in lmSolver... iterative solvers without preconditioners will be used...\n")
}
}
}
resultList <- list()
# for(i in rev(1:n)) {
resultList <- foreach(i = 1:n) %dopar% {
noNA <- !is.na(trainData[[i]])
y <- (trainData[[i]])[noNA]
C <- (trainC[[i]])[noNA, ]
X <- rbind(C, R)
coef <- try(lmSolver(X, y, type = solver[1], method = solver[2], env = e, iterControl = iterControl, trace = trace), silent = !trace)
if ("try-error" %in% class(coef)) {
if (trace) cat("\nError in lmSolver...\n")
# return(Inf)
# next
c(SSE = Inf, l = 1)
# c(SSE = 0, l = 0)
} else {
fcast <- fcastC[[i]] %*% coef
resid <- fcastData[[i]] - as.vector(fcast)
# resultList[[length(resultList) + 1]] = c(SSE = sum(resid^2, na.rm = TRUE), l = sum(!is.na(resid)))
c(SSE = sum(resid^2, na.rm = TRUE), l = sum(!is.na(resid)))
}
}
for (i in seq_along(resultList)) {
SSE <- SSE + resultList[[i]][1]
l <- l + resultList[[i]][2]
}
if (l == 0) {
return(Inf)
}
return(SSE / l)
}
extractPattern <- function(predictors) {
pattern <- NULL
for (i in seq_along(predictors)) {
pattern <- c(pattern, predictors[[i]]$lambdas > 0)
}
return(pattern)
}
extractP <- function(predictors, pattern) {
lambdas <- NULL
for (i in seq_along(predictors)) {
lambdas <- c(lambdas, predictors[[i]]$lambdas)
}
return(lambdas[pattern])
}
createLambdas <- function(p, pattern, original) {
ensure(length(pattern) %% 3 == 0)
ensure(length(pattern) == length(original))
# pLong = rep(0, length(pattern))
pLong <- original
pLong[pattern] <- p
i <- 1
l <- list()
for (i in seq_len(length(pattern) %/% 3)) {
pInd <- (i - 1) * 3 + 1
l[[i]] <- list(lambdas = pLong[pInd:(pInd + 2)])
}
return(l)
}
#' @rdname STR
#' @name STR
#' @title Automatic STR decomposition
#' @description Automatically selects parameters for an STR decomposition of time series data.
#'
#' If a parallel backend is registered for use before \code{STR} call,
#' \code{STR} will use it for n-fold cross validation computations.
#'
#' @seealso \code{\link{STRmodel}} \code{\link{RSTRmodel}} \code{\link{AutoSTR}}
#' @inheritParams data
#' @inheritParams predictors
#' @inheritParams confidence
#' @inheritParams robust
#' @inheritParams lambdas
#' @inheritParams pattern
#' @inheritParams nFold
#' @inheritParams reltol
#' @inheritParams gapCV
#' @inheritParams solver
#' @inheritParams nMCIter
#' @inheritParams control
#' @inheritParams trace
#' @param iterControl Control parameters for some experimental features.
#' This should not be used by an ordinary user.
#' @templateVar class STR
#' @templateVar topLevel1 \item \strong{cvMSE} -- optional cross validated (leave one out) Mean Squared Error.
#' @templateVar topLevel2 \item \strong{optim.CV.MSE} or \strong{optim.CV.MAE} -- best cross validated Mean Squared Error or Mean Absolute Error (n-fold) achieved during minimisation procedure.
#' @templateVar topLevel3 \item \strong{nFold} -- the input \code{nFold} parameter.
#' @templateVar topLevel4 \item \strong{gapCV} -- the input \code{gapCV} parameter.
#' @templateVar topLevel5 \item \strong{method} -- contains strings \code{"STR"} or \code{"RSTR"} depending on used method.
#' @template returnValue
#' @examples
#' \donttest{
#' TrendSeasonalStructure <- list(
#' segments = list(c(0, 1)),
#' sKnots = list(c(1, 0))
#' )
#' WDSeasonalStructure <- list(
#' segments = list(c(0, 48), c(100, 148)),
#' sKnots = c(as.list(c(1:47, 101:147)), list(c(0, 48, 100, 148)))
#' )
#'
#' TrendSeasons <- rep(1, nrow(electricity))
#' WDSeasons <- as.vector(electricity[, "WorkingDaySeasonality"])
#'
#' Data <- as.vector(electricity[, "Consumption"])
#' Times <- as.vector(electricity[, "Time"])
#' TempM <- as.vector(electricity[, "Temperature"])
#' TempM2 <- TempM^2
#'
#' TrendTimeKnots <- seq(from = head(Times, 1), to = tail(Times, 1), length.out = 116)
#' SeasonTimeKnots <- seq(from = head(Times, 1), to = tail(Times, 1), length.out = 24)
#'
#' TrendData <- rep(1, length(Times))
#' SeasonData <- rep(1, length(Times))
#'
#' Trend <- list(
#' name = "Trend",
#' data = TrendData,
#' times = Times,
#' seasons = TrendSeasons,
#' timeKnots = TrendTimeKnots,
#' seasonalStructure = TrendSeasonalStructure,
#' lambdas = c(1500, 0, 0)
#' )
#' WDSeason <- list(
#' name = "Dayly seas",
#' data = SeasonData,
#' times = Times,
#' seasons = WDSeasons,
#' timeKnots = SeasonTimeKnots,
#' seasonalStructure = WDSeasonalStructure,
#' lambdas = c(0.003, 0, 240)
#' )
#' StaticTempM <- list(
#' name = "Temp Mel",
#' data = TempM,
#' times = Times,
#' seasons = NULL,
#' timeKnots = NULL,
#' seasonalStructure = NULL,
#' lambdas = c(0, 0, 0)
#' )
#' StaticTempM2 <- list(
#' name = "Temp Mel^2",
#' data = TempM2,
#' times = Times,
#' seasons = NULL,
#' timeKnots = NULL,
#' seasonalStructure = NULL,
#' lambdas = c(0, 0, 0)
#' )
#' Predictors <- list(Trend, WDSeason, StaticTempM, StaticTempM2)
#'
#' elec.fit <- STR(
#' data = Data,
#' predictors = Predictors,
#' gapCV = 48 * 7
#' )
#'
#' plot(elec.fit,
#' xTime = as.Date("2000-01-11") + ((Times - 1) / 48 - 10),
#' forecastPanels = NULL
#' )
#'
#' #########################################################
#'
#' n <- 70
#' trendSeasonalStructure <- list(segments = list(c(0, 1)), sKnots = list(c(1, 0)))
#' ns <- 5
#' seasonalStructure <- list(
#' segments = list(c(0, ns)),
#' sKnots = c(as.list(1:(ns - 1)), list(c(ns, 0)))
#' )
#' seasons <- (0:(n - 1)) %% ns + 1
#' trendSeasons <- rep(1, length(seasons))
#' times <- seq_along(seasons)
#' data <- seasons + times / 4
#' set.seed(1234567890)
#' data <- data + rnorm(length(data), 0, 0.2)
#' data[20] <- data[20] + 3
#' data[50] <- data[50] - 5
#' plot(times, data, type = "l")
#' timeKnots <- times
#' trendData <- rep(1, n)
#' seasonData <- rep(1, n)
#' trend <- list(
#' data = trendData, times = times, seasons = trendSeasons,
#' timeKnots = timeKnots, seasonalStructure = trendSeasonalStructure, lambdas = c(1, 0, 0)
#' )
#' season <- list(
#' data = seasonData, times = times, seasons = seasons,
#' timeKnots = timeKnots, seasonalStructure = seasonalStructure, lambdas = c(1, 0, 1)
#' )
#' predictors <- list(trend, season)
#' rstr <- STR(data, predictors,
#' reltol = 0.0000001, gapCV = 10,
#' confidence = 0.95, nMCIter = 400, robust = TRUE
#' )
#' plot(rstr)
#' }
#' @author Alexander Dokumentov
#' @references Dokumentov, A., and Hyndman, R.J. (2022)
#' STR: Seasonal-Trend decomposition using Regression,
#' \emph{INFORMS Journal on Data Science}, 1(1), 50-62.
#' \url{https://robjhyndman.com/publications/str/}
#' @export
STR <- function(data, predictors,
confidence = NULL,
robust = FALSE,
lambdas = NULL,
pattern = extractPattern(predictors), nFold = 5, reltol = 0.005, gapCV = 1,
solver = c("Matrix", "cholesky"),
nMCIter = 100,
control = list(nnzlmax = 1000000, nsubmax = 300000, tmpmax = 50000),
trace = FALSE,
iterControl = list(maxiter = 20, tol = 1E-6)) {
if (robust) {
return(
RSTR_(
data = data, predictors = predictors,
confidence = confidence,
nMCIter = nMCIter,
lambdas = lambdas,
pattern = pattern, nFold = nFold,
reltol = reltol, gapCV = gapCV,
control = control,
trace = FALSE
)
)
} else {
return(
STR_(
data = data, predictors = predictors,
confidence = confidence, lambdas = lambdas,
pattern = pattern, nFold = nFold,
reltol = reltol, gapCV = gapCV,
solver = solver, trace = trace, iterControl = iterControl
)
)
}
}
STR_ <- function(
data, predictors,
confidence = NULL, lambdas = NULL,
pattern = extractPattern(predictors), nFold = 5, reltol = 0.005, gapCV = 1,
solver = c("Matrix", "cholesky"),
trace = FALSE,
ratioGap = 1e6, # Ratio to define bounds for one-dimensional search
iterControl = list(maxiter = 20, tol = 1E-6)) {
if (any(confidence <= 0 | confidence >= 1)) stop("confidence must be between 0 and 1")
if (gapCV < 1) stop("gapCV must be greater or equal to 1")
if (getDoParWorkers() <= 1) registerDoSEQ() # A way to avoid warning from %dopar% when no parallel backend is registered
f <- function(p) {
p <- exp(p) # Optimisation is on log scale
if (trace) {
cat("\nParameters = [")
cat(p)
cat("]\n")
}
newLambdas <- createLambdas(p, pattern = pattern, original = origP)
cv <- nFoldSTRCV(
n = nFold,
trainData = trainData, fcastData = fcastData, completeData = data,
trainC = trainC, fcastC = fcastC, completeC = C,
regMatrix = regMatrix, regSeats = regSeats,
lambdas = newLambdas,
solver = solver,
trace = trace,
iterControl = iterControl
)
if (trace) {
cat("CV = ")
cat(format(cv, digits = 16))
cat("\n")
}
return(cv)
}
lData <- length(data)
if (nFold * gapCV > lData) {
stop(paste0(
"nFold*gapCV should be less or equal to the data length.\nnFold = ",
nFold, "\ngapCV = ", gapCV, "\nlength of the data = ", lData
))
}
subInds <- lapply(1:nFold, FUN = function(i) sort(unlist(lapply(1:gapCV, FUN = function(j) seq(from = (i - 1) * gapCV + j, to = lData, by = nFold * gapCV)))))
complInds <- lapply(subInds, FUN = function(s) setdiff(1:lData, s))
strDesign <- STRDesign(predictors)
C <- strDesign$cm$matrix
fcastC <- lapply(subInds, FUN = function(si) C[si, ])
trainC <- lapply(complInds, FUN = function(ci) C[ci, ])
fcastData <- lapply(subInds, FUN = function(si) data[si])
trainData <- lapply(complInds, FUN = function(ci) data[ci])
rm <- strDesign$rm
regMatrix <- rm$matrix
regSeats <- rm$seats
if (!is.null(lambdas)) {
initP <- extractP(lambdas, pattern)
origP <- abs(extractP(lambdas, rep(TRUE, length(pattern))))
} else {
initP <- extractP(predictors, pattern)
origP <- abs(extractP(predictors, rep(TRUE, length(pattern))))
}
# cat("\ninitP: "); cat(initP)
# if(length(initP) == 1) {
# lower = initP/ratioGap
# upper = initP*ratioGap
# cat("\nlower: "); cat(lower)
# cat("\nupper: "); cat(upper)
# }
# cat("\n")
# Optimisation is performed on log scale
optP <- optim(
par = log(initP),
fn = f,
method = ifelse(length(initP) > 1, "Nelder-Mead", "Brent"),
lower = ifelse(length(initP) > 1, -Inf, log(initP / ratioGap)),
upper = ifelse(length(initP) > 1, Inf, log(initP * ratioGap)),
control = list(reltol = reltol)
)
newLambdas <- createLambdas(exp(optP$par), pattern = pattern, original = origP)
result <- STRmodel(data, strDesign = strDesign, lambdas = newLambdas, confidence = confidence, trace = trace)
result$optim.CV.MSE <- optP$value
result$nFold <- nFold
result$gapCV <- gapCV
result$method <- "STR"
return(result)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.