R/STR.R
In stR: Seasonal Trend Decomposition Using Regression

Documented in STR STRmodel

#' @import compiler
#' @import quantreg
#' @import foreach
#' @importFrom Matrix Matrix Diagonal sparseMatrix bdiag .solve.dgC.chol
#' @importFrom stats optim qnorm quantile time
#' @importFrom utils tail head
# library(doMC)
# registerDoMC(8) #Number of CPU cores

ensure <- function(...) stopifnot(...)

initialize <- cmpfun(function(env, nRows, nCols, len) {
  env$d <- c(nRows, nCols)
  env$ra <- numeric(len)
  env$ia <- integer(len)
  env$ja <- integer(len)
  env$l <- 0L
})

append <- cmpfun(function(env, val, i, j) {
  b <- env$l + 1L
  env$l <- env$l + length(val)
  env$ra[b:env$l] <- val
  env$ia[b:env$l] <- i
  env$ja[b:env$l] <- j
})

dims <- cmpfun(function(env, nRows, nCols) {
  env$d <- c(nRows, nCols)
})

last <- cmpfun(function(x) {
  tail(x, n = 1)
})

first <- cmpfun(function(x) {
  head(x, n = 1)
})

# One dimensional territory belonging to a seasonal knot (for all seasonal knots).
# Territory is the half sum of all lengths of all segments attached to the knot
# (there can be more or less than two attached to a knot if the seasonal
# structure is not a circle or a segment).
sTerritory <- cmpfun(function(seasonalStructure) {
  sKnots <- seasonalStructure$sKnots
  sKnotsV <- sort(unlist(sKnots))
  segments <- seasonalStructure$segments

  lefts <- sapply(segments, FUN = min)
  rights <- sapply(segments, FUN = max)

  sKnotsNoLefts <- lapply(sKnots, FUN = function(k) {
    k[!(k %in% lefts)]
  })
  sKnotsNoRights <- lapply(sKnots, FUN = function(k) {
    k[!(k %in% rights)]
  })

  lDistsSum <- sapply(sKnotsNoLefts, FUN = function(k) {
    ifelse(
      length(k) == 0,
      0,
      sum(sapply(k, FUN = function(kk) {
        kk - max(sKnotsV[sKnotsV < kk])
      }))
    )
  })
  rDistsSum <- sapply(sKnotsNoRights, FUN = function(k) {
    ifelse(
      length(k) == 0,
      0,
      sum(sapply(k, FUN = function(kk) {
        min(sKnotsV[sKnotsV > kk]) - kk
      }))
    )
  })

  return((lDistsSum + rDistsSum) / 2)
})

# Seasonal weights taking into account the territory around a seasonal knot and the norm
sWeights <- cmpfun(function(seasonalStructure, norm = 2) {
  return(sTerritory(seasonalStructure)^(1 / norm))
})

tTerritory <- cmpfun(function(timeKnots) {
  distances <- diff(timeKnots)
  return((c(0, distances) + c(distances, 0)) / 2)
})

tWeights <- cmpfun(function(timeKnots, norm = 2) {
  return(tTerritory(timeKnots)^(1 / norm))
})

# Transforms reduced season matrix into full season matrix.
# The source and the result are in vectorised forms (seasons are changing quicker).
# Reduced seasonal matrix is a seasonal matrix where one season is omitted as input value
# (since value of that season can be calculated as negative of weighted sum of other values)
seasonalTransformer <- cmpfun(function(nKnots, seasonalStructure) {
  sKnots <- seasonalStructure$sKnots
  nSKnots <- length(sKnots)
  weights <- sTerritory(seasonalStructure)
  lw <- length(weights)
  m <- rbind(
    Diagonal(n = nSKnots - 1),
    Matrix(data = -weights[-lw] / weights[lw], nrow = 1, sparse = TRUE)
  )

  l <- list(m)
  for (i in 1:nKnots) {
    l[[i]] <- m
  }

  return(bdiag(l))
})

# Transposes vectorised full seasonal matrix (to make time changing quicker in the representation)
seasonalTransposer <- cmpfun(function(nKnots, nSKnots) {
  m <- new.env(parent = .GlobalEnv)
  initialize(
    env = m,
    nRows = nKnots * nSKnots,
    nCols = nKnots * nSKnots,
    len = nKnots * nSKnots
  )

  for (t in 1:nKnots) {
    for (s in 1:nSKnots) {
      append(m, 1, (s - 1) * nKnots + t, (t - 1) * nSKnots + s)
    }
  }

  return(sparseMatrix(
    x = m$ra[1L:m$l],
    i = as.integer(m$ia[1L:m$l]),
    j = as.integer(m$ja[1L:m$l]),
    dims = as.integer(m$d)
  ))
})

# Example data:
# All connection points must be knots as well.
# Distances from any connection point to the nearest knots must be same for every connection point.
# data = c(1,3,4,1,2,3,4,1,2,3,5,1,2,3,4,5)
# times = c(1,3,4,5.5,12,16,20,21,22,22.2,23,24,27,28,30,32)
# seasons = c(0,3,5,8,11,16,20,0,3,5,8,12,213,222,100,110)
# timeKnots = c(0,3,5.5,17,33)
# ttLambda = 0.7
# ssLambda = 0.02
# stLambda = 0.9
# lambdas = c(ttLambda, ssLambda, stLambda)
# seasonalStructure = list(segments = list(c(0,24), c(100,124), c(212,224), c(312,324)),
#                          sKnots = list(c(0,24,324),4,8,c(12, 212),16,20,c(100,124,224),104,108,c(112,312),116,120,216,220,316,320)
#                          )
# predictor = list(data = data, times = times, seasons = seasons, timeKnots = timeKnots, seasonalStructure = seasonalStructure, lambdas = lambdas)

checkPredictor <- cmpfun(function(predictor) {
  tkn <- predictor$timeKnots
  tm <- predictor$times
  ss <- predictor$seasonalStructure
  seg <- ss$segments
  skn <- ss$sKnots
  if (length(predictor$data) != length(predictor$times)) {
    cat("\nVector lengths of data and times should be same...")
    return(FALSE)
  }
  if (is.null(tkn) && is.null(predictor$seasons) && is.null(ss)) {
    return(TRUE)
  }
  if (length(unlist(skn)) != length(unique(unlist(skn)))) {
    cat("\nPoints in sKnots should be mentioned once...")
    return(FALSE)
  }
  if (!all(unlist(seg) %in% unlist(skn))) {
    cat("\nPoints in segments should be mentioned in sKnots...")
    return(FALSE)
  }
  if (length(predictor$seasons) != length(predictor$times)) {
    cat("\nVector lengths of seasons and times should be same...")
    return(FALSE)
  }
  if (min(tm) < min(tkn)) {
    cat("\nThe first time knot should be before the first observation time...")
    return(FALSE)
  }
  if (max(tm) > max(tkn)) {
    cat("\nThe last time knot should be beyond the last observation time...")
    return(FALSE)
  }
  return(TRUE)
})

# ensure(checkPredictor(predictor))

# Too slow
# knotToIndex = cmpfun(function(knot, sKnots) { which(sapply(sKnots, FUN = function(k) {knot %in% k})) })

# Twice faster than above
knotToIndex <- cmpfun(function(knot, sKnots) {
  for (k in seq_along(sKnots)) {
    if (knot %in% sKnots[[k]]) {
      return(k)
    }
  }
  return(0)
})

# The fastest
knotToIndex2 <- cmpfun(function(knot, flatKnots, indexes) {
  indexes[flatKnots == knot]
})

# Creates a matrix which distributes any value between knots
# to the corresponding values at knots and any value at a knot
# is applied to this knot directly.
getInfluenceMatrix <- cmpfun(function(seasons, sKnots) {
  m <- new.env(parent = .GlobalEnv)
  initialize(
    m,
    nRows = length(seasons),
    nCols = length(sKnots),
    len = 2 * length(seasons)
  )

  flatKnots <- unlist(sKnots)
  sIndexes <- unlist(lapply(seq_along(sKnots), FUN = function(i) {
    rep(i, length(sKnots[[i]]))
  }))
  for (i in seq_along(seasons)) {
    s <- seasons[i]
    if (s %in% flatKnots) {
      append(m, 1, i, knotToIndex2(s, flatKnots, sIndexes))
    } else {
      sLeft <- max(flatKnots[flatKnots <= s])
      sRight <- min(flatKnots[flatKnots > s])
      d <- sRight - sLeft
      append(m, (sRight - s) / d, i, knotToIndex2(sLeft, flatKnots, sIndexes))
      append(m, (s - sLeft) / d, i, knotToIndex2(sRight, flatKnots, sIndexes))
    }
  }
  return(sparseMatrix(
    x = m$ra[1L:m$l],
    i = as.integer(m$ia[1L:m$l]),
    j = as.integer(m$ja[1L:m$l]),
    dims = as.integer(m$d)
  ))
})

# Creates a matrix which takes values at seasonal and time knots and interpolates
# these values at coordinates of the observed points.
seasonalPredictorConstructor <- cmpfun(function(predictor) {
  if (!checkPredictor(predictor)) {
    stop(paste0('Predictor "', predictor$name, '" has wrong structure...'))
  }
  data <- predictor$data
  times <- predictor$times
  seasons <- predictor$seasons
  segments <- predictor$seasonalStructure$segments
  nRows <- length(data)
  tKnots <- predictor$timeKnots
  nKnots <- length(tKnots)
  seasonalStructure <- predictor$seasonalStructure
  sKnots <- seasonalStructure$sKnots
  nSKnots <- length(sKnots)
  nCols <- length(tKnots) * length(sKnots)

  # Static predictor: single coefficient to estimate without any regularization
  if (is.null(tKnots) && is.null(seasons) && is.null(seasonalStructure)) {
    return(Matrix(data = data, nrow = length(data), ncol = 1, sparse = TRUE))
  }

  msw <- getInfluenceMatrix(seasons, sKnots)
  mtw <- getInfluenceMatrix(times, tKnots)

  m <- Matrix(data = 0, nrow = nRows, ncol = nCols, sparse = TRUE)

  iCol <- 1
  nSCol <- dim(msw)[2]
  for (t in seq_along(tKnots)) {
    m[, iCol:(iCol + nSCol - 1)] <- Diagonal(x = mtw[, t]) %*% msw
    iCol <- iCol + nSCol
  }

  mData <- Diagonal(x = data)
  if (nSKnots == 1) {
    result <- mData %*% m
  } else {
    mTr <- seasonalTransformer(nKnots, seasonalStructure)
    result <- (mData %*% m) %*% mTr
  }
  return(result)
})

# Creates a sparse matrix which takes first discrete differences of a vector
diff1 <- cmpfun(function(nCols) {
  return(diff(Diagonal(nCols)))
})

# Creates a sparse matrix which takes second discrete differences of a vector
diff2 <- cmpfun(function(nCols) {
  return(diff1(nCols - 1) %*% diff1(nCols))
})

vector2Derivatives <- cmpfun(function(knots, weights) {
  ensure(length(knots) - 2 == length(weights))

  m <- new.env(parent = .GlobalEnv)
  nCols <- length(knots)
  nRows <- nCols - 2
  initialize(env = m, nRows = nRows, nCols = nCols, len = nRows * 3)

  for (i in 1:nRows) {
    delta1 <- knots[i + 1] - knots[i]
    delta2 <- knots[i + 2] - knots[i + 1]
    delta <- knots[i + 2] - knots[i]
    append(
      m,
      2 *
        weights[i] *
        c(1 / (delta1 * delta), -1 / (delta1 * delta2), 1 / (delta2 * delta)),
      c(i, i, i),
      c(i, i + 1, i + 2)
    )
  }

  return(sparseMatrix(
    x = m$ra[1L:m$l],
    i = as.integer(m$ia[1L:m$l]),
    j = as.integer(m$ja[1L:m$l]),
    dims = as.integer(m$d)
  ))
})

lrKnots <- cmpfun(function(seasonalStructure) {
  segments <- seasonalStructure$segments
  lefts <- sapply(segments, FUN = min)
  rights <- sapply(segments, FUN = max)

  sKnots <- seasonalStructure$sKnots
  sKnotsV <- sort(unlist(sKnots))

  lKnots <- lapply(sKnots, FUN = function(k) {
    sapply(k, FUN = function(kk) {
      ifelse(kk %in% lefts, NA, max(sKnotsV[sKnotsV < kk]))
    })
  })
  rKnots <- lapply(sKnots, FUN = function(k) {
    sapply(k, FUN = function(kk) {
      ifelse(kk %in% rights, NA, min(sKnotsV[sKnotsV > kk]))
    })
  })
  return(list(lKnots = lKnots, rKnots = rKnots))
})

# Creates a matrix which translates a seasonal column to the second derivatives
# applied to that column. Since the seasonal structure/topology can be different
# from cycle, the procedure is rather complicated and takes into account cases
# when a node in the seasonal graph can have none/multiple incoming vertexes and/or
# none/multiple outcoming vertexes.
cycle2Derivatives <- cmpfun(function(seasonalStructure, norm = 2) {
  sKnots <- seasonalStructure$sKnots
  m <- new.env(parent = .GlobalEnv)
  nCols <- nSKnots <- length(sKnots)
  initialize(env = m, nRows = -1, nCols = nCols, len = nSKnots * 3)

  lr <- lrKnots(seasonalStructure)
  lKnots <- lr$lKnots
  rKnots <- lr$rKnots
  nlKnots <- sapply(lKnots, FUN = function(k) sum(!is.na(k)))
  nrKnots <- sapply(rKnots, FUN = function(k) sum(!is.na(k)))

  nRow <- 0
  for (i in 1:nSKnots) {
    for (jPrev in seq_along(lKnots[[i]])) {
      lk <- lKnots[[i]][jPrev]
      if (is.na(lk)) {
        next
      }
      for (jNext in seq_along(rKnots[[i]])) {
        rk <- rKnots[[i]][jNext]
        if (is.na(rk)) {
          next
        }

        k1 <- sKnots[[i]][jPrev]
        d1 <- abs(k1 - lk)

        k2 <- sKnots[[i]][jNext]
        d2 <- abs(rk - k2)

        d <- d1 + d2
        w <- ((d1 / nrKnots[i] + d2 / nlKnots[i]) / 2)^(1 / norm)
        nRow <- nRow + 1
        append(
          m,
          2 * w * c(1 / (d1 * d), -1 / (d1 * d2), 1 / (d2 * d)),
          c(nRow, nRow, nRow),
          c(knotToIndex(lk, sKnots), i, knotToIndex(rk, sKnots))
        )
      }
    }
  }

  dims(env = m, nRows = nRow, nCols = nCols)
  return(sparseMatrix(
    x = m$ra[1L:m$l],
    i = as.integer(m$ia[1L:m$l]),
    j = as.integer(m$ja[1L:m$l]),
    dims = as.integer(m$d)
  ))
})

# Creates a matrix to calculate regularization values
# when second derivatives are taken along seasonal dimension.
ssRegulariser <- cmpfun(function(predictor, norm = 2) {
  seasonalStructure <- predictor$seasonalStructure
  dataLength <- length(predictor$data)

  knots <- predictor$timeKnots
  sKnots <- seasonalStructure$sKnots
  nKnots <- length(knots)
  nSKnots <- length(sKnots)

  m <- cycle2Derivatives(seasonalStructure, norm)

  weights <- tWeights(knots, norm) # tWeights (not sWeights) because width along t dimension should be taken into account

  ensure(length(weights) == nKnots)

  listM <- list()
  for (i in 1:nKnots) {
    listM[[i]] <- weights[i] * m
  }

  m1 <- bdiag(listM)
  m2 <- seasonalTransformer(nKnots, seasonalStructure)
  return(m1 %*% m2)
})

# Creates a matrix to calculate regularization values
# when second derivatives are taken along time dimension.
ttRegulariser <- cmpfun(function(predictor, norm = 2) {
  seasonalStructure <- predictor$seasonalStructure
  timeKnots <- predictor$timeKnots
  sKnots <- seasonalStructure$sKnots
  nKnots <- length(timeKnots)
  nSKnots <- length(sKnots)

  m <- vector2Derivatives(timeKnots, tWeights(timeKnots, norm)[c(-1, -nKnots)])

  weights <- sWeights(seasonalStructure, norm) # sWeights (not tWeights) because width along s dimension should be taken into account

  listM <- list()
  for (i in 1:nSKnots) {
    listM[[i]] <- weights[i] * m
  }
  m1 <- bdiag(listM)

  if (nSKnots >= 2) {
    m2 <- seasonalTransposer(nKnots, nSKnots)
    m3 <- seasonalTransformer(nKnots, seasonalStructure)
    return(m1 %*% (m2 %*% m3))
  } else {
    return(m1)
  }
})

# Translates full seasonal matrix coordinates to vector positions
translST <- cmpfun(function(s, t, nSKnots) {
  return((t - 1) * nSKnots + (s - 1) %% nSKnots + 1)
})

# Creates a matrix to calculate regularization values
# when second derivatives are taken along s and t dimensions.
stRegulariser <- cmpfun(function(predictor, norm = 2) {
  timeKnots <- predictor$timeKnots
  seasonalStructure <- predictor$seasonalStructure
  sKnots <- seasonalStructure$sKnots
  nKnots <- length(timeKnots)
  nSKnots <- length(sKnots)

  m <- new.env(parent = .GlobalEnv)
  nCols <- nKnots * nSKnots
  initialize(m, nRows = -1, nCols = nCols, len = 4 * (nKnots - 1) * nSKnots)

  lrk <- lrKnots(seasonalStructure)
  rKnots <- lrk$rKnots

  nRow <- 0
  for (t in 1:(nKnots - 1)) {
    for (s in 1:nSKnots) {
      for (ss in seq_along(rKnots[[s]])) {
        rKnot <- rKnots[[s]][ss]
        if (is.na(rKnot)) {
          next
        }
        knot <- sKnots[[s]][ss]
        tDistance <- abs(timeKnots[t + 1] - timeKnots[t])
        sDistance <- abs(rKnot - knot)
        area <- tDistance * sDistance
        rKnotInd <- knotToIndex(rKnot, sKnots)
        nRow <- nRow + 1
        append(
          m,
          (area^(1 / norm - 1)) * c(1, -1, -1, 1),
          c(nRow, nRow, nRow, nRow),
          c(
            translST(s, t, nSKnots),
            translST(rKnotInd, t, nSKnots),
            translST(s, t + 1, nSKnots),
            translST(rKnotInd, t + 1, nSKnots)
          )
        )
      }
    }
  }
  dims(env = m, nRows = nRow, nCols = nCols)

  m1 <- sparseMatrix(
    x = m$ra[1L:m$l],
    i = as.integer(m$ia[1L:m$l]),
    j = as.integer(m$ja[1L:m$l]),
    dims = as.integer(m$d)
  )
  m2 <- seasonalTransformer(nKnots, seasonalStructure)
  return(m1 %*% m2)
})

# Creates regularization matrix for one predictor.
# norm parameter specifies which norm is used L2 or L1. It affects how distance between knots should affect knot weigts.
# lambdas0or1 is a "technical" parameter and when it is TRUE then lambdas are not taken into account. It is used to create
# a "template" matrix to be later adjusted by multiplying by appropriate lambdas. It is done for performance.
predictorRegulariser <- cmpfun(function(
  predictor,
  norm = 2,
  lambdas0or1 = FALSE
) {
  nKnots <- length(predictor$timeKnots)
  nSKnots <- length(predictor$seasonalStructure$sKnots)
  l1 <- l2 <- l3 <- 0
  result <- Matrix(
    data = 0,
    nrow = 0,
    ncol = max(nKnots, 1) * max(nSKnots - 1, 1)
  ) # Empty matrix,
  # it has 0 rows when l1 = l2 = l3 = 0. The only information it passes in this case is the number of columns.
  if (predictor$lambdas[1] != 0) {
    reg <- ifelse(lambdas0or1, 1, predictor$lambdas[1]) *
      ttRegulariser(predictor, norm)
    l1 <- dim(reg)[1]
    result <- rbind(result, reg)
  }
  if (predictor$lambdas[2] != 0) {
    reg <- ifelse(lambdas0or1, 1, predictor$lambdas[2]) *
      ssRegulariser(predictor, norm)
    l2 <- dim(reg)[1]
    result <- rbind(result, reg)
  }
  if (predictor$lambdas[3] != 0) {
    reg <- ifelse(lambdas0or1, 1, predictor$lambdas[3]) *
      stRegulariser(predictor, norm)
    l3 <- dim(reg)[1]
    result <- rbind(result, reg)
  }
  return(list(matrix = result, lengths = c(l1, l2, l3)))
})

# The "constructor" matrix is responsible for reconstruction the data from the parameters.
constructorMatrix <- function(predictors) {
  l <- list()
  i <- 1
  predictorSeats <- list()
  j <- 1
  startCol <- 1
  for (predictor in predictors) {
    m <- seasonalPredictorConstructor(predictor)
    nCol <- ncol(m)
    predictorSeats[[j]] <- c(start = startCol, length = nCol)
    startCol <- startCol + nCol
    j <- j + 1
    l[[i]] <- m
    i <- i + 1
  }
  return(list(matrix = do.call(cbind, l), seats = predictorSeats))
}

# The "regulariser" matrix is responsible for penalty calculations.
regulariserMatrix <- cmpfun(function(
  predictors,
  norm = 2,
  lambdas0or1 = FALSE
) {
  l <- list()
  i <- 1
  seats <- list()
  startRow <- 1
  for (predictor in predictors) {
    pr <- predictorRegulariser(predictor, norm, lambdas0or1)
    l[[i]] <- pr$matrix
    pl <- pr$lengths
    seats[[i]] <- list(start = startRow, lengths = pl)
    startRow <- startRow + sum(pl)
    i <- i + 1
  }
  return(list(matrix = bdiag(l), seats = seats))
})

# Calculates a design matrix. The design matrix consists of two matrices one on top of the other:
# "constructor" matrix and "regulariser".
# The first one is responsible for reconstruction the data from the parameters.
# The second is resposible for penalty calculating the penalty corresponding to the parameters.
designMatrix <- cmpfun(function(predictors, norm = 2) {
  cm <- constructorMatrix(predictors)$matrix
  rm <- regulariserMatrix(predictors, norm)$matrix
  return(rbind(cm, rm))
})

targetVector <- cmpfun(function(data, designMatrix) {
  return(c(data, rep(0, nrow(designMatrix) - length(data))))
})

# Calculates lower and upper confidence/forecasting intervals.
getLowerUpper <- cmpfun(function(data, covMatrix, constr, range, confidence) {
  if (is.null(covMatrix) || is.null(confidence)) {
    return(list())
  }
  sigmas <- sqrt(diag(constr %*% covMatrix[range, range] %*% t(constr)))
  lower <- upper <- NULL
  for (conf in confidence) {
    q1 <- qnorm(p = conf, mean = data, sd = sigmas)
    q2 <- qnorm(p = 1 - conf, mean = data, sd = sigmas)
    lower <- cbind(lower, pmin(q1, q2))
    upper <- cbind(upper, pmax(q1, q2))
  }
  return(list(lower = lower, upper = upper))
})

extract <- cmpfun(function(
  coef,
  resid,
  covMatrix,
  constructorMatrix,
  seats,
  predictors,
  confidence = NULL
) {
  predictorsData <- list()
  for (i in seq_along(predictors)) {
    range <- seats[[i]]["start"]:(seats[[i]]["start"] +
      seats[[i]]["length"] -
      1)
    beta <- coef[range]
    constr <- constructorMatrix[, range, drop = FALSE]
    data <- as.vector(constr %*% beta)
    predictorsData[[i]] <- c(
      list(data = data, beta = beta),
      getLowerUpper(data, covMatrix, constr, range, confidence)
    )
  }
  data <- as.vector(constructorMatrix %*% coef)
  forecastData <- c(
    list(data = data, beta = coef),
    getLowerUpper(
      data,
      covMatrix,
      constructorMatrix,
      seq_along(coef),
      confidence
    )
  )
  return(list(
    predictors = predictorsData,
    random = list(data = resid),
    forecast = forecastData
  ))
})

STRDesign <- function(predictors, norm = 2) {
  cm <- constructorMatrix(predictors)
  rm <- regulariserMatrix(predictors, norm, lambdas0or1 = TRUE)
  return(list(cm = cm, rm = rm, predictors = predictors, norm = norm))
}

lambdaMatrix <- function(lambdas, seats) {
  ensure(length(lambdas) == length(seats))
  v <- NULL
  for (i in seq_along(lambdas)) {
    v <- c(
      v,
      rep(lambdas[[i]]$lambdas[1], seats[[i]]$lengths[1]),
      rep(lambdas[[i]]$lambdas[2], seats[[i]]$lengths[2]),
      rep(lambdas[[i]]$lambdas[3], seats[[i]]$lengths[3])
    )
  }
  return(Diagonal(x = v))
}

# Returns covariance (diagonal) matrix of errors/residuals.
getISigma <- function(resid, firstLength, seats) {
  d <- rep(mean(head(resid, firstLength)^2), firstLength)
  resid <- tail(resid, -firstLength)
  for (i in seq_along(seats)) {
    for (j in seq_along(seats[[i]]$lengths)) {
      if (seats[[i]]$lengths[j] > 0) {
        start <- seats[[i]]$start
        lBefore <- sum(seats[[i]]$lengths[seq_len(j - 1)])
        range <- (start + lBefore):(start + lBefore + seats[[i]]$lengths[j] - 1)
        d <- c(d, rep(mean(resid[range]^2), length(range)))
      }
    }
  }
  return(Diagonal(length(d), d))
}

#' @title STR decomposition
#' @description Seasonal-Trend decomposition of time series data using Regression.
#' @seealso \code{\link{AutoSTR}}
#' @inheritParams data
#' @inheritParams predictors
#' @inheritParams strDesign
#' @inheritParams lambdas
#' @inheritParams confidence
#' @inheritParams solver
#' @inheritParams reportDimensionsOnly
#' @inheritParams trace
#' @return A structure containing input and output data.
#' It is an \strong{S3} class \code{STR}, which is a list with the following components:
#' \itemize{
#' \item \strong{output} -- contains decomposed data. It is a list of three components:
#' \itemize{
#' \item \strong{predictors} -- a list of components where each component
#' corresponds to the input predictor. Every such component is a list containing the following:
#' \itemize{
#' \item \strong{data} -- fit/forecast for the corresponding predictor (trend, seasonal component, flexible or seasonal predictor).
#' \item \strong{beta} -- beta coefficients of the fit of the coresponding predictor.
#' \item \strong{lower} -- optional (if requested) matrix of lower bounds of confidence intervals.
#' \item \strong{upper} -- optional (if requested) matrix of upper bounds of confidence intervals.
#' }
#' \item \strong{random} -- a list with one component \strong{data}, which contains residuals of the model fit.
#' \item \strong{forecast} -- a list with two components:
#' \itemize{
#' \item \strong{data} -- fit/forecast for the model.
#' \item \strong{beta} -- beta coefficients of the fit.
#' \item \strong{lower} -- optional (if requested) matrix of lower bounds of confidence intervals.
#' \item \strong{upper} -- optional (if requested) matrix of upper bounds of confidence intervals.
#' }
#' }
#' \item \strong{input} -- input parameters and lambdas used for final calculations.
#' \itemize{
#' \item \strong{data} -- input data.
#' \item \strong{predictors} - input predictors.
#' \item \strong{lambdas} -- smoothing parameters used for final calculations (same as input lambdas for STR method).
#' }
#' \item \strong{cvMSE} -- optional cross validated (leave one out) Mean Squared Error.
#' \item \strong{method} -- always contains string \code{"STRmodel"} for this function.
#' }
#' @references Dokumentov, A., and Hyndman, R.J. (2022)
#' STR: Seasonal-Trend decomposition using Regression,
#' \emph{INFORMS Journal on Data Science}, 1(1), 50-62.
#' \url{https://robjhyndman.com/publications/str/}
#' @examples
#' n <- 50
#' trendSeasonalStructure <- list(segments = list(c(0, 1)), sKnots = list(c(1, 0)))
#' ns <- 5
#' seasonalStructure <- list(
#'   segments = list(c(0, ns)),
#'   sKnots = c(as.list(1:(ns - 1)), list(c(ns, 0)))
#' )
#' seasons <- (0:(n - 1)) %% ns + 1
#' trendSeasons <- rep(1, length(seasons))
#' times <- seq_along(seasons)
#' data <- seasons + times / 4
#' plot(times, data, type = "l")
#' timeKnots <- times
#' trendData <- rep(1, n)
#' seasonData <- rep(1, n)
#' trend <- list(
#'   data = trendData, times = times, seasons = trendSeasons,
#'   timeKnots = timeKnots, seasonalStructure = trendSeasonalStructure, lambdas = c(1, 0, 0)
#' )
#' season <- list(
#'   data = seasonData, times = times, seasons = seasons,
#'   timeKnots = timeKnots, seasonalStructure = seasonalStructure, lambdas = c(10, 0, 0)
#' )
#' predictors <- list(trend, season)
#'
#' str1 <- STRmodel(data, predictors)
#' plot(str1)
#'
#' data[c(3, 4, 7, 20, 24, 29, 35, 37, 45)] <- NA
#' plot(times, data, type = "l")
#' str2 <- STRmodel(data, predictors)
#' plot(str2)
#'
#' @author Alexander Dokumentov
#' @export

STRmodel <- function(
  data,
  predictors = NULL,
  strDesign = NULL,
  lambdas = NULL,
  confidence = NULL, # confidence = c(0.8, 0.95)
  solver = c("Matrix", "cholesky"),
  reportDimensionsOnly = FALSE,
  trace = FALSE
) {
  if (is.null(strDesign) && !is.null(predictors)) {
    strDesign <- STRDesign(predictors, norm = 2)
  }
  if (is.null(lambdas)) {
    lambdas <- predictors
  }
  if (is.null(strDesign)) {
    stop("(strDesign and lambdas) or predictors should be provided...")
  }
  if (any(confidence <= 0 | confidence >= 1)) {
    stop("confidence must be between 0 and 1")
  }
  cm <- strDesign$cm
  rm <- strDesign$rm
  lm <- lambdaMatrix(lambdas, rm$seats)
  design <- rbind(cm$matrix, lm %*% rm$matrix)
  if (trace) {
    cat("\nDesign matrix dimensions: ")
    cat(dim(design))
    cat("\n")
  }
  if (reportDimensionsOnly) {
    return(NULL)
  }

  noNA <- !is.na(as.vector(data))
  y <- as.vector(data)[noNA]
  X <- design[c(noNA, rep(TRUE, nrow(design) - length(noNA))), , drop = FALSE] # noNA should be extended with TRUE values to keep rows resposible for regularisation
  if (trace) {
    cat("X matrix (NA removed) dimensions: ")
    cat(dim(X))
    cat("\n")
  }
  C <- cm$matrix[noNA, , drop = FALSE]
  CC <- cm$matrix

  if (is.null(confidence)) {
    coef <- lmSolver(X, y, type = solver[1], method = solver[2], trace = trace)
    dataHat <- CC %*% coef

    if (is.null(predictors)) {
      predictors <- strDesign$predictors
    }
    components <- extract(
      coef = as.vector(coef),
      resid = as.vector(data) - as.vector(dataHat),
      covMatrix = NULL,
      constructorMatrix = cm$matrix,
      seats = cm$seats,
      predictors = predictors,
      confidence = NULL
    )
    result <- list(
      output = components,
      input = list(data = data, predictors = predictors, lambdas = lambdas),
      method = "STRmodel"
    )
    class(result) <- "STR"
    return(result)
  } else {
    XtXinv <- solve(crossprod(X))
    partialB <- XtXinv %*% t(C)
    partialH <- C %*% partialB
    partialDiagH <- diag(partialH)

    coef <- as.vector(partialB %*% y)
    yHat <- as.vector(C %*% coef)
    yHatPlus <- as.vector(X %*% coef)
    dataHat <- as.vector(CC %*% coef)
    resid <- y - yHat
    yPlus <- c(y, rep(0, length(yHatPlus) - length(y)))
    residPlus <- yPlus - yHatPlus
    ISigma <- getISigma(residPlus, length(y), rm$seats)
    cvResid <- resid / (1 - partialDiagH)
    cvMSE <- sum((cvResid)^2) / length(resid) # Estimated covarience of the errors
    # Sigma = cvMSE * (XtXinv %*% crossprod(C) %*% XtXinv) # Covariance matrix of the parameters
    Sigma <- crossprod((sqrt(ISigma) %*% X) %*% XtXinv) # Covariance matrix of the parameters

    if (is.null(predictors)) {
      predictors <- strDesign$predictors
    }
    components <- extract(
      as.vector(coef),
      as.vector(data) - as.vector(dataHat),
      Sigma,
      cm$matrix,
      cm$seats,
      predictors,
      confidence
    )
    result <- list(
      output = components,
      input = list(data = data, predictors = predictors, lambdas = lambdas),
      cvMSE = cvMSE,
      method = "STRmodel"
    )
    class(result) <- "STR"
    return(result)
  }
}

nFoldSTRCV <- function(
  n,
  trainData,
  fcastData,
  completeData = NULL, # parameter completeData is required only for iterative methods
  trainC,
  fcastC,
  completeC,
  regMatrix,
  regSeats,
  lambdas,
  solver = c("Matrix", "cholesky"),
  trace = FALSE,
  iterControl = list(maxiter = 20, tol = 1E-6)
) {
  SSE <- 0
  l <- 0
  lm <- lambdaMatrix(lambdas, regSeats)
  R <- lm %*% regMatrix

  e <- NULL
  if (solver[1] == "iterative") {
    if (solver[2] %in% c("cg-chol", "lsmr-chol", "lsmr")) {
      e <- new.env(parent = .GlobalEnv)
    }
    if (solver[2] %in% c("cg-chol", "lsmr-chol")) {
      noNA <- !is.na(completeData)
      y <- completeData[noNA]
      C <- completeC[noNA, ]
      X <- rbind(C, R)
      coef0 <- try(
        lmSolver(
          X,
          y,
          type = solver[1],
          method = solver[2],
          env = e,
          iterControl = iterControl,
          trace = trace
        ),
        silent = !trace
      )
      if ("try-error" %in% class(coef0)) {
        if (trace) {
          cat(
            "\nError in lmSolver... iterative solvers without preconditioners will be used...\n"
          )
        }
      }
    }
  }

  resultList <- list()
  # for(i in rev(1:n)) {
  resultList <- foreach(i = 1:n) %dopar%
    {
      noNA <- !is.na(trainData[[i]])
      y <- (trainData[[i]])[noNA]
      C <- (trainC[[i]])[noNA, ]
      X <- rbind(C, R)
      coef <- try(
        lmSolver(
          X,
          y,
          type = solver[1],
          method = solver[2],
          env = e,
          iterControl = iterControl,
          trace = trace
        ),
        silent = !trace
      )
      if ("try-error" %in% class(coef)) {
        if (trace) {
          cat("\nError in lmSolver...\n")
        }
        # return(Inf)
        # next
        c(SSE = Inf, l = 1)
        # c(SSE = 0, l = 0)
      } else {
        fcast <- fcastC[[i]] %*% coef
        resid <- fcastData[[i]] - as.vector(fcast)
        # resultList[[length(resultList) + 1]] = c(SSE = sum(resid^2, na.rm = TRUE), l = sum(!is.na(resid)))
        c(SSE = sum(resid^2, na.rm = TRUE), l = sum(!is.na(resid)))
      }
    }
  for (i in seq_along(resultList)) {
    SSE <- SSE + resultList[[i]][1]
    l <- l + resultList[[i]][2]
  }
  if (l == 0) {
    return(Inf)
  }
  return(SSE / l)
}

extractPattern <- function(predictors) {
  pattern <- NULL
  for (i in seq_along(predictors)) {
    pattern <- c(pattern, predictors[[i]]$lambdas > 0)
  }
  return(pattern)
}

extractP <- function(predictors, pattern) {
  lambdas <- NULL
  for (i in seq_along(predictors)) {
    lambdas <- c(lambdas, predictors[[i]]$lambdas)
  }
  return(lambdas[pattern])
}

createLambdas <- function(p, pattern, original) {
  ensure(length(pattern) %% 3 == 0)
  ensure(length(pattern) == length(original))

  # pLong = rep(0, length(pattern))
  pLong <- original
  pLong[pattern] <- p
  i <- 1
  l <- list()
  for (i in seq_len(length(pattern) %/% 3)) {
    pInd <- (i - 1) * 3 + 1
    l[[i]] <- list(lambdas = pLong[pInd:(pInd + 2)])
  }
  return(l)
}

#' @rdname STR
#' @name STR
#' @title Automatic STR decomposition
#' @description Automatically selects parameters for an STR decomposition of time series data.
#'
#' If a parallel backend is registered for use before \code{STR} call,
#' \code{STR} will use it for n-fold cross validation computations.
#'
#' @seealso \code{\link{STRmodel}} \code{\link{RSTRmodel}} \code{\link{AutoSTR}}
#' @inheritParams data
#' @inheritParams predictors
#' @inheritParams confidence
#' @inheritParams robust
#' @inheritParams lambdas
#' @inheritParams pattern
#' @inheritParams nFold
#' @inheritParams reltol
#' @inheritParams gapCV
#' @inheritParams solver
#' @inheritParams nMCIter
#' @inheritParams control
#' @inheritParams trace
#' @param iterControl Control parameters for some experimental features.
#' This should not be used by an ordinary user.
#' @return A structure containing input and output data.
#' It is an \strong{S3} class \code{STR}, which is a list with the following components:
#' \itemize{
#' \item \strong{output} -- contains decomposed data. It is a list of three components:
#' \itemize{
#' \item \strong{predictors} -- a list of components where each component
#' corresponds to the input predictor. Every such component is a list containing the following:
#' \itemize{
#' \item \strong{data} -- fit/forecast for the corresponding predictor (trend, seasonal component, flexible or seasonal predictor).
#' \item \strong{beta} -- beta coefficients of the fit of the coresponding predictor.
#' \item \strong{lower} -- optional (if requested) matrix of lower bounds of confidence intervals.
#' \item \strong{upper} -- optional (if requested) matrix of upper bounds of confidence intervals.
#' }
#' \item \strong{random} -- a list with one component \strong{data}, which contains residuals of the model fit.
#' \item \strong{forecast} -- a list with two components:
#' \itemize{
#' \item \strong{data} -- fit/forecast for the model.
#' \item \strong{beta} -- beta coefficients of the fit.
#' \item \strong{lower} -- optional (if requested) matrix of lower bounds of confidence intervals.
#' \item \strong{upper} -- optional (if requested) matrix of upper bounds of confidence intervals.
#' }
#' }
#' \item \strong{input} -- input parameters and lambdas used for final calculations.
#' \itemize{
#' \item \strong{data} -- input data.
#' \item \strong{predictors} - input predictors.
#' \item \strong{lambdas} -- smoothing parameters used for final calculations (same as input lambdas for STR method).
#' }
#' \item \strong{cvMSE} -- optional cross validated (leave one out) Mean Squared Error.
#' \item \strong{optim.CV.MSE} or \strong{optim.CV.MAE} -- best cross validated Mean Squared Error or Mean Absolute Error (n-fold) achieved during minimisation procedure.
#' \item \strong{nFold} -- the input \code{nFold} parameter.
#' \item \strong{gapCV} -- the input \code{gapCV} parameter.
#' \item \strong{method} -- contains strings \code{"STR"} or \code{"RSTR"} depending on used method.
#' }
#' @examples
#' \donttest{
#' TrendSeasonalStructure <- list(
#'   segments = list(c(0, 1)),
#'   sKnots = list(c(1, 0))
#' )
#' WDSeasonalStructure <- list(
#'   segments = list(c(0, 48), c(100, 148)),
#'   sKnots = c(as.list(c(1:47, 101:147)), list(c(0, 48, 100, 148)))
#' )
#'
#' TrendSeasons <- rep(1, nrow(electricity))
#' WDSeasons <- as.vector(electricity[, "WorkingDaySeasonality"])
#'
#' Data <- as.vector(electricity[, "Consumption"])
#' Times <- as.vector(electricity[, "Time"])
#' TempM <- as.vector(electricity[, "Temperature"])
#' TempM2 <- TempM^2
#'
#' TrendTimeKnots <- seq(from = head(Times, 1), to = tail(Times, 1), length.out = 116)
#' SeasonTimeKnots <- seq(from = head(Times, 1), to = tail(Times, 1), length.out = 24)
#'
#' TrendData <- rep(1, length(Times))
#' SeasonData <- rep(1, length(Times))
#'
#' Trend <- list(
#'   name = "Trend",
#'   data = TrendData,
#'   times = Times,
#'   seasons = TrendSeasons,
#'   timeKnots = TrendTimeKnots,
#'   seasonalStructure = TrendSeasonalStructure,
#'   lambdas = c(1500, 0, 0)
#' )
#' WDSeason <- list(
#'   name = "Dayly seas",
#'   data = SeasonData,
#'   times = Times,
#'   seasons = WDSeasons,
#'   timeKnots = SeasonTimeKnots,
#'   seasonalStructure = WDSeasonalStructure,
#'   lambdas = c(0.003, 0, 240)
#' )
#' StaticTempM <- list(
#'   name = "Temp Mel",
#'   data = TempM,
#'   times = Times,
#'   seasons = NULL,
#'   timeKnots = NULL,
#'   seasonalStructure = NULL,
#'   lambdas = c(0, 0, 0)
#' )
#' StaticTempM2 <- list(
#'   name = "Temp Mel^2",
#'   data = TempM2,
#'   times = Times,
#'   seasons = NULL,
#'   timeKnots = NULL,
#'   seasonalStructure = NULL,
#'   lambdas = c(0, 0, 0)
#' )
#' Predictors <- list(Trend, WDSeason, StaticTempM, StaticTempM2)
#'
#' elec.fit <- STR(
#'   data = Data,
#'   predictors = Predictors,
#'   gapCV = 48 * 7
#' )
#'
#' plot(elec.fit,
#'   xTime = as.Date("2000-01-11") + ((Times - 1) / 48 - 10),
#'   forecastPanels = NULL
#' )
#'
#' #########################################################
#'
#' n <- 70
#' trendSeasonalStructure <- list(segments = list(c(0, 1)), sKnots = list(c(1, 0)))
#' ns <- 5
#' seasonalStructure <- list(
#'   segments = list(c(0, ns)),
#'   sKnots = c(as.list(1:(ns - 1)), list(c(ns, 0)))
#' )
#' seasons <- (0:(n - 1)) %% ns + 1
#' trendSeasons <- rep(1, length(seasons))
#' times <- seq_along(seasons)
#' data <- seasons + times / 4
#' set.seed(1234567890)
#' data <- data + rnorm(length(data), 0, 0.2)
#' data[20] <- data[20] + 3
#' data[50] <- data[50] - 5
#' plot(times, data, type = "l")
#' timeKnots <- times
#' trendData <- rep(1, n)
#' seasonData <- rep(1, n)
#' trend <- list(
#'   data = trendData, times = times, seasons = trendSeasons,
#'   timeKnots = timeKnots, seasonalStructure = trendSeasonalStructure, lambdas = c(1, 0, 0)
#' )
#' season <- list(
#'   data = seasonData, times = times, seasons = seasons,
#'   timeKnots = timeKnots, seasonalStructure = seasonalStructure, lambdas = c(1, 0, 1)
#' )
#' predictors <- list(trend, season)
#' rstr <- STR(data, predictors,
#'   reltol = 0.0000001, gapCV = 10,
#'   confidence = 0.95, nMCIter = 400, robust = TRUE
#' )
#' plot(rstr)
#' }
#' @author Alexander Dokumentov
#' @references Dokumentov, A., and Hyndman, R.J. (2022)
#' STR: Seasonal-Trend decomposition using Regression,
#' \emph{INFORMS Journal on Data Science}, 1(1), 50-62.
#' \url{https://robjhyndman.com/publications/str/}
#' @export

STR <- function(
  data,
  predictors,
  confidence = NULL,
  robust = FALSE,
  lambdas = NULL,
  pattern = extractPattern(predictors),
  nFold = 5,
  reltol = 0.005,
  gapCV = 1,
  solver = c("Matrix", "cholesky"),
  nMCIter = 100,
  control = list(nnzlmax = 1000000, nsubmax = 300000, tmpmax = 50000),
  trace = FALSE,
  iterControl = list(maxiter = 20, tol = 1E-6)
) {
  if (robust) {
    return(
      RSTR_(
        data = data,
        predictors = predictors,
        confidence = confidence,
        nMCIter = nMCIter,
        lambdas = lambdas,
        pattern = pattern,
        nFold = nFold,
        reltol = reltol,
        gapCV = gapCV,
        control = control,
        trace = FALSE
      )
    )
  } else {
    return(
      STR_(
        data = data,
        predictors = predictors,
        confidence = confidence,
        lambdas = lambdas,
        pattern = pattern,
        nFold = nFold,
        reltol = reltol,
        gapCV = gapCV,
        solver = solver,
        trace = trace,
        iterControl = iterControl
      )
    )
  }
}

STR_ <- function(
  data,
  predictors,
  confidence = NULL,
  lambdas = NULL,
  pattern = extractPattern(predictors),
  nFold = 5,
  reltol = 0.005,
  gapCV = 1,
  solver = c("Matrix", "cholesky"),
  trace = FALSE,
  ratioGap = 1e6, # Ratio to define bounds for one-dimensional search
  iterControl = list(maxiter = 20, tol = 1E-6)
) {
  if (any(confidence <= 0 | confidence >= 1)) {
    stop("confidence must be between 0 and 1")
  }
  if (gapCV < 1) {
    stop("gapCV must be greater or equal to 1")
  }

  if (getDoParWorkers() <= 1) {
    registerDoSEQ()
  } # A way to avoid warning from %dopar% when no parallel backend is registered
  f <- function(p) {
    p <- exp(p) # Optimisation is on log scale
    if (trace) {
      cat("\nParameters = [")
      cat(p)
      cat("]\n")
    }
    newLambdas <- createLambdas(p, pattern = pattern, original = origP)
    cv <- nFoldSTRCV(
      n = nFold,
      trainData = trainData,
      fcastData = fcastData,
      completeData = data,
      trainC = trainC,
      fcastC = fcastC,
      completeC = C,
      regMatrix = regMatrix,
      regSeats = regSeats,
      lambdas = newLambdas,
      solver = solver,
      trace = trace,
      iterControl = iterControl
    )
    if (trace) {
      cat("CV = ")
      cat(format(cv, digits = 16))
      cat("\n")
    }
    return(cv)
  }

  lData <- length(data)
  if (nFold * gapCV > lData) {
    stop(paste0(
      "nFold*gapCV should be less or equal to the data length.\nnFold = ",
      nFold,
      "\ngapCV = ",
      gapCV,
      "\nlength of the data = ",
      lData
    ))
  }
  subInds <- lapply(1:nFold, FUN = function(i) {
    sort(unlist(lapply(1:gapCV, FUN = function(j) {
      seq(from = (i - 1) * gapCV + j, to = lData, by = nFold * gapCV)
    })))
  })
  complInds <- lapply(subInds, FUN = function(s) setdiff(1:lData, s))

  strDesign <- STRDesign(predictors)
  C <- strDesign$cm$matrix
  fcastC <- lapply(subInds, FUN = function(si) C[si, ])
  trainC <- lapply(complInds, FUN = function(ci) C[ci, ])
  fcastData <- lapply(subInds, FUN = function(si) data[si])
  trainData <- lapply(complInds, FUN = function(ci) data[ci])
  rm <- strDesign$rm
  regMatrix <- rm$matrix
  regSeats <- rm$seats

  if (!is.null(lambdas)) {
    initP <- extractP(lambdas, pattern)
    origP <- abs(extractP(lambdas, rep(TRUE, length(pattern))))
  } else {
    initP <- extractP(predictors, pattern)
    origP <- abs(extractP(predictors, rep(TRUE, length(pattern))))
  }

  #  cat("\ninitP: "); cat(initP)
  #   if(length(initP) == 1) {
  #     lower = initP/ratioGap
  #     upper = initP*ratioGap
  #     cat("\nlower: "); cat(lower)
  #     cat("\nupper: "); cat(upper)
  #  }
  #  cat("\n")

  # Optimisation is performed on log scale
  optP <- optim(
    par = log(initP),
    fn = f,
    method = ifelse(length(initP) > 1, "Nelder-Mead", "Brent"),
    lower = ifelse(length(initP) > 1, -Inf, log(initP / ratioGap)),
    upper = ifelse(length(initP) > 1, Inf, log(initP * ratioGap)),
    control = list(reltol = reltol)
  )
  newLambdas <- createLambdas(
    exp(optP$par),
    pattern = pattern,
    original = origP
  )

  result <- STRmodel(
    data,
    strDesign = strDesign,
    lambdas = newLambdas,
    confidence = confidence,
    trace = trace
  )
  result$optim.CV.MSE <- optP$value
  result$nFold <- nFold
  result$gapCV <- gapCV
  result$method <- "STR"
  return(result)
}