R/DataPrepare.R

Defines functions data.prepare

Documented in data.prepare

#' Data preparation
#'
#' @description Prepares data according to the specified parameters.
#'
#' @details (include the details for the parameters)
#'
#' @param datatype a string. Indicates the data structure, whether the series are correlated or not. Choose from 'simple', 'block1',
#' 'block2'. Periodic series to be implemented.
#' @param parameters.of.X a list.
#' @param parameters.of.Y a list.
#' @param parameter.of.estimation a list.
#' @param horizon a number. Forecast horizon, we use 7 in the example
#' @param intercept a vector. Length is 10 for now, and 0 by default.
#'
#' @return a list of components
#'
#' \item{X}{a matrix. Original covariate matrix, each column is one series. Note that it's both training and test period: hence the length is nsize + horizon}
#' \item{X.for.fcst}{a matrix. Training period of covariate matrix, length is nsize}
#' \item{Xtr}{a matrix. Shifted training X given required time lags, 5 lags in our example: hence 10*5 columns}
#' \item{Ytr}{a matrix. Shifted response matrix, used for residual fitting}
#' \item{Ytr.single}{a vector. Response in training period}
#' \item{Xind}{X index (?)}
#' \item{yind}{Y index (?)}
#' \item{Xte}{a matrix. Shifted test X, 10*5 columns}
#' \item{Yte.single}{a vector. Test response}
#' \item{Yte}{a matrix. Test shifted response matrix}
#'
#'
#' @export
#'
#' @examples
#' # later



data.prepare <- function(datatype,
                         parameters.of.X,
                         parameters.of.Y,
                         parameter.of.estimation,
                         horizon,
                         intercept){  # do I need to specify the corresponding parameters?

  nsize <- parameters.of.X$nsize

  # ------------ generate x

  if(datatype == 'simple'){

    dataX <- XgenSimple(p = parameters.of.X$nseries,
                        A = parameters.of.X$A,
                        x0 = parameters.of.X$x0,
                        t = (parameters.of.X$nsize + horizon),
                        sigmax = parameters.of.X$sigmax)  # this p is not defined in the function here
    X <- dataX$X


    for (i in 1:ncol(X)){
      X[, i] <- X[, i] + intercept[i]
    }



  }else if(datatype == 'block1'){  # for 3-3-4

    X <- XgenCorr(t = (parameters.of.X$nsize + horizon),
                  sigmax = parameters.of.X$sigmax,
                  A.block = parameters.of.X$A.block,
                  p.block = parameters.of.X$p.block,
                  x0.block = parameters.of.X$x0.block)
    for (i in 1:ncol(X)){
      X[, i] <- X[, i] + intercept[i]
    }


  }else if(datatype == 'block2'){      # for 6-4

    X <- XgenCorr2(t = (parameters.of.X$nsize + horizon),
                   sigmax = parameters.of.X$sigmax,
                   A.block = parameters.of.X$A.block,
                   p.block = parameters.of.X$p.block,
                   x0.block = parameters.of.X$x0.block)
    for (i in 1:ncol(X)){
      X[, i] <- X[, i] + intercept[i]
    }





  }else{  # periodic

    dataX.p <- Xperiodic67(p = parameters.of.X$nseries,
                           A = parameters.of.X$A,
                           x0 = parameters.of.X$x0,
                           t = (parameters.of.X$nsize + horizon),
                           sigmax = parameters.of.X$sigmax,
                           levelweekend = parameters.of.X$levelweekend,
                           noiseweekend = parameters.of.X$noiseweekend)
    X <- dataX.p$X
    for (i in 1:ncol(X)){
      X[, i] <- X[, i] + intercept[i]
    }
  }

  # ------------- take out the X used for forecast: it's never 507 or 207!
  X.for.fcst <- X[c(1:nsize), ]

  # ------------- generate y
  dataY <- y.present(beta = parameters.of.Y$beta,
                     sigmay = parameters.of.Y$sigmay,
                     X,
                     lagx = parameters.of.Y$lagx,
                     lagy = parameters.of.Y$lagy)







  # -------------- fix index for estimation
  nsize <- parameters.of.X$nsize
  Xind <- unlist(parameter.of.estimation$full.index[parameter.of.estimation$lag.to.estX])
  yind <- c(1:parameter.of.estimation$lag.to.estY)

  Xtr <- dataY$ExtendX[c(1:(nsize - 5)), Xind]  # use nsize-5 instead of nsize
  Ytr.single <- dataY$Y[c(1:(nsize - 5))]
  Ytr <- dataY$ExtendY[c(1:(nsize - 5)), yind]

  # take the last 7 as test set, so we have a whole number of training set
  Xte <- tail(dataY$ExtendX[, Xind], horizon)
  Yte.single <- tail(dataY$Y, horizon)
  Yte <- tail(dataY$ExtendY[, yind], horizon)

  return(list(X = X,
              X.for.fcst = X.for.fcst,
              Xtr = Xtr,
              Ytr = Ytr,
              Ytr.single = Ytr.single,
              Xind = Xind,
              yind = yind,
              Xte = Xte,
              Yte.single = Yte.single,
              Yte = Yte))
}
yymmhaha/PackPaper1 documentation built on May 24, 2019, 8:55 a.m.