TSAT: Time Series Analysis Tools (TSAT)

Documented in FcLSTM

# res = FcLSTM(DataVec)
#
# DESCRIPTION
# In simple words the feedword network feeds input not only forward from layer to layer
# but also in a loop back to specific layers which is called recurrent. 
# LSTM is an improvement in the case of 'vanishing gradients'.
# The procedure of this method works as follows:
# We start out with centered and scaled time series data (not necessary: 
# we just need a time series varying in the interval [-1,1]) provided from a numerical
# vector data (hence equidistant). Furthermore one has to set a forecast length forecast_length.
#
# INPUT
# DataVec             [1:n] numerical vector of regular (equidistant) time series data.
# SplitAt             Index of row where the DataVec is divided into test and train data. If not given n is used
# ForecastHorizon     Scalar defining the timesteps to forecast ahead
# Seasonality         Main saisonality of data, is used for generating batches of data. Default is 28
# Scaled              TRUE: automatic scaling
# ErrorLoss           Error for the loss function, either "MRD","SRD","MSE","MAE"
# Epochs              Number of epochs to train the model, see batch_size in fit in keras
# Neurons             Number of units per layer, see units in keras::layer_lstm.
# ActivationFunction  Defines the function of activation to use, please see [Goodfellow, 2016] for details.
# RecurrentActivation Defines the function of activation to use, please see [Goodfellow, 2016] for details.
# Batch_size          Number of samples per gradient update, see batch_size in fit in keras. 
#                     The batch size is the number of data samples in one forward/backward pass of a RNN before 
#                     a weight update.
#                     The batch size shouldn't be chosen too high in relation to the forecast_length.
# OPTIONAL
# Time                [1:n] character vector of Time in the length of data
# PlotIt              FALSE (default), do nothing. TRUE: plots the forecast versus the validation set.
# Silent              If FALSE, print diverse ouptuts of keras. Default is TRUE
# ...                 Further arguments for keras::layer_lstm
#
# OUTPUT
# Model               Pointer to an ANN model generated by keras, the model is not directly available in R
# FitStats            Output of fit in keras
# Forecast            Forecast generated by the ANN model where we put in the last portion of the training set of 
#                     length forecast_length as data to predict from. The test data stays untouched.
# TestData            [(k+1):n] vector, the part of Response not used in the model
# TestTime            [(k+1):n] vector, time of response not used in the model
# TrainData           [1:k] vector, the part of Response used in the model
# TrainTime           [1:k] vector, time of Training data if given
# TrainingForecast    [1:k] vector, forecasted value using TrainData
#
# DETAILS
# In this approach the recurrent ANN has several internal parameters set as defined in deep learning, 
# see [Goodfellow, 2016] for details. The last layer is a densely-connected NN layer within a time_distributed layer. 
# Currently only one hidden-layer is set.
# The epochs are the total number of forward/backward pass iterations. Typically more improves model performance 
# unless overfitting occurs at which time the validation accuracy/loss will not improve.
# data should be scaled between [-1,1] with "sound" distribution, see [Goodfellow, 2016; Mörchen 2006].
# Gradients are vanishing if inputs between zero and one are multiplied several times, because then gradient can shrink to zero. 
# The result is the weights would not change significantly in an recurrent ANN of many layers ('deep learning').
# ErrorLoss defines the objective function which should be minimized, see loss in compile in keras, 
# if you want to use a pre-coded function. You can also put in custom loss functions if you write it in keras backend syntax. 
# (e.g. tensor_srd. The 'Adam' optimizes is chosen here [Kingma/Ba, 2014].
#
# Author: MCT



FcLSTM=function(DataVec,
                          SplitAt,ForecastHorizon,Seasonality=28,Scaled=TRUE,
                          ErrorLoss="MSE",Epochs=100,Neurons=28,	
                          ActivationFunction="relu",RecurrentActivation="sigmoid",Batch_size=1,Time,PlotIt=FALSE,Silent=TRUE,...){
  
  #ToDo fuer spaeteres multivariates forecasting
  NumberFeatures=1
  #Todo muessen vermutlich fuer ForecastHorizon>1 angepasst werden
  batch_size_ts_gen=1
  time_steps=1
  
  
  if(!is.logical(Scaled)) {
    warning('Input for Scaled is not logical. Setting Scaled to TRUE')
    Scaled = TRUE
  }
  if(!is.logical(Silent)) {
    warning('Input for Silent is not logical. Setting Scaled to TRUE')
    Silent = TRUE
  }
  if(!(is.numeric(Seasonality) && length(Seasonality) == 1 && Seasonality >= 0)) {
    warning('Seasonality should be a positive scalar. Setting Seasonality to 28')
    Seasonality = 28
  }
  if(!(is.numeric(Epochs) && length(Epochs) == 1 && Epochs >= 0)) {
    warning('Epochs should be a positive scalar. Setting Epochs to 100')
    Epochs = 100
  }
  if(!(is.numeric(Neurons) && length(Neurons) == 1 && Neurons >= 0)) {
    warning('Neurons should be a positive scalar. Setting Neurons to 28')
    Neurons = 28
  }

  
  if(missing(SplitAt)) {
    warning('Input for SplitAt was not given. Setting SplitAt to length(DataVec)')
    SplitAt = length(DataVec)
  }
  if(!(is.numeric(Batch_size) && length(Batch_size) == 1 && Batch_size >= 0 && Batch_size > SplitAt)) {
    warning('Batch_size should be a positive scalar and be smaller or equal to training data length. Setting Batch_size to 1')
    Batch_size = 1
  }
  if(missing(ForecastHorizon)) {
    warning('Input for ForecastHorizon was not given. Setting ForecastHorizon to 1')
    ForecastHorizon = 1
  }
  if(missing(Time)) {
    Time = 1:length(DataVec)
    warning('No input for Time was given, will use 1:length(DataVec) for Time')
  }
  
  inputs = checkInputForecasting(DataVec, Time, SplitAt, ForecastHorizon, PlotIt)
  DataVec = inputs$DataVec
  Time = inputs$Time
  SplitAt = inputs$SplitAt
  ForecastHorizon = inputs$ForecastHorizon
  PlotIt = inputs$PlotIt
  
  n = length(DataVec)
  
  errorMessage = packageMissingError("keras", n, SplitAt)
  if(errorMessage != FALSE){
    return(
      list(
        Model = NULL,
        FitStats = NULL,
        Forecast = rep(NaN, ForecastHorizon),
        TestData = DataVec[SplitAt:n],
        TestTime = Time[SplitAt:n],
        TrainData = DataVec[1:SplitAt],
        TrainTime = Time[1:SplitAt],
        ForecastTrain = rep(NaN, SplitAt),
        Info = errorMessage
      )
    )
  }
  errorMessage = packageMissingError("tensorflow", n, SplitAt)
  if(errorMessage != FALSE){
    return(
      list(
        Model = NULL,
        FitStats = NULL,
        Forecast = rep(NaN, ForecastHorizon),
        TestData = DataVec[SplitAt:n],
        TestTime = Time[SplitAt:n],
        TrainData = DataVec[1:SplitAt],
        TrainTime = Time[1:SplitAt],
        ForecastTrain = rep(NaN, SplitAt),
        Info = errorMessage
      )
    )
  }
  
  
  ###############################################################
  # Function for keras backend calculating the sum root error.  #
  # Can't use the upper one due to a bug in tensorflow backend. #
  ###############################################################
  tensor_srd = function(x,y) {
    #require(tensorflow)
    requireNamespace('tensorflow')
    requireNamespace('keras')
    return(
      keras::k_sum(
        keras::k_sqrt(
          keras::k_abs(
            x - y
          )
        )
      )
    )
  }
  
  tensor_mrd = function(x,y) {
    #require(tensorflow)
    requireNamespace('tensorflow')
    requireNamespace('keras')
    return(
      keras::k_mean(
        keras::k_sqrt(
          keras::k_abs(
            x - y
          )
        )
      )
    )
  }
  
  
switch(ErrorLoss,
       "MRD"={
         loss_function=tensor_mrd
       },
       "SRD"={
         loss_function=tensor_srd
       },
       "MSE"={
         loss_function='mean_squared_error'
       },
       "MAE"={
         loss_function="mean_absolute_error"
       },
       {
         stop('FcLSTM: Please select correct ErrorLoss')
       })

  if(isTRUE(Scaled)){
    toRange=function(data, lower, upper){
        #taken from dbt.Transforms
        data <- as.matrix(data)
        if(lower==upper){
          error('interval width can not be 0!')
        }
        if (lower > upper){
          temp <- upper;
          upper <- lower;
          lower <- upper;
        }
        range <- upper - lower
        n <- dim(data)[1]
        d <- dim(data)[2]
        if ((n==1) & (d > 1)){ # row vector to colum vector
          data <- t(data)  
          wasRowVector <- 1
        }
        else{
          wasRowVector <- 0
        }
        nRow <- dim(data)[1]
        nCol <- dim(data)[2]
        min <-apply(data,2,min,na.rm=TRUE)
        min <- matrix(min,nRow,nCol,byrow=TRUE)
        max <- apply(data,2,max,na.rm=TRUE)
        max <- matrix(max,nRow,nCol,byrow=TRUE)
        # Range = Max-Min;
        range <- max-min
        range[range==0]<-1
        scaleData <- (data-min)/range
        scaleData <- lower + scaleData * (upper-lower)
        if(wasRowVector==1){
          scaleData = t(scaleData)
        }
        return(scaleData)
      }
    DataVec=toRange(DataVec,-1,1)
  }

  if(missing(Time)) Time=1:length(DataVec)
  
  #V=TSAT::SplitPercentageTS(DataVec,Time,Percentage = Percentage)
  #V=TSAT::SplitPercentageTS(DataVec,Time,Percentage = length(DataVec)/SplitAt*100)
  N = length(DataVec)
  split = N - SplitAt
  train = head(DataVec, N - split)
  test = tail(DataVec, split)
  TimeTrain= head(Time, N - split)
  TimeTest= tail(Time, split)
  
  #train=V$TrainingSet
  #test=V$TestSet
  #TimeTrain=V$TrainingTime
  #TimeTest=V$TestTime
  # Tensor Format:
  #   
  #   Predictors (X) must be a 3D Array with dimensions: [samples, timesteps, features]: The first dimension is the length of values, 
  # the second is the number of time steps (lags), and the third is the number of predictors (1 if univariate or n if multivariate)
  # Outcomes/Targets (y) must be a 2D Array with dimensions: [samples, timesteps]: The first dimension is the length of values and the second is the number of time steps (lags)
  #
  generator = keras::timeseries_generator(data = train,targets = train,length=Seasonality,batch_size=batch_size_ts_gen)

  model <- keras::keras_model_sequential()
  keras::layer_lstm(
    model,
    units            = Neurons,
    input_shape      = c(time_steps, NumberFeatures),
    return_sequences = TRUE,
    activation=ActivationFunction,
    recurrent_activation=RecurrentActivation,...
  )
  keras::time_distributed(model,keras::layer_dense(units = 1))
  keras::compile(model,loss=loss_function, optimizer='adam') #adam: gradient descent
  if(isFALSE(Silent)){
    summary(model)
    verbose=1
  }else{
    verbose=0
  }
  
  out = keras::fit(
    model,
    generator,
    batch_size = Batch_size,
    epochs = Epochs,verbose=verbose
  )
  
  pred_out1 = predict(model,train)
  #plot(train,type="l")
  #points(as.numeric(pred_out1),type="l",col="red")
  
  currentpred=c()
  #take the last saisonality batch to predict the forecast horizon
  training=tail(train,Seasonality)
  future_forecast=c()
  for(i in 1:length(test)){
    currentpred = head(predict(model,array(data = tail(training,Seasonality),c(1,1,1))),Forecasthorizon)
    if(ForecastHorizon ==1)
      future_forecast=c(future_forecast,currentpred)
    else
      future_forecast[[i]]=currentpred
    
    #rolling forecast
    #take one point of the test set to predict again the next seasonality
    training=tail(c(training,test[i]),Seasonality)
  }
  
  if(is.list(future_forecast)){
    names(future_forecast)=TimeTest
  }
  
  if (PlotIt) {
    if(ForecastHorizon == 1){
      future_forecast_cur=future_forecast
    get('plotEvaluationFilteredTS',
        envir = asNamespace('TSAT'),
        inherits = FALSE)(TimeTest, test, future_forecast_cur, FALSE)
    }else{
      #ToDo
    }
  }
  return(list(
    Model = model,
    FitStats = out,
    Forecast = future_forecast,
    TestData = test,
    TestTime = TimeTest,
    TrainData = train,
    TrainTime = TimeTrain,
    ForecastTrain = pred_out1
  ))
  
}