R/models.R

# forecasting model interface
# 1) training function
#     - train set
#     - column names
#     - parameters
#     - whatever else is needed to make the model run
#     - return a list with the trained model (SVM), name of the load column 
# and order of the load column
# 2) prediction function
#     - object created by the training function
#     - prediction dataset
# 3) notes
#     - The training and prediction functions do not care about training and 
# testing set, the splitting must be done before calling them
#     - They also do not compute any performance inidcators
#     - the predict function shouold predict only 1 step into the future,
# a function is provided to iterate this function automatically to forecast
# longer into the future
#     - the predict function should return a vector value, not a dataset

# build the necessary columns to call predict_one
build_columns <- function(tse, m, ...) UseMethod('build_columns', m)
# predict one step, does not build the columns (for internal use only)
predict_one <- function(tse, m, ...) UseMethod('predict_one', m)
get_X <- function(m) UseMethod('get_X', m)
get_X_order <- function(m) UseMethod('get_X_order', m)

# predicting n-steps
# interface for n steps
# build_columns, predict_one, get_X, get_X_order
# interface with data

#------------------------------------------------------------------------------#
# Iterated forecasting
# applies to any model that uses past values (all forecasting models)

# Intuitive explanation
# predict order 2
# is X.f2 already there? if yes exit
# D orders of X values : 1,2,3 & 4
# n (= 2) minus D orders : 1, 0, -1, -2
# columns needed -> X.f1.d1, X.d2, X.d3, X.d4
# create X.f1
# create X.f1.d1
# [new dset] map X.f1.d1 to X.d1, ...
# call predict on new dset -> create X.f2
# return


#' Takes a model that can predict one and iterates it n steps into the future
#' .
#' @description This is the core function of the forecasting model interface.
#' The forecasting model interface allows models that use previous values to be
#' iterated. We only need to define the basic functions: build_columns and
#' predict_one : a function that predicts one step into the future. Any such
#' model can be iterated to predict n-steps into the future using the predict_n
#' function.
#' 
#' @param m A forecasting model a.k.a. a model implementing the forecasting
#' model interface composed of the build_columns, predict_one, get_X_col,
#' get_X_order
#' @param n A positive integer indicating how many steps should the algorithm
#' be iterated
#' @return The return value is a data.frame of the algorithm applied to the
#' data. There are n column and the number of rows is the same as the one in
#' tse. The column names are f1, f2,... fn. f1 is the prediction according to
#' the algorithm m. f2 is the prediction according to the algorithm iterated
#' once, etc.
predict_n <- function(tse, m, n) {
  # right now we haven't implemented iterating an algorithm
  # which doesn't use the first past value
  stopifnot(min(get_X_order(m)) == 1)
  
  # build the necessary columns once and for all
  # then start the iterations (recursion)
  tse %>% build_columns(m) %>%
    predict_n_helper(m, n, NULL)
}

#' helper used by predict_n function
#' @details Internal use only?
predict_n_helper <- function(tse, m, n, predicted) {
  # no prediction to do for negative value of n
  if(n<=0) return(predicted)
  # the value to predict has already be computed -> abort mission :)
  if(paste0('f', n) %in% colnames(predicted)) return(predicted)
  
  # base case n = 1
  if(n==1) {
    predicted <- data.frame(f1 = predict_one(tse, m))
    return(predicted)
  }

  # comppute which forecasts we need
  forecasting_orders <- n - get_X_order(m)
  forecasting_orders %>% print
  # compute the necessary forecasted iterations
  # according to the forecasting orders
  for(k in forecasting_orders)
    predicted <- predict_n_helper(tse, m, k, predicted)
  
  print('hello')
  predicted %>% head %>% print
  predicted %>% str %>% print
  print('bye')
  
  # compute the lagged version of the forecasted
  # iterations and map them into the input dataset
  for(i in 1:length(get_X_order(m)))
    if(forecasting_orders[i] > 0)
      tse[, paste0(get_X(m), '.d', get_X_order(m)[i])] <- 
    # power.d1 <- f1.d1
    lag(predicted[, paste0('f', forecasting_orders[i])], get_X_order(m)[i]) 
  
  # compute the predicted value of the nth iterated algorithm
  predicted[, paste0('f', n)] <- predict_one(tse %>% update_available, m)
  
  predicted
}

#' Returns one forecast for a tse, using the smallest order possible
#' 
#' @param m A trained (forecasting) model
#' @param n Number of steps into the future we want to predict
best_forecast <- function(tse, m, n) {
  # input checks
  stopifnot(length(n) == 1)
  stopifnot(n > 0)
  
  predicted <- predict_n(tse, m, n)
  bf <- predicted$f1
  for(i in 2:n)
    bf[is.na(bf)] <- predicted[is.na(bf), paste0('f', i)]
  
  bf
}

#------------------------------------------------------------------------------#
# performance

#' Compute speed performance of algorithms
#' @param train_flag A boolean vector masking the training set
#' @param train_time Time taken to train the model in seconds
#' @param pred_time Time taken by the model to predict, in seconds
#' @param pred_size Size of the dataset that the model has predicted
#' @return A named vector with the main speed indicators: train time,
#' train size, prediction time, prediction test size, prediction time per 
#' point
perf_speed <- function(train_flag, train_time, pred_time, pred_size) {
  c('train time' = train_time,
     'train size' = sum(train_flag),
     'pred time' = pred_time,
     'pred set size'= pred_size,
     'pred time per point' = pred_time / pred_size)
}


#' Compute accuracy of algorithms
#' 
#' @param actual A vector of actual values. Can contain NAs
#' @param predicted A data.frame of predicted values. Each column 
#' being one forecast. This is especially usefull for forecasting algorithm 
#' which have different forecasting accuracy according to their iterated power.
#' 
#' @return Returns a data.frame with 2 columns: r2 and MAPE and 1 row for each
#' column in predicted
perf_accuracy <- function(actual, predicted) {
  # type of predicted must be a data.frame
  stopifnot(is.data.frame(predicted))
  # type of actual and predicted must be the same
  stopifnot(identical(length(actual), dim(predicted)[1]))
  
  # compute the accuracy for each column of predicted
  r2_r <- NULL
  mape_r <- NULL
  for(col in predicted) {
    r2_r <- c(r2_r, r2(actual, col))
    mape_r <- c(mape_r, mape(actual, col))
  }
  
  # put the results into a data.frame and return
  data.frame(r2 = r2_r, mape = mape_r, row.names = colnames(predicted))
}


#' Compute the explained variance or "r squared" statistic
#' @details Removes NA values so that it always returns a value even when the
#' forecasting algorithm or the base values has gaps.
r2 <- function(actual, predicted) {
  1 - var(actual-predicted, na.rm = TRUE) / var(actual, na.rm = TRUE)
}

#' Compute the Mean Absolute Proportional Error (MAPE)
#' 
#' @description  MAPE is a performance indicator of goodness of fit.
#' @details Removes NA values so that it always returns a value even when the
#' forecasting algorithm or the base values has gaps.
mape <- function(actual, predicted) {
  # remove zero values in the actual, so has to avoid infinite values
  zero_flag <- actual == 0
  actual[zero_flag] <- NA_real_
  predicted[zero_flag] <- NA_real_
  # compute the MAPE
  mean(abs(actual - predicted) / actual, na.rm = TRUE) # remove NAs
}
EBlonkowski/timeseries documentation built on May 6, 2019, 2:57 p.m.