timeseries:

#------------------------------------------------------------------------------#
# SVM model with gap filling -> does not output any NAs

#' SVM regression for forecasting
#' 
#' @description This models uses SVM to predict the future values of a variable.
#' It takes into account the day of the week and gap-filling: even if the input
#' values have NAs it will produce complete output
shuya_svm_gf <- function(tse, gamma, cost, split, X_col, ..., 
                      .orders_list = NULL) {
  message('[shuya-svm-gf] hi! How are you?')
  
  if(missing(.orders_list))
    orders <- list(...)
  else orders <- .orders_list
  
  # input checks
  stopifnot(X_col %in% names(orders)) # must give the orders of the X_col
  # all the orders must match some column in the tse
  stopifnot(names(orders) %in% colnames(tse)) 
  # all orders must be non-negative
  stopifnot(all(c(orders, recursive = TRUE) >= 0)) 
  # split must be logical
  stopifnot(is.logical(split))
  
  # construct lagged columns and remove useless ones
  tse$temp.group_by <- factor(tse$time.wday)
  tse <- tse %>%
    subcol(c(names(orders), 'temp.group_by')) %>% # remove useless columns
    D_multi(orders_list = orders)  # construct the lagged columns  
  if(!split) 
    tse <- select(tse, -temp.group_by)
  
  # remove the time columns, since the algorithm does not use them
  # and keep only the columns with all values
  data <- as.data.frame(tse)[tse$available, get_value_cols(tse)]
  rownames(data) <- NULL
  print('training data')
  data %>% head(15) %>% print
  
  # train the SVM
  model <- list(svm = svm(paste(X_col, '~ .') %>% as.formula, data = data, 
                           gamma = gamma, cost = cost))
  #model <- list(svm = lm(paste(X_col, '~ .') %>% as.formula, data = data))
                
  # train the daily_cluster_model that will be used for load gap filling
  model$dcm_gf <- daily_cluster_model(tse, group_by = 'wd_s_sph', X_col)
  
  # save some metadata
  model$X_col <- X_col
  model$X_order <- orders[[X_col]]
  model$parameters <- c(gamma = gamma, cost = cost)
  model$orders = orders
  model$split = split
  
  structure(model, class = 'shuya_svm_gf')
}

predict.shuya_svm_gf <- function(m, tse) {
  build_columns(tse, m) %>% predict_one(m) # so clean wow
}

# build the lagged columns for the algorithm
build_columns.shuya_svm_gf <- function(tse, m) {
  # build the column used to split the model
  tse$temp.group_by <- factor(tse$time.wday)
  # gap fill the predictor variables, not X_col
  for(col in setdiff(names(m$orders), m$X_col))
    tse[, col] <- fill_col(tse, col)
  # gap fill the X_col using dcm
  xcol <- tse[, m$X_col]
  xcol[is.na(xcol)] <- predict(m$dcm_gf, tse)[is.na(xcol)]
  tse[, m$X_col] <- xcol
  tse <- tse %>%
    subcol(c(names(m$orders), 'temp.group_by')) %>% # remove useless columns
    D_multi(orders_list = m$orders) %>%  # construct the lagged columns 
    # remove non-lagged X_col as it is not used for forecasting
    select_(paste0('-', m$X_col)) %>% 
    update_available
    
  if(!m$split)
    tse <- select(tse, -temp.group_by)
  
  # validity check
  stopifnot(is.tse(tse))

  tse
}

# predict one step, assuming all the lagged columns are already there
predict_one.shuya_svm_gf <- function(tse, m) {
  message('[predict_one] hi')
  as.data.frame(tse)[tse$available, get_value_cols(tse)] %>% head(15) %>% print
  result <- rep(NA_real_, dim(tse)[1])
  result[tse$available] <- predict(m$svm, 
                                   as.data.frame(tse)[tse$available, 
                                                      get_value_cols(tse) ])
  
  result
}

# which one is the X (or forecasted) columns?
get_X.shuya_svm_gf <- function(m) { return(m$X_col) }

# which lagged version of the forecasted columns are we using?
get_X_order.shuya_svm_gf <- function(m) { return(m$X_order) }