R/par_catch_all.R

Defines functions mean_prediction all_in_one_time_series values_replaced forecast_function best_forecast_type extract_model_fit_forecast par_time_series_catch

Documented in all_in_one_time_series best_forecast_type extract_model_fit_forecast forecast_function mean_prediction par_time_series_catch values_replaced

## data goes into a list of arrays

#' The big dawg that will forecast with the best of them
#'
#' @param original list of time series you want to forecast
#' @param freq character string indicating "month" for monthly etc.
#' @param steps integer that explains how far out you want forecast(needed for DLM)
#' @param dlmPoly integer for the order of polynomial you want in dlm, default = 2
#' @param dlmSeas integer for seasonal effect in dlm, default is 12 for monthly
#' @param num.cores integer for the number of cores you want to use
#' @param error either is "smape" or "mape", default is "mape"
#' @param xreg list of matrix/array of exogenous regressors you want to use to forecast only applicable to certain models
#' @param a.a.args list of arguments you want to change
#' @param ets.args list of arguments you want to change
#' @param tbats.args list of arguments you want to change
#' @param n.n.args list of arguments you want to change
#'
#' @return list of models for eacg ts
#' @export
#'
#' @examples \dontrun{
#' test <- replicate(5, list(ldeaths))
#' test_model <- par_time_series_catch(test, num.cores = 12)
#'}
#'
par_time_series_catch <- function(original, freq = "month", steps = 3, dlmPoly = 2, dlmSeas = 12,  num.cores = 2, error = "mape", xreg = NULL, a.a.args = list(NULL),
                                  ets.args = list(NULL), tbats.args = list(NULL),  n.n.args = list(NULL)){
  auto_args <- list(max.p = 5, max.q = 5, max.P = 2,
                    max.Q = 2, max.order = 5, max.d = 2, max.D = 1, start.p = 2,
                    start.q = 2, start.P = 1, start.Q = 1, stationary = FALSE,
                    seasonal = TRUE, nmodels = 94)
  ets_args <- list(model = "ZZZ", damped = NULL, alpha = NULL, beta = NULL,
                   gamma = NULL, phi = NULL, additive.only = FALSE, lambda = NULL,
                   biasadj = FALSE)
  tbats_args <- list(use.box.cox = NULL, use.trend = NULL,
                     use.damped.trend = NULL, seasonal.periods = NULL,
                     use.arma.errors = TRUE)
  n_n_args <- list(P = 1, repeats = 20, xreg = NULL, lambda = NULL,
                   model = NULL, subset = NULL, scale.inputs = TRUE)

  ## modifying lists to update code
  auto_args <- utils::modifyList(auto_args, a.a.args)
  ets_args <- utils::modifyList(ets_args, ets.args)
  tbats_args <- utils::modifyList(tbats_args, tbats.args)
  n_n_args <- utils::modifyList(n_n_args, n.n.args)
  startDate = zoo::as.Date(format(lubridate::date_decimal(utils::head(zoo::index(original[[1]]),1)), "%Y-%m-%d"))

  if (class(original)!="list"){
    print("Put the data into a list and retry dummy")
    model_list <- NULL
    stop()
  } else {
    x <- lapply(original, utils::head, -steps)
    x <- lapply(x, stats::as.ts)
    non_ts <- unlist(lapply(x, stats::is.ts))
    if (FALSE %in% non_ts){
      print("Your list contains non time-series, please change them all to time-series")
      stop()
    }
    ##modelling functions
    cl <- parallel::makeCluster(getOption("cl.cores", num.cores))
    par_auto <- parallel::parLapply(cl,x,forecast::auto.arima, max.p = auto_args$max.p, max.q = auto_args$max.q, max.P = auto_args$max.P,
                                    max.Q = auto_args$max.Q, max.order = auto_args$max.order, max.d = auto_args$max.d, max.D = auto_args$max.D,
                                    start.p = auto_args$start.p, start.q = auto_args$start.q, start.P = auto_args$start.P, start.Q = auto_args$start.Q,
                                    stationary = auto_args$stationary, seasonal = auto_args$seasonal, nmodels = auto_args$nmodels)
    parallel::stopCluster(cl)
    cl <- parallel::makeCluster(getOption("cl.cores", num.cores))
    par_ets <- parallel::parLapply(cl, x, forecast::ets, model = ets_args$model, damped = ets_args$damped, alpha = ets_args$alpha, beta = ets_args$beta,
                                   gamma = ets_args$gamma, phi = ets_args$phi, additive.only = ets_args$additive.only, lambda = ets_args$lambda,
                                   biasadj = ets_args$biasadj)
    parallel::stopCluster(cl)
    cl <- parallel::makeCluster(getOption("cl.cores", num.cores))
    par_tbats <- parallel::parLapply(cl, x, forecast::tbats, use.box.cox = tbats_args$use.box.cox, use.trend = tbats_args$use.trend, use.damped.trend = tbats_args$use.damped.trend,
                                     seasonal.periods = tbats_args$seasonal.periods, use.arma.errors = tbats_args$use.arma.errors)
    parallel::stopCluster(cl)
    cl <- parallel::makeCluster(getOption("cl.cores", num.cores))
    par_hybrid <- parallel::parLapply(cl, x, forecastHybrid::hybridModel,  a.args = auto_args, e.args = ets_args, t.args = tbats_args, n.args = n_n_args)
    parallel::stopCluster(cl)
    cl <- parallel::makeCluster(getOption("cl.cores", num.cores))
    par_dlm <- parallel::parLapply(cl, x, dlmPara, dlmPoly = dlmPoly, dlmSeas = dlmSeas, steps = steps, freq = freq)
    parallel::stopCluster(cl)
    model_list <- c(par_auto, par_ets, par_tbats, par_hybrid, par_dlm)
  }
  print("Modelling Done")
  model_forecast <- extract_model_fit_forecast(model_list, steps = steps, xreg = xreg, freq = freq, num.cores = num.cores)
  print("Forecasting Done")
  best_model <- best_forecast_type(model_forecast, steps = steps, origin = original)
  print("Picking Best Model")
  if (error=="mape"){
    error_df <- best_model[[2]]
  } else if (error=="smape"){
    error_df <- best_model[[1]]
  } else if (error=="mase"){
    error_df <- best_model[[3]]
  } else {
    print("Not a valid error metric")
    stop()
  }

  models <- colnames(error_df)
  orig_x <- lapply(original, stats::window, start = lubridate::decimal_date(startDate))
  cl <- parallel::makeCluster(getOption("cl.cores", num.cores))
  models_final <- parallel::parLapply(cl, orig_x, forecast_function, models, freq, steps, dlmPoly, dlmSeas, num.cores, error, xreg, a.a.args,
                                      ets.args, tbats.args, n.n.args)
  parallel::stopCluster(cl)
  print("Forecasting with Best Model")
  best_model_forecast <- dplyr::bind_cols(models_final)
  colnames(best_model_forecast) <- models
  return(best_model_forecast)
}

#'DLM building function that models and forecast a dlm model for each TS in the par_catch_all cal
#'
#' @param x ts
#' @param dlmPoly polynomial order used in model.build
#' @param dlmSeas seasonal portion
#' @param steps steps ahead
#' @param freq "month"
#'
#' @return array of fitted + forecast
#' @export
#'
#' @examples \dontrun{
#' test <- replicate(5, list(ldeaths))
#' test_model <- par_time_series_catch(test, num.cores = 12)
#' }


dlmPara <- function ( x, dlmPoly = dlmPoly, dlmSeas = dlmSeas, steps = steps , freq = freq){
  model.build <- function(p) {
    return(
      dlm::dlmModPoly(dlmPoly, dV=p[1], dW=p[2:3]) +
        dlm::dlmModSeas(dlmSeas, dV=p[4])
    )
  }
  model.mle <- dlm::dlmMLE(x, parm=c(0.1, 0, 1, 1), build=model.build)
  model.fit <- model.build(model.mle$par)
  if (freq == "month"){
    time <- 12
  } else if (freq == "day") {
    time <- 365
  } else if (freq == "week") {
    time <- 52
  } else if (freq == "quarter") {
    time <- 4
  } else { time <- 1}
  model.filtered <- dlm::dlmFilter(x, model.fit)
  model.smoothed <- dlm::dlmSmooth(x, model.fit)
  model.forecast <- dlm::dlmForecast(model.filtered, nAhead=steps)
  xf <- seq(max(zoo::index(x)), max(zoo::index(x))+steps/time, 1/time)
  xf <- xf[(-1)]
  aa <- model.forecast$a[,-1]*(-1)
  aa <- cbind(model.forecast$a[,1], aa)
  a <- drop(model.forecast$a%*%t(dlm::FF(model.fit)))
  a <- c(x,a)
  class(a) <- "dlm"
  return(a)
}

#' Extract the numeric array of fitted values with the forecast added on to the end.
#'
#' @param x list of models from par_catch_all
#' @param steps integer of how many steps out you want to forecast
#' @param xreg list of matrix/array of exogenous regressors you want to use to forecast only applicable to certain models
#' @param freq string of characters indicating e.g. "month" for monthly etc
#' @param num.cores integer for the number of cores you want to use
#' @return list of numeric arrays with forecasts appended
#' @export
#'
#' @examples \dontrun{
#' test <- replicate(5, list(ldeaths))
#' test_model <- par_time_series_catch(test, num.cores = 12)
#' }

#'
extract_model_fit_forecast <- function(x, steps = 3, xreg = NULL, freq = "month", num.cores = 2){
  ## forecasts are put into lists based on forecasting type
  aa_ets_tbats = list()
  dlm = list()
  hybrid = list()
  for (i in 1:(length(x)*(3/5))){
    aa_ets_tbats[[i]] <- x[[i]]
  }
  for (i in (length(x)*(3/5)+1):(length(x)*(4/5))){
    hybrid[[i]] <- x[[i]]
  }
  hybrid <- hybrid[lengths(hybrid) != 0]
  for (i in (length(x)*(4/5)+1):length(x)){
    dlm[[i]] <- x[[i]]
  }
  dlm <- dlm[lengths(dlm) != 0]
  cl <- parallel::makeCluster(getOption("cl.cores", num.cores))
  final_forecast <- parallel::parLapply(cl, aa_ets_tbats, forecast::forecast, h=steps, xreg=xreg)
  parallel::stopCluster(cl)
  final_hybrid <- lapply(hybrid, forecast::forecast, h=steps, xreg=xreg, PI=FALSE)
  final_dlm <- dlm
  forecast_combine <- c(final_forecast, final_hybrid)
  final_reg <- lapply(forecast_combine, function(x){c(x$x, x$mean)})
  final <- c(final_reg, final_dlm)
  return(final)
}


#' Picks the best model based on mape and smape and outputs a list
#'
#' @param model_forecast output from extract_model_fit_forecast
#' @param steps numeric based on how far you want to look back and ahead
#' @param origin series you are comparing agianst
#'
#' @return list of data frames with mape and smape models
#' @export
#'
#' @examples \dontrun{
#' test <- replicate(5, list(ldeaths))
#' test_model <- par_time_series_catch(test, num.cores = 12)
#' }
#'
best_forecast_type <- function(model_forecast, steps = steps, origin = NULL){
  ## use a list of models
  forecasts <- length(model_forecast)/5
  best_mape <- list()
  best_smape <- list()
  best_mase <- list()
  for (q in 1:forecasts){
    seq_list <- seq(q, length(model_forecast), by = forecasts)
    first_ts <- suppressMessages(cbind(dplyr::bind_cols(model_forecast[seq_list]), as.numeric(origin[[q]])))
    colnames(first_ts) <- c("Auto Arima", "ETS", "TBATS", "Hybrid", "DLM", "Original")
    mape <- NA
    smape <- NA
    mase <- NA
    for (i in 1:5){
      mape[i] <- Metrics::mape(utils::tail(as.numeric(first_ts$Original),steps), utils::tail(as.numeric(first_ts[,i]),steps))
      smape[i] <- Metrics::smape(utils::tail(as.numeric(first_ts$Original),steps), utils::tail(as.numeric(first_ts[,i]),steps))
      mase[i] <- Metrics::mase(utils::tail(as.numeric(first_ts$Original),steps), utils::tail(as.numeric(first_ts[,i]),steps), step_size = steps)
    }

    minmape <- match(min(mape), mape)
    best_forecastmape <- as.data.frame(as.double(first_ts[,minmape]))
    colnames(best_forecastmape) <- paste(c(names(first_ts)[minmape]), q, sep =", " )
    best_mape[[q]] <- best_forecastmape
    minsmape <- match(min(smape), smape)
    best_forecastsmape <- as.data.frame(as.double(first_ts[,minsmape]))
    colnames(best_forecastsmape) <-  paste(c(names(first_ts)[minsmape]), q, sep =", " )
    best_smape[[q]] <- best_forecastsmape
    minmase <- match(min(mase), mase)
    best_forecastmase <- as.data.frame(as.double(first_ts[,minmase]))
    colnames(best_forecastmase) <-  paste(c(names(first_ts)[minmase]), q, sep =", " )
    best_mase[[q]] <- best_forecastmase

  }

  smape_df <- dplyr::bind_cols(best_smape)
  mape_df <- dplyr::bind_cols(best_mape)
  mase_df <- dplyr::bind_cols(best_mase)

  error_list <- list("smape" = smape_df, "mape" = mape_df, "mase" = mase_df)
  return(error_list)
}

#' forecast function that forecasts out how many steps you look forward
#'
#' @param orig_x list of time series you want to forecast
#' @param models list of models
#' @param freq character string indicating "month" for monthly etc.
#' @param steps integer that explains how far out you want forecast(needed for DLM)
#' @param dlmPoly integer for the order of polynomial you want in dlm, default = 2
#' @param dlmSeas integer for seasonal effect in dlm, default is 12 for monthly
#' @param num.cores integer for the number of cores you want to use
#' @param error either is "smape" or "mape", default is "mape"
#' @param xreg list of matrix/array of exogenous regressors you want to use to forecast only applicable to certain models
#' @param a.a.args list of arguments you want to change
#' @param ets.args list of arguments you want to change
#' @param tbats.args list of arguments you want to change
#' @param n.n.args list of arguments you want to change
#'
#' @return list of models for models
#' @export
#'
#' @examples \dontrun{
#' test <- replicate(5, list(ldeaths))
#' test_model <- par_time_series_catch(test, num.cores = 12)
#'}
#'
forecast_function <- function(orig_x, models, freq = freq, steps = steps, dlmPoly = dlmPoly, dlmSeas = dlmSeas,  num.cores = num.cores, error = error, xreg = xreg, a.a.args = a.a.args,
                              ets.args = ets.args, tbats.args = tbats.args, n.n.args = n.n.args) {
  auto_args <- list(max.p = 5, max.q = 5, max.P = 2,
                    max.Q = 2, max.order = 5, max.d = 2, max.D = 1, start.p = 2,
                    start.q = 2, start.P = 1, start.Q = 1, stationary = FALSE,
                    seasonal = TRUE, nmodels = 94)
  ets_args <- list(model = "ZZZ", damped = NULL, alpha = NULL, beta = NULL,
                   gamma = NULL, phi = NULL, additive.only = FALSE, lambda = NULL,
                   biasadj = FALSE)
  tbats_args <- list(use.box.cox = NULL, use.trend = NULL,
                     use.damped.trend = NULL, seasonal.periods = NULL,
                     use.arma.errors = TRUE)
  n_n_args <- list(P = 1, repeats = 20, xreg = NULL, lambda = NULL,
                   model = NULL, subset = NULL, scale.inputs = TRUE)
  auto_args <- utils::modifyList(auto_args, a.a.args)
  ets_args <- utils::modifyList(ets_args, ets.args)
  tbats_args <- utils::modifyList(tbats_args, tbats.args)
  n_n_args <- utils::modifyList(n_n_args, n.n.args)
  i <- parent.frame()$i[]

  models <- models[i]

  if ( grepl("Auto Arima",models)){
    model <- forecast::auto.arima(orig_x, max.p = auto_args$max.p, max.q = auto_args$max.q, max.P = auto_args$max.P,
                                  max.Q = auto_args$max.Q, max.order = auto_args$max.order, max.d = auto_args$max.d, max.D = auto_args$max.D,
                                  start.p = auto_args$start.p, start.q = auto_args$start.q, start.P = auto_args$start.P, start.Q = auto_args$start.Q,
                                  stationary = auto_args$stationary, seasonal = auto_args$seasonal, nmodels = auto_args$nmodels)
    forecast_model <- forecast::forecast(model, h=steps, xreg=xreg)
    forecast_model <- c(forecast_model$x, forecast_model$mean)
  } else if ( grepl("ETS",models)){
    model <- forecast::ets(orig_x, model = ets_args$model, damped = ets_args$damped, alpha = ets_args$alpha, beta = ets_args$beta,
                           gamma = ets_args$gamma, phi = ets_args$phi, additive.only = ets_args$additive.only, lambda = ets_args$lambda,
                           biasadj = ets_args$biasadj)
    forecast_model <- forecast::forecast(model, h=steps, xreg=xreg)
    forecast_model <- c(forecast_model$x, forecast_model$mean)
  } else if (grepl("TBATS",models)){
    model <- forecast::tbats(orig_x, use.box.cox = tbats_args$use.box.cox, use.trend = tbats_args$use.trend, use.damped.trend = tbats_args$use.damped.trend,
                             seasonal.periods = tbats_args$seasonal.periods, use.arma.errors = tbats_args$use.arma.errors)
    forecast_model <- forecast::forecast(model, h=steps, xreg=xreg)
    forecast_model <- c(forecast_model$x, forecast_model$mean)
  } else if ( grepl("Hybrid",models)){
    model <- forecastHybrid::hybridModel(orig_x ,  a.args = auto_args, e.args = ets_args, t.args = tbats_args, n.args = n_n_args)
    forecast_model <- forecast::forecast(model, h=steps, xreg=xreg)
    forecast_model <- c(forecast_model$x, forecast_model$mean)
  } else {
    dlmPara <- function ( x, dlmPoly = dlmPoly, dlmSeas = dlmSeas, steps = steps , freq = freq){
      model.build <- function(p) {
        return(
          dlm::dlmModPoly(dlmPoly, dV=p[1], dW=p[2:3]) +
            dlm::dlmModSeas(dlmSeas, dV=p[4])
        )
      }
      model.mle <- dlm::dlmMLE(x, parm=c(0.1, 0, 1, 1), build=model.build)
      model.fit <- model.build(model.mle$par)
      if (freq == "month"){
        time <- 12
      } else if (freq == "day") {
        time <- 365
      } else if (freq == "week") {
        time <- 52
      } else if (freq == "quarter") {
        time <- 4
      } else { time <- 1}
      model.filtered <- dlm::dlmFilter(x, model.fit)
      model.smoothed <- dlm::dlmSmooth(x, model.fit)
      model.forecast <- dlm::dlmForecast(model.filtered, nAhead=steps)
      xf <- seq(max(zoo::index(x)), max(zoo::index(x))+steps/time, 1/time)
      xf <- xf[(-1)]
      aa <- model.forecast$a[,-1]*(-1)
      aa <- cbind(model.forecast$a[,1], aa)
      a <- drop(model.forecast$a%*%t(dlm::FF(model.fit)))
      a <- c(x,a)
      class(a) <- "dlm"
      return(a)
    }
    forecast_model <- dlmPara(orig_x,  dlmPoly = dlmPoly, dlmSeas = dlmSeas, steps = steps, freq = freq)

  }
  return(forecast_model)
}


#' tells you what values got changed in your dataframe
#'
#' @param new dataframe that has imputed values
#' @param old dataframe of same dim with original values
#'
#' @return prints whats changed from new to old
#' @export
#'
#' @examples \dontrun{
#' values_replaced(mtcars, mtcars*.2)
#'}
values_replaced <- function(new, old){
  if (dim(new)[1]==dim(old)[1] && dim(new)[2]==dim(old)[2]){
  } else {
    stop("Dimensions are different for the dataframes")
  }
  for (j in 1:NCOL(new)){
    for (i in 1:NROW(new)){
      if (is.na(new[i,j])|is.nan(new[i,j])|is.infinite(new[i,j])|is.null(new[i,j])){
        print(paste(colnames(new)[j], "at row",i, "is missing!" ))
      } else if (is.na(old[i,j])|is.nan(old[i,j])|is.infinite(old[i,j])|is.null(old[i,j])){
        print(paste(colnames(new)[j], "at row",i, "changed by",(new[i,j]-0)))
      } else if (new[i,j]==old[i,j]){
      } else {
        print(paste(colnames(new)[j], "at row",i, "changed by",(new[i,j]-old[i,j])))
      }
    }
  }
}

#'
#' God version of prediction
#' @importFrom dplyr "%>%"
#'
#' @param original list of time series you want to forecast
#' @param freq character string indicating "month" for monthly etc.
#' @param steps integer that explains how far out you want forecast(needed for DLM)
#' @param dlmPoly integer for the order of polynomial you want in dlm, default = 2
#' @param dlmSeas integer for seasonal effect in dlm, default is 12 for monthly
#' @param num.cores integer for the number of cores you want to use
#' @param error error method you want to use for backtesting
#' @param xreg list of matrix/array of exogenous regressors you want to use to forecast only applicable to certain models
#' @param a.a.args list of arguments you want to change
#' @param ets.args list of arguments you want to change
#' @param tbats.args list of arguments you want to change
#' @param n.n.args list of arguments you want to change
#' @param m how many times you would like it to impute values
#'
#' @return a data frame with your series original values plus the forecasts with added steps
#' @export
#'
#' @examples \dontrun{
#' all_in_one_time_series(mtcars)
#'}
all_in_one_time_series <- function(original, freq = "month", steps = 3, dlmPoly = 2, dlmSeas = 12,  num.cores = 2, error = "mape", xreg = NULL, a.a.args = list(NULL),
                                   ets.args = list(NULL), tbats.args = list(NULL),  n.n.args = list(NULL),  m = 10){

  if (class(original)!="data.frame"){
    stop("data isnt in data frame, please change it!")
  }

  ifelse(sapply(original, is.numeric),NA ,stop("all the data isnt numeric, please change it to numeric"))
  ## setting the best random seed
  set.seed(1337)
  if(any(!stats::complete.cases(original))){
    original_imputed <- missRanger::missRanger(original, .~1, maxiter = m, verbose = 1)
    original_imputed <- apply(original_imputed, 2, as.numeric)

  } else {

    original_imputed <- original
  }

  print("Missing values replaced with imputed values")
  values_replaced(original_imputed, original)

  ## then looking for anomalies
  outliers <- outForest::outForest(original_imputed, verbose = 0)
  print(outForest::outliers(outliers))
  original_imputed_anom <- outliers[["Data"]]

  print("Outliers coerced to more reasonable values")
  ## tell you what values where replaced
  values_replaced(original_imputed_anom, original_imputed)

  good_format <- lapply(original_imputed_anom, stats::as.ts)

  output <- par_time_series_catch(good_format, freq = freq, steps = steps, dlmPoly = dlmPoly, dlmSeas = dlmSeas,  num.cores = num.cores, error = error, xreg = xreg, a.a.args = a.a.args,
                                  ets.args = ets.args, tbats.args = tbats.args,  n.n.args = n.n.args)
  ##collecting model info and pasting it to the bottom
  models <- colnames(output)
  models <- gsub(",.*", "", models)
  colnames(output) <- colnames(original)
  output <- as.data.frame(rbind.data.frame(output, models))
  #changing row name to models
  rownames(output)[nrow(output)] <- "Models"

  return(output)

}

#' mean prediction for data that isnt very good
#'
#' @param df the dataframe you want to predict with the mean
#' @param steps the number of steps you are looking forward
#'
#' @return a data frame of your predictions
#' @export
#'
#' @examples \dontrun{
#' test_model <- mean_prediction(mtcars, steps=3)
#'}
mean_prediction <- function(df, steps=3){
  col_means <- colMeans(df, na.rm = TRUE)
  col_means <- ifelse(col_means=="NaN", 0, col_means)

  df_mean <- rbind.data.frame(df, t(replicate(steps,col_means)))
  df_mean <- rbind.data.frame(df_mean, replicate(ncol(df_mean), "Mean Predicition"))
  rownames(df_mean) <-  seq(1, nrow(df_mean))

  rownames(df_mean)[nrow(df_mean)] <- "Models"


  return(df_mean)
}
ryanbieber/Time-Series-Catch-All documentation built on Oct. 13, 2020, 9:59 a.m.