R/AutoCatBoostCARMA.R

Defines functions AutoCatBoostCARMA

Documented in AutoCatBoostCARMA

# AutoQuant is a package for quickly creating high quality visualizations under a common and easy api.
# Copyright (C) <year>  <name of author>
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU Affero General Public License as
# published by the Free Software Foundation, either version 3 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU Affero General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program.  If not, see <https://www.gnu.org/licenses/>.

#' @title AutoCatBoostCARMA
#'
#' @description AutoCatBoostCARMA Mutlivariate Forecasting with calendar variables, Holiday counts, holiday lags, holiday moving averages, differencing, transformations, interaction-based categorical encoding using target variable and features to generate various time-based aggregated lags, moving averages, moving standard deviations, moving skewness, moving kurtosis, moving quantiles, parallelized interaction-based fourier pairs by grouping variables, and Trend Variables.
#'
#' @author Adrian Antico
#' @family Automated Panel Data Forecasting
#'
#' @param data Supply your full series data set here
#' @param TrainOnFull Set to TRUE to train on full data
#' @param TargetColumnName List the column name of your target variables column. E.g. 'Target'
#' @param NonNegativePred TRUE or FALSE
#' @param RoundPreds Rounding predictions to an integer value. TRUE or FALSE. Defaults to FALSE
#' @param DateColumnName List the column name of your date column. E.g. 'DateTime'
#' @param GroupVariables Defaults to NULL. Use NULL when you have a single series. Add in GroupVariables when you have a series for every level of a group or multiple groups.
#' @param HierarchGroups Vector of hierachy categorical columns.
#' @param TimeWeights Supply a value that will be multiplied by he time trend value
#' @param TimeUnit List the time unit your data is aggregated by. E.g. '1min', '5min', '10min', '15min', '30min', 'hour', 'day', 'week', 'month', 'quarter', 'year'.
#' @param TimeGroups Select time aggregations for adding various time aggregated GDL features.
#' @param FC_Periods Set the number of periods you want to have forecasts for. E.g. 52 for weekly data to forecast a year ahead
#' @param SaveDataPath NULL Or supply a path. Data saved will be called 'ModelID'_data.csv
#' @param EncodingMethod 'binary', 'credibility', 'woe', 'target_encoding', 'poly_encode', 'backward_difference', 'helmert'
#' @param TargetTransformation TRUE or FALSE. If TRUE, select the methods in the Methods arg you want tested. The best one will be applied.
#' @param Methods Choose from 'YeoJohnson', 'BoxCox', 'Asinh', 'Log', 'LogPlus1', 'Sqrt', 'Asin', or 'Logit'. If more than one is selected, the one with the best normalization pearson statistic will be used. Identity is automatically selected and compared.
#' @param XREGS Additional data to use for model development and forecasting. Data needs to be a complete series which means both the historical and forward looking values over the specified forecast window needs to be supplied.
#' @param Timer Set to FALSE to turn off the updating print statements for progress
#' @param DebugMode Defaults to FALSE. Set to TRUE to get a print statement of each high level comment in function
#' @param Lags Select the periods for all lag variables you want to create. E.g. c(1:5,52) or list('day' = c(1:10), 'weeks' = c(1:4))
#' @param MA_Periods Select the periods for all moving average variables you want to create. E.g. c(1:5,52) or list('day' = c(2:10), 'weeks' = c(2:4))
#' @param SD_Periods Select the periods for all moving standard deviation variables you want to create. E.g. c(1:5,52) or list('day' = c(2:10), 'weeks' = c(2:4))
#' @param Skew_Periods Select the periods for all moving skewness variables you want to create. E.g. c(1:5,52) or list('day' = c(2:10), 'weeks' = c(2:4))
#' @param Kurt_Periods Select the periods for all moving kurtosis variables you want to create. E.g. c(1:5,52) or list('day' = c(2:10), 'weeks' = c(2:4))
#' @param Quantile_Periods Select the periods for all moving quantiles variables you want to create. E.g. c(1:5,52) or list('day' = c(2:10), 'weeks' = c(2:4))
#' @param Quantiles_Selected Select from the following 'q5', 'q10', 'q15', 'q20', 'q25', 'q30', 'q35', 'q40', 'q45', 'q50', 'q55', 'q60', 'q65', 'q70', 'q75', 'q80', 'q85', 'q90', 'q95'
#' @param AnomalyDetection NULL for not using the service. Other, provide a list, e.g. AnomalyDetection = list('tstat_high' = 4, 'tstat_low' = -4)
#' @param Difference Puts the I in ARIMA for single series and grouped series.
#' @param FourierTerms Set to the max number of pairs. E.g. 2 means to generate two pairs for by each group level and interations if hierarchy is enabled.
#' @param CalendarVariables NULL, or select from 'minute', 'hour', 'wday', 'mday', 'yday', 'week', 'isoweek', 'month', 'quarter', 'year'
#' @param HolidayVariable NULL, or select from 'USPublicHolidays', 'EasterGroup', 'ChristmasGroup', 'OtherEcclesticalFeasts'
#' @param HolidayLookback Number of days in range to compute number of holidays from a given date in the data. If NULL, the number of days are computed for you.
#' @param HolidayLags Number of lags to build off of the holiday count variable.
#' @param HolidayMovingAverages Number of moving averages to build off of the holiday count variable.
#' @param TimeTrendVariable Set to TRUE to have a time trend variable added to the model. Time trend is numeric variable indicating the numeric value of each record in the time series (by group). Time trend starts at 1 for the earliest point in time and increments by one for each success time point.
#' @param DataTruncate Set to TRUE to remove records with missing values from the lags and moving average features created
#' @param ZeroPadSeries NULL to do nothing. Otherwise, set to 'maxmax', 'minmax', 'maxmin', 'minmin'. See \code{\link{TimeSeriesFill}} for explanations of each type
#' @param PartitionType Select 'random' for random data partitioning 'timeseries' for partitioning by time frames
#' @param SplitRatios E.g c(0.7,0.2,0.1) for train, validation, and test sets
#' @param NumOfParDepPlots Supply a number for the number of partial dependence plots you want returned
#' @param EvalMetric Select from 'RMSE', 'MAE', 'MAPE', 'Poisson', 'Quantile', 'LogLinQuantile', 'Lq', 'NumErrors', 'SMAPE', 'R2', 'MSLE', 'MedianAbsoluteError'
#' @param EvalMetricValue Used when EvalMetric accepts an argument. See \code{\link{AutoCatBoostRegression}}
#' @param LossFunction Used in model training for model fitting. Select from 'RMSE', 'MAE', 'Quantile', 'LogLinQuantile', 'MAPE', 'Poisson', 'PairLogitPairwise', 'Tweedie', 'QueryRMSE'
#' @param LossFunctionValue Used when LossFunction accepts an argument. See \code{\link{AutoCatBoostRegression}}
#' @param TaskType Default is 'GPU' but you can also set it to 'CPU'
#' @param NumGPU Defaults to 1. If CPU is set this argument will be ignored.
#' @param GridTune Set to TRUE to run a grid tune
#' @param PassInGrid Defaults to NULL
#' @param ModelCount Set the number of models to try in the grid tune
#' @param MaxRunsWithoutNewWinner Default is 50
#' @param MaxRunMinutes Default is 60*60
#' @param Langevin Enables the Stochastic Gradient Langevin Boosting mode. If TRUE and TaskType == 'GPU' then TaskType will be converted to 'CPU'
#' @param DiffusionTemperature Default is 10000
#' @param NTrees Select the number of trees you want to have built to train the model
#' @param Depth Depth of catboost model
#' @param L2_Leaf_Reg l2 reg parameter
#' @param LearningRate Defaults to NULL. Catboost will dynamically define this if L2_Leaf_Reg is NULL and RMSE is chosen (otherwise catboost will default it to 0.03). Then you can pull it out of the model object and pass it back in should you wish.
#' @param RandomStrength Default is 1
#' @param BorderCount Default is 254
#' @param RSM CPU only. If TaskType is GPU then RSM will not be used
#' @param BootStrapType If NULL, then if TaskType is GPU then Bayesian will be used. If CPU then MVS will be used. If MVS is selected when TaskType is GPU, then BootStrapType will be switched to Bayesian
#' @param GrowPolicy Default is SymmetricTree. Others include Lossguide and Depthwise
#' @param ModelSizeReg Defaults to 0.5. Set to 0 to allow for bigger models. This is for models with high cardinality categorical features. Valuues greater than 0 will shrink the model and quality will decline but models won't be huge.
#' @param FeatureBorderType Defaults to 'GreedyLogSum'. Other options include: Median, Uniform, UniformAndQuantiles, MaxLogSum, MinEntropy
#' @param SamplingUnit Default is Group. Other option is Object. if GPU is selected, this will be turned off unless the loss_function is YetiRankPairWise
#' @param SubSample Can use if BootStrapType is neither Bayesian nor No. Pass NULL to use Catboost default. Used for bagging.
#' @param ScoreFunction Default is Cosine. CPU options are Cosine and L2. GPU options are Cosine, L2, NewtonL2, and NewtomCosine (not available for Lossguide)
#' @param MinDataInLeaf Defaults to 1. Used if GrowPolicy is not SymmetricTree
#' @param SaveModel Logical. If TRUE, output ArgsList will have a named element 'Model' with the CatBoost model object
#' @param ArgsList ArgsList is for scoring. Must contain named element 'Model' with a catboost model object
#' @param ExpandEncoding = FALSE
#' @param ModelID Something to name your model if you want it saved
#' @param TVT Passthrough
#' @examples
#' \dontrun{
#'
#' # Set up your output file path for saving results as a .csv
#' Path <- 'C:/YourPathHere'
#'
#' # Run on GPU or CPU (some options in the grid tuning force usage of CPU for some runs)
#' TaskType = 'GPU'
#'
#' # Define number of CPU threads to allow data.table to utilize
#' data.table::setDTthreads(percent = max(1L, parallel::detectCores()-2L))
#'
#' # Load data
#' data <- data.table::fread('https://www.dropbox.com/s/2str3ek4f4cheqi/walmart_train.csv?dl=1')
#' data <- Rappture::DM.pgQuery(Host = 'localhost', DataBase = 'AutoQuant', SELECT = NULL, FROM = 'WalmartFull', User = 'postgres', Port = 5432, Password = 'Aa1028#@')
#'
#' # Ensure series have no missing dates (also remove series with more than 25% missing values)
#' data <- AutoQuant::TimeSeriesFill(
#'   data,
#'   DateColumnName = 'Date',
#'   GroupVariables = c('Store','Dept'),
#'   TimeUnit = 'weeks',
#'   FillType = 'maxmax',
#'   MaxMissingPercent = 0.25,
#'   SimpleImpute = TRUE)
#'
#' # Set negative numbers to 0
#' data <- data[, Weekly_Sales := data.table::fifelse(Weekly_Sales < 0, 0, Weekly_Sales)]
#'
#' # Remove IsHoliday column
#' data[, IsHoliday := NULL]
#'
#' # Create xregs (this is the include the categorical variables instead of utilizing only the interaction of them)
#' xregs <- data[, .SD, .SDcols = c('Date', 'Store', 'Dept')]
#'
#' # Change data types
#' data[, ':=' (Store = as.character(Store), Dept = as.character(Dept))]
#' xregs[, ':=' (Store = as.character(Store), Dept = as.character(Dept))]
#'
#' # Subset data so we have an out of time sample
#' data1 <- data.table::copy(data[, ID := 1L:.N, by = c('Store','Dept')][ID <= 125L][, ID := NULL])
#' data[, ID := NULL]
#'
#' # Define values for SplitRatios and FCWindow Args
#' N1 <- data1[, .N, by = c('Store','Dept')][1L, N]
#' N2 <- xregs[, .N, by = c('Store','Dept')][1L, N]
#'
#' # Setup Grid Tuning & Feature Tuning data.table using a cross join of vectors
#' Tuning <- data.table::CJ(
#'   TimeWeights = c('None',0.999),
#'   MaxTimeGroups = c('weeks','months'),
#'   TargetTransformation = c('TRUE','FALSE'),
#'   Difference = c('TRUE','FALSE'),
#'   HoldoutTrain = c(6,18),
#'   Langevin = c('TRUE','FALSE'),
#'   NTrees = c(2500,5000),
#'   Depth = c(6,9),
#'   RandomStrength = c(0.75,1),
#'   L2_Leaf_Reg = c(3.0,4.0),
#'   RSM = c(0.75,'NULL'),
#'   GrowPolicy = c('SymmetricTree','Lossguide','Depthwise'),
#'   BootStrapType = c('Bayesian','MVS','No'))
#'
#' # Remove options that are not compatible with GPU (skip over this otherwise)
#' Tuning <- Tuning[Langevin == 'TRUE' | (Langevin == 'FALSE' & RSM == 'NULL' & BootStrapType %in% c('Bayesian','No'))]
#'
#' # Randomize order of Tuning data.table
#' Tuning <- Tuning[order(runif(.N))]
#'
#' # Load grid results and remove rows that have already been tested
#' if(file.exists(file.path(Path, 'Walmart_CARMA_Metrics.csv'))) {
#'   Metrics <- data.table::fread(file.path(Path, 'Walmart_CARMA_Metrics.csv'))
#'   temp <- data.table::rbindlist(list(Metrics,Tuning), fill = TRUE)
#'   temp <- unique(temp, by = c(4:(ncol(temp)-1)))
#'   Tuning <- temp[is.na(RunTime)][, .SD, .SDcols = names(Tuning)]
#'   rm(Metrics,temp)
#' }
#'
#' # Define the total number of runs
#' TotalRuns <- Tuning[,.N]
#'
#' # Kick off feature + grid tuning
#' for(Run in seq_len(TotalRuns)) {
#'
#'   # print run number
#'   for(zz in seq_len(100)) print(Run)
#'
#'   # Use fresh data for each run
#'   xregs_new <- data.table::copy(xregs)
#'   data_new <- data.table::copy(data1)
#'
#'   # Timer start
#'   StartTime <- Sys.time()
#'
#'   # Run carma system
#'   CatBoostResults <- AutoQuant::AutoCatBoostCARMA(
#'
#'     # data args
#'     data = data_new,
#'     TimeWeights = if(Tuning[Run, TimeWeights] == 'None') NULL else as.numeric(Tuning[Run, TimeWeights]),
#'     TargetColumnName = 'Weekly_Sales',
#'     DateColumnName = 'Date',
#'     HierarchGroups = NULL,
#'     GroupVariables = c('Store','Dept'),
#'     EncodingMethod = 'credibility',
#'     TimeUnit = 'weeks',
#'     TimeGroups = if(Tuning[Run, MaxTimeGroups] == 'weeks') 'weeks' else if(Tuning[Run, MaxTimeGroups] == 'months') c('weeks','months') else c('weeks','months','quarters'),
#'
#'     # Production args
#'     TrainOnFull = TRUE,
#'     SplitRatios = c(1 - Tuning[Run, HoldoutTrain] / N2, Tuning[Run, HoldoutTrain] / N2),
#'     PartitionType = 'random',
#'     FC_Periods = N2-N1,
#'     TaskType = TaskType,
#'     NumGPU = 1,
#'     Timer = TRUE,
#'     DebugMode = TRUE,
#'
#'     # Target variable transformations
#'     TargetTransformation = as.logical(Tuning[Run, TargetTransformation]),
#'     Methods = c('YeoJohnson', 'BoxCox', 'Asinh', 'Log', 'LogPlus1', 'Sqrt', 'Asin', 'Logit'),
#'     Difference = as.logical(Tuning[Run, Difference]),
#'     NonNegativePred = TRUE,
#'     RoundPreds = FALSE,
#'
#'     # Calendar-related features
#'     CalendarVariables = c('week','wom','month','quarter'),
#'     HolidayVariable = c('USPublicHolidays'),
#'     HolidayLookback = NULL,
#'     HolidayLags = c(1,2,3),
#'     HolidayMovingAverages = c(2,3),
#'
#'     # Lags, moving averages, and other rolling stats
#'     Lags = if(Tuning[Run, MaxTimeGroups] == 'weeks') c(1,2,3,4,5,8,9,12,13,51,52,53) else if(Tuning[Run, MaxTimeGroups] == 'months') list('weeks' = c(1,2,3,4,5,8,9,12,13,51,52,53), 'months' = c(1,2,6,12)) else list('weeks' = c(1,2,3,4,5,8,9,12,13,51,52,53), 'months' = c(1,2,6,12), 'quarters' = c(1,2,3,4)),
#'     MA_Periods = if(Tuning[Run, MaxTimeGroups] == 'weeks') c(2,3,4,5,8,9,12,13,51,52,53) else if(Tuning[Run, MaxTimeGroups] == 'months') list('weeks' = c(2,3,4,5,8,9,12,13,51,52,53), 'months' = c(2,6,12)) else list('weeks' = c(2,3,4,5,8,9,12,13,51,52,53), 'months' = c(2,6,12), 'quarters' = c(2,3,4)),
#'     SD_Periods = NULL,
#'     Skew_Periods = NULL,
#'     Kurt_Periods = NULL,
#'     Quantile_Periods = NULL,
#'     Quantiles_Selected = NULL,
#'
#'     # Bonus features
#'     AnomalyDetection = NULL,
#'     XREGS = xregs_new,
#'     FourierTerms = 0,
#'     TimeTrendVariable = TRUE,
#'     ZeroPadSeries = NULL,
#'     DataTruncate = FALSE,
#'
#'     # ML grid tuning args
#'     GridTune = FALSE,
#'     PassInGrid = NULL,
#'     ModelCount = 5,
#'     MaxRunsWithoutNewWinner = 50,
#'     MaxRunMinutes = 60*60,
#'
#'     # ML evaluation output
#'     SaveDataPath = NULL,
#'     NumOfParDepPlots = 0L,
#'
#'     # ML loss functions
#'     EvalMetric = 'RMSE',
#'     EvalMetricValue = 1,
#'     LossFunction = 'RMSE',
#'     LossFunctionValue = 1,
#'
#'     # ML tuning args
#'     NTrees = Tuning[Run, NTrees],
#'     Depth = Tuning[Run, Depth],
#'     L2_Leaf_Reg = Tuning[Run, L2_Leaf_Reg],
#'     LearningRate = 0.03,
#'     Langevin = as.logical(Tuning[Run, Langevin]),
#'     DiffusionTemperature = 10000,
#'     RandomStrength = Tuning[Run, RandomStrength],
#'     BorderCount = 254,
#'     RSM = if(Tuning[Run, RSM] == 'NULL') NULL else as.numeric(Tuning[Run, RSM]),
#'     GrowPolicy = Tuning[Run, GrowPolicy],
#'     BootStrapType = Tuning[Run, BootStrapType],
#'     ModelSizeReg = 0.5,
#'     FeatureBorderType = 'GreedyLogSum',
#'     SamplingUnit = 'Group',
#'     SubSample = NULL,
#'     ScoreFunction = 'Cosine',
#'     MinDataInLeaf = 1)
#'
#'   # Timer End
#'   EndTime <- Sys.time()
#'
#'   # Prepare data for evaluation
#'   Results <- CatBoostResults$Forecast
#'   data.table::setnames(Results, 'Weekly_Sales', 'bla')
#'   Results <- merge(Results, data, by = c('Store','Dept','Date'), all = FALSE)
#'   Results <- Results[is.na(bla)][, bla := NULL]
#'
#'   # Create totals and subtotals
#'   Results <- data.table::groupingsets(
#'     x = Results,
#'     j = list(Predictions = sum(Predictions), Weekly_Sales = sum(Weekly_Sales)),
#'     by = c('Date', 'Store', 'Dept'),
#'     sets = list(c('Date', 'Store', 'Dept'), c('Store', 'Dept'), 'Store', 'Dept', 'Date'))
#'
#'   # Fill NAs with 'Total' for totals and subtotals
#'   for(cols in c('Store','Dept')) Results[, eval(cols) := data.table::fifelse(is.na(get(cols)), 'Total', get(cols))]
#'
#'   # Add error measures
#'   Results[, Weekly_MAE := abs(Weekly_Sales - Predictions)]
#'   Results[, Weekly_MAPE := Weekly_MAE / Weekly_Sales]
#'
#'   # Weekly results
#'   Weekly_MAPE <- Results[, list(Weekly_MAPE = mean(Weekly_MAPE)), by = list(Store,Dept)]
#'
#'   # Monthly results
#'   temp <- data.table::copy(Results)
#'   temp <- temp[, Date := lubridate::floor_date(Date, unit = 'months')]
#'   temp <- temp[, lapply(.SD, sum), by = c('Date','Store','Dept'), .SDcols = c('Predictions', 'Weekly_Sales')]
#'   temp[, Monthly_MAE := abs(Weekly_Sales - Predictions)]
#'   temp[, Monthly_MAPE := Monthly_MAE / Weekly_Sales]
#'   Monthly_MAPE <- temp[, list(Monthly_MAPE = mean(Monthly_MAPE)), by = list(Store,Dept)]
#'
#'   # Collect metrics for Total (feel free to switch to something else or no filter at all)
#'   Metrics <- data.table::data.table(
#'     RunNumber = Run,
#'     Total_Weekly_MAPE = Weekly_MAPE[Store == 'Total' & Dept == 'Total', Weekly_MAPE],
#'     Total_Monthly_MAPE = Monthly_MAPE[Store == 'Total' & Dept == 'Total', Monthly_MAPE],
#'     Tuning[Run],
#'     RunTime = EndTime - StartTime)
#'
#'   # Append to file (not overwrite)
#'   data.table::fwrite(Metrics, file = file.path(Path, 'Walmart_CARMA_Metrics.csv'), append = TRUE)
#'
#'   # Remove objects (clear space before new runs)
#'   rm(CatBoostResults, Results, temp, Weekly_MAE, Weekly_MAPE, Monthly_MAE, Monthly_MAPE)
#'
#'   # Garbage collection because of GPU
#'   gc()
#' }
#' }
#' @return See examples
#' @export
AutoCatBoostCARMA <- function(data,
                              TimeWeights = NULL,
                              NonNegativePred = FALSE,
                              RoundPreds = FALSE,
                              TrainOnFull = FALSE,
                              TargetColumnName = NULL,
                              DateColumnName = NULL,
                              HierarchGroups = NULL,
                              GroupVariables = NULL,
                              FC_Periods = 1,
                              TimeUnit = NULL,
                              TimeGroups = NULL,
                              SaveDataPath = NULL,
                              NumOfParDepPlots = 10L,
                              EncodingMethod = 'target_encoding',
                              TargetTransformation = FALSE,
                              Methods = c('Asinh', 'Log', 'LogPlus1', 'Sqrt'),
                              AnomalyDetection = NULL,
                              XREGS = NULL,
                              Lags = NULL,
                              MA_Periods = NULL,
                              SD_Periods = NULL,
                              Skew_Periods = NULL,
                              Kurt_Periods = NULL,
                              Quantile_Periods = NULL,
                              Quantiles_Selected = c('q5','q95'),
                              Difference = FALSE,
                              FourierTerms = 0L,
                              CalendarVariables = NULL,
                              HolidayVariable = NULL,
                              HolidayLookback = NULL,
                              HolidayLags = NULL,
                              HolidayMovingAverages = NULL,
                              TimeTrendVariable = FALSE,
                              ZeroPadSeries = 'maxmax',
                              DataTruncate = FALSE,
                              SplitRatios = c(0.85, 0.10, 0.05),
                              PartitionType = 'random',
                              TaskType = 'CPU',
                              NumGPU = 1,
                              DebugMode = FALSE,
                              Timer = TRUE,
                              EvalMetric = 'RMSE',
                              EvalMetricValue = 1.2,
                              LossFunction = 'RMSE',
                              LossFunctionValue = 1.2,
                              GridTune = FALSE,
                              PassInGrid = NULL,
                              ModelCount = 30,
                              MaxRunsWithoutNewWinner = 20,
                              MaxRunMinutes = 24L*60L,
                              Langevin = FALSE,
                              DiffusionTemperature = 10000,
                              NTrees = 500,
                              L2_Leaf_Reg = 4,
                              LearningRate = 0.50,
                              RandomStrength = 1,
                              BorderCount = 254,
                              Depth = 6,
                              RSM = 1,
                              BootStrapType = 'No',
                              GrowPolicy = 'SymmetricTree',
                              ModelSizeReg = 1.2,
                              FeatureBorderType = 'GreedyLogSum',
                              SamplingUnit = 'Group',
                              SubSample = 0.70,
                              ScoreFunction = 'Cosine',
                              MinDataInLeaf = 1,
                              ReturnShap = FALSE,
                              SaveModel = FALSE,
                              ArgsList = NULL,
                              ModelID = 'FC001',
                              TVT = NULL) {

  if(DebugMode) print(data)

  # Load catboost ----
  loadNamespace(package = 'catboost')

  # Prepare environment for using existing model
  # if(): length(ArgsList) > 0L
  # If I want to retrain + forecast, I supply ArgsList w/o model to
  #    update the args based on the model configuration but then
  #    train the model anyways
  if(length(ArgsList) > 0L) {
    if(length(ArgsList$Model) > 0L) {
      if(DebugMode) for(i in 1:10) print('ArgsList$Model > 0')
      skip_cols <- c('TrainOnFull','data','FC_Periods','SaveModel','ArgsList','ModelID')
      SaveModel <- FALSE
      TrainOnFull <- TRUE
    } else {
      skip_cols <- c('TrainOnFull','data','FC_Periods','ArgsList','ModelID')
    }
    default_args <- formals(fun = AutoQuant::AutoCatBoostCARMA)
    for(sc in skip_cols) default_args[[sc]] <- NULL
    nar <- names(ArgsList)
    for(arg in names(default_args)) if(length(arg) > 0L && arg %in% nar && length(ArgsList[[arg]]) > 0L) assign(x = arg, value = ArgsList[[arg]])
  }

  # Args checking ----
  if(DebugMode) print('# Purified args: see CARMA HELPER FUNCTIONS----')
  if(length(ModelID) == 0) ModelID <- 'FC001'
  Args <- CARMA_Define_Args(TimeUnit=TimeUnit, TimeGroups=TimeGroups, HierarchGroups=HierarchGroups, GroupVariables=GroupVariables, FC_Periods=FC_Periods, PartitionType=PartitionType, TrainOnFull=TrainOnFull, SplitRatios=SplitRatios, SD_Periods=SD_Periods, Skew_Periods=Skew_Periods, Kurt_Periods=Kurt_Periods, Quantile_Periods=Quantile_Periods, TaskType=TaskType, BootStrapType=BootStrapType, GrowPolicy=GrowPolicy, TimeWeights=TimeWeights, HolidayLookback=HolidayLookback, Difference=Difference, NonNegativePred=NonNegativePred)
  IndepentVariablesPass <- Args$IndepentVariablesPass
  NonNegativePred <- Args$NonNegativePred
  HolidayLookback <- Args$HolidayLookback
  HoldOutPeriods <- Args$HoldOutPeriods
  HierarchGroups <- Args$HierarchGroups
  GroupVariables <- Args$GroupVariables
  BootStrapType <- Args$BootStrapType
  TimeWeights <- Args$TimeWeights
  TimeGroups <- Args$TimeGroups
  FC_Periods <- Args$FC_Periods
  GrowPolicy <- Args$GrowPolicy
  Difference <- Args$Difference
  TimeGroup <- Args$TimeGroupPlaceHolder
  TimeUnit <- Args$TimeUnit
  TaskType <- Args$TaskType; rm(Args)

  # Grab all official parameters and their evaluated arguments
  if(length(ArgsList) == 0L) ArgsList <- c(as.list(environment()))

  if(DebugMode) print(names(ArgsList))

  # Convert data to data.table ----
  if(!data.table::is.data.table(data)) data.table::setDT(data)
  if(!is.null(XREGS) && !data.table::is.data.table(XREGS)) data.table::setDT(XREGS)

  date_class <- tolower(class(data[[DateColumnName]])[1L])
  if(!date_class %in% c("date","posixct")) {
    if(date_class %in% c("idate")) {
      data[, eval(DateColumnName) := as.Date(get(DateColumnName))]
    } else if(date_class %in% c("idatetime")) {
      data[, eval(DateColumnName) := as.POSIXct(get(DateColumnName))]
    } else if(date_class %in% c("character","factor") && TimeUnit %in% c('day', 'week', 'month', 'quarter', 'year')) {
      data[, eval(DateColumnName) := as.Date(get(DateColumnName))]
    } else {
      data[, eval(DateColumnName) := as.POSIXct(get(DateColumnName))]
    }
  }

  # Aggregate data to ensure it's on the proper aggregation level
  data <- data[, list(temp___ = mean(get(TargetColumnName), na.rm = TRUE)), by = c(DateColumnName, GroupVariables)]
  data.table::setnames(data, "temp___", TargetColumnName)

  # Variables for Program: Redefine HoldOutPerids ----
  if(!TrainOnFull) HoldOutPeriods <- round(SplitRatios[2L] * length(unique(data[[eval(DateColumnName)]])), 0L)

  if(DebugMode) print('Fill Start')
  if(DebugMode) print(data)

  # Feature Engineering: Add Zero Padding for missing dates ----
  if(DebugMode) print('Feature Engineering: Add Zero Padding for missing dates----')
  if(length(ZeroPadSeries) > 0L && ZeroPadSeries %in% c("dynamic:meow","dynamic:credibility","dynamic:target_encoding") && length(GroupVariables) > 0) {
    print("Rodeo::TimeSeriesFillRoll 1")
    print(data)
    data <- Rodeo::TimeSeriesFillRoll(
      data = data,
      RollVars = TargetColumnName,
      NonRollVars = NULL,
      RollDirection = "backward",
      DateColumnName = DateColumnName,
      GroupVariables = GroupVariables,
      TimeUnit = TimeUnit,
      SimpleImpute = TRUE)
  } else if(length(ZeroPadSeries) > 0L && length(GroupVariables) > 0L) {
    print("Rodeo::TimeSeriesFillRoll 2")
    print(data)
    print(TargetColumnName)
    print(DateColumnName)
    print(GroupVariables)
    print(TimeUnit)
    print(ZeroPadSeries)
    data <- Rodeo::TimeSeriesFill(data, TargetColumn=TargetColumnName, DateColumnName=eval(DateColumnName), GroupVariables=GroupVariables, TimeUnit=TimeUnit, FillType=ZeroPadSeries, MaxMissingPercent=0.95, SimpleImpute=TRUE)
    print("Rodeo::TimeSeriesFillRoll 2.1")
  } else if(data[, .N] != unique(data)[, .N]) {
    print("Rodeo::TimeSeriesFillRoll 3")
    print(data)
    data <- unique(data); ZeroPadSeries <- 'maxmax'
    data <- Rodeo::TimeSeriesFill(data, TargetColumn=TargetColumnName, DateColumnName=eval(DateColumnName), GroupVariables=GroupVariables, TimeUnit=TimeUnit, FillType=ZeroPadSeries, MaxMissingPercent=0.95, SimpleImpute=TRUE)
  } else if(length(GroupVariables) > 0L) {
    print("Rodeo::TimeSeriesFillRoll 4")
    print(data)
    temp <- Rodeo::TimeSeriesFill(data, TargetColumn=TargetColumnName, DateColumnName=eval(DateColumnName), GroupVariables=GroupVariables, TimeUnit=TimeUnit, FillType='maxmax', MaxMissingPercent=0.95, SimpleImpute=TRUE)
    if(temp[,.N] != data[,.N]) stop('There are missing dates in your series. You can utilize the ZeroPadSeries argument to handle this or manage it before running the function')
  }

  if(DebugMode) print('Fill Done')
  if(DebugMode) print(data)

  # Modify FC_Periods ----
  if(DebugMode) print('# Check lengths of XREGS')
  Output <- CarmaFCHorizon(data.=data, XREGS.=XREGS, TrainOnFull.=TrainOnFull, Difference.= Difference, FC_Periods.=FC_Periods, HoldOutPeriods.=HoldOutPeriods, DateColumnName.=DateColumnName)
  FC_Periods <- Output$FC_Periods
  HoldOutPeriods <- Output$HoldOutPeriods; rm(Output)

  # Merge data and XREG for Training ----
  if(DebugMode) print('xregs management')
  if(!is.null(XREGS)) {
    Output <- CarmaMergeXREGS(data.=data, XREGS.=XREGS, TargetColumnName.=TargetColumnName, GroupVariables.=GroupVariables, DateColumnName.=DateColumnName)
    data <- Output$data; Output$data <- NULL
    XREGS <- Output$XREGS; rm(Output)
  }

  # Set Keys for data.table usage ----
  if(DebugMode) print('# Set Keys for data.table usage ----')
  if(length(GroupVariables) > 0L) {
    data.table::setkeyv(x = data, cols = c(eval(GroupVariables), eval(DateColumnName)))
    if(!is.null(XREGS)) data.table::setkeyv(x = XREGS, cols = c('GroupVar', eval(DateColumnName)))
  } else {
    data.table::setkeyv(x = data, cols = c(eval(DateColumnName)))
    if(!is.null(XREGS)) data.table::setkeyv(x = XREGS, cols = c(eval(DateColumnName)))
  }

  # Data Wrangling: Remove Unnecessary Columns ----
  if(DebugMode) print('Data Wrangling: Remove Unnecessary Columns ----')
  data <- CarmaSubsetColumns(data.=data, XREGS.=XREGS, GroupVariables.=GroupVariables, DateColumnName.=DateColumnName, TargetColumnName.=TargetColumnName)

  # GroupVar creation: feature engineering: Concat Categorical Columns - easier to deal with this way ----
  if(DebugMode) print('GroupVar creation: feature engineering: Concat Categorical Columns - easier to deal with this way ----')
  if(length(GroupVariables) > 0L) {
    data[, GroupVar := do.call(paste, c(.SD, sep = ' ')), .SDcols = GroupVariables]
    MergeGroupVariablesBack <- data[, .N, by = c('GroupVar',GroupVariables)]
    if(length(GroupVariables) > 1L) data[, eval(GroupVariables) := NULL] else if(GroupVariables != 'GroupVar') data[, eval(GroupVariables) := NULL]
  } else {
    MergeGroupVariablesBack <- NULL
  }

  # Variables for Program: Store unique values of GroupVar in GroupVarVector ----
  if(DebugMode) print('Variables for Program: Store unique values of GroupVar in GroupVarVector ----')
  if(length(GroupVariables) > 0L) {
    GroupVarVector <- data.table::as.data.table(x = unique(as.character(data[['GroupVar']])))
    data.table::setnames(GroupVarVector, 'V1', 'GroupVar')
  }

  # Data Wrangling: Standardize column ordering ----
  if(DebugMode) print('Data Wrangling: Standardize column ordering ----')
  if(length(GroupVariables) > 0L) data.table::setcolorder(data, c('GroupVar', eval(DateColumnName), eval(TargetColumnName))) else data.table::setcolorder(data, c(eval(DateColumnName), eval(TargetColumnName)))

  # Data Wrangling: Convert DateColumnName to Date or POSIXct ----
  if(DebugMode) print('Data Wrangling: Convert DateColumnName to Date or POSIXct ----')
  Output <- CarmaDateStandardize(data.=data, XREGS.=XREGS, DateColumnName.=DateColumnName, TimeUnit.=TimeUnit)
  data <- Output$data; Output$data <- NULL
  XREGS <- Output$XREGS; rm(Output)

  # Data Wrangling: Ensure TargetColumnName is Numeric ----
  if(DebugMode) print('Data Wrangling: Ensure TargetColumnName is Numeric ----')
  if(!is.numeric(data[[eval(TargetColumnName)]])) data[, eval(TargetColumnName) := as.numeric(get(TargetColumnName))]

  # Variables for Program: Store number of data partitions in NumSets ----
  if(DebugMode) print('Variables for Program: Store number of data partitions in NumSets ----')
  NumSets <- length(SplitRatios)

  # Variables for Program: Store Maximum Value of TargetColumnName in val ----
  if(DebugMode) print('Variables for Program: Store Maximum Value of TargetColumnName in val ----')
  if(!is.null(Lags)) {
    if(is.list(Lags) && is.list(MA_Periods)) val <- max(unlist(Lags), unlist(MA_Periods)) else val <- max(Lags, MA_Periods)
  }

  # Data Wrangling: Sort data ----
  if(DebugMode) print('Data Wrangling: Sort data by GroupVar then DateColumnName ----')
  if(DebugMode) print(data)
  if(length(GroupVariables) > 0L) data <- data[order(GroupVar, get(DateColumnName))] else data <- data[order(get(DateColumnName))]

  # Feature Engineering: Create Fourier Features ----
  if(DebugMode) print('Feature Engineering: Fourier Features ----')
  if(DebugMode) print(data)
  Output <- CarmaFourier(data.=data, XREGS.=XREGS, FourierTerms.=FourierTerms, TimeUnit.=TimeUnit, TargetColumnName.=TargetColumnName, GroupVariables.=GroupVariables, DateColumnName.=DateColumnName, HierarchGroups.=HierarchGroups)
  FourierTerms <- Output$FourierTerms; Output$FourierTerms <- NULL
  FourierFC <- Output$FourierFC; Output$FourierFC <- NULL
  data <- Output$data; rm(Output)

  # Feature Engineering: Create Calendar Variables ----
  if(DebugMode) print('Feature Engineering: Add Create Calendar Variables ----')
  if(DebugMode) print(data)
  if(!is.null(CalendarVariables)) {
    data <- Rodeo::CreateCalendarVariables(data=data, DateCols=eval(DateColumnName), AsFactor=FALSE, TimeUnits=CalendarVariables)
  }

  # Feature Engineering: Create Holiday Variables ----
  if(DebugMode) print('Feature Engineering: Add Create Holiday Variables ----')
  if(!is.null(HolidayVariable)) {
    data <- Rodeo::CreateHolidayVariables(data, DateCols = eval(DateColumnName), LookbackDays = if(!is.null(HolidayLookback)) HolidayLookback else LB(TimeUnit), HolidayGroups = HolidayVariable, Holidays = NULL, Debug = DebugMode)
    if(!(tolower(TimeUnit) %chin% c('1min','5min','10min','15min','30min','hour'))) {
      data[, eval(DateColumnName) := lubridate::as_date(get(DateColumnName))]
    } else {
      data[, eval(DateColumnName) := as.POSIXct(get(DateColumnName))]
    }
  }

  # Anomaly detection by Group and Calendar Vars ----
  if(DebugMode) print('Anomaly detection by Group and Calendar Vars ----')
  if(DebugMode) print(AnomalyDetection)
  if(!is.null(AnomalyDetection)) {
    data <- GenTSAnomVars(
      data = data, ValueCol = eval(TargetColumnName),
      GroupVars = if(!is.null(CalendarVariables) && length(GroupVariables) > 0L) c('GroupVar', paste0(DateColumnName, '_', CalendarVariables[1])) else if(length(GroupVariables) > 0L) 'GroupVar' else NULL,
      DateVar = eval(DateColumnName), KeepAllCols = TRUE, IsDataScaled = FALSE,
      HighThreshold = AnomalyDetection$tstat_high,
      LowThreshold = AnomalyDetection$tstat_low)
    data[, paste0(eval(TargetColumnName), '_zScaled') := NULL]
    data[, ':=' (RowNumAsc = NULL, CumAnomHigh = NULL, CumAnomLow = NULL, AnomHighRate = NULL, AnomLowRate = NULL)]
  }

  # Feature Engineering: Target Transformation ----
  if(DebugMode) print('Feature Engineering: Add Target Transformation ----')
  if(TargetTransformation) {
    TransformResults <- Rodeo::AutoTransformationCreate(data, ColumnNames=TargetColumnName, Methods=Methods, Path=NULL, TransID='Trans', SaveOutput=FALSE)
    data <- TransformResults$Data; TransformResults$Data <- NULL
    TransformObject <- TransformResults$FinalResults; rm(TransformResults)
  } else if(TargetTransformation) {
    if(length(GroupVariables) > 0L) x <- 'GroupVar' else x <- NULL
    TransformResults <- AutoQuant::Standardize(data, ColNames = TargetColumnName, GroupVars = x, Center = TRUE, Scale = TRUE, ScoreTable = FALSE)
    data <- TransformResults$data
    TransformObject <- TransformResults$ScoreTable
    rm(TransformResults)
  } else {
    TransformObject <- NULL
  }

  # Copy data for non grouping + difference ----
  if(DebugMode) print('Copy data for non grouping + difference ----')
  if(is.null(GroupVariables) && Difference) antidiff <- data.table::copy(data[, .SD, .SDcols = c(eval(TargetColumnName),eval(DateColumnName))])

  # Variables for CARMA function IDcols ----
  if(DebugMode) print('Variables for CARMA function:IDcols----')
  IDcols <- names(data)[which(names(data) %chin% DateColumnName)]
  if(Difference && length(GroupVariables) > 0L) IDcols <- c(IDcols, names(data)[which(names(data) == TargetColumnName)], names(data)[which(names(data) == 'TargetDiffMidStep')])

  # Feature Engineering: Differencing ----
  if(DebugMode) print('Feature Engineering: Add Difference Data ----')
  Output <- CarmaDifferencing(GroupVariables.=GroupVariables, Difference.=Difference, data.=data, TargetColumnName.=TargetColumnName, FC_Periods.=FC_Periods)
  data <- Output$data; Output$data <- NULL
  dataStart <- Output$dataStart; Output$dataStart <- NULL
  FC_Periods <- Output$FC_Periods; Output$FC_Periods <- NULL
  DiffTrainOutput <- Output$DiffTrainOutput
  Train <- Output$Train; rm(Output)
  MaxDate <- data[, max(get(DateColumnName))]
  if(Difference && length(GroupVariables) > 0L) IDcols <- c(IDcols, 'TargetDiffMidStep')

  # Feature Engineering: Lags and Rolling Stats ----
  if(DebugMode) print('Feature Engineering: Lags and Rolling Stats ----')
  Output <- CarmaTimeSeriesFeatures(data.=data, TargetColumnName.=TargetColumnName, DateColumnName.=DateColumnName, GroupVariables.=GroupVariables, HierarchGroups.=HierarchGroups, Difference.=Difference, TimeGroups.=TimeGroups, TimeUnit.=TimeUnit, Lags.=Lags, MA_Periods.=MA_Periods, SD_Periods.=SD_Periods, Skew_Periods.=Skew_Periods, Kurt_Periods.=Kurt_Periods, Quantile_Periods.=Quantile_Periods, Quantiles_Selected.=Quantiles_Selected, HolidayVariable.=HolidayVariable, HolidayLags.=HolidayLags, HolidayMovingAverages.=HolidayMovingAverages, DebugMode.=DebugMode)
  IndependentSupplyValue <- Output$IndependentSupplyValue; Output$IndependentSupplyValue <- NULL
  HierarchSupplyValue <- Output$HierarchSupplyValue; Output$HierarchSupplyValue <- NULL
  GroupVarVector <- Output$GroupVarVector; Output$GroupVarVector <- NULL
  Categoricals <- Output$Categoricals; Output$Categoricals <- NULL
  data <- Output$data; rm(Output)

  # Create GroupVar ----
  if(length(GroupVariables) > 0L && !'GroupVar' %chin% names(data)) data[, GroupVar := do.call(paste, c(.SD, sep = ' ')), .SDcols = c(GroupVariables)]

  # Data Wrangling: Rodeo::ModelDataPrep() to prepare data ----
  if(DebugMode) print('Data Wrangling: Rodeo::ModelDataPrep() to prepare data ----')
  data <- Rodeo::ModelDataPrep(data=data, Impute=TRUE, IntToNumeric=TRUE, DateToChar=FALSE, FactorToChar=FALSE, CharToFactor=TRUE, LogicalToBinary=FALSE, RemoveDates=FALSE, MissFactor='0', MissNum=-1, IgnoreCols=NULL)

  # Data Wrangling: Remove dates with imputed data from the DT_GDL_Feature_Engineering() features ----
  if(DebugMode) print('Data Wrangling: Remove dates with imputed data from the DT_GDL_Feature_Engineering() features ----')
  if(DataTruncate && !is.null(Lags)) data <- CarmaTruncateData(data.=data, DateColumnName.=DateColumnName, TimeUnit.=TimeUnit)

  # Feature Engineering: Add TimeTrend Variable ----
  if(DebugMode) print('Feature Engineering: Add TimeTrend Variable----')
  if(TimeTrendVariable) {
    if(length(GroupVariables) > 0L) data[, TimeTrend := seq_len(.N), by = 'GroupVar'] else data[, TimeTrend := seq_len(.N)]
  }

  # Store Date Info ----
  FutureDateData <- sort(unique(data[, get(DateColumnName)]))

  # Create TimeWeights ----
  if(DebugMode) print('Create TimeWeights ----')
  train <- CarmaTimeWeights(train.=data, TimeWeights.=TimeWeights, GroupVariables.=GroupVariables, DateColumnName.=DateColumnName)

  # Save data to file before Partitioning ----
  if(!is.null(SaveDataPath)) data.table::fwrite(data, file.path(SaveDataPath, 'ModelData.csv'))

  # Data Wrangling: Partition data ----
  if(DebugMode) print('Data Wrangling: Partition data with AutoDataPartition()----')
  if(tolower(PartitionType) == 'timeseries' && is.null(GroupVariables)) PartitionType <- 'time'
  Output <- CarmaPartition(data.=data, SplitRatios.=SplitRatios, TrainOnFull.=TrainOnFull, NumSets.=NumSets, PartitionType.=PartitionType, GroupVariables.=GroupVariables, DateColumnName.=DateColumnName, TVT.=TVT)
  train <- Output$train
  valid <- Output$valid
  data <- Output$data
  test <- Output$test
  ArgsList[['TVT']] <- Output$TVT
  rm(Output)

  # Data Wrangling: copy data or train for later in function since AutoRegression will modify data and train ----
  if(DebugMode) print('Data Wrangling: copy data or train for later in function since AutoRegression will modify data and train ----')
  if(length(GroupVariables) > 0L) data.table::setorderv(x = data, cols = c('GroupVar',eval(DateColumnName)), order = c(1,1)) else data.table::setorderv(x = data, cols = c(eval(DateColumnName)), order = c(1))
  Step1SCore <- data.table::copy(data)

  # Define Target and Features ----
  if(DebugMode) print('Define ML args ----')
  Output <- CarmaFeatures(data.=data, train.=train, XREGS.=XREGS, Difference.=Difference, TargetColumnName.=TargetColumnName, DateColumnName.=DateColumnName, GroupVariables.=GroupVariables)
  ModelFeatures <- Output$ModelFeatures
  TargetVariable <- Output$TargetVariable; rm(Output)

  # Switch up TrainOnFull if SplitRatios is not null ----
  if(length(SplitRatios) > 0 || !TrainOnFull) TOF <- FALSE else TOF <- TRUE

  # Run AutoCatBoostRegression and return list of ml objects ----
  if(!(length(ArgsList) > 0L && length(ArgsList[['Model']]) > 0L)) {
    TestModel <- AutoCatBoostRegression(

      # GPU or CPU and the number of available GPUs
      task_type = TaskType,
      NumGPUs = NumGPU,
      OutputSelection = if(TrainOnFull) NULL else c('Importances', 'EvalMetrics'),
      ReturnShap = ReturnShap,

      # Metadata arguments
      ModelID = 'CatBoost',
      model_path = getwd(),
      metadata_path = NULL,
      SaveModelObjects = FALSE,
      ReturnModelObjects = TRUE,
      SaveInfoToPDF = FALSE,

      # Data arguments
      data = train,
      TrainOnFull = TOF,
      ValidationData = valid,
      TestData = test,
      TargetColumnName = TargetVariable,
      FeatureColNames = ModelFeatures,
      PrimaryDateColumn = eval(DateColumnName),
      WeightsColumnName = if('Weights' %in% names(train)) 'Weights' else NULL,
      IDcols = IDcols,
      EncodeMethod = EncodingMethod,
      TransformNumericColumns = NULL,
      Methods = NULL,

      # Model evaluation
      eval_metric = EvalMetric,
      eval_metric_value = EvalMetricValue,
      loss_function = LossFunction,
      loss_function_value = LossFunctionValue,
      MetricPeriods = 10L,
      NumOfParDepPlots = NumOfParDepPlots,

      # Grid tuning arguments
      PassInGrid = PassInGrid,
      GridTune = GridTune,
      MaxModelsInGrid = ModelCount,
      MaxRunsWithoutNewWinner = MaxRunsWithoutNewWinner,
      MaxRunMinutes = 60*60,
      BaselineComparison = 'default',

      # ML args
      langevin = Langevin,
      diffusion_temperature = DiffusionTemperature,
      Trees = NTrees,
      Depth = Depth,
      LearningRate = LearningRate,
      L2_Leaf_Reg = L2_Leaf_Reg,
      RandomStrength = RandomStrength,
      BorderCount = BorderCount,
      RSM = if(TaskType == 'GPU') NULL else RSM,
      BootStrapType = BootStrapType,
      GrowPolicy = GrowPolicy,

      # New ML args
      model_size_reg = ModelSizeReg,
      feature_border_type = FeatureBorderType,
      sampling_unit = SamplingUnit,
      subsample = SubSample,
      score_function = ScoreFunction,
      min_data_in_leaf = MinDataInLeaf,
      DebugMode = DebugMode)

    # Remove Weights
    ModelFeatures <- ModelFeatures[!ModelFeatures %in% 'Weights']
    ArgsList[['FeatureColNames']] <- ModelFeatures

    # Return model object for when TrainOnFull is FALSE
    # SaveModel == TRUE && TrainOnFull == TRUE --> return after FC
    # TrainOnFull == FALSE --> return early
    if(SaveModel) {

      if(DebugMode) cat(rep('SaveModel == TRUE \n'))

      # Add new items
      ArgsList[['Model']] <- TestModel$Model
      ArgsList[['FactorLevelsList']] <- TestModel$FactorLevelsList

      # Save model
      Model <- ArgsList[['Model']]
      Path <- file.path(SaveDataPath, paste0(ModelID,'.rds'))
      if(length(SaveDataPath) > 0L && dir.exists(SaveDataPath)) saveRDS(object = ArgsList, file = Path)
      if(!TrainOnFull) return(list(ModelInformation = TestModel, ArgsList = ArgsList))
      TestModel$Model <- NULL

    } else if(!TrainOnFull) {

      if(DebugMode) cat(rep('SaveModel == FALSE \n'))
      return(list(TestModel = TestModel, ArgsList = ArgsList))

    } else {
      if(DebugMode) print('Store Model in variable ----')
      Model <- TestModel$Model
    }

  } else {
    for(i in 1L:10L) print('SKIPPING ML TRAINING ')
    Model <- ArgsList[['Model']]
    TestModel <- list()
    TestModel$FactorLevelsList <- ArgsList$FactorLevelsList
  }

  # Variable for interation counts: max number of rows in Step1SCore data.table across all group ----
  if(DebugMode) print('Variable for interation counts: max number of rows in Step1SCore data.table across all group ----')
  N <- CarmaRecordCount(GroupVariables.=GroupVariables,Difference.=Difference, Step1SCore.=Step1SCore)

  # ARMA PROCESS FORECASTING ----
  if(DebugMode) print('ARMA PROCESS FORECASTING ----')
  if(length(Lags) > 0L && all(Lags != 0) || (length(MA_Periods) > 0L && all(MA_Periods != 0))) {

    if(DebugMode) print("ARIMA FORECAST: length(Lags) > 0L")

    # i = 1; # i = 2; # i = 3
    acc <- 0
    for(i in seq_len(FC_Periods+1L)) {

      if(Timer && i != 1) starttime <- Sys.time()

      # Score model ----
      if(DebugMode) print('  - - Score model')
      if(i == 1L) UpdateData <- NULL
      Output <- CarmaScore(Step1SCore.=Step1SCore, Debug = DebugMode, Type = 'catboost', i.=i, N.=N, GroupVariables.=GroupVariables, ModelFeatures.=ModelFeatures, HierarchGroups.=HierarchGroups, DateColumnName.=DateColumnName, Difference.=Difference, TargetColumnName.=TargetColumnName, Model.=Model, FutureDateData.=FutureDateData, NonNegativePred.=NonNegativePred, RoundPreds.=RoundPreds, UpdateData.=UpdateData, FactorList.=TestModel$FactorLevelsList, EncodingMethod.=EncodingMethod, dt = data)
      if(DebugMode) print("CarmaScoring Done")
      UpdateData <- Output$UpdateData; Output$UpdateData <- NULL

      # If DateColumnName shows up twice in UpdateData then remove the second one
      if(sum(names(UpdateData) %in% DateColumnName) == 2L) UpdateData[, eval(which(names(UpdateData) %in% DateColumnName)[2]) := NULL]
      Preds <- Output$Preds; Output$Preds <- NULL
      N <- Output$N; rm(Output)

      # Update data for next prediction ----
      if(DebugMode) print('Update data for next prediction ----')
      if(i != FC_Periods+1L) {

        # Create single future record ----
        if(DebugMode) print('  - - Create single future record')
        CalendarFeatures <- NextTimePeriod(UpdateData.=UpdateData, TimeUnit.=TimeUnit, DateColumnName.=DateColumnName)

        # Update flat feature engineering ----
        if(DebugMode) print('  - - Update feature engineering')
        UpdateData <- UpdateFeatures(RollingVars. = TRUE, UpdateData.=UpdateData, GroupVariables.=GroupVariables, CalendarFeatures.=CalendarFeatures, CalendarVariables.=CalendarVariables, GroupVarVector.=GroupVarVector, DateColumnName.=DateColumnName, XREGS.=XREGS, FourierTerms.=FourierTerms, FourierFC.=FourierFC, TimeGroups.=TimeGroups, TimeTrendVariable.=TimeTrendVariable, N.=N, TargetColumnName.=TargetColumnName, HolidayVariable.=HolidayVariable, HolidayLookback.=HolidayLookback, TimeUnit.=TimeUnit, AnomalyDetection.=AnomalyDetection, i.=i, Debug = DebugMode)

        # Update Lags and MA's ----
        if(DebugMode) print('  - - Update Lags and MAs')
        UpdateData <- CarmaRollingStatsUpdate(ModelType='catboost', DebugMode.=DebugMode, UpdateData.=UpdateData, GroupVariables.=GroupVariables, Difference.=Difference, CalendarVariables.=CalendarVariables, HolidayVariable.=HolidayVariable, IndepVarPassTRUE.=IndepentVariablesPass, data.=data, CalendarFeatures.=CalendarFeatures, XREGS.=XREGS, HierarchGroups.=HierarchGroups, GroupVarVector.=GroupVarVector, TargetColumnName.=TargetColumnName, DateColumnName.=DateColumnName, Preds.=Preds, HierarchSupplyValue.=HierarchSupplyValue, IndependentSupplyValue.=IndependentSupplyValue, TimeUnit.=TimeUnit, TimeGroups.=TimeGroups, Lags.=Lags, MA_Periods.=MA_Periods, SD_Periods.=SD_Periods, Skew_Periods.=Skew_Periods, Kurt_Periods.=Kurt_Periods, Quantile_Periods.=Quantile_Periods, Quantiles_Selected.=Quantiles_Selected, HolidayLags.=HolidayLags, HolidayMovingAverages.=HolidayMovingAverages)

        # print time to complete ----
        if(Timer && i != 1) {endtime <- Sys.time(); rt <- round(endtime - starttime, 4); acc <- acc + rt; print(paste('Forecast step: ', i-1, " :: step runtime --> ", rt, " :: total run time --> ", acc))}
      }
    }

  } else {

    if(DebugMode) print("!ARIMA FORECAST: length(Lags) == 0L")

    # Prepare data
    if(DebugMode) print('# Prepare data')
    if(length(GroupVariables) > 0L) {
      UpdateData <- FutureTimePeriods(UpdateData. = Step1SCore, TimeUnit. = TimeUnit, DateColumnName. = DateColumnName, FC_Periods = FC_Periods, GroupVariables. = GroupVariables, SkipPeriods = NULL)
    } else {
      UpdateData <- FutureTimePeriods(UpdateData. = Step1SCore, TimeUnit. = TimeUnit, DateColumnName. = DateColumnName, FC_Periods = FC_Periods, GroupVariables. = NULL, SkipPeriods = NULL)
    }

    # UpdateFeatures and Score Future
    UpdateData <- UpdateFeatures(RollingVars. = FALSE, UpdateData.=Step1SCore, GroupVariables.=GroupVariables, CalendarFeatures.=UpdateData, CalendarVariables.=CalendarVariables, GroupVarVector.=GroupVarVector, DateColumnName.=DateColumnName, XREGS.=XREGS, FourierTerms.=FourierTerms, FourierFC.=FourierFC, TimeGroups.=TimeGroups, TimeTrendVariable.=TimeTrendVariable, N.=N, TargetColumnName.=TargetColumnName, HolidayVariable.=HolidayVariable, HolidayLookback.=HolidayLookback, TimeUnit.=TimeUnit, AnomalyDetection.=AnomalyDetection, i.=1, Debug = DebugMode)
    UpdateData <- CarmaScore(Type = 'catboost', i. = 0L, N.=N, GroupVariables.=GroupVariables, ModelFeatures.=ModelFeatures, HierarchGroups.=HierarchGroups, DateColumnName.=DateColumnName, Difference.=Difference, TargetColumnName.=TargetColumnName, Step1SCore.=Step1SCore, Model.=Model, FutureDateData.=FutureDateData, NonNegativePred.=NonNegativePred, RoundPreds.=RoundPreds, UpdateData.=UpdateData, FactorList.=TestModel$FactorLevelsList, EncodingMethod.=EncodingMethod, dt = data)
    if(DebugMode) print(UpdateData)
  }

  # Memory support ----
  gc()

  # Return data prep ----
  if(DebugMode) print('Return data prep ----')
  Output <- CarmaReturnDataPrep(MaxDate. = MaxDate, UpdateData.=UpdateData, FutureDateData.=FutureDateData, dataStart.=dataStart, DateColumnName.=DateColumnName, TargetColumnName.=TargetColumnName, GroupVariables.=GroupVariables, Difference.=Difference, TargetTransformation.=TargetTransformation, TransformObject.=TransformObject, NonNegativePred.=NonNegativePred, DiffTrainOutput.=DiffTrainOutput, MergeGroupVariablesBack.=MergeGroupVariablesBack, Debug = DebugMode)
  UpdateData <- Output$UpdateData; Output$UpdateData <- NULL
  TransformObject <- Output$TransformObject; rm(Output)

  # Save model
  if(SaveModel) {
    ArgsList[['Model']] <- Model
    if(length(SaveDataPath) > 0L) Path <- file.path(SaveDataPath, paste0(ModelID,'.rds')) else Path <- NULL
    if(length(Path) > 0L && dir.exists(SaveDataPath)) {
      saveRDS(object = ArgsList, file = Path)
    }
  }

  # Return ----
  return(list(
    Forecast = UpdateData,
    ModelInformation = TestModel,
    TransformationDetail = if(exists('TransformObject') && !is.null(TransformObject)) TransformObject else NULL,
    ArgsList = ArgsList))
}
AdrianAntico/ModelingTools documentation built on Feb. 1, 2024, 7:33 a.m.