# res = FcGradientBoosting(DataVec, SplitAt, Time, ForecastHorizon)
#
# DESCRIPTION
# Gradient Boosting as published by [Chen/Guestrin, 2016] is a machine learning technique for regression and classification problems,
# which produces a prediction model in the form of the ensemble of weak prediction models, typically decision trees.
# However it is claimed that an ensemble of weak predictors when combined together should hive a strong model with superior accuracy.
#
# INPUT
# DataVec [1:n] numerical vector of time series data
# SplitAt Index of row where the DataVec is divided into test and train data. If not given n is used
# Time [1:n] character vector of Time in the length of data
# Frequency If aggregation required. Can be either "days", "weeks", "months" or "quarters" or "years",
# see ConvertNumerical2TSobject. Default is "days"
# ForecastHorizon Scalar defining the timesteps to forecast ahead
# OPTIONAL
# PlotIt FALSE (default), do nothing. TRUE: plots the forecast versus the validation set.
#
# OUTPUT
# Forecast [1:ForecastHorizon] Forecast generated by the model
# Model Model object of class xgbar
# TestData [1:n-SplitAt, 1:2] Data frame of test data and test time
# TrainData [1:SplitAt, 1:2] Data frame of training data and training time
#
# DETAILS
# "Gradient Boosting algorithm is a machine learning technique used for building predictive tree-based models.Boosting is an ensemble
# technique in which new models are added to correct the errors made by existing models. Models are added sequentially until no further
# improvements can be made. The ensemble technique uses the tree ensemble model which is a set of classification and regression trees
# (CART)." [see url]
# Do not confuse with bagging: simple ensembling technique in which we build many independent predictors/models/learners and combine
# them using some model averaging techniques. (e.g. weighted average, majority vote or normal average.
# It should be noted that in order to forecast more than one step saisonality extracted by either an arma model (\pkg{gbm}) or and
# fourier model (\pkg{forecastxgb}) has to be used.
#
# Author: MCT
FcGradientBoosting=function(DataVec,SplitAt,Time,ForecastHorizon,Frequency='days',FUN=sum,PlotIt=FALSE){
# devtools::install_github("ellisp/forecastxgb-r-package/pkg")
if(missing(SplitAt)) {
warning('Input for SplitAt was not given. Setting SplitAt to length(DataVec)')
SplitAt = length(DataVec)
}
if(missing(ForecastHorizon)) {
warning('Input for ForecastHorizon was not given. Setting ForecastHorizon to 1')
ForecastHorizon = 1
}
if(missing(Time)) {
Time = 1:length(DataVec)
warning('No input for Time was given, will use 1:length(DataVec) for Time')
}
inputs = checkInputForecasting(DataVec, Time, SplitAt, ForecastHorizon, PlotIt)
DataVec = inputs$DataVec
Time = inputs$Time
SplitAt = inputs$SplitAt
ForecastHorizon = inputs$ForecastHorizon
PlotIt = inputs$PlotIt
if (!is.function(FUN)) {
stop('FUN has to be a function')
}
n = length(DataVec)
errorMessage = packageMissingError("forecastxgb", n, SplitAt)
if(errorMessage != FALSE){
return(
list(
Model = NULL,
FitStats = NULL,
Forecast = rep(NaN, length(DataVec)-SplitAt),
ForecastTrain = rep(NaN, SplitAt),
Info = errorMessage
)
)
}
errorMessage = packageMissingError("forecast", n, SplitAt)
if(errorMessage != FALSE){
return(
list(
Model = NULL,
FitStats = NULL,
Forecast = rep(NaN, length(DataVec)-SplitAt),
ForecastTrain = rep(NaN, SplitAt),
Info = errorMessage
)
)
}
switch(Frequency,
days={
DT=data.frame(Time=Time,Datavector=DataVec)
N=nrow(DT)
Splitted=list(
TrainingSet = head(DT$Data,SplitAt),
TrainingTime = head(DT$Time,SplitAt),
TestSet = tail(DT$Data,N-SplitAt),
TestTime = tail(DT$Time,N-SplitAt))
},
months={
DT=TSAT::aggregateDays2Months(Time,DataVec,FUN = FUN,Header = c('Time','Data'))
N=nrow(DT)
Splitted=list(
TrainingSet = head(DT$Data,SplitAt),
TrainingTime = head(DT$Time,SplitAt),
TestSet = tail(DT$Data,N-SplitAt),
TestTime = tail(DT$Time,N-SplitAt))
},
weeks={
DT=TSAT::aggregateDays2Weeks(Time,DataVec,FUN = FUN,Header = c('Time','Data'))
N=nrow(DT)
Splitted=list(
TrainingSet = head(DT$Data,SplitAt),
TrainingTime = head(DT$Time,SplitAt),
TestSet = tail(DT$Data,N-SplitAt),
TestTime = tail(DT$Time,N-SplitAt))
},
{stop('please chose correct frequency')}
)
TS=TSAT::ConvertNumerical2TSobject(Time =Splitted$TrainingTime ,NumericVectorOrMatrix = Splitted$TrainingSet,Frequency = Frequency)
model <- forecastxgb::xgbar(TS)
# fc <- forecastxgb::forecast.xgbar(model, h = Horizon)
# SplitAt = ForecastHorizon, SplitAt < ForecastHorizon, SplitAt > ForecastHorizon
fc <- forecast::forecast(model, h = ForecastHorizon)
n=nrow(DT)
if(PlotIt){
# plot(fc)
if(length(Splitted$TestTime)==0) {
x = Splitted$TrainingTime
y = Splitted$TrainingSet
#plot(Splitted$TrainingTime,Splitted$TrainingSet,type='l',col='black')
} else {
x = Splitted$TestTime
y = Splitted$TestSet
}
plot(x,y,type='l',col='black')
#points(Splitted$TestTime,fc$mean,type='l',col='red')
if(ForecastHorizon>n-SplitAt){
time_interval = mean(diff(x))
extended_time = c(Splitted$TestTime, seq(from = x[length(x)], by=time_interval,
length.out = ForecastHorizon-(n-SplitAt)))
points(extended_time,fc$mean,type='l',col='red')
} else {
points(x,fc$mean,type='l',col='red')
}
}
requireNamespace('forecast')
if(ForecastHorizon > n-SplitAt) {
acc=forecast::accuracy(fc$mean[1:length(Splitted$TestSet)],Splitted$TestSet)
} else if(ForecastHorizon < n-SplitAt) {
acc=forecast::accuracy(fc$mean,Splitted$TestSet[1:ForecastHorizon])
} else {
acc=forecast::accuracy(fc$mean,Splitted$TestSet)
}
return(list(
Forecast = fc$mean,
Model = model,
ForecastStats=fc,
TestData =data.frame(Time=Splitted$TestTime,TestSet=Splitted$TestSet),
TrainData=data.frame(Time=Splitted$TrainingTime,TrainingSet=Splitted$TrainingSet)
))
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.