Nothing
#' Extreme learning machines for time series forecasting
#'
#' This function fits ELM neural networks for time series forecasting.
#'
#' @param y Input time series. Can be ts or msts object.
#' @param m Frequency of the time series. By default it is picked up from y.
#' @param hd Number of hidden nodes. This can be a vector, where each number represents the number of hidden nodes of a different hidden layer. Use NULL to automatically specify.
#' @param type Estimation type for output layer weights. Can be "lasso" (lasso with CV), "ridge" (ridge regression with CV), "step" (stepwise regression with AIC) or "lm" (linear regression).
#' @param reps Number of networks to train, the result is the ensemble forecast.
#' @param comb Combination operator for forecasts when reps > 1. Can be "median", "mode" (based on KDE estimation) and "mean".
#' @param lags Lags of y to use as inputs. If none provided then 1:frequency(y) is used. Use 0 for no univariate lags.
#' @param keep Logical vector to force lags to stay in the model if sel.lag == TRUE. If NULL then it keep = rep(FALSE,length(lags)).
#' @param difforder Vector including the differencing lags. For example c(1,12) will apply first and seasonal (12) differences. For no differencing use 0. For automatic selection use NULL.
#' @param outplot Provide plot of model fit. Can be TRUE or FALSE.
#' @param sel.lag Automatically select lags. Can be TRUE or FALSE.
#' @param direct Use direct input-output connections to model strictly linear effects. Can be TRUE or FALSE.
#' @param allow.det.season Permit modelling seasonality with deterministic dummies.
#' @param det.type Type of deterministic seasonality dummies to use. This can be "bin" for binary or "trg" for a sine-cosine pair. With "auto" if ony a single seasonality is used and periodicity is up to 12 then "bin" is used, otherwise "trg".
#' @param xreg Exogenous regressors. Each column is a different regressor and the sample size must be at least as long as the target in-sample set, but can be longer.
#' @param xreg.lags This is a list containing the lags for each exogenous variable. Each list is a numeric vector containing lags. If xreg has 3 columns then the xreg.lags list must contain three elements. If NULL then it is automatically specified.
#' @param xreg.keep List of logical vectors to force lags of xreg to stay in the model if sel.lag == TRUE. If NULL then all exogenous lags can be removed.
#' @param barebone Use an alternative elm implementation (written in R) that is faster when the number of inputs is very high. Typically not needed.
#' @param model A previously trained mlp object. If this is provided then the same model is fitted to y, without re-estimating any model parameters.
#' @param retrain If a previous model is provided, retrain the network or not. If the network is retrained the size of the hidden layer is reset.
#'
#' @return Return object of class \code{elm}. If barebone == TRUE then the object inherits a second class "\code{elm.fast}".
#' The function \code{plot} produces a plot the network architecture.
#' \code{elm} contains:
#' \itemize{
#' \item \code{net} - ELM networks. If it is of class "\code{elm.fast}" then this is NULL.
#' \item \code{hd} - Number of hidden nodes. If it is of class "\code{elm.fast}" this is a vector with a different number for each repetition.
#' \item \code{W.in} - NULL unless it is of class "\code{elm.fast}". Contains the input weights.
#' \item \code{W} - Output layer weights for each repetition.
#' \item \code{b} - Contains the output node bias for each training repetition.
#' \item \code{W.dct} - Contains the direct connection weights if argument direct == TRUE. Otherwise is NULL.
#' \item \code{lags} - Input lags used.
#' \item \code{xreg.lags} - \code{xreg} lags used.
#' \item \code{difforder} - Differencing used.
#' \item \code{sdummy} - Use of deterministic seasonality.
#' \item \code{ff} - Seasonal frequencies detected in data (taken from ts or msts object).
#' \item \code{ff.det} - Seasonal frequencies coded using deterministic dummies.
#' \item \code{det.type} - Type of determistic seasonality.
#' \item \code{y} - Input time series.
#' \item \code{minmax} - Scaling structure.
#' \item \code{xreg.minmax} - Scaling structure for xreg variables.
#' \item \code{comb} - Combination operator used.
#' \item \code{type} - Estimation used for output layer weights.
#' \item \code{direct} - Presence of direct input-output connections.
#' \item \code{fitted} - Fitted values.
#' \item \code{MSE} - In-sample Mean Squared Error.
#' }
#'
#' @author Nikolaos Kourentzes, \email{nikolaos@kourentzes.com}
#' @seealso \code{\link{forecast.elm}}, \code{\link{elm.thief}}, \code{\link{mlp}}.
#' @references
#' \itemize{
#' \item For an introduction to neural networks see: Ord K., Fildes R., Kourentzes N. (2017) \href{https://kourentzes.com/forecasting/2017/10/16/new-forecasting-book-principles-of-business-forecasting-2e/}{Principles of Business Forecasting 2e}. \emph{Wessex Press Publishing Co.}, Chapter 10.
#' \item For combination operators see: Kourentzes N., Barrow B.K., Crone S.F. (2014) \href{https://kourentzes.com/forecasting/2014/04/19/neural-network-ensemble-operators-for-time-series-forecasting/}{Neural network ensemble operators for time series forecasting}. \emph{Expert Systems with Applications}, \bold{41}(\bold{9}), 4235-4244.
#' \item For variable selection see: Crone S.F., Kourentzes N. (2010) \href{https://kourentzes.com/forecasting/2010/04/19/feature-selection-for-time-series-prediction-a-combined-filter-and-wrapper-approach-for-neural-networks/}{Feature selection for time series prediction – A combined filter and wrapper approach for neural networks}. \emph{Neurocomputing}, \bold{73}(\bold{10}), 1923-1936.
#' \item For ELMs see: Huang G.B., Zhou H., Ding X. (2006) Extreme learning machine: theory and applications. \emph{Neurocomputing}, \bold{70}(\bold{1}), 489-501.
#' }
#' @keywords mlp thief ts
#' @note To use elm with Temporal Hierarchies (\href{https://cran.r-project.org/package=thief}{thief} package) see \code{\link{elm.thief}}.
#' The elm function by default calls the \code{neuralnet} function. If barebone == TRUE then it uses an alternative implementation (\code{TStools:::elm.fast}) which is more appropriate when the number of inputs is several hundreds.
#' @examples
#' \dontshow{
#' fit <- elm(AirPassengers,reps=1)
#' print(fit)
#' plot(fit)
#' }
#' \dontrun{
#' fit <- elm(AirPassengers)
#' print(fit)
#' plot(fit)
#' frc <- forecast(fit,h=36)
#' plot(frc)
#' }
#'
#' @export elm
elm <- function(y,m=frequency(y),hd=NULL,type=c("lasso","ridge","step","lm"),reps=20,comb=c("median","mean","mode"),
lags=NULL,keep=NULL,difforder=NULL,outplot=c(FALSE,TRUE),sel.lag=c(TRUE,FALSE),direct=c(FALSE,TRUE),
allow.det.season=c(TRUE,FALSE),det.type=c("auto","bin","trg"),
xreg=NULL,xreg.lags=NULL,xreg.keep=NULL,barebone=c(FALSE,TRUE),model=NULL,retrain=c(FALSE,TRUE)){
# Defaults
type <- match.arg(type,c("lasso","ridge","step","lm"))
comb <- match.arg(comb,c("median","mean","mode"))
outplot <- outplot[1]
sel.lag <- sel.lag[1]
direct <- direct[1]
allow.det.season <- allow.det.season[1]
det.type <- det.type[1]
barebone <- barebone[1]
retrain <- retrain[1]
# Check if y input is a time series
if (!(is(y,"ts") | is(y,"msts"))){
stop("Input y must be of class ts or msts.")
}
# Check if a model input is provided
if (!is.null(model)){
if (is(model,"elm")){
oldmodel <- TRUE
if (retrain == FALSE){
hd <- model$hd
}
lags <- model$lags
xreg.lags <- model$xreg.lags
difforder <- model$difforder
comb <- model$comb
det.type <- model$det.type
allow.det.season <- model$sdummy
direct <- model$direct
type <- model$type
reps <- length(model$b)
sel.lag <- FALSE
if (is(model,"elm.fast")){
barebone <- TRUE
}
# Check xreg inputs
x.n <- dim(xreg)[2]
if (is.null(x.n)){
x.n <- 0
}
xm.n <- length(model$xreg.minmax)
if (x.n != xm.n){
stop("Previous model xreg specification and new xreg inputs do not match.")
}
} else {
stop("model must be an mlp object, the output of the mlp() function.")
}
} else {
oldmodel <- FALSE
}
# Check xreg inputs
if (!is.null(xreg)){
x.n <- length(xreg[1,])
if (!is.null(xreg.lags)){
if (length(xreg.lags) != x.n){
stop("Argument xreg.lags must be a list with as many elements as xreg variables (columns).")
}
}
}
# Get frequency
ff.ls <- get.ff(y)
ff <- ff.ls$ff
ff.n <- ff.ls$ff.n
rm("ff.ls")
# Check seasonality of old model
if (oldmodel == TRUE){
if (model$sdummy == TRUE){
if (!all(model$ff.det == ff)){
stop("Seasonality of current data and seasonality of provided model does not match.")
}
}
}
# Default lagvector
xreg.ls <- def.lags(lags,keep,ff,xreg.lags,xreg.keep,xreg)
lags <- xreg.ls$lags
keep <- xreg.ls$keep
xreg.lags <- xreg.ls$xreg.lags
xreg.keep <- xreg.ls$xreg.keep
rm("xreg.ls")
# Pre-process data (same for MLP and ELM)
PP <- preprocess(y,m,lags,keep,difforder,sel.lag,allow.det.season,det.type,ff,ff.n,xreg,xreg.lags,xreg.keep)
Y <- PP$Y
X <- PP$X
sdummy <- PP$sdummy
difforder <- PP$difforder
det.type <- PP$det.type
lags <- PP$lags
xreg.lags <- PP$xreg.lags
sc <- PP$sc
xreg.minmax <- PP$xreg.minmax
d <- PP$d
y.d <- PP$y.d
y.ud <- PP$y.ud
frm <- PP$frm
ff.det <- PP$ff.det
lag.max <- PP$lag.max
rm("PP")
if (is.null(hd)){
hd <- min(100-60*(type=="step" | type=="lm"),max(4,length(Y)-2-as.numeric(direct)*length(X[1,])))
}
# Train ELM
# If single hidden layer switch to fast, unless requested otherwise
if ((length(hd)==1 & barebone == TRUE) | (barebone == TRUE & oldmodel == TRUE)){
if (oldmodel == FALSE | retrain == TRUE){
# Train model
# Switch to elm.fast
f.elm <- elm.fast(Y,X,hd=hd,reps=reps,comb=comb,type=type,direct=direct,linscale=FALSE,output="linear",core=TRUE)
# Post-process output
Yhat <- f.elm$fitted.all
} else {
# Use inputted model
Yhat <- array(NA,c(length(Y),reps))
W.in <- model$W.in
W <- model$W
B <- model$b
W.dct <- model$W.dct
for (r in 1:reps){
Yhat[,r] <- predictElmFastInternal(X,W.in[[r]],W[[r]],B[r],W.dct[[r]],direct)
}
}
for (r in 1:reps){
# Reverse scaling
yhat <- linscale(Yhat[,r],sc$minmax,rev=TRUE)$x
# Undifference - this is 1-step ahead undifferencing
y.ud[[d+1]] <- yhat
if (d>0){
for (i in 1:d){
n.ud <- length(y.ud[[d+2-i]])
n.d <- length(y.d[[d+1-i]])
y.ud[[d+1-i]] <- y.d[[d+1-i]][(n.d-n.ud-difforder[d+1-i]+1):(n.d-difforder[d+1-i])] + y.ud[[d+2-i]]
}
}
Yhat[,r] <- head(y.ud,1)[[1]]
}
} else {
# Rely on neuralnet, very slow when number of inputs is large
if (oldmodel == FALSE | retrain == TRUE){
# Train network
net <- neuralnet::neuralnet(frm,cbind(Y,X),hidden=hd,threshold=10^10,rep=reps,err.fct="sse",linear.output=FALSE)
} else {
net <- model$net
}
# Get weights for each repetition
W <- W.dct <- vector("list",reps)
B <- vector("numeric",reps)
Yhat <- array(NA,c((length(y)-sum(difforder)-lag.max),reps))
x.names <- colnames(X)
for (r in 1:reps){
H <- as.matrix(tail(neuralnet::compute(net,X,r)$neurons,1)[[1]][,2:(tail(hd,1)+1)])
if (direct==TRUE){
Z <- cbind(H,X)
} else {
Z <- H
}
# Train network (last layer)
if (oldmodel == FALSE | retrain == TRUE){
w.out <- elm.train(Y,Z,type,X,direct,hd,output="linear")
B[r] <- w.out[1] # Bias (Constant)
if (direct == TRUE){ # Direct connections
w.dct <- w.out[(1+hd+1):(1+hd+dim(X)[2]),,drop=FALSE]
if (!is.null(x.names)){
rownames(w.dct) <- x.names
}
W.dct[[r]] <- w.dct
}
W[[r]] <- w.out[2:(1+hd),,drop=FALSE] # Hidden layer
} else {
# Take last layer weights from imputted model
B[r] <- model$b[r]
W.dct[[r]] <- model$W.dct[[r]]
W[[r]] <- model$W[[r]]
}
# Produce fit
yhat.sc <- H %*% W[[r]] + B[r] + if(direct!=TRUE){0}else{X %*% W.dct[[r]]}
# Post-process
yhat <- linscale(yhat.sc,sc$minmax,rev=TRUE)$x
# # Check unscaled, but differenced fit
# plot(1:length(tail(y.d,1)[[1]]),tail(y.d,1)[[1]], type="l")
# lines((max(lags)+1):length(tail(y.d,1)[[1]]),yhat,col="red")
# Undifference - this is 1-step ahead undifferencing
y.ud[[d+1]] <- yhat
if (d>0){
for (i in 1:d){
n.ud <- length(y.ud[[d+2-i]])
n.d <- length(y.d[[d+1-i]])
y.ud[[d+1-i]] <- y.d[[d+1-i]][(n.d-n.ud-difforder[d+1-i]+1):(n.d-difforder[d+1-i])] + y.ud[[d+2-i]]
}
}
yout <- head(y.ud,1)[[1]]
# yout <- ts(yout,end=end(y),frequency=frequency(y))
# # Check undifferences and unscaled
# plot(y)
# lines(yout,col="red")
# # plot(1:length(y),y,type="l")
# # lines((max(lags)+1+sum(difforder)):length(y),y.ud[[1]],col="red")
Yhat[,r] <- yout
} # Close reps
} # Close elm.fast if
# Combine forecasts
yout <- frc.comb(Yhat,comb)
# Convert to time series
yout <- ts(yout,end=end(y),frequency=frequency(y))
MSE <- mean((y[(lag.max+1+sum(difforder)):length(y)] - yout)^2)
# Construct plot
if (outplot==TRUE){
plot(y)
if (reps>1){
for (i in 1:reps){
temp <- Yhat[,i]
temp <- ts(temp,frequency=frequency(y),end=end(y))
lines(temp,col="grey")
}
}
lines(yout,col="blue")
}
if (barebone == FALSE){
W.in <- NULL
class.type <- "elm"
} else {
net <- NULL
if (oldmodel == FALSE || retrain == TRUE){
hd <- f.elm$hd
W <- f.elm$W
B <- f.elm$b
W.in <- f.elm$W.in
W.dct <- f.elm$W.dct
}
class.type <- c("elm","elm.fast")
}
return(structure(list("net"=net,"hd"=hd,"W.in"=W.in,"W"=W,"b"=B,"W.dct"=W.dct,
"lags"=lags,"xreg.lags"=xreg.lags,"difforder"=difforder,
"sdummy"=sdummy,"ff.det"=ff.det,"det.type"=det.type,"y"=y,"minmax"=sc$minmax,"xreg.minmax"=xreg.minmax,
"comb"=comb,"type"=type,"direct"=direct,"fitted"=yout,"MSE"=MSE),class=class.type))
}
elm.train <- function(Y,Z,type,X,direct,hd,output="linear"){
# Find output weights for ELM
switch(type,
"lasso" = {
if (output=="logistic"){
fit <- suppressWarnings(glmnet::cv.glmnet(Z,cbind(Y),family="binomial"))
} else {
fit <- suppressWarnings(glmnet::cv.glmnet(Z,cbind(Y)))
}
cf <- as.vector(coef(fit))
},
"ridge" = {
if (output=="logistic"){
fit <- suppressWarnings(glmnet::cv.glmnet(Z,cbind(Y),alpha=0,family="binomial"))
} else {
fit <- suppressWarnings(glmnet::cv.glmnet(Z,cbind(Y),alpha=0))
}
cf <- as.vector(coef(fit))
},
{ # LS and stepwise
reg.data <- as.data.frame(cbind(Y,Z))
colnames(reg.data) <- c("Y",paste0("X",1:(tail(hd,1)+as.numeric(direct)*length(X[1,]))))
# Take care of linear dependency
alias.fit <- alias(as.formula(paste0("Y~",paste0("X",1:(tail(hd,1)+as.numeric(direct)*length(X[1,])),collapse="+"))),data=reg.data)
alias.x <- rownames(alias.fit$Complete)
frm <- as.formula(paste0("Y~",paste0(setdiff(colnames(reg.data)[2:(hd+1+as.numeric(direct)*length(X[1,]))],alias.x),collapse="+")))
fit <- suppressWarnings(lm(frm,reg.data))
if (type == "step"){
fit <- suppressWarnings(MASS::stepAIC(fit,trace=0)) # ,direction="backward",k=log(length(Y)))) # BIC criterion
}
cf.temp <- coef(fit)
loc <- which(colnames(reg.data) %in% names(cf.temp))
cf <- rep(0,(tail(hd,1)+1+direct*length(X[1,])))
cf[1] <- cf.temp[1]
cf[loc] <- cf.temp[2:length(cf.temp)]
})
return(cbind(cf))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.