library(dtw)
library(robustbase)
library(forecast)
# Smooth & deseasonalisation functions
#'
#' @param input observation time series
#' @param ppy frequency of the series
#'
#' @return whether to execute seasonal adjustment
#' @references Assimakopoulos & Nikolopoulos (2000)
#' @export
#'
#' @examples
SeasonalityTest <- function(input, ppy){
if (length(input)<3*ppy){
test_seasonal <- FALSE
}else{
xacf <- acf(input, plot = FALSE)$acf[-1, 1, 1]
clim <- 1.645/sqrt(length(input)) * sqrt(cumsum(c(1, 2 * xacf^2)))
test_seasonal <- ( abs(xacf[ppy]) > clim[ppy] )
if (is.na(test_seasonal)==TRUE){ test_seasonal <- FALSE }
}
return(test_seasonal)
}
Smoothing_ts2 <- function(x, spanw, fh){
ppy <- frequency(x) ; ST <- F ; trend <- 1:length(x)
if (ppy>1){ ST <- SeasonalityTest(x,ppy) }
if (ST==T){
lambda <- BoxCox.lambda(x,lower=0,upper=1)
bc.x <- as.numeric(BoxCox(x, lambda))
seasonal <- stl(ts(bc.x, frequency = ppy), "per")$time.series[, 1]
bc.x <- bc.x - as.numeric(seasonal)
x <- as.numeric(InvBoxCox(bc.x, lambda))+x-x
suppressWarnings(x.loess <- loess(x ~ trend, span = spanw/length(x), degree = 1))
x <- as.numeric(x.loess$fitted)+x-x
SIin <- seasonal
SIout <- head(rep(seasonal[(length(seasonal)-ppy+1):length(seasonal)], fh), fh)
}else{
suppressWarnings(x.loess <- loess(x ~ trend, span = spanw/length(x), degree = 1))
x <- as.numeric(x.loess$fitted)+x-x
SIin <- rep(0, length(x))
SIout <- rep(0, fh)
lambda <- 1
}
output <- list(series=x, seasonalIn=SIin, seasonal=SIout, lambda=lambda)
return(output)
}
# Distance measures related functions
dtw_dist <- function(reference, testing){
reference <- as.numeric(reference)
testing <- as.numeric(testing)
if (length(reference)>length(testing)){
DTW <- NA
}else if (length(reference)<length(testing)){
extract <- tail(testing,length(reference))
DTW<-dtw(x=reference, y=extract)$distance/length(extract)
}else{
DTW <- (dtw(x=reference, y=testing)$distance)/length(testing)
}
return(DTW)
}
Euclidean_dist <- function(reference, testing){
reference <- as.numeric(reference)
testing <- as.numeric(testing)
if (length(reference)>length(testing)){
Euclidean <- NA
}else if (length(reference)<length(testing)){
extract <- tail(testing,length(reference))
Euclidean <- (sum((reference-extract)^2))/length(extract)
}else{
Euclidean <- (sum((reference-testing)^2))/length(testing)
}
return(Euclidean)
}
AD_dist <- function(reference, testing){
reference <- as.numeric(reference)
testing <- as.numeric(testing)
if (length(reference)>length(testing)){
AD <- NA
}else if (length(reference)<length(testing)){
extract <- tail(testing,length(reference))
AD<- (sum(abs(reference-extract)))/length(extract)
}else{
AD <- (sum(abs(reference-testing)))/length(testing)
}
return(AD)
}
# Forecast function
#'
#' @param data_input input data for each data frequency
#' @param fh number of forecasting horizon
#' @param nts number of the most similar series
#' @param Preprocessing remove seasonality, smooth the series and scale the target and the reference
#' series to the same magnitude
#' @param Dist_type number of types of distance measure
#' @param clustering number of sample categories in kmeans
#' @param LoadData
#' @param FullOutput
#'
#' @return A list generated by forecasting with similarity
#' @export
#'
#' @examples
Forecasting <- function(data_input, fh, nts, Preprocessing, Dist_type, path, clustering=0, LoadData=TRUE, FullOutput=FALSE){
if (LoadData){
load(paste0("reference/ref",frequency(data_input),".RData"))
load(paste0("reference/ref",frequency(data_input),"_",Preprocessing,".RData"))
} else {
load(paste0("reference/", path,".RData"))
load(paste0("reference/", path,"_",Preprocessing,".RData"))
}
if (frequency(data_input)==1){
spanw <- 6
} else if (frequency(data_input)==4){
spanw <- 8
} else if (frequency(data_input)==12){
spanw <- 18
}
limit <- min(ref_info$lengths)
if (length(data_input)>limit){
data_input <- tail(data_input, limit)
}
#Keep only ref t-s of enough observations
cd <- length(data_input) + fh
keep <- ref_info[ref_info$lengths>=cd,]$ordnum
smoothed_ref <- lapply(keep, function(x) as.numeric(tail(list_ref[[x]], cd)))
# Smooth target series
if (Preprocessing==1){
out_req <- Smoothing_ts2(data_input, spanw, fh)
smoothed_tar <- out_req$series
SeasIndIn <- out_req$seasonalIn
SeasInd <- out_req$seasonal
lambda <- out_req$lambda
}else{
smoothed_tar <- data_input
SeasIndIn <- rep(0, length(data_input))
SeasInd <- rep(0, fh)
}
# Split reference set and target series into insample and outsample
insample_ref <- lapply(c(1:length(smoothed_ref)), function(x) head(smoothed_ref[[x]], length(smoothed_ref[[x]])-fh))
outsample_ref <- lapply(c(1:length(smoothed_ref)), function(x) tail(smoothed_ref[[x]], fh))
insample_tar <- smoothed_tar
# Normalize reference set and target series
Scale_ref <- lapply(c(1:length(insample_ref)), function(x) as.numeric(tail(insample_ref[[x]],1)) )
Scale_tar <- as.numeric(tail(insample_tar,1))
insample_ref_scale <- lapply(c(1:length(insample_ref)), function(x) insample_ref[[x]]/Scale_ref[[x]])
outsample_ref_scale <- lapply(c(1:length(outsample_ref)), function(x) outsample_ref[[x]]/Scale_ref[[x]])
insample_tar_scale <- insample_tar/Scale_tar
# Compute distances
if (Dist_type==1){
Dist <- unlist(lapply(c(1:length(insample_ref_scale)), function(x) dtw_dist(insample_tar_scale,insample_ref_scale[[x]])))
}else if (Dist_type==2){
Dist <- unlist(lapply(c(1:length(insample_ref_scale)), function(x) Euclidean_dist(insample_tar_scale,insample_ref_scale[[x]])))
}else{
Dist <- unlist(lapply(c(1:length(insample_ref_scale)), function(x) AD_dist(insample_tar_scale,insample_ref_scale[[x]])))
}
# Create forecasts
temp_dist <- Dist
temp_order <- rank(temp_dist)
temp_id <- c(1:length(temp_dist))
temp <- data.frame(temp_id,temp_dist,temp_order)
temp <- temp[order(temp_order),] ; row.names(temp) <- NULL
temp <- na.omit(temp)
if (clustering >= 2){
fit <- kmeans(Dist, clustering)
nts <- length(which(fit$cluster == which.min(aggregate(Dist, by=list(fit$cluster), FUN=mean)[,2])))
}
temp_c_in <- array(NA, c(nts,length(insample_tar_scale)))
for (i in 1:nts){
temp_c_in[i,] <- as.numeric(insample_ref_scale[[temp$temp_id[i]]])
}
if (is.null(nrow(temp_c_in))){
temp_fcs_in <- temp_c_in
}else{
temp_fcs_in <- colMedians(temp_c_in)
}
temp_fcs_in <- temp_fcs_in*Scale_tar
temp_c_in <- temp_c_in*Scale_tar
if (all(SeasIndIn==0)==F){
temp_fcs_in <- InvBoxCox( (BoxCox(temp_fcs_in, lambda) + SeasIndIn) , lambda)
for (i in 1:nts){
temp_c_in[i,] <- InvBoxCox( (BoxCox(temp_c_in[i,], lambda) + SeasIndIn) , lambda)
}
}
if (fh>1){
temp_c <- array(NA, c(nts,fh))
for (i in 1:nts){
temp_c[i,] <- as.numeric(outsample_ref_scale[[temp$temp_id[i]]])
}
if (is.null(nrow(temp_c))){
temp_fcs <- temp_c
}else{
temp_fcs <- colMedians(temp_c)
}
temp_fcs <- temp_fcs*Scale_tar
temp_c <- temp_c*Scale_tar
if (all(SeasInd==0)==F){
temp_fcs <- InvBoxCox( (BoxCox(temp_fcs, lambda) + SeasInd) , lambda)
for (i in 1:nts){
temp_c[i,] <- InvBoxCox( (BoxCox(temp_c[i,], lambda) + SeasInd) , lambda)
}
}
} else {
temp_c <- array(NA, nts)
for (i in 1:nts){
temp_c[i] <- as.numeric(outsample_ref_scale[[temp$temp_id[i]]])
}
if (is.null(nrow(temp_c))){
temp_c <- temp_c
}else{
temp_fcs <- median(temp_c)
}
temp_fcs <- temp_fcs*Scale_tar
temp_c <- temp_c*Scale_tar
if (SeasInd!=0){
temp_fcs <- InvBoxCox( (BoxCox(temp_fcs, lambda) + SeasInd) , lambda)
for (i in 1:nts){
temp_c[i] <- InvBoxCox( (BoxCox(temp_c[i], lambda) + SeasInd) , lambda)
}
}
}
if (FullOutput){
return(list(fcs_in = temp_fcs_in, similarfcs_in = temp_c_in, fcs = temp_fcs, similarfcs = temp_c, similarDistances=temp$temp_dist[1:nts], k=nts))
} else {
return(list(fcs = temp_fcs, similarfcs = temp_c))
}
}
#' Similarity
#'
#' @param y "vector" the observation series
#' @param fh number of forecasting horizon
#' @param nts number of the most similar series
#' @param Preprocessing remove seasonality, smooth the series and scale the target and the reference
#' series to the same magnitude
#' @param Dist_type number of forecasting benchmarks
#' @param gamma calibrated significance level quantiles
#' @param scaling scaling method: using different scaling or not
#' @param clustering number of sample categories in kmeans
#' @param LoadData
#' @param FullOutput
#'
#' @return A list containing calculation of similarity
#' @export
#'
#' @examples
Similarity <- function(y, fh, LoadData, path, nts=100, Preprocessing=1, Dist_type=3, gamma = 0.05, scaling = c('sdiff', 'snaive'),
clustering=0, FullOutput=FALSE){
deltas = seq(0, 1, 0.01)
all_MSIS = rep(0, length(deltas))
y.train = head(y, length(y) - fh)
y.test = as.vector(tail(y, fh))
ytrainresults = Forecasting(y.train, fh=fh, nts=nts, Preprocessing=Preprocessing, Dist_type=Dist_type, path=path,
clustering=clustering, LoadData=LoadData, FullOutput=FullOutput)
if (fh>1){
PIs_ytrain = apply(ytrainresults$similarfcs, 2, quantile, probs=c(gamma/2, 1-gamma/2))
for (i in seq_along(deltas)){
delta = deltas[i]
PIL = PIs_ytrain[1,] * (1 - delta)
PIU = PIs_ytrain[2,] * (1 + delta)
all_MSIS[i] = calculate_PI_MSIS(PIL, PIU, y.train, y.test, gamma = gamma, scaling = scaling)
}
} else {
PIs_ytrain = quantile(ytrainresults$similarfcs, probs=c(gamma/2, 1-gamma/2))
for (i in seq_along(deltas)){
delta = deltas[i]
PIL = PIs_ytrain[1] * (1 - delta)
PIU = PIs_ytrain[2] * (1 + delta)
all_MSIS[i] = calculate_PI_MSIS(PIL, PIU, y.train, y.test, gamma = gamma, scaling = scaling)
}
}
opt.delta = deltas[which.min(all_MSIS)]
yresults = Forecasting(y, fh=fh, nts=nts, Preprocessing=Preprocessing, Dist_type=Dist_type, path=path,
clustering=clustering, LoadData=LoadData, FullOutput=FullOutput)
if (fh>1){
PIs = apply(yresults$similarfcs, 2, quantile, probs=c(gamma/2, 1-gamma/2))
PIL_Similarity = PIs[1,] * (1 - opt.delta)
PIU_Similarity = PIs[2,] * (1 + opt.delta)
} else {
PIs = quantile(yresults$similarfcs, probs=c(gamma/2, 1-gamma/2))
PIL_Similarity = PIs[1] * (1 - opt.delta)
PIU_Similarity = PIs[2] * (1 + opt.delta)
}
if (FullOutput){
return(list(fcs = yresults$fcs,
similarfcs = yresults$similarfcs,
PIL = PIL_Similarity,
PIU = PIU_Similarity,
opt.delta = opt.delta,
fcs_in = yresults$fcs_in,
similarfcs_in = yresults$similarfcs_in,
similarDistances = yresults$similarDistances))
} else {
return(list(fcs = yresults$fcs,
similarfcs = yresults$similarfcs,
PIL = PIL_Similarity,
PIU = PIU_Similarity))
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.