R/farima.R

Defines functions farima AccuMetrics summary.Farima plot.Farima farima.simplified Pred_Interval plot.PredInterval rMSE DirTest plot.rMSE ForCompare

Documented in farima

farima<-function(data=NULL, window="recursive", w_size=NULL, y.index=1L,
                 order=c(0,0,0), seasonal=c(0,0,0), include.mean=TRUE,
                 include.drift=FALSE, include.constant=FALSE, lambda=model$lambda,
                 biasadj=FALSE, method="CSS", model=NULL, frequency=7)
  {
  if(is.null(data)|is.null(w_size)){
    stop("Have to provide values for data and w_size.")
  }
  if(window %in% c("recursive", "rolling", "fixed")==F){
    stop("Unsupported forecasting sheme. Has to be either 'recursive', 'rolling' or 'fixed'.")
  }

  suppressMessages(require(forecast))

  Data<-as.matrix(data)

  w_size = w_size
  n_windows = dim(Data)[1] - w_size
  predicts<-c()
  trues<-c()

  if(dim(Data)[2]>1){
    Y<-matrix(Data[, y.index], ncol=1)
    X<-matrix(Data[, -y.index], ncol=ncol(Data)-1)

    if(window=="recursive"){
      for(i in 1:n_windows){
        xregs <- X[1:(w_size + i - 1), ]
        newxregs <-matrix(X[(w_size + i), ], ncol=ncol(X), nrow=1)

        Data_uni = ts(Y, frequency=frequency)
        Model <-forecast::Arima(Data_uni[1:(w_size + i - 1)], order=order, xreg =xregs,
                         seasonal=seasonal, include.mean=include.mean, include.drift=include.drift,
                         include.constant=include.constant, lambda=lambda, biasadj=biasadj,
                         method=method, model=model)
        predict <- predict(Model, newxreg = newxregs, n.ahead =1)$pred
        y_real =Data_uni[w_size + i]

        predicts[i] <- predict
        trues[i] <- y_real
      }
    }else if(window=="rolling"){
      for(i in 1:n_windows){
        xregs <- X[i:(w_size + i - 1), ]
        newxregs <-matrix(X[(w_size + i), ], ncol=ncol(X), nrow=1)

        Data_uni = ts(Y, frequency=frequency)
        Model<-forecast::Arima(Data_uni[i:(w_size + i - 1)], order=order, xreg =xregs,
                         seasonal=seasonal, include.mean=include.mean, include.drift=include.drift,
                         include.constant=include.constant, lambda=lambda, biasadj=biasadj,
                         method=method, model=model)
        predict <- predict(Model, newxreg = newxregs, n.ahead =1)$pred
        y_real =Data_uni[w_size + i]

        predicts[i] <- predict
        trues[i] <- y_real
      }

    }else{
      xregs<-X[1:w_size, ]
      Data_uni = ts(Y, frequency=frequency)
      Model<-forecast::Arima(Data_uni[1:w_size], order=order, xreg =xregs,
                   seasonal=seasonal, include.mean=include.mean, include.drift=include.drift,
                   include.constant=include.constant, lambda=lambda, biasadj=biasadj,
                   method=method, model=model)
      for(i in 1:n_windows){
        newxregs <-matrix(X[(w_size + i), ], ncol=ncol(X), nrow=1)
        predict <- predict(Model, newxreg = newxregs, n.ahead =1)$pred
        y_real =Data_uni[w_size + i]

        predicts[i] <- predict
        trues[i] <- y_real
      }
    }
  }else{
    Y<-Data

    if(window=="recursive"){
      for(i in 1:n_windows){
        xregs <- NULL
        newxregs <-NULL

        Data_uni = ts(Y, frequency=frequency)
        Model <-forecast::Arima(Data_uni[1:(w_size + i - 1)], order=order, xreg =xregs,
                      seasonal=seasonal, include.mean=include.mean, include.drift=include.drift,
                      include.constant=include.constant, lambda=lambda, biasadj=biasadj,
                      method=method, model=model)
        predict <- predict(Model, newxreg = newxregs, n.ahead =1)$pred
        y_real =Data_uni[w_size + i]

        predicts[i] <- predict
        trues[i] <- y_real
      }
    }else if(window=="rolling"){
      for(i in 1:n_windows){
        xregs <- NULL
        newxregs <-NULL

        Data_uni = ts(Y, frequency=frequency)
        Model<-forecast::Arima(Data_uni[i:(w_size + i - 1)], order=order, xreg =xregs,
                     seasonal=seasonal, include.mean=include.mean, include.drift=include.drift,
                     include.constant=include.constant, lambda=lambda, biasadj=biasadj,
                     method=method, model=model)
        predict <- predict(Model, newxreg = newxregs, n.ahead =1)$pred
        y_real =Data_uni[w_size + i]

        predicts[i] <- predict
        trues[i] <- y_real
      }

    }else{
      xregs<-NULL
      Data_uni = ts(Y, frequency=frequency)
      Model<-forecast::Arima(Data_uni[1:w_size], order=order, xreg =xregs,
                   seasonal=seasonal, include.mean=include.mean, include.drift=include.drift,
                   include.constant=include.constant, lambda=lambda, biasadj=biasadj,
                   method=method, model=model)
      for(i in 1:n_windows){
        newxregs <-NULL
        predict <- predict(Model, newxreg = newxregs, n.ahead =1)$pred
        y_real =Data_uni[w_size + i]

        predicts[i] <- predict
        trues[i] <- y_real
      }
    }
  }
  results<-AccuMetrics(pred=predicts, true=trues)
  results$Model<-list(window=window, order=order, y.index=y.index, w_size=w_size,
                      seasonal=seasonal, include.mean=include.mean,
                      inclyde.drift=include.drift, include.constant=include.constant,
                      lambda=lambda, biasadj=biasadj, method=method, model=model,
                      frequency=frequency)
  results$Data<-data
  class(results)<-"Farima"
  return(results)
}








AccuMetrics<-function(pred=NULL, true=NULL){
  forecasts<-data.frame(Forecasts=pred, Realized=true)
  forecasts$Errors<-forecasts$Realized-forecasts$Forecasts
  forecasts$ForecastDirection<-sign(forecasts$Forecasts)
  forecasts$TrueDirection<-sign(forecasts$Realized)
  forecasts$Success<-ifelse(forecasts$ForecastDirection==forecasts$TrueDirection,
                            yes=1, no=0)
  accuracy<-accuracy(f=forecasts$Forecasts, x=forecasts$Realized)
  mse<-as.numeric(accuracy)[2]^2
  mape<-as.numeric(accuracy)[5]
  me<-as.numeric(accuracy)[1]
  mae<-as.numeric(accuracy)[3]
  mpe<-as.numeric(accuracy)[4]
  SRatio<-mean(na.omit(forecasts$Success))
  accuracy<-data.frame(MSPE=mse, MAPE=mape, ME=me, MAE=mae, MPE=mpe, SRatio=SRatio)

  results<-list(Forecasts=forecasts, Accuracy=accuracy)
  return(results)
}


summary.Farima<-function(.data=NULL, digits=7){
  stopifnot(inherits(.data, "Farima"))
  cat("\t\n",
      sprintf("Forecasting Window: %s\n", .data$Model$window),
      sprintf("Window Size: %s\n", .data$Model$w_size),
      sprintf("MSPE: %s\n", base::round(.data$Accuracy$MSPE, digits=digits)),
      sprintf("Success Ratio: %s\n", base::round(.data$Accuracy$SRatio, digits=digits))
  )
}





plot.Farima<-function(.data=NULL, start=NULL, frequency='month', forecast.lab="Forecasts", true.lab="Realized", x.lab="Time", y.lab="Value", title=NULL,
                      original=NULL, transform="logdiff")
  {
  if(is.null(start)){
    dates=seq(from=1, by=1, length.out=length(.data$Forecasts$Forecasts))
  }else{
    dates=seq(from = as.Date(start), by=frequency, length.out=length(.data$Forecasts$Forecasts))
  }
  suppressMessages(require(reshape2))
  suppressMessages(require(ggplot2))

  if(is.null(original)){
    df <- data.frame(date=dates, predicts=as.numeric(.data$Forecasts$Forecasts),
                     true=as.numeric(.data$Forecasts$Realized))
    colnames(df) <-c("Time", forecast.lab, true.lab)
    df <- reshape2::melt(df, id.vars = "Time", value.name="Value", variable.name="Variable")
    plot<-ggplot2::ggplot(data = df) +
      geom_line(aes(x = Time, y = Value, linetype = Variable, color = Variable))+
      xlab(x.lab)+ylab(y.lab)
    if(!is.null(title)){
      plot<-plot+ggtitle(title)
    }
  }else{
    w_size=.data$Model$w_size
    Date<-index(original)
    Value<-as.numeric(original)
    ForeValue<-vector()

    if(transform=="logdiff"){
      for(i in 1:length((Date))-w_size){
        ForeValue[i]<-exp(.data$Forecasts$Forecasts[i])*Value[(w_size+i)]
      }
    }else if(transform=="diff"){
        for(i in 1:length((Date))-w_size){
          ForeValue[i]<-.data$Forecasts$Forecasts[i]+Value[(w_size+i)]
        }
      }else{
          stop("Unsupported transformation type")
      }


      ForeValue<-zoo(ForeValue, order.by=Date[(w_size+2):length(Date)])
      Values<-merge(original, ForeValue)

      names(Values)<-c("Realized", "Forecasts")

      if(is.null(title)){
        title<-paste("Out-of-Sample Forecasts vs. Original Time Series", sep="")
      }else{
        title<-title
      }

      suppressMessages(plot<-ggplot2::ggplot(Values)+
                         geom_line(aes(x=Date, y=as.numeric(Realized)))+
                         geom_line(aes(x=Date, y=as.numeric(Forecasts)), color="red",
                                   linetype="longdash", na.rm=T)+
                         ylab("Values")+
                         ggtitle(title))
    }

  suppressMessages(return(plot))

}











farima.simplified<-function(data=NULL, window="recursive", w_size=NULL, y.index=1L,
                            order=c(0,0,0), seasonal=c(0,0,0), include.mean=TRUE,
                            include.drift=FALSE, include.constant=FALSE, lambda=model$lambda,
                            biasadj=FALSE, method="CSS", model=NULL, frequency=7){
  results<-farima(data=data, window=window, w_size=w_size,
                  y.index=y.index, order=order, seasonal=seasonal, include.mean=include.mean,
                  include.drift=include.drift, include.constant=include.constant,
                  lambda=lambda, biasadj=biasadj, method=method, model=model, frequency=frequency)$Forecasts$Forecasts
  return(results)
}






Pred_Interval<-function(.data=NULL, boot=100, forecast="model", sim="fixed", l=3){
  if(class(.data)!="Farima"){
    stop("The object passed to the argument .data has to be of class 'Farima'.")
  }
  sim=sim
  endcorr=TRUE
  norm=TRUE
  Data<-as.matrix(.data$Data)
  Model.mega<-.data$Model
  window=Model.mega$window
  w_size=Model.mega$w_size
  y.index=Model.mega$y.index
  order=Model.mega$order
  seasonal=Model.mega$seasonal
  frequency=Model.mega$frequency
  include.mean=Model.mega$include.mean
  include.drift=Model.mega$include.drift
  include.constant=Model.mega$include.constant
  lambda=Model.mega$lambda
  model=Model.mega$model
  biasadj=Model.mega$biasadj
  method=Model.mega$method


  suppressMessages(require(boot))
  suppressMessages(require(ggfortify))
  suppressMessages(require(forecast))


  results<-boot::tsboot(tseries=Data, statistic=farima.simplified, R=boot,
                        sim=sim, n.sim=nrow(Data), norm=norm, l=l,
                        w_size=w_size, window=window, endcorr=endcorr, y.index=y.index,
                        order=order, seasonal=seasonal, frequency=frequency,
                        include.mean=include.mean, include.drift=include.drift,
                        include.constant=include.constant, lambda=lambda,
                        biasadj=biasadj, method=method)$t

  results<-t(results)
  forecasts<-.data$Forecasts$Forecasts
  lower<-apply(results, 1, quantile, prob=0.025, na.rm=T)
  upper<-apply(results, 1, quantile, prob=0.975, na.rm=T)

  if(forecast=="model"){
    forecasts=forecasts
  }else if(forecast=="mean"){
    forecasts=rowMeans(results)
  }

  Intervals<-data.frame(Lower=lower, Forecast=forecasts, Upper=upper)
  colnames(results)<-paste("Bootstrap.", seq(from=1, to=boot, by=1), sep="")
  Results<-list(Interval=Intervals, Model=Model.mega, Data=Data, Bootstrapped=results)
  class(Results)<-"PredInterval"
  return(Results)
}




plot.PredInterval<-function(.data=NULL, out.true=F, original=NULL, transform="logdiff", title=NULL){
  suppressMessages(require(forecast))
  Results<-.data$Interval
  w_size=.data$Model$w_size
  y.index=.data$Model$y.index
  Data<-.data$Data

  if(is.null(original)){

    start=w_size+1
    interval <- structure(list(
      mean = ts(Results$Forecast, start=start),
      lower = ts(Results$Lower, start=start),
      upper = ts(Results$Upper, start=start),
      level=95), class="forecast")

    plot<-ggplot2::autoplot(ts(Data[,y.index][1:(w_size)]))+
      ggtitle(paste(" 95% Out-of-Sample Predictive Interval",  sep="")) +
      xlab("Time") + ylab("Value") +
      autolayer(interval, series="95% Interval")+
      theme(legend.position="none")
    if(out.true){
      true<-data.frame(Index=start:length(Data[,y.index]),
                       Value=Data[,y.index][start:length(Data[,y.index])])
      plot<-plot+
        geom_line(data=true, mapping=aes(x=Index, y=Value),
                  color="blue", alpha=0.6, linetype="longdash")
    }
  }else{
    w_size=.data$Model$w_size
    Date<-index(original)
    Value<-as.numeric(original)
    ForeValue<-vector()
    UpperValue<-vector()
    LowerValue<-vector()

    if(transform=="logdiff"){
      for(i in 1:length((Date))-w_size){
        ForeValue[i]<-exp(Results$Forecast[i])*Value[(w_size+i)]
        UpperValue[i]<-exp(Results$Upper[i])*Value[(w_size+i)]
        LowerValue[i]<-exp(Results$Lower[i])*Value[(w_size+i)]
      }
    }else if(transform=="diff"){
      for(i in 1:length((Date))-w_size){
        ForeValue[i]<-Results$Forecast[i]+Value[(w_size+i)]
        UpperValue[i]<-Results$Upper[i]+Value[(w_size+i)]
        LowerValue[i]<-Results$Lower[i]+Value[(w_size+i)]
      }
    }else{
      stop("Unsupported transformation type")
    }

    ForeValue<-zoo(ForeValue, order.by=Date[(w_size+2):length(Date)])
    UpperValue<-zoo(UpperValue, order.by=Date[(w_size+2):length(Date)])
    LowerValue<-zoo(LowerValue, order.by=Date[(w_size+2):length(Date)])
    Values<-merge(original, ForeValue, UpperValue, LowerValue)

    names(Values)<-c("Realized", "Forecasts", "Upper", "Lower")

    if(is.null(title)){
      title<-paste("Out-of-Sample Forecasts vs. Original Time Series", sep="")
    }else{
      title<-title
    }

    plot<-ggplot2::ggplot(Values)+
      geom_line(aes(x=Date, y=as.numeric(Realized)))+
      geom_line(aes(x=Date, y=as.numeric(Forecasts)), color="blue", na.rm=T)+
      geom_line(aes(x=Date, y=as.numeric(Upper)), color="red", linetype="longdash", na.rm=T)+
      geom_line(aes(x=Date, y=as.numeric(Lower)), color="red", linetype="longdash", na.rm=T)+
      ylab("Values")+
      ggtitle(title)
  }
  return(plot)
}




rMSE<-function(forecast.main=NULL, forecast.benchmark=NULL){
  if(class(forecast.main)!="Farima"|class(forecast.benchmark)!="Farima"){
    stop("Inputs must be of class 'Farima'.")
  }
  if(length(forecast.main$Forecasts$Forecasts)!=length(forecast.benchmark$Forecasts$Forecasts)){
    stop("Lengths of forecasts differ.")
  }
  MSE.main<-vector()
  MSE.benchmark<-vector()
  for(i in 1:length(forecast.main$Forecasts$Forecasts)){
    MSE.main[i]<-mean(na.omit(forecast.main$Forecasts$Errors[1:i])^2)
    MSE.benchmark[i]<-mean(na.omit(forecast.benchmark$Forecasts$Errors[1:i])^2)
  }
  results<-data.frame(MSE.main=MSE.main, MSE.benchmark=MSE.benchmark)
  results$MSE.diff<-MSE.main-MSE.benchmark
  results$MSE.ratio<-MSE.main/MSE.benchmark
  class(results)<-"rMSE"
  return(results)
}








DirTest<-function(forecasts=NULL, weighted=TRUE){
  if(!class(forecasts)%in%c("Farima")){
    stop("Argument forecasts has to be of class 'Farima'.")
  }
  nw <- function(y,qn){
    T <- length(y)
    ybar <- rep(1,T) * ((sum(y))/T)
    dy <- y-ybar
    G0 <- t(dy) %*% dy/T
    for (j in 1:qn){
      gamma <- t(dy[(j+1):T]) %*% dy[1:(T-j)]/T
      G0 <- G0+(gamma+t(gamma))*(1-abs(j/(qn+1)))
    }
    return(as.numeric(G0))
  }

  if(weighted){
    For<-na.omit(forecasts$Forecasts$ForecastDirection)
    True<-na.omit(forecasts$Forecasts$Realized)

    weighted.dir<-as.numeric(For)*as.numeric(True)
    P <- length(weighted.dir)
    varfroll.adj <- nw(y=weighted.dir, qn=1)
    CW <- sqrt(P)*(mean(weighted.dir))/sqrt(varfroll.adj)
    pv <- 1-pnorm(CW,0,1)
    results=list(test.statistic=CW, pvalue=pv)
  }else{
    For<-na.omit(forecasts$Forecasts$ForecastDirection)-mean(na.omit(forecasts$Forecasts$ForecastDirection))
    True<-na.omit(forecasts$Forecasts$TrueDirection)-mean(na.omit(forecasts$Forecasts$TrueDirection))
    unweighted.dir<-as.numeric(For)*as.numeric(True)
    P<-length(unweighted.dir)
    varfroll.adj<-nw(y=unweighted.dir, qn=1)
    CW<-sqrt(P)*(mean(unweighted.dir))/sqrt(varfroll.adj)
    pv<-1-pnorm(CW,0,1)
    results=list(test.statistic=CW, pvalue=pv)
  }
  return(results)
}










plot.rMSE<-function(.data=NULL, type="ratio", title=NULL){
  if(type=="ratio"){
    x<-.data$MSE.ratio
    lab<-"MSE Ratio (Main/Benchmark)"
    intercept=1
  }else if(type=="diff"){
    x<-.data$MSE.diff
    lab<-"MSE Difference (Main-Benchmark)"
    intercept=0
  }else{
    stop("Type not supported. It should either be 'ratio' or 'diff'.")
  }
  if(is.null(title)){
    title<-"Recursive MSE"
  }

  suppressMessages(require(ggplot2))

  x<-data.frame(Index=1:length(x), Value=x)
  plot<-ggplot2::ggplot(x)+
    geom_line(aes(x=Index, y=Value))+
    xlab("Forecast Stopping Point")+
    ylab(lab)+
    ggtitle(title)+
    geom_hline(yintercept=intercept, color="red")
  return(plot)
}




ForCompare<-function(..., benchmark.index=NULL, directional=c("weighted", "binary")){
  suppressMessages(require(matrixStats))

  if(class(list(...)[[1]])=="list"){
    model.list<-list(...)[[1]]
  }else{
    model.list<-list(...)
  }

  cw <- function(e.m1,e.m2,yf.m1,yf.m2){
    nw <- function(y,qn){
      T <- length(y)
      ybar <- rep(1,T) * ((sum(y))/T)
      dy <- y-ybar
      G0 <- t(dy) %*% dy/T
      for (j in 1:qn){
        gamma <- t(dy[(j+1):T]) %*% dy[1:T-j]/(T-1)
        G0 <- G0+(gamma+t(gamma))*(1-abs(j/qn))
      }
      return(as.numeric(G0))
    }
    P <- length(e.m1)
    froll.adj <- e.m1^2-(e.m2^2-(yf.m1-yf.m2)^2)
    varfroll.adj <- nw(froll.adj,1)
    CW <- sqrt(P)*(mean(froll.adj))/sqrt(varfroll.adj)
    pv <- 1-pnorm(CW,0,1)
    results=list(test=CW,pvalue=pv)
    return(results)
  }

  model.list<-list(...)
  for(i in 1:length(model.list)){
    if(!class(model.list[[i]])%in%c("Farima")){
      stop(paste("Object number ", i, " is not of class 'Farima'."))
    }
  }

  MSE<-vector()
  names<-paste("Model ", seq(from=1, to=length(model.list), by=1), sep="")
  if(class(benchmark.index)=="integer"){
    DMW<-vector()
    CW<-vector()
    MSERatio<-vector()
    e1=as.numeric(model.list[[benchmark.index]]$Forecasts$Errors)
    yf1=as.numeric(model.list[[benchmark.index]]$Forecasts$Forecasts)
    MSEbench<-model.list[[benchmark.index]]$Accuracy$MSPE
    for(i in 1:length(model.list)){
      MSE[i]<-model.list[[i]]$Accuracy$MSPE
      MSERatio[i]<-MSE[i]/MSEbench
      if(i==as.numeric(benchmark.index)){
        DMW[i]<-NA
        CW[i]<-NA
      }else{
        DMW[i]<-forecast::dm.test(e1=e1, e2=model.list[[i]]$Forecasts$Errors, "greater", h=1)$p.value
        CW[i]<-cw(e.m1=e1,
                  e.m2=model.list[[i]]$Forecasts$Errors,
                  yf.m1=yf1,
                  yf.m2=model.list[[i]]$Forecasts$Forecasts)$pvalue
      }
    }

    table<-data.frame(Model=names, MSPE=MSE, MSPERatio=MSERatio, DMW=DMW, CW=CW)
  }else if(is.null(benchmark.index)){
    for(i in 1:length(model.list)){
      MSE[i]<-model.list[[i]]$Accuracy$MSPE
    }
    table<-data.frame(Model=names, MSPE=MSE)
  }else{
    stop("The argument benchmark.index should either be omitted or of the classe 'integer'. ")
  }

  if("weighted"%in%directional){
    weighted.p<-vector()
    for(i in 1:length(model.list)){
      weighted.p[i]<-DirTest(forecasts=model.list[[i]], weighted=T)$pvalue
    }
    table$Weighted<-weighted.p
  }
  if("binary"%in%directional){
    unweighted.p<-vector()
    for(i in 1:length(model.list)){
      unweighted.p[i]<-DirTest(forecasts=model.list[[i]], weighted=F)$pvalue
    }
    table$Unweighted<-unweighted.p
  }
  return(table)
}
ZehuaWu/farima documentation built on May 29, 2019, 12:01 a.m.