R/predict_seq.R

Defines functions predict_seq

Documented in predict_seq

#' Get sequentially updated predictions from a GP model
#'
#' Obtains predictions for \code{newdata} given a GP model, but sequentially
#' adds each new observation to the training data and refits the model. This
#' simulates a real-time forecasting application.
#' 
#' In comparison to \code{\link{predict.GP}}, this prediction method is similar
#' to \code{predictmethod="sequential"} (leave-future-out), but in this case,
#' the future time points are not included in the training data. It is also
#' similar to the train/test split using \code{newdata}, but in this case, the
#' training data and model are updated with each timestep.
#' 
#' Using this method requires the use of \code{data} with pre-generated lags
#' (option A1 in \code{\link{fitGP}}), that a \code{time} column is specified,
#' that \code{newdata} has exactly the same columns as \code{data}, and that
#' \code{newdata} contains observed values. 
#' 
#' The original \code{data} data frame is obtained from the global environment,
#' so results will be not be accurate if it is changed. Also, if for whatever
#' reason you want to use \code{update} on the output of \code{predict_seq}, it
#' will probably not work unless you respecify \code{data} and
#' \code{initpars}. 
#'
#' @param object Output from \code{fitGP}.
#' @param newdata Data frame containing the same columns supplied in the
#'   original model.
#' @param restart.initpars Set initpars to pars of the previous model (FALSE, default), or restart 
#'   with the default initspars each time (TRUE). Starting at the last values can reduce the number 
#'   of iterations required, saving time, but might trap you in a local minimum.
#' @param refit.b If a "fisheries model", should the b parameter be refit at each iteration? 
#'   Defaults to FALSE (b fixed to value in original model).
#' @return A list (class GP and GPpred) with the same elements as \code{\link{fitGP}}. 
#'   The model information will be for the final model including all of the original training
#'   data plus \code{newdata}. The \code{outsampresults} and \code{outsampfitstats} will be
#'   for the sequentially updated predictions.
#' @export
#' @keywords functions
predict_seq=function(object,newdata,restart.initpars=F,refit.b=F) { 
  
  if(is.null(object$inputs$time_names)) {
    stop("Model must include `data` and `time` to use this function.")
  }
  if(!is.null(object$inputs$E)) {
    stop("Lags must be pre-generated to use this function. See option A1 in help(fitGP).")
  }
  
  #in fisheries models, should b be updated or not?
  if(refit.b) {
    bfixed=NULL
  } else {
    bfixed=object$b
  }
  
  #get times
  timename=object$inputs$time_names
  newtimes=unique(newdata[,timename])
  
  #get original data from global env
  ogdata=eval(object$call$data)
  
  modelupdated=object
  dataupdated=ogdata
  pred=list()
  
  for(i in seq_along(newtimes)) {
    newdatai=newdata[newdata[,timename]==newtimes[i],]
    #get prediction
    pred[[i]]=predict(modelupdated, newdata=newdatai)$outsampresults
    #append new data
    dataupdated=rbind(dataupdated, newdatai)
    #set initpars
    if(restart.initpars) {
      inits=NULL
    } else {
      inits=modelupdated$pars
    }
    #refit model
    if(!is.null(bfixed)) {
      modelupdated=update(modelupdated, data=dataupdated, initpars=inits, bfixed=bfixed)
    } else {
      modelupdated=update(modelupdated, data=dataupdated, initpars=inits)
    }
  }
  #compile predictions
  outsampresults=do.call(rbind, pred)
  modelupdated$outsampresults=outsampresults

  #return updated model and predictions
  modelupdated$outsampfitstats=c(R2=getR2(outsampresults$obs,outsampresults$predmean), 
                                 rmse=sqrt(mean((outsampresults$obs-outsampresults$predmean)^2,na.rm=T)))
  if(length(unique(outsampresults$pop))>1) { #within site fit stats
    modelupdated$outsampfitstatspop=getR2pop(outsampresults$obs,outsampresults$predmean,outsampresults$pop)
    R2centered=getR2pop(outsampresults$obs,outsampresults$predmean,outsampresults$pop,type = "centered")
    R2scaled=getR2pop(outsampresults$obs,outsampresults$predmean,outsampresults$pop,type = "scaled")
    modelupdated$outsampfitstats=c(modelupdated$outsampfitstats,R2centered=R2centered,R2scaled=R2scaled)
  }

  return(modelupdated)
}
tanyalrogers/GPEDM documentation built on June 1, 2025, 8:10 p.m.