R/SMG.Latest.Entry.R

#' SMG.Latest.Entry
#'
#' The OnlineSuperLearner uses various \code{SummaryMeasureGenerator} instances
#' to generate new variables that can be used to represent the relevant history
#' of a variable. The \code{SMG.Latest.Entry} class allows a user to include the
#' variables of the current measurement (the contemporaneous variables). This is 
#' a very basic SMG.
#'
#' @docType class
#' @importFrom R6 R6Class
#' @section Methods: 
#' \describe{  
#'   \item{\code{initialize(colnames.to.use) }}{ 
#'     Initializes a new \code{SMG.Latest.Entry} object.
#'
#'     @param colnames.to.use vector a vector containg the names of the
#'      variables to include in the blocks generated by this SMG. 
#'   } 
#' 
#'   \item{\code{set_minimal_observations(minimal_observations) }}{ 
#'     A setter for the minimal number of observations needed by this SMG.
#'     This is usually 1 (the current one).
#'
#'     @param minimal_observations integer the minimal number of observations
#'      needed.
#'   } 
#' 
#'   \item{\code{update(data.current) }}{ 
#'     For online learning we need to be able to create new data blocks on the
#'     fly (as not all data is available beforehand. This function updates a
#'     set of variables to contain the current variables as well.
#'
#'     @param data.current data.table the current data / the last data used for
#'      training, from which a new summary measure needs to be generated.
#'
#'     @return data.table a new block for the next training iteration.
#'   } 
#' 
#'   \item{\code{process(data.current) }}{ 
#'     Adds the contemporaneous columns for all provided data. It goes through
#'     the data and selects the variables specified on initialization.
#'
#'     @param data.current data.table the currently available data.
#'
#'     @return data.table with the new summary measure columns.
#'   } 
#' 
#'   \item{\code{exposedVariables}}{ 
#'     Active method. Returns a list of variables returned by this SMG.
#'
#'     @return vector a vector of strings conaining all specified (used)
#'      contemporaneous colnames.
#'   } 
#' 
#'   \item{\code{minimalObservations}}{ 
#'     Active method. The minimal number of measurements needed for this SMG to
#'     be able to generate a new block. This is usualy just 1.
#'
#'     @return integer the minimal number of measuements needed.
#'   } 
#' }  
#' @export
SMG.Latest.Entry <- R6Class("SMG.Latest.Entry",
  inherit = SMG.Base,
  public =
    list(
      initialize = function(colnames.to.use) {
        private$colnames.to.use = colnames.to.use
      },

      set_minimal_observations = function(minimal_observations) {
        private$theMinimalObservations = minimal_observations
      },

      update = function(data.current) {
        # An update means getting the next set of variables, based on the current ones.
        # since this will get the 'new' current ones, this is never possible. Hence
        # we return a list of NA's
        result <- rep(NA, length(private$colnames.to.use))
        names(result) <- private$colnames.to.use
        #data.current[,private$colnames.to.use, with=FALSE]
        as.data.table(t(result))
      },

      process = function(data.current){
        self$check_enough_available(data.current)
        
        if(self$minimalObservations == 1) return(data.current[,private$colnames.to.use, with=FALSE])
        return(tail( data.current[,private$colnames.to.use, with=FALSE], -(self$minimalObservations - 1)))
      }
    ),
  active =
    list(
      exposedVariables = function() {
        private$colnames.to.use
      },

      minimalObservations = function() {
        private$theMinimalObservations
      }
    ),
  private =
    list(
      colnames.to.use = NULL,
      theMinimalObservations = 1
    )
)
frbl/OnlineSuperLearner documentation built on Feb. 9, 2020, 9:28 p.m.