R/bandit.R

#' @importFrom R6 R6Class
#' @export
Bandit <- R6::R6Class(
  class    = FALSE,
  public   = list(
    k           = NULL,  # Number of arms (integer, required)
    d           = NULL,  # Dimension of context feature vector (integer, required)
    unique      = NULL,  # Vector of arm indices of unique context features (vector, optional)
    shared      = NULL,  # Vector of arm indices of context features shared between arms (vector, optional)
    class_name  = "Bandit",
    initialize  = function() {
      # Is called before the Policy instance has been cloned.
      # Initialize Bandit. Set self$d and self$k here.
    },
    post_initialization = function() {
      # Is called after a Simulator has cloned the Bandit instance [number_of_simulations] times.
      # Do sim level random generation here.
      invisible(self)
    },
    get_context = function(t) {
      stop("Bandit subclass needs to implement bandit$get_context()", call. = FALSE)
      # Return a list with number of arms self$k, number of feature dimensions self$d and, where
      # applicable, a self$d dimensional context vector or self$d x self$k dimensional context matrix X.
      list(X = context, k = arms, d = features) # nocov
    },
    get_reward = function(t, context, action) {
      stop("Bandit subclass needs to implement bandit$get_reward()", call. = FALSE)
      # Return a list with the reward of the chosen arm and, if available, optimal arm reward and index
      list(reward = reward_for_choice_made, optimal_reward = optimal_reward, optimal_arm = optimal_arm) # nocov
    },
    generate_bandit_data = function(n) {
      # Optionally pregenerate n contexts and rewards here.
    },
    final = function() {
      # called on object destruction
    }
  )
)

#' Bandit: Superclass
#'
#' Parent or superclass of all \code{\{contextual\}} \code{Bandit} subclasses.
#'
#' In \code{\{contextual\}}, \code{Bandits} are responsible for the generation of (either
#' synthetic or offline) contexts and rewards.
#'
#' On initialisation, a \code{Bandit} subclass has to define the number of arms \code{self$k}
#' and the number of contextual feature dimensions \code{self$d}.
#'
#' For each \emph{t} = \{1, \ldots, T\} a \code{Bandit} then generates a \code{list} containing
#' current context in \code{d x k} dimensional matrix \code{context$X},
#' the number of arms in \code{context$k} and the number of features in \code{context$d}.
#'
#' Note: in context-free scenario's, \code{context$X} can be omitted.
#'
#' ![](3abandit.jpeg "contextual diagram: get context")
#'
#' On receiving the index of a \code{\link{Policy}}-chosen arm through \code{action$choice},
#' \code{Bandit} is expected to return a named \code{list} containing at least \code{reward$reward}
#' and,  where computable, \code{reward$optimal}.
#'
#' ![](3cbandit.jpeg "contextual diagram: get context")
#'
#' @name Bandit
#' @aliases post_initialization get_context generate_bandit_data bandit
#'
#' @section Usage:
#' \preformatted{
#'   bandit <- Bandit$new()
#' }
#'
#' @section Methods:
#'
#' \describe{
#'
#'   \item{\code{new()}}{ generates and instantializes a new \code{Bandit} instance. }
#'
#'   \item{\code{get_context(t)}}{
#'      argument:
#'      \itemize{
#'          \item \code{t}: integer, time step \code{t}.
#'      }
#'      returns a named \code{list}
#'      containing the current \code{d x k} dimensional matrix \code{context$X},
#'      the number of arms \code{context$k} and the number of features \code{context$d}.
#'  }
#'
#'   \item{\code{get_reward(t, context, action)}}{
#'      arguments:
#'      \itemize{
#'          \item \code{t}: integer, time step \code{t}.
#'          \item \code{context}: list, containing the current \code{context$X} (d x k context matrix),
#'          \code{context$k} (number of arms) and \code{context$d} (number of context features)
#'          (as set by \code{bandit}).
#'          \item \code{action}:  list, containing \code{action$choice} (as set by \code{policy}).
#'      }
#'      returns a named \code{list} containing \code{reward$reward} and, where computable,
#'         \code{reward$optimal} (used by "oracle" policies and to calculate regret).
#'  }
#'
#'   \item{\code{post_initialization()}}{
#'      Is called after a Simulator has cloned the Bandit instance \code{number_of_simulations} times.
#'      Do sim level random generation here.
#'   }
#'
#'   \item{\code{generate_bandit_data(n)}}{
#'      Is called after cloning the Bandit instance \code{number_of_simulations} times.
#'      Differentiates itself from \code{post_initialization()} in that it is called after the optional
#'      arm-multiplier option is applied in Simulator, and in that it is possible to set the length of
#'      the to be generated data with the function's \code{n} parameter.
#'   }
#' }
#'
#' @seealso
#'
#' Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}},
#' \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
#'
#' Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}},
#' \code{\link{OfflineReplayEvaluatorBandit}}
#'
#' Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}}
NULL

Try the contextual package in your browser

Any scripts or data that you put into this service are public.

contextual documentation built on July 26, 2020, 1:06 a.m.