Nothing
#' @importFrom R6 R6Class
#' @export
Bandit <- R6::R6Class(
class = FALSE,
public = list(
k = NULL, # Number of arms (integer, required)
d = NULL, # Dimension of context feature vector (integer, required)
unique = NULL, # Vector of arm indices of unique context features (vector, optional)
shared = NULL, # Vector of arm indices of context features shared between arms (vector, optional)
class_name = "Bandit",
initialize = function() {
# Is called before the Policy instance has been cloned.
# Initialize Bandit. Set self$d and self$k here.
},
post_initialization = function() {
# Is called after a Simulator has cloned the Bandit instance [number_of_simulations] times.
# Do sim level random generation here.
invisible(self)
},
get_context = function(t) {
stop("Bandit subclass needs to implement bandit$get_context()", call. = FALSE)
# Return a list with number of arms self$k, number of feature dimensions self$d and, where
# applicable, a self$d dimensional context vector or self$d x self$k dimensional context matrix X.
list(X = context, k = arms, d = features) # nocov
},
get_reward = function(t, context, action) {
stop("Bandit subclass needs to implement bandit$get_reward()", call. = FALSE)
# Return a list with the reward of the chosen arm and, if available, optimal arm reward and index
list(reward = reward_for_choice_made, optimal_reward = optimal_reward, optimal_arm = optimal_arm) # nocov
},
generate_bandit_data = function(n) {
# Optionally pregenerate n contexts and rewards here.
},
final = function() {
# called on object destruction
}
)
)
#' Bandit: Superclass
#'
#' Parent or superclass of all \code{\{contextual\}} \code{Bandit} subclasses.
#'
#' In \code{\{contextual\}}, \code{Bandits} are responsible for the generation of (either
#' synthetic or offline) contexts and rewards.
#'
#' On initialisation, a \code{Bandit} subclass has to define the number of arms \code{self$k}
#' and the number of contextual feature dimensions \code{self$d}.
#'
#' For each \emph{t} = \{1, \ldots, T\} a \code{Bandit} then generates a \code{list} containing
#' current context in \code{d x k} dimensional matrix \code{context$X},
#' the number of arms in \code{context$k} and the number of features in \code{context$d}.
#'
#' Note: in context-free scenario's, \code{context$X} can be omitted.
#'
#' 
#'
#' On receiving the index of a \code{\link{Policy}}-chosen arm through \code{action$choice},
#' \code{Bandit} is expected to return a named \code{list} containing at least \code{reward$reward}
#' and, where computable, \code{reward$optimal}.
#'
#' 
#'
#' @name Bandit
#' @aliases post_initialization get_context generate_bandit_data bandit
#'
#' @section Usage:
#' \preformatted{
#' bandit <- Bandit$new()
#' }
#'
#' @section Methods:
#'
#' \describe{
#'
#' \item{\code{new()}}{ generates and instantializes a new \code{Bandit} instance. }
#'
#' \item{\code{get_context(t)}}{
#' argument:
#' \itemize{
#' \item \code{t}: integer, time step \code{t}.
#' }
#' returns a named \code{list}
#' containing the current \code{d x k} dimensional matrix \code{context$X},
#' the number of arms \code{context$k} and the number of features \code{context$d}.
#' }
#'
#' \item{\code{get_reward(t, context, action)}}{
#' arguments:
#' \itemize{
#' \item \code{t}: integer, time step \code{t}.
#' \item \code{context}: list, containing the current \code{context$X} (d x k context matrix),
#' \code{context$k} (number of arms) and \code{context$d} (number of context features)
#' (as set by \code{bandit}).
#' \item \code{action}: list, containing \code{action$choice} (as set by \code{policy}).
#' }
#' returns a named \code{list} containing \code{reward$reward} and, where computable,
#' \code{reward$optimal} (used by "oracle" policies and to calculate regret).
#' }
#'
#' \item{\code{post_initialization()}}{
#' Is called after a Simulator has cloned the Bandit instance \code{number_of_simulations} times.
#' Do sim level random generation here.
#' }
#'
#' \item{\code{generate_bandit_data(n)}}{
#' Is called after cloning the Bandit instance \code{number_of_simulations} times.
#' Differentiates itself from \code{post_initialization()} in that it is called after the optional
#' arm-multiplier option is applied in Simulator, and in that it is possible to set the length of
#' the to be generated data with the function's \code{n} parameter.
#' }
#' }
#'
#' @seealso
#'
#' Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}},
#' \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
#'
#' Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}},
#' \code{\link{OfflineReplayEvaluatorBandit}}
#'
#' Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}}
NULL
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.