#' @export
LifPolicy <- R6::R6Class(
portable = FALSE,
class = FALSE,
inherit = Policy,
public = list(
first = NULL,
inttime = NULL, # Integration time
amplitude = NULL, # Amplitude
learnrate = NULL, # Learnrate
omega = NULL, # Omega
x0_start = NULL, # x0 start value
class_name = "LifPolicy",
initialize = function(inttime,amplitude,learnrate,omega,x0_start) {
super$initialize()
self$inttime <- inttime
self$amplitude <- amplitude
self$learnrate <- learnrate
self$omega <- omega
self$x0_start <- x0_start
},
set_parameters = function(context_params) {
self$theta <- list('x0' = x0_start, 'Y' = rep(NA, inttime))
},
get_action = function(t, context) {
action$choice <- self$theta$x0 + amplitude*cos(omega * t)
action
},
set_reward = function(t, context, action, reward) {
reward <- reward$reward
y <- amplitude*cos(omega * t)*reward
self$theta$Y <- c(y, self$theta$Y)[seq_along(self$theta$Y)]
if (t > inttime)
self$theta$x0 <- self$theta$x0 + learnrate * sum( self$theta$Y ) / inttime
self$theta
}
)
)
#' Policy: Continuum Bandit Policy with Lock-in Feedback
#'
#' The continuum type Lock-in Feedback (LiF) policy is based on an approach used in physics and engineering,
#' where, if a physical variable y depends on the value of a well controllable physical variable x,
#' the search for argmax x f(x) can be solved via what is nowadays considered as standard electronics.
#' This approach relies on the possibility of making the variable x oscillate at a fixed frequency and to
#' look at the response of the dependent variable y at the very same frequency by means of a lock-in
#' amplifier. The method is particularly suitable when y is immersed in a high noise level,
#' where other more direct methods would fail. Furthermore, should the entire curve
#' shift (or, in other words, if argmax x f(x) changes in time, also known as concept drift), the circuit
#' will automatically adjust to the new situation and quickly reveal the new maximum position.
#' This approach is widely used in a very large number of applications, both in industry and research,
#' and is the basis for the Lock-in Feedback (LiF) method.
#'
#' In this, Lock in feedback goes through the following steps, again and again:
#'
#' * Oscillate a controllable independent variable X around a set value at a fixed pace.
#' * Apply the Lock-in amplifier algorithm.to obtain values of the amplitude if the outcome variable Y at
#' the pace you set at step 1.
#' * Is the amplitude of this variable zero? Congratulations, you have reached lock-in!
#' That is, you have found the optimal value of Y at the current value of X.
#' Still, this optimal value might shift over time, so move to step 1 and repeat the process to make sure we
#' maintain lock-in.
#' * Is the amplitude less than, or greater than zero?
#' Then move the set value around which we are oscillating our independent variable X up or down on the basis
#' of the outcome.
#'
#' Now move to step 1 and repeat..
#'
#' @name LifPolicy
#'
#' @section Usage:
#' \preformatted{b <- LifPolicy$new(inttime,amplitude,learnrate,omega,x0_start)}
#'
#' @references
#' Kaptein, M. C., Van Emden, R., & Iannuzzi, D. (2016). Tracking the decoy: maximizing the decoy effect
#' through sequential experimentation. Palgrave Communications, 2, 16082.
#'
#' @seealso
#'
#' Core contextual classes: \code{\link{Bandit}}, \code{\link{Policy}}, \code{\link{Simulator}},
#' \code{\link{Agent}}, \code{\link{History}}, \code{\link{Plot}}
#'
#' Bandit subclass examples: \code{\link{BasicBernoulliBandit}}, \code{\link{ContextualLogitBandit}},
#' \code{\link{OfflineReplayEvaluatorBandit}}
#'
#' Policy subclass examples: \code{\link{EpsilonGreedyPolicy}}, \code{\link{ContextualLinTSPolicy}}
NULL
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.