R/func_epsilon.R
In multiRL: Reinforcement Learning Tools for Multi-Armed Bandit

Documented in func_epsilon

#' @title Function: Exploration or Exploitation
#' @description
#' 
#'  \eqn{\epsilon-first}: 
#'  
#'  \deqn{
#'  P(x) = 
#'  \begin{cases}
#'    i \le \text{threshold}, & x=1  \\
#'    i > \text{threshold}, & x=0 
#'  \end{cases}
#'  }
#'  
#'  \eqn{\epsilon-greedy}: 
#'  
#'  \deqn{
#'  P(x) = 
#'  \begin{cases}
#'    \epsilon, & x=1  \\
#'    1-\epsilon, & x=0 
#'  \end{cases}
#'  }
#'  
#'  \eqn{\epsilon-decreasing}:
#'  
#'  \deqn{
#'  P(x) = 
#'  \begin{cases}
#'    \frac{1}{1+\epsilon \cdot i}, & x=1  \\
#'    \frac{\epsilon \cdot i}{1+\epsilon \cdot i}, & x=0 
#'  \end{cases}
#'  }
#'
#' @param shown
#'  Which options shown in this trial.
#' @param rownum 
#'  The trial number
#' @param params 
#'  Parameters used by the model's internal functions,
#'    see \link[multiRL]{params}
#' @param ... 
#'  It currently contains the following information; additional information 
#'    may be added in future package versions.
#' \itemize{
#'   \item idinfo: 
#'      \itemize{
#'        \item subid
#'        \item block
#'        \item trial
#'      }
#'   \item exinfo: 
#'      contains information whose column names are specified by the user.
#'      \itemize{
#'        \item Frame
#'        \item RT
#'        \item NetWorth
#'        \item ...
#'      }
#'   \item behave: 
#'      includes the following:
#'      \itemize{
#'        \item action: 
#'          the behavior performed by the human in the given trial.
#'        \item latent: 
#'          the object updated by the agent in the given trial.
#'        \item simulation: 
#'          the actual behavior performed by the agent.
#'        \item position:
#'          the position of the stimulus on the screen.
#'      }
#'    \item cue and rsp:
#'      Cues and responses within latent learning rules, 
#'        see \link[multiRL]{behrule} 
#'    \item state:
#'      The state stores the stimuli shown in the current trial—split into 
#'      components by underscores—and the rewards associated with them.
#' }
#'    
#' @return An \code{int}, either 0 or 1, indicating exploration or 
#'    exploitation on the current trial.
#'    
#' @section Body: 
#' \preformatted{func_epsilon <- function(
#'     shown,
#'     rownum,
#'     params,
#'     ...
#' ){
#' 
#'   list2env(list(...), envir = environment())
#'   
#'   # If you need extra information(...)
#'   # Column names may be lost(C++), indexes are recommended
#'   # e.g.
#'   # Trial  <- idinfo[3]
#'   # Frame  <- exinfo[1]
#'   # Action <- behave[1]
#'   
#'   epsilon   <-  params[["epsilon"]]
#'   threshold <-  params[["threshold"]]
#'   
#'   # Determine the model currently in use based on which parameters are free.
#'   if (is.na(epsilon) && threshold > 0) {
#'     model <- "first"
#'   } else if (!(is.na(epsilon)) && threshold == 0) {
#'     model <- "decreasing"
#'   } else if (!(is.na(epsilon)) && threshold == 1) {
#'     model <- "greedy"
#'   } else {
#'     stop("Unknown Model! Plase modify your learning rate function")
#'   }
#'   
#'   set.seed(rownum)
#'   # Epsilon-First: 
#'   if (rownum <= threshold) {
#'     try <- 1
#'   } else if (rownum > threshold && model == "first") {
#'     try <- 0
#'     # Epsilon-Greedy:
#'   } else if (rownum > threshold && model == "greedy"){
#'     try <- as.integer(stats::runif(1) < epsilon)
#'     # Epsilon-Decreasing: 
#'   } else if (rownum > threshold && model == "decreasing") {
#'     prob_explore <- 1 / (1 + epsilon * rownum)
#'     try <- as.integer(stats::runif(1) < prob_explore)
#'   }
#'   
#'   return(try)
#' }
#' }
#' 
func_epsilon <- function(
    shown,
    rownum,
    params,
    ...
){
  
  list2env(list(...), envir = environment())
  
  # If you need extra information(...)
  # Column names may be lost(C++), indexes are recommended
  # e.g.
  # Trial  <- idinfo[3]
  # Frame  <- exinfo[1]
  # Action <- behave[1]
  
  epsilon   <-  params[["epsilon"]]
  threshold <-  params[["threshold"]]
  
  # Determine the model currently in use based on which parameters are free.
  if (is.na(epsilon) && threshold > 0) {
    model <- "first"
  } else if (!(is.na(epsilon)) && threshold == 0) {
    model <- "decreasing"
  } else if (!(is.na(epsilon)) && threshold == 1) {
    model <- "greedy"
  } else {
    stop("Unknown Model! Plase modify your learning rate function")
  }
  
  set.seed(rownum)
  # Epsilon-First: 
  if (rownum <= threshold) {
    try <- 1
  } else if (rownum > threshold && model == "first") {
    try <- 0
  # Epsilon-Greedy:
  } else if (rownum > threshold && model == "greedy"){
    try <- as.integer(stats::runif(1) < epsilon)
  # Epsilon-Decreasing: 
  } else if (rownum > threshold && model == "decreasing") {
    prob_explore <- 1 / (1 + epsilon * rownum)
    try <- as.integer(stats::runif(1) < prob_explore)
  }
  
  return(try)
}