demo_lif_bandit.R
In contextual: Simulation and Analysis of Contextual Multi-Armed Bandit Policies

# This is the demo file for the online and offline parameter tuning of Lock-in Feedback
# For policy and bandit specific code, please look at the files (as sourced above).
# First make sure to install contextual 
# (see https://github.com/Nth-iteration-labs/contextual for a how to).
#
# For any questions, please contact the authors.

library(contextual)
library(here)
library(ggplot2)

source("./bandit_continuum_function_unimodal.R")
source("./bandit_continuum_function_bimodal.R")
source("./bandit_continuum_offon.R")
source("./bandit_continuum_offon_kern.R")
source("./policy_cont_lif_randstart.R")

#############################################################
#                                                           #
#  Online evaluation for LiF                                #
#                                                           #
#############################################################

### Set seed
set.seed(1)

### Set number of interactions (horizon) and number of repeats (simulations)
### In the paper we used a horizon of 10000 and 10000 simulations
horizon            <- 10000
simulations        <- 10

### Set LiF specific parameters. Start point is set within the policy itself.
int_time           <- 50
amplitude_list     <- seq(0.002, 0.2, length.out = 20)
learn_rate         <- .1
omega              <- 2*pi/int_time

### Set up two different bandits
bandits <- c(ContinuumBanditUnimodal$new(), ContinuumBanditBimodal$new())

### Set up all agents with different amplitudes and run them for each bandit
for (bandit in bandits){
  
  agents <- list()
  
  for (i in 1:length(amplitude_list)){
    agents <- append(agents, Agent$new(LifPolicyRandstart$new(int_time, amplitude_list[i], learn_rate, omega), bandit))
  }
  
  history            <- Simulator$new(agents      = agents,
                                      horizon     = horizon,
                                      simulations = simulations,
                                      do_parallel = TRUE,
                                      save_interval = 10)$run()
  
  ### Post-processing for plotting
  iters <- length(amplitude_list)
  reward_rate <- c()
  confs <- c()
  
  for(i in 1:iters){
    reward_rate[[i]] <- history$cumulative[[i]]$cum_reward_rate
    confs[[i]] <- history$cumulative[[i]]$cum_reward_rate_sd / sqrt(simulations) * qnorm(0.975)
  }
  
  df <- data.frame(amp = amplitude_list, reward = reward_rate, ci = confs, delta = rep("Online"))
  g <- ggplot(data = df, aes(x=amp, y=reward)) +
    geom_line(colour = 'red') +
    geom_errorbar(aes(ymin=reward-ci, ymax=reward+ci, color='red'), width=0.01) +
    labs(x = "Amplitude", y = "Average reward per interaction") +
    theme_bw(base_size = 15) +
    theme(legend.position = "none")
  
  ### Saving data to use later
  if (bandit$class_name == "ContinuumBanditBimodal"){
    online_lif_bimodal <- df
  } else if (bandit$class_name == "ContinuumBanditUnimodal"){
    online_lif_unimodal <- df
  }
  
  print(g)
  print(amplitude_list[[which.max(reward_rate)]])
}

#############################################################
#                                                           #
#  Offline evaluation for LiF                               #
#                                                           #
#############################################################

### Set seed
set.seed(1)

### Set number of interactions (horizon) and number of repeats (simulations)
### Typically same as in online evaluation
horizon            <- 1000
simulations        <- 2

### Set LiF specific parameters. Start point is set within the policy itself.
### Typically same as in online evaluation
int_time           <- 50
amplitude_list     <- seq(0.002, 0.2, length.out = 20)
learn_rate         <- .1
omega              <- 2*pi/int_time

### Set up functions to make offline dataset
unimodal_data    <- function(x) {
  c1 <- runif(1, 0.25, 0.75)
  c2 <- 1
  return(-(x - c1) ^ 2 + c2  + rnorm(length(x), 0, 0.01))
}

bimodal_data <- function(x){
  mu1 <- runif(1, 0.15, 0.2)
  sd1 <- runif(1, 0.1, 0.15)
  mu2 <- runif(1, 0.7, 0.85)
  sd2 <- runif(1, 0.1, 0.15)
  y1 <- truncnorm::dtruncnorm(x, a=0, b=1, mean=mu1, sd=sd1)
  y2 <- truncnorm::dtruncnorm(x, a=0, b=1, mean=mu2, sd=sd2)
  return(y1 + y2 + rnorm(length(x), 0, 0.01))
}

functions <- list(list("unimodal", unimodal_data), list("bimodal", bimodal_data))

### Set up different delta's for the delta and kernel method. If delta = 0 we resort to the kernel method.
deltas <- c(0, 0.01, 0.1, 0.5)

### Pre-allocation
offline_lif_unimodal_kernel <- data.frame()
offline_lif_unimodal <- data.frame()
offline_lif_bimodal_kernel <- data.frame()
offline_lif_bimodal <- data.frame()

### Set up all agents with different amplitudes and run them for each bandit
### Do this for each specified delta
for (f in functions){
  for (d in deltas){
    if(d == 0){
      bandit             <- OnlineOfflineContinuumBanditKernel$new(FUN = f[[2]], horizon = horizon)
    } else {
      bandit             <- OnlineOfflineContinuumBandit$new(FUN = f[[2]], delta = d, horizon = horizon)
    }
    
    agents <- list()
    
    for (amp in amplitude_list){
      agents <- append(agents, Agent$new(LifPolicyRandstart$new(int_time, amp, learn_rate, omega), bandit))
    }
    
    history            <- Simulator$new(agents      = agents,
                                        horizon     = horizon,
                                        simulations = simulations,
                                        policy_time_loop = FALSE,
                                        save_interval = 20)$run()
    
    ### Post-processing for plotting
    iters <- length(amplitude_list)
    reward_rate <- c()
    confs <- c()
    
    for(k in 1:iters){
      reward_rate[[k]] <- history$cumulative[[k]]$cum_reward_rate
      dt <- history$get_data_table()
      df_split <- split(dt, dt$agent)
      for(dd in df_split){
        dd <- as.data.table(dd)
        maxes <- dd[, .I[which.max(t)], by=sim]$V1
        select <- dd[maxes]$cum_reward_rate
        confs[[k]] <- sd(select) / sqrt(simulations) * qnorm(0.975)
      }
    }
    history$clear_data_table()
    if (d == 0){
      df <- data.frame(amp = amplitude_list, reward = reward_rate, delta = as.factor(rep("Kernel")), ci = confs)
    } else {
      df <- data.frame(amp = amplitude_list, reward = reward_rate, delta = as.factor(rep(d)), ci = confs)
    }
    
    if (f[[1]] == "bimodal"){
      if (d == 0){
        offline_lif_bimodal_kernel <- rbind(offline_lif_bimodal_kernel, df)
      } else {
        offline_lif_bimodal <- rbind(offline_lif_bimodal, df)
      }
    } else if(f[[1]] == "unimodal"){
      if (d == 0){
        offline_lif_unimodal_kernel <- rbind(offline_lif_unimodal_kernel, df)
      } else {
        offline_lif_unimodal <- rbind(offline_lif_unimodal, df)
      }
    }
  }
}

### Plotting both online and offline data together
different_plots <- list(
  list("unimodal", rbind(online_lif_unimodal, offline_lif_unimodal)),
  list("unimodal_kernel", offline_lif_unimodal_kernel),
  list("bimodal", rbind(online_lif_bimodal, offline_lif_bimodal)),
  list("bimodal_kernel", offline_lif_bimodal_kernel)
)

for (dif_plot in different_plots){
  if(dif_plot[[1]] == "unimodal_kernel"){
    g <- ggplot(data = dif_plot[[2]], aes(x=amp, y=reward, label = delta)) +
      geom_line(aes(colour = as.factor(delta))) +
      geom_errorbar(data = dif_plot[[2]], aes(ymin=reward-ci, ymax=reward+ci, color=as.factor(delta)), width=.01) +
      geom_vline(xintercept = 0.115, linetype = "dotted", color = "black", size = 1.5) +
      theme(legend.position = "right") + #"none"
      labs(x = "Amplitude", y = "Average reward per interaction", color="", fill="") +
      theme_bw(base_size = 15)
    ggsave(g, file=paste0("offline_lif_function_",dif_plot[[1]],".eps"), device="eps")
    print(g)
  } else if(dif_plot[[1]] == "bimodal_kernel"){
    g <- ggplot(data = dif_plot[[2]], aes(x=amp, y=reward, label = delta)) +
      geom_line(aes(colour = as.factor(delta))) +
      geom_errorbar(data = dif_plot[[2]], aes(ymin=reward-ci, ymax=reward+ci, color=as.factor(delta)), width=.01) +
      geom_vline(xintercept = 0.035, linetype = "dotted", color = "black", size = 1.5) +
      theme(legend.position = "right") + #"none"
      labs(x = "Amplitude", y = "Average reward per interaction", color="", fill="") +
      theme_bw(base_size = 15)
    ggsave(g, file=paste0("offline_lif_function_",dif_plot[[1]],".eps"), device="eps")
    print(g)
  } else if(dif_plot[[1]] == "unimodal" || dif_plot[[1]] == "bimodal") {
    g <- ggplot(data = dif_plot[[2]], aes(x=amp, y=reward, label = delta)) +
      geom_line(aes(colour = as.factor(delta))) +
      geom_errorbar(data = dif_plot[[2]], aes(ymin=reward-ci, ymax=reward+ci, color=as.factor(delta)), width=.01) +
      theme(legend.position = "right") + #"none"
      labs(x = "Amplitude", y = "Average reward per interaction", color="", fill="") +
      theme_bw(base_size = 15)
    ggsave(g, file=paste0("offline_lif_function_",dif_plot[[1]],"_delta.eps"), device="eps")
    print(g)
  }
}

Any scripts or data that you put into this service are public.

contextual documentation built on July 26, 2020, 1:06 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

contextual
Simulation and Analysis of Contextual Multi-Armed Bandit Policies

demo/replication_kruijswijk_2019/demo_lif_bandit.R
In contextual: Simulation and Analysis of Contextual Multi-Armed Bandit Policies

Try the contextual package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

contextual Simulation and Analysis of Contextual Multi-Armed Bandit Policies

demo/replication_kruijswijk_2019/demo_lif_bandit.R In contextual: Simulation and Analysis of Contextual Multi-Armed Bandit Policies

Try the contextual package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

contextual
Simulation and Analysis of Contextual Multi-Armed Bandit Policies

demo/replication_kruijswijk_2019/demo_lif_bandit.R
In contextual: Simulation and Analysis of Contextual Multi-Armed Bandit Policies