npbanditC: (Contextual) Multi-Armed Bandit Simulations

Documented in new_bandit sim_rand_bandit

#' Simulate Random Bandit
#' 
#' In this function we assume that the true expectation of each lever
#' can be expressed as linear combination of known basis functions 
#' with known coefficients.
#'
#' @param nsteps The number of steps to make in the trajectory.
#' @param nlevers The number of levers to choose from.
#' @param J_mod The number of basis functions to use in the model (not including)
#' the intercept.
#' @param J_true The number of basis functions to be used in the expression for
#' each lever's true expectation.
#' @param sd A vector of the standard deviations of each lever.
#' @param alpha A scaling parameter which tunes how the prior variance decreases
#' as J_mod increases.
#' @param alpha_true The true scaling parameter that the coefficients are generated from.
#' @param calc_mse True, if you want to output the mse for the beta coefficients.
#' @param b Hyperparameter for the inverse gamma distribution. Directly related to 
#' the rate of exploration.
#'
#' @export
sim_rand_bandit <- function(nsteps,nlevers,J_mod,J_true,sd,alpha = 1, alpha_true = 1,
                            calc_mse = FALSE, b = 2){
  #First we generate the true random betas.
  # set.seed(1)
  covar_true <- (1:J_true)^(-alpha_true)
  covar_true <- diag(c(1,covar_true))
  beta_true <- MASS::mvrnorm(nlevers,rep(0,J_true+1),covar_true)
  #and the prior distributions.
  posteriors <- gen_priors(nlevers,J_mod,bas_type = "poly",alpha,b)

  basis <- legendre_basis(max(c(J_mod,J_true)))

  # RUN THE THOMPSON SIMULATION
  sim <- rand_thompson(nsteps,posteriors,basis,sd,J_mod+1,J_true+1, beta_true, calc_mse)
  traj <- sim$traj ; traj$action <- traj$action + 1
  posteriors <- sim$posteriors

  out <- new_bandit(posteriors,traj,basis,beta_true)

  if (calc_mse) return(list("bandit" = out, "mse" = sim$mse))
  else return(out)
}


#' S4 Class to represent output of Bandit Algorithm
#'
#' @slot posteriors A list containing posterior parameter values: beta, covar, a and b.
#' @slot trajectory A dataframe containing information on the decision trajectory.
#' @slot basis A list of basis functions.
#' @slot beta_true a matrix of the true model coefficients.
#'
#' @export
methods::setClass("bandit",
                  slots = c(
                    posteriors = "list",
                    trajectory = "data.frame",
                    basis = "list",
                    beta_true = "matrix"
                  )
)

#' Create a new object of class bandit
#'
#' @param posteriors A list containing posterior parameter values: beta, covar, a and b.
#' @param trajectory A matrix containing information on the decision trajectory.
#' @param basis A basis of polynomials.
#' @param beta_true a matrix of the true model coefficients.
#'
#' @return An S4 object of class bandit.
#' @export
#'
new_bandit <- function(posteriors,trajectory,basis,beta_true){
  obj <- methods::new("bandit",
                      posteriors = posteriors,
                      trajectory = trajectory,
                      basis = basis,
                      beta_true = beta_true
  )
  return(obj)
}

#CREATE A PLOT METHOD TO COMPARE THE REAL LEVERS AGAINST THE POSTERIOR APPROXIMATIONS
#' @export
#' @importFrom graphics plot lines points legend
methods::setMethod("plot","bandit",
                   function(x){
                     traj <- x@trajectory
                     post <- x@posteriors
                     basis <- x@basis
                     beta_true <- x@beta_true
                     
                     x_vals <- seq(-1,1,length=200)
                     phi <- c()
                     
                     #Construct the full phi matrix
                     for(j in 1:length(basis)){
                       phi <- cbind(phi,basis[[j]](x_vals))
                     }
                     
                     y_true <- beta_true %*% t(phi[,1:ncol(beta_true)])
                     
                     y_post <- c()
                     for (i in 1:length(post)){
                       coeff <- as.matrix(post[[i]]$beta)
                       y_post <- rbind(y_post,
                                       t(coeff) %*% t(phi[,1:length(coeff)])
                       )
                     }
                     
                     ylims <- c(min(c(y_true,y_post)),max(c(y_true,y_post)))
                     colr <- grDevices::rainbow(nrow(y_true)+1)
                     plot(traj$x,traj$reward,col = grDevices::rainbow(nrow(y_true)+1,alpha = 0.1)[nrow(y_true)+1],
                          xlab = "x", ylab = "Reward", main = paste(
                            "Steps = ",nrow(phi)
                          ))
                     for (i in 1:nrow(y_true)){
                       lines(x_vals,y_true[i,],type = "l", col = colr[i])
                     }
                     
                     for (i in 1:nrow(y_post)){
                       lines(x_vals,y_post[i,],type = "l", lty = 2, col = colr[i] )
                     }
                     
                     legend_names <- c()
                     for (i in 1:nrow(y_true)){
                       legend_names <- c(legend_names,paste("Lever",i))
                     }
                     legend("bottom",c(legend_names,"Reward Obs"),lty = c(rep(1,nrow(y_true)),NA),pch = c(rep(NA,nrow(y_true)),1), col = colr,
                            bty = "n", xpd = TRUE, horiz = TRUE, inset = c(0,0))
                   }
)