#' Simulate Random Bandit
#'
#' In this function we assume that the true expectation of each lever
#' can be expressed as linear combination of known basis functions
#' with known coefficients.
#'
#' @param nsteps The number of steps to make in the trajectory.
#' @param nlevers The number of levers to choose from.
#' @param J_mod The number of basis functions to use in the model (not including)
#' the intercept.
#' @param J_true The number of basis functions to be used in the expression for
#' each lever's true expectation.
#' @param sd A vector of the standard deviations of each lever.
#' @param alpha A scaling parameter which tunes how the prior variance decreases
#' as J_mod increases.
#' @param alpha_true The true scaling parameter that the coefficients are generated from.
#' @param calc_mse True, if you want to output the mse for the beta coefficients.
#' @param b Hyperparameter for the inverse gamma distribution. Directly related to
#' the rate of exploration.
#'
#' @export
sim_rand_bandit <- function(nsteps,nlevers,J_mod,J_true,sd,alpha = 1, alpha_true = 1,
calc_mse = FALSE, b = 2){
#First we generate the true random betas.
# set.seed(1)
covar_true <- (1:J_true)^(-alpha_true)
covar_true <- diag(c(1,covar_true))
beta_true <- MASS::mvrnorm(nlevers,rep(0,J_true+1),covar_true)
#and the prior distributions.
posteriors <- gen_priors(nlevers,J_mod,bas_type = "poly",alpha,b)
basis <- legendre_basis(max(c(J_mod,J_true)))
# RUN THE THOMPSON SIMULATION
sim <- rand_thompson(nsteps,posteriors,basis,sd,J_mod+1,J_true+1, beta_true, calc_mse)
traj <- sim$traj ; traj$action <- traj$action + 1
posteriors <- sim$posteriors
out <- new_bandit(posteriors,traj,basis,beta_true)
if (calc_mse) return(list("bandit" = out, "mse" = sim$mse))
else return(out)
}
#' S4 Class to represent output of Bandit Algorithm
#'
#' @slot posteriors A list containing posterior parameter values: beta, covar, a and b.
#' @slot trajectory A dataframe containing information on the decision trajectory.
#' @slot basis A list of basis functions.
#' @slot beta_true a matrix of the true model coefficients.
#'
#' @export
methods::setClass("bandit",
slots = c(
posteriors = "list",
trajectory = "data.frame",
basis = "list",
beta_true = "matrix"
)
)
#' Create a new object of class bandit
#'
#' @param posteriors A list containing posterior parameter values: beta, covar, a and b.
#' @param trajectory A matrix containing information on the decision trajectory.
#' @param basis A basis of polynomials.
#' @param beta_true a matrix of the true model coefficients.
#'
#' @return An S4 object of class bandit.
#' @export
#'
new_bandit <- function(posteriors,trajectory,basis,beta_true){
obj <- methods::new("bandit",
posteriors = posteriors,
trajectory = trajectory,
basis = basis,
beta_true = beta_true
)
return(obj)
}
#CREATE A PLOT METHOD TO COMPARE THE REAL LEVERS AGAINST THE POSTERIOR APPROXIMATIONS
#' @export
#' @importFrom graphics plot lines points legend
methods::setMethod("plot","bandit",
function(x){
traj <- x@trajectory
post <- x@posteriors
basis <- x@basis
beta_true <- x@beta_true
x_vals <- seq(-1,1,length=200)
phi <- c()
#Construct the full phi matrix
for(j in 1:length(basis)){
phi <- cbind(phi,basis[[j]](x_vals))
}
y_true <- beta_true %*% t(phi[,1:ncol(beta_true)])
y_post <- c()
for (i in 1:length(post)){
coeff <- as.matrix(post[[i]]$beta)
y_post <- rbind(y_post,
t(coeff) %*% t(phi[,1:length(coeff)])
)
}
ylims <- c(min(c(y_true,y_post)),max(c(y_true,y_post)))
colr <- grDevices::rainbow(nrow(y_true)+1)
plot(traj$x,traj$reward,col = grDevices::rainbow(nrow(y_true)+1,alpha = 0.1)[nrow(y_true)+1],
xlab = "x", ylab = "Reward", main = paste(
"Steps = ",nrow(phi)
))
for (i in 1:nrow(y_true)){
lines(x_vals,y_true[i,],type = "l", col = colr[i])
}
for (i in 1:nrow(y_post)){
lines(x_vals,y_post[i,],type = "l", lty = 2, col = colr[i] )
}
legend_names <- c()
for (i in 1:nrow(y_true)){
legend_names <- c(legend_names,paste("Lever",i))
}
legend("bottom",c(legend_names,"Reward Obs"),lty = c(rep(1,nrow(y_true)),NA),pch = c(rep(NA,nrow(y_true)),1), col = colr,
bty = "n", xpd = TRUE, horiz = TRUE, inset = c(0,0))
}
)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.