R/process_simulate_list.R
In binaryRL: Reinforcement Learning Tools for Two-Alternative Forced Choice Tasks

Documented in simulate_list

#' Process: Simulating Fake Data
#' 
#' @description
#' This function is responsible for generating synthetic (fake) data
#'  using random numbers. For all parameters except the last one, their
#'  values are drawn from a uniform distribution within their respective
#'  specified ranges.
#'
#' The last parameter, representing the temperature (\code{tau}) in the soft-max
#'  function, is drawn from an exponential distribution. If its \code{upper} bound
#'  is set to 1, it implies \code{tau} is sampled from \code{Exp(1)} (an exponential
#'  distribution with a rate parameter of 1). If its \code{lower} bound is set
#'  to 1, this means that after \code{tau} is randomly generated, it is shifted
#'  to the right by adding 1 (i.e., \code{tau+1}), establishing a minimum value.
#' 
#' @param data [data.frame] 
#' 
#' This data should include the following mandatory columns: 
#'  \itemize{
#'    \item "sub"
#'    \item "time_line" (e.g., "Block", "Trial")
#'    \item "L_choice"
#'    \item "R_choice"
#'    \item "L_reward"
#'    \item "R_reward"
#'    \item "sub_choose"
#'  }
#'  
#' @param id [vector]
#' 
#' Specifies which subject's data to use. In parameter and model recovery
#'  analyses, the specific subject ID is often irrelevant. Although the
#'  experimental trial order might have some randomness for each subject,
#'  the sequence of reward feedback is typically pseudo-random.
#'
#' The default value for this argument is \code{NULL}. When \code{id = NULL},
#'  the program automatically detects existing subject IDs within the
#'  dataset. It then randomly selects one subject as a sample, and the
#'  parameter and model recovery procedures are performed based on this
#'  selected subject's data.
#'  
#' default: \code{id = NULL}
#'  
#' @param obj_func [function]
#' 
#' The objective function that the optimization algorithm package accepts.
#'  This function must strictly take only one argument, \code{params} (a vector
#'  of model parameters). Its output must be a single numeric value
#'  representing the loss function to be minimized. For more detailed
#'  requirements and examples, please refer to the relevant documentation 
#'  (
#'     \code{\link[binaryRL]{TD}}, 
#'     \code{\link[binaryRL]{RSTD}}, 
#'     \code{\link[binaryRL]{Utility}}
#'  ).
#' 
#' @param n_params [integer] 
#' 
#' The number of free parameters in your model. 
#' 
#' @param n_trials [integer] 
#' 
#' The total number of trials in your experiment.
#' 
#' @param lower [vector] 
#' 
#' Lower bounds of free parameters
#' 
#' @param upper [vector] 
#' 
#' Upper bounds of free parameters
#'  
#' @param iteration [integer]
#' 
#' This parameter determines how many simulated datasets are created for 
#'  subsequent model and parameter recovery analyses.
#'  
#' @param seed [integer] 
#' 
#' Random seed. This ensures that the results are 
#'  reproducible and remain the same each time the function is run. 
#'  
#' default: \code{seed = 123}
#'
#' @returns a list with fake data generated by random free parameters
#' 
#' @examples
#' \dontrun{
#' list_simulated <- binaryRL::simulate_list(
#'   data = binaryRL::Mason_2024_G2,
#'   obj_func = binaryRL::RSTD,
#'   n_params = 3,
#'   n_trials = 360,
#'   lower = c(0, 0, 1),
#'   upper = c(1, 1, 1),
#'   iteration = 100
#' )
#'
#' df_recovery <- binaryRL::recovery_data(
#'   list = list_simulated,
#'   fit_model = binaryRL::RSTD,
#'   model_name = "RSTD",
#'   n_params = 3,
#'   n_trials = 360,
#'   lower = c(0, 0, 1),
#'   upper = c(1, 1, 5),
#'   iteration = 100,
#'   nc = 1,
#'   algorithm = "L-BFGS-B"
#' )
#' }
#' 
simulate_list <- function(
  data,
  id = 1,
  obj_func, 
  n_params, 
  n_trials,
  lower, 
  upper,
  iteration = 10,
  seed = 123
) {
  list_simulated <- list()
  # 检测是都用同一个被试的题目, 还是每次都更换题目
  if (length(id) == 1) {
    id <- rep(id, iteration)
  }
  
  for (i in 1:iteration) {
    params <- c()
    
    for (j in 1:n_params) {
      # 确保每次种子不同
      set.seed(seed + n_params * i + j) 
      if (j == n_params) {
        params[j] <- stats::rexp(1, rate = upper[j]) + lower[j]
      } else {
        # 其他参数服从均匀分布
        params[j] <- stats::runif(n = 1, min = lower[j], max = upper[j])
      }
    }
    
    # 创建临时环境
    binaryRL.env <- new.env()
    
    # 给临时环境创建全局变量
    binaryRL.env$mode <- "simulate"
    binaryRL.env$policy <- "on"
    
    binaryRL.env$estimate <- "MLE"
    binaryRL.env$priors <- NULL
    
    binaryRL.env$data <- data
    binaryRL.env$id <- id[i]
    binaryRL.env$n_params <- n_params
    binaryRL.env$n_trials <- n_trials
    
    # 让obj_func的环境绑定在fit_env中
    environment(obj_func) <- binaryRL.env
    
    list_simulated[[i]] <- obj_func(params = params)
    list_simulated[[i]]$input <- params
  }
  
  return(list_simulated)
}