Nothing
#' Simulate choice data
#'
#' @description
#' This function simulates choice data from a probit model.
#'
#' @details
#' See [the vignette on choice data](https://loelschlaeger.de/RprobitB/articles/v02_choice_data.html)
#' for more details.
#'
#' @param covariates \[`list`\]\cr
#' A named list of covariate values. Each element must be a vector of length
#' equal to the number of choice occasions and named according to a covariate.
#' Covariates for which no values are supplied are drawn from a standard normal
#' distribution.
#'
#' @param true_parameter \[`list`\]\cr
#' A named list with true parameter values for \code{alpha},
#' \code{C}, \code{s}, \code{b}, \code{Omega}, \code{Sigma}, \code{Sigma_full},
#' \code{beta}, \code{z}, \code{d} for the simulation.
#'
#' See [the vignette on model definition](https://loelschlaeger.de/RprobitB/articles/v01_model_definition.html)
#' for definitions of these variables.
#'
#' @inheritParams prepare_data
#' @inheritParams RprobitB_data
#' @inheritParams check_form
#'
#' @return
#' An object of class \code{RprobitB_data}.
#'
#' @examples
#' ### simulate data from a binary probit model with two latent classes
#' data <- simulate_choices(
#' form = choice ~ cost | income | time,
#' N = 100,
#' T = 10,
#' J = 2,
#' re = c("cost", "time"),
#' alternatives = c("car", "bus"),
#' true_parameter = list(
#' "alpha" = c(-1, 1),
#' "b" = matrix(c(-1, -1, -0.5, -1.5, 0, -1), ncol = 2),
#' "C" = 2
#' )
#' )
#'
#' ### simulate data from an ordered probit model
#' data <- simulate_choices(
#' form = opinion ~ age + gender,
#' N = 10,
#' T = 1:10,
#' J = 5,
#' alternatives = c("very bad", "bad", "indifferent", "good", "very good"),
#' ordered = TRUE,
#' covariates = list(
#' "gender" = rep(sample(c(0, 1), 10, replace = TRUE), times = 1:10)
#' )
#' )
#'
#' ### simulate data from a ranked probit model
#' data <- simulate_choices(
#' form = product ~ price,
#' N = 10,
#' T = 1:10,
#' J = 3,
#' alternatives = c("A", "B", "C"),
#' ranked = TRUE
#' )
#'
#' @export
#'
#' @seealso
#' \itemize{
#' \item [check_form()] for checking the model formula
#' \item [overview_effects()] for an overview of the model effects
#' \item [create_lagged_cov()] for creating lagged covariates
#' \item [as_cov_names()] for re-labelling alternative-specific covariates
#' \item [prepare_data()] for preparing empirical choice data
#' \item [train_test()] for splitting choice data into a train and test subset
#' }
simulate_choices <- function(
form, N, T = 1, J, re = NULL, alternatives = NULL, ordered = FALSE,
ranked = FALSE, base = NULL, covariates = NULL, true_parameter = list()
) {
### check 'form'
oeli::input_check_response(
check = oeli::check_missing(form),
var_name = "form"
)
check_form_out <- check_form(form = form, re = re, ordered = ordered)
form <- check_form_out$form
choice <- check_form_out$choice
re <- check_form_out$re
vars <- check_form_out$vars
ASC <- check_form_out$ASC
### check other inputs
oeli::input_check_response(
check = oeli::check_missing("N"),
var_name = "N"
)
oeli::input_check_response(
check = checkmate::check_count(N, positive = TRUE),
var_name = "N"
)
if (length(T) == 1) T <- rep(T, N)
oeli::input_check_response(
check = checkmate::check_integerish(T, lower = 0, any.missing = FALSE),
var_name = "T"
)
oeli::input_check_response(
check = checkmate::check_int(J, lower = 2),
var_name = "J"
)
if (is.null(alternatives)) {
if (J > 26) {
stop("Please specify 'alternatives'.", call. = FALSE)
} else {
alternatives <- LETTERS[1:J]
}
}
oeli::input_check_response(
check = checkmate::check_character(alternatives, len = J),
var_name = "alternatives"
)
oeli::input_check_response(
check = checkmate::check_flag(ordered),
var_name = "ordered"
)
oeli::input_check_response(
check = checkmate::check_flag(ranked),
var_name = "ranked"
)
if (isTRUE(ordered) && isTRUE(ranked)) {
stop("'ordered' and 'ranked' cannot both be TRUE.", call. = FALSE)
}
if (isTRUE(ordered) && J <= 2) {
stop("'J' must be >= 3 in the ordered probit model.", call. = FALSE)
}
if (isTRUE(ranked) && J <= 2) {
stop("'J' must be >= 3 in the ranked probit model.", call. = FALSE)
}
if (!is.null(covariates)) {
for (i in 1:length(covariates)) {
if (length(covariates[[i]]) != sum(T)) {
stop(
paste0(
"In 'covariates', there must be ", sum(T), " values for '",
names(covariates)[i], "'."
),
call. = FALSE
)
}
}
}
### draw covariates
choice_data <- data.frame(
"id" = rep(1:N, times = T),
"idc" = unlist(sapply(T, seq_len, simplify = FALSE))
)
if (!ordered) {
### sort alternatives
alternatives <- sort(alternatives)
### determine index of base alternative
if (is.null(base)) {
base <- alternatives[J]
base_index <- J
} else if (any(alternatives == base)) {
base_index <- which(alternatives == base)
} else {
base <- alternatives[J]
warning(
paste0(
"'base' not contained in alternative set.\n",
"Set 'base = ", alternatives[J], "' instead."
),
immediate. = TRUE, call. = FALSE
)
base_index <- J
}
}
for (var in vars[[1]]) {
for (alt in alternatives) {
var_alt <- paste0(var, "_", alt)
if (var_alt %in% names(covariates)) {
cov <- matrix(covariates[[var_alt]], ncol = 1)
covariates[[var_alt]] <- NULL
} else {
cov <- matrix(stats::rnorm(n = sum(T)), ncol = 1)
}
colnames(cov) <- var_alt
choice_data <- cbind(choice_data, cov)
}
}
for (var in vars[[2]]) {
if (var %in% names(covariates)) {
cov <- matrix(covariates[[var]], ncol = 1)
covariates[[var]] <- NULL
} else {
cov <- matrix(stats::rnorm(n = sum(T)), ncol = 1)
}
colnames(cov) <- var
choice_data <- cbind(choice_data, cov)
}
for (var in vars[[3]]) {
for (alt in alternatives) {
var_alt <- paste0(var, "_", alt)
if (var_alt %in% names(covariates)) {
cov <- matrix(covariates[[var_alt]], ncol = 1)
covariates[[var_alt]] <- NULL
} else {
cov <- matrix(stats::rnorm(n = sum(T)), ncol = 1)
}
colnames(cov) <- var_alt
choice_data <- cbind(choice_data, cov)
}
}
### report un-used elements in 'covariates'
if (length(names(covariates)) > 0) {
warning(
paste(
"The column(s)",
paste(paste0("'", names(covariates), "'", collapse = ", ")),
"in 'covariates' are ignored."
),
call. = FALSE, immediate. = TRUE
)
}
### artificially add ASCs
if (ASC) choice_data$ASC <- 1
### determine number and names of linear coefficients
effects <- overview_effects(form, re, alternatives, base, ordered)
P_f <- sum(effects$random == FALSE)
P_r <- sum(effects$random == TRUE)
### check supplied and draw missing model parameters
true_parameter <- do.call(
what = RprobitB_parameter,
args = c(
list(
"P_f" = P_f, "P_r" = P_r, "J" = J, "N" = N, "ordered" = ordered,
"sample" = TRUE
),
true_parameter
)
)
### transform 'choice_data' in list format 'data' and simulate choices
data <- list()
ids <- unique(choice_data[, "id"])
N <- length(ids)
T <- as.numeric(table(choice_data[, "id"]))
### simulate choices
for (n in seq_len(N)) {
data[[n]] <- list()
data[[n]][["X"]] <- list()
data_n <- choice_data[choice_data[, "id"] == ids[n], ]
y_n <- numeric(T[n])
for (t in seq_len(T[n])) {
data_nt <- data_n[t, ]
if (ordered) {
X_nt <- as.matrix(data_nt[, vars[[2]]], nrow = 1)
colnames(X_nt) <- vars[[2]]
} else {
X_nt <- matrix(NA_real_, nrow = J, ncol = 0)
### type-1 covariates
for (var in vars[[1]]) {
old_names <- colnames(X_nt)
col <- numeric(J)
for (j in 1:J) {
col[j] <- data_nt[, paste0(var, "_", alternatives[j])]
}
X_nt <- cbind(X_nt, col)
colnames(X_nt) <- c(old_names, var)
}
### type-2 covariates
for (var in c(vars[[2]], if (ASC) "ASC")) {
old_names <- colnames(X_nt)
mat <- matrix(0, J, J)
for (j in (1:J)[-base_index]) {
mat[j, j] <- data_nt[, var]
}
mat <- mat[, -base_index, drop = FALSE]
X_nt <- cbind(X_nt, mat)
colnames(X_nt) <- c(
old_names, paste0(var, "_", alternatives[(1:J)[-base_index]])
)
}
### type-3 covariates
for (var in vars[[3]]) {
old_names <- colnames(X_nt)
mat <- matrix(0, J, J)
for (j in 1:J) {
mat[j, j] <- data_nt[, paste0(var, "_", alternatives[j])]
}
X_nt <- cbind(X_nt, mat)
colnames(X_nt) <- c(old_names, paste0(var, "_", alternatives))
}
}
### sort covariates
X_nt <- X_nt[, effects$effect, drop = FALSE]
### save in list
data[[n]][["X"]][[t]] <- X_nt
### build coefficient vector
if (P_f > 0 & P_r > 0) {
coef <- c(true_parameter$alpha, true_parameter$beta[, n])
} else if (P_f > 0 & P_r == 0) {
coef <- true_parameter$alpha
} else {
coef <- true_parameter$beta[, n]
}
### compute utility and choice decision
if (ordered) {
eps <- rnorm(n = 1, mean = 0, sd = sqrt(true_parameter$Sigma))
if (P_f == 0 & P_r == 0) {
U_nt <- eps
} else {
V_nt <- as.numeric(X_nt %*% coef)
U_nt <- V_nt + eps
}
gamma <- c(0, cumsum(exp(true_parameter[["d"]])))
y_nt_ind <- cut(
U_nt, breaks = c(-Inf, gamma, Inf), right = TRUE,
include.lowest = TRUE, labels = FALSE
)
y_n[t] <- alternatives[y_nt_ind]
} else {
eps <- oeli::rmvnorm(
n = 1, mean = rep(0, J), Sigma = true_parameter$Sigma_full
)
if (P_f == 0 & P_r == 0) {
U_nt <- eps
} else {
V_nt <- X_nt %*% coef
U_nt <- V_nt + eps
}
if (ranked) {
y_n[t] <- paste(
alternatives[order(as.vector(U_nt), decreasing = TRUE)],
collapse = ","
)
} else {
y_n[t] <- alternatives[which.max(U_nt)]
}
}
}
data[[n]][["y"]] <- y_n
}
### save choices in 'choice_data'
choice_data[choice] <- unlist(lapply(data, function(x) x[["y"]]))
### delete "ASC" from 'choice_data'
if (ASC) choice_data$ASC <- NULL
### save cov names
alt_length <- length(alternatives)
cov_names <- c(
if (length(vars[[1]]) > 0) {
paste(rep(vars[[1]], each = alt_length), alternatives, sep = "_")
},
vars[[2]],
if (length(vars[[3]]) > 0) {
paste(rep(vars[[3]], each = alt_length), alternatives, sep = "_")
}
)
### create output
RprobitB_data(
data = data,
choice_data = choice_data,
N = N,
T = T,
J = J,
P_f = P_f,
P_r = P_r,
alternatives = alternatives,
ordered = ordered,
ranked = ranked,
base = base,
form = form,
re = re,
ASC = ASC,
effects = effects,
standardize = NULL,
simulated = TRUE,
choice_available = TRUE,
true_parameter = true_parameter,
res_var_names = list(
"choice" = choice, "cov" = cov_names, "id" = "id", "idc" = "idc"
)
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.