Nothing
#' Function for the Simulation of Multistage-Designs
#'
#' This function simulates data according to the specified and submitted multistage design. The persons are drawn from a standard normal distribution if the amount of persons are specified. As an additional argument, a seed can also be set. If requested, it is also possible to submit a vector ore list of person parameters to specify different person distributions.
#'
#' @param mstdesign definition of desired multistage design
#' @param items vector of difficulty parameters for each items
#' @param persons amount of persons per starting module
#' @param preconditions definition of preconditions can optionally be specified. In the case of probabilistic routing preconditions such as a pre-test, which are taken into account in the MST design. For the specification the correlation with the true person parameter have to be specified. The submitted correlation is adjusted in the function according to Demirtas and Yavuz (2015; <doi:10.1080/10543406.2014.920868>) It is also possible to submit your own vector with integers for the preconditions.
#' @param ... further optional arguments like setting a \code{seed}
#'
#' @author Jan Steinfeld
#'
#' @importFrom stats rnorm
#'
#' @return List with following entries
#'
#' \item{data}{Matrix with item responses}
#' \item{data_mst}{Data frame with item responses and additional a vector of used modules per person}
#' \item{persons}{Generated and used person parameters}
#' \item{mstdesign}{Submitted multistage design}
#'
#' @references
#'\itemize{
#' \item Demirtas, H., & Yavuz, Y. (2015). Concurrent Generation of Ordinal and Normal Data. \emph{Journal of Biopharmaceutical Statistics}, 25(4), 635-650. https://doi.org/10.1080/10543406.2014.920868
#' }
#'
#'
#' @example ./R/.example_designsim.R
#'
#' @export
tmt_sim <- function(mstdesign = NULL,
items = NULL, persons = NULL, preconditions = NULL, ...) {
additional_arguments <- list(...)
if (!is.null(additional_arguments$mean)) warning("argument 'mean' is deprecated")
if (!is.null(additional_arguments$sd)) warning("argument 'sd' is deprecated")
if (!is.null(additional_arguments$seed)) {
seed <- additional_arguments$seed
set.seed(seed)
} else {
seed <- NULL
}
# sanitary checks
if (is.null(mstdesign)) stop("mstdesign needs to be specified! \n")
if (is.null(items)) stop("vector of difficulty parameter is required")
input <- tmt_mstdesign(mstdesign = mstdesign, options = c("simulation", "items"))
design <- input$simulation
items_d <- input$items
start_b <- input$simulation$start
# Update 2020-04-28 preconditions
if (!is.null(input$preconditions)) {
precon_start <- input$preconditions$preconditions
}
if (!is.null(preconditions) & is.null(input$preconditions)) {
warning("No preconditions were specified in the mstdesign, but passed as a function argument. The 'preconditions' argument is set to 'NULL'.")
# preconditions has to be set to NULL, because the information has already been created
preconditions <- NULL
}
if (is.null(preconditions) & !is.null(input$preconditions)) {
stop("Please specify a correlation for the preconditions")
}
# generate preconditions vector for generating variables
if (!is.null(preconditions) & (sum(unlist(persons)) != length(sum(unlist(preconditions))))) {
precon_corr <- preconditions
if (length(preconditions) < nrow(input$preconditions$precondition_matrix)) {
warning("The specified correlation in 'preconditions' was used for all starting groups.")
# message("The specified correlation in 'preconditions' was used for all starting groups")
precon_corr <- rep(precon_corr, length.out = nrow(input$preconditions$precondition_matrix))
} else if (length(preconditions) > nrow(input$preconditions$precondition_matrix)) {
warning(paste0("The first ",nrow(input$preconditions$precondition_matrix)," specified correlation in 'preconditions' were used."))
# message("The specified correlation in 'preconditions' was used for all starting groups")
precon_corr <- rep(precon_corr, length.out = nrow(input$preconditions$precondition_matrix))
}
precon_values <- grep("value", colnames(precon_start))
precon_start[, precon_values] <- apply(precon_start[, precon_values, drop = FALSE], 2, function(x) as.numeric(gsub("\\(|\\)", "", x)))
precon_n <- grep("precondition", colnames(precon_start))
preconditions <- list()
for (ii in precon_n) {
preconditions_mat <- matrix(NA, ncol = 3, nrow = 1)
colnames(preconditions_mat) <- c("min", "max", "r")
i <- input$preconditions$precondition_matrix[,"name"] %in% unique(precon_start[, ii])
preconditions_mat[, 1] <- as.numeric(input$preconditions$precondition_matrix[i,"min"])
preconditions_mat[, 2] <- as.numeric(input$preconditions$precondition_matrix[i,"max"])
preconditions_mat[, 3] <- precon_corr[i]
preconditions[[unique(precon_start[, ii])]] <- preconditions_mat
}
}
if (nrow(start_b) != length(persons)) {
if(is.list(persons)) {
warning(paste0(length(persons)," groups of Persons were specified, but only one starting group, the groups were merged"))
persons <- unlist(persons)
} else if (all(persons%%1==0) & length(unique(persons)) == 1) {
persons <- unique(persons)
} else if (!all(persons%%1<1 & persons%%1!=0)) {
stop("Please specify a unique number of desired persons for the data simulation. A vector with different amounts of persons is currently only supported for several start groups.")
}
}
# generate person parameter and if desired also sum score for preconditions
personinfo <- precon_sim(ppar = persons, precon = preconditions)
persons <- personinfo$perspar
precon <- personinfo$preconpar
if (is.null(names(items))) {
if (length(items) == length(items_d)) {
warning("The provided item parameters are submitted without names, this was subsequently made up (in ascending order according to the information in the design)")
names(items) <- items_d
}
}
if (!all(items_d %in% names(items))) stop("vector of 'named' item parameter is required")
if(length(items) != length(items_d)) {
stop("The number of specified items in 'items' does not match the MST design.")
}
if (length(persons) < nrow(start_b)) {
stop("Please specify as many groups of person parameters as start blocks are defined")
}
n <- sum(lengths(persons))
n_n <- lengths(persons)
p_pos_start <- c(1, cumsum(n_n)[-length(n_n)] + 1)
p_pos_end <- cumsum(n_n)
ppar <- persons
preconpar <- precon
if (!is.null(preconditions)) {
mat <- matrix(NA, nrow = n, ncol = length(items) + 1 + length(precon))
inp <- do.call(cbind, precon)
mat[, seq(precon) + 1] <- as.matrix(inp)
colnames(mat) <- c("branching", names(preconditions), items_d)
mat <- data.frame(mat)
} else {
mat <- data.frame(matrix(NA, nrow = n, ncol = length(items) + 1))
colnames(mat) <- c("branching", items_d)
}
mat[, "branching"] <- rep(unique(design[["start"]][, "from"]), times = n_n)
helper_stages <- vector(mode = "character", length = nrow(mat))
for (s in seq_len(length(design))) {
# only once for the start module
if (s == 1) {
for (ss in seq_along(unique(mat[, "branching"]))) {
sb <- unique(mat[, "branching"])[ss]
items_i <- unique(design[["start"]][design[["start"]][, "from"] == sb, "items_to"])
items_i <- unlist(strsplit(items_i, ","))
mat[p_pos_start[ss]:p_pos_end[ss], items_i] <- sim.rm(
theta = ppar[[ss]],
b = items[names(items) %in% items_i]
)
helper_stages[p_pos_start[ss]:p_pos_end[ss]] <- sb
}
ppar <- unlist(ppar, use.names = FALSE)
# 2020-05-13 the preconditions are always added but only taken into account if the design is cumulative.
# If so they appear in the 'from' column in the submitted and processed design information
} else { # start with 2 because in first entry are the starting values
# get information from last stage and store this information
stage_last <- helper_stages
helper_stages <- vector(mode = "character", length = nrow(mat))
for (sl in sort(unique(stage_last))) {
# Items from stage before
persn_sl <- stage_last == sl
items_i <- unique(design[[s]][design[[s]][, "from"] == sl, "items_from"])
items_i <- unlist(strsplit(items_i, ","))
# sums of stage before
probabilities <- design[[s]][design[[s]][, "from"] == sl, "probability"]
probabilities <- as.numeric(unlist(sapply(probabilities, strsplit, ",")))
maxSolved <- as.numeric(design[[s]][design[[s]][, "from"] == sl, "maxSolved"])
minSolved <- as.numeric(design[[s]][design[[s]][, "from"] == sl, "minSolved"])
if (is.null(preconpar) & (any(max(maxSolved) > length(items_i)))) {
stop("Within the design matrix an divergent amount of maxSolved items are specified\n") # nolint
}
rs_sl <- rowSums(mat[persn_sl, items_i])
if (all(probabilities == 1)) {
# 2021-12-19 added, for cases with two or lower amounts of items in one module
if ( any(maxSolved - minSolved == 0) ) {
if(maxSolved[maxSolved - minSolved == 0] == 0){
groups_n <- rep(NA,length(rs_sl))
minequalmax <- maxSolved[maxSolved-minSolved==0]
groups_n[rs_sl %in% minequalmax] <- seq_along(minequalmax)
newcat <- c(0, maxSolved)[!c(0, maxSolved) %in% minequalmax]
groups_n <- factor(groups_n, levels = seq_along(c(newcat,length(minequalmax))))
groups_n[!rs_sl %in% minequalmax] <- cut(rs_sl[!rs_sl %in% minequalmax], sort(c(0,newcat)), include.lowest = TRUE, labels = seq_along(newcat) + length(minequalmax)) # nolint
} else {
groups_n <- cut(rs_sl, sort(c(0, maxSolved)), include.lowest = TRUE, labels = seq_along(maxSolved)) # nolint
}
} else {
groups_n <- cut(rs_sl, sort(c(0, maxSolved)), include.lowest = TRUE, labels = seq_along(maxSolved)) # nolint
}
# groups_n <- cut(rs_sl, sort(c(0, maxSolved)), include.lowest = TRUE, labels = seq_along(maxSolved)) # nolint
for (g in sort(unique(groups_n))) {
item_g <- design[[s]][design[[s]][, "from"] == sl &
design[[s]][, "maxSolved"] == sort(maxSolved)[as.numeric(g)], "items_to"] # nolint
item_g <- as.character(unlist(strsplit(item_g, ",")))
to <- design[[s]][design[[s]][, "from"] == sl & design[[s]][, "maxSolved"] == sort(maxSolved)[as.numeric(g)], "to"] # nolint
mat[persn_sl, "branching"][groups_n %in% g] <- paste0(mat[persn_sl, "branching"][groups_n %in% g], "-", to) # nolint
helper_stages[persn_sl][groups_n %in% g] <- to
# simulate again, based on existing person parameters
mat[persn_sl, item_g][groups_n %in% g, ] <- sim.rm(
theta = ppar[persn_sl][groups_n %in% g], # nolint
b = items[item_g]
)
}
} else { # probability based routing
# 2019-11-15 added feature for probabilistic routing
probabilities <- design[[s]][design[[s]][, "from"] == sl, "probability"] # nolint
probabilities <- do.call(rbind, sapply(probabilities, strsplit, ","))
probabilities <- apply(probabilities, 2, as.numeric)
if (!all(colSums(probabilities)%%1 == 0)) warning("For each raw score, the probabilities do not add up to 1\n") # nolint
groups_n <- rep(NA, length(rs_sl))
seq_gg <- ifelse(min(minSolved) == 0, 1, 0)
for (gg in seq(min(minSolved), max(maxSolved))) {
if(any(rs_sl %in% gg)){
groups_n[rs_sl %in% gg] <- sample(design[[s]][design[[s]][, "from"] == sl, "to"], size = sum(rs_sl %in% gg), prob = probabilities[, gg + seq_gg], replace = TRUE) # nolint
}
}
for (ggg in sort(unique(groups_n))) {
item_g <- design[[s]][design[[s]][, "from"] == sl &
design[[s]][, "to"] == ggg, "items_to"]
# 2024-01-26 auskommentiert if ( length(item_g)>1 & length(unique(item_g)) ) item_g <- item_g[1]
item_g <- as.character(unlist(strsplit(item_g, ",")))
mat[persn_sl, "branching"][groups_n %in% ggg] <- paste0(mat[persn_sl, "branching"][groups_n %in% ggg], "-", ggg) # nolint
helper_stages[persn_sl][groups_n %in% ggg] <- ggg
# simulate again, based on existing person parameters
mat[persn_sl, item_g][groups_n %in% ggg, ] <- sim.rm(
theta = ppar[persn_sl][groups_n %in% ggg], # nolint
b = items[item_g]
) # nolint
}
}
# }
}
} # end else
} # end for (s in 1:length(design)) {
mat_mst <- mat
mat <- apply(mat[, -1], 2, as.numeric)
# cat("\nseed end: ",additional_arguments$seed)
out <- list(
data = mat,
data_mst = mat_mst,
persons = ppar,
mstdesign = mstdesign,
preconditions = preconditions,
preconpar = preconpar
)
class(out) <- append(class(out), "mstdesign")
return(out)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.