#' Recovery simulation of mpt2irt models.
#'
#' This function allows to run a simulation study of mpt2irt models. Data are
#' generated either from the Boeckenholt Model (\code{genModel = "2012"}) or
#' from the Acquiescence Model (\code{genModel = "ext"}). Subsequently, one or
#' both of these models are fit to the generated data using either JAGS or Stan.
#' The results are saved in an RData file in \code{dir}.
#'
#' @param J number of items. Can be a vector for multiple traits (e.g.,
#' J=c(10,10,10)).
#' @param prop.rev number of reversed items. Can be a vector for multiple
#' traits(e.g., prop.rev=c(5,3,5)/10)
#' @param rrr Sequence of integers (e.g., \code{1:100}) of length greater or
#' equal to 1 specifying the number of replications to run.
#' @param genModel Character. The data generating model (either "2012" or "ext").
#' @param fitModel Character. The model for data analysis ("2012", "ext", or both as vector
#' c("2012", "ext")).
#' @param fitMethod Character. Whether to use "stan" or "jags".
# @param trait.sd standard deviation of normally distributed item properties
# for trait. Can be a vector for multiple traits (e.g., trait.sd=c(2, 1.5,
# 2))
# @param RSmin minimum of range for uniformly distributed response styles (can
# be a vector with separate bounds for middle/extreme/(acquiesence))
# @param RSmax maximum of range for response styles (can also be a vector)
#' @param theta_vcov true covariance matrix of response processes (order:
#' middle, extreme, (acquiescence), trait). standard is diag(3) / diag(4). Can be a vector of variances (not SDs).
#' @param betas Optional list. May have entries \code{"beta.mrs"},
#' \code{"beta.ers"}, \code{"beta.trait"}, and/or \code{"beta.ars"}. Each of
#' those may have arguments passed to \code{\link[truncnorm]{rtruncnorm}}.
#' @param df_vcov Numeric. Degrees of freedom for wishart distribution from
#' which the variance-covariance matrix for generating the data is drawn.
#' @param dir Path to directory where results should be stored,
#' @param keep_mcmc Logical indicating wheter to retain, besides a summary of
#' the parameters, the raw mcmc samples.
#' @param savext_mcmc Logical indicating wheter to save the mcmc samples in
#' an external RData file.
#' @param savext_all Logical indicating wheter to save the output from Stan/JAGS
#' in an external RData file.
#' @param beta_ARS_extreme Numeric. Only for \code{genModel="ext"}: probability
#' (on probit scale) of choosing category 5 (vs.4) in case of ARS. Defaults to
#' \code{rtruncnorm(mean = qnorm(.7), sd = sqrt(.1), a = qnorm(.5), b =
#' qnorm(.9))}.
# @param M number of MCMC samples (adaptation + burnin = M/2)
# @param n.chains number of chains
# @param thin thinning of MCMC samples
# @param printProgress whether to print the progress in a text file (either at the location defined by \code{path} or at the working directory)
# @param path where to save progress report (e.g., path="C:/"). disabled by using path=""
# @param saveTemp save temporary results to same location as progress report
# @param rng_seed random number seed
# @param mail an email address used when the simulation is finished (no dash "-" allowed!)
# @param ... further arguments passed to \code{\link[rstan]{sampling}} (for
# Stan) or \code{\link[runjags]{run.jags}} (for JAGS)
#' @inheritParams fit_irtree
#' @inheritParams generate_irtree_ext
#' @inheritParams runjags::run.jags
#' @return Function does not directly return anything but saves an external
#' RData file to \code{dir}. This object is a list containing the generated
#' parameters in \code{sim-results$param.sum$gen}, fitted parameters and other model fit
#' information in \code{sim-results$param.sum$foo}, as well as a summary of the setup.
#' @details Note that a text file "progress.txt" is written (and updated) to \code{dir} informing you about the progress of the simulation.
#' @import coda
# @import runjags
# @import parallel
#' @examples
#' \dontrun{
#' recovery_irtree(rrr = 1:2, N = 20, J = 10, genModel = "ext", fitModel = "ext",
#' fitMethod = "stan", M = 200, n.chains = 2, warmup = 200,
#' dir = "~/")
#'
#' # run multiple simulations in parallel using the 'parallel' package
#' no_cores <- parallel::detectCores() - 1
#' cl <- parallel::makeCluster(no_cores)
#' parallel::clusterApplyLB(cl, x = 11:13, fun = recovery_irtree, cores = 1,
#' N = 20, J = 10, genModel = "ext", fitModel = "ext",
#' fitMethod = "stan", M = 200, n.chains = 2, warmup = 200,
#' dir = "~/")
#' parallel::stopCluster(cl = cl)
#'}
#' @export
recovery_irtree <- function(rrr = NULL,
N = NULL,
J = NULL,
prop.rev = .5,
genModel = c("ext", "2012"),
fitModel = c("ext", "2012", "pcm", "steps", "shift", "ext2"),
fitMethod = c("stan", "jags"),
theta_vcov = NULL,
betas = NULL,
beta_ARS_extreme = NULL,
df = NULL,
V = NULL,
M = 500,
n.chains = 3,
thin = 1,
warmup = 500,
method = "simple",
outFormat = NULL,
startSmall = FALSE,
df_vcov = 50,
dir = NULL,
keep_mcmc = FALSE,
savext_all = FALSE,
savext_mcmc = TRUE,
add2varlist = c("deviance", "pd", "popt", "dic"),
...) {
# It is assumed that parameters such as df and V are the same for all models
# in fitModel. There's no problem in changing this, only remember to change
# the output, e.g., for V from length 1 to length(fitModel).
checkmate::qassert(rrr, "X>0[1,]")
checkmate::qassert(N, "X1")
checkmate::qassert(J, "X>0[1,]")
genModel <- match.arg(genModel)
fitMethod <- match.arg(fitMethod)
if (fitMethod == "stan") {
fitModel <- match.arg(fitModel, several.ok = TRUE)
} else {
# PCM and Steps not yet implemented in JAGS
fitModel <- match.arg(fitModel, choices = c("ext", "2012"),
several.ok = TRUE)
}
checkmate::qassert(df_vcov, "N1[1,]")
checkmate::assert_character(dir, any.missing = FALSE, max.len = 1,
null.ok = TRUE)
checkmate::qassert(keep_mcmc, "B1")
checkmate::qassert(savext_all, "B1")
checkmate::qassert(savext_mcmc, "B1")
args <- c(as.list(environment()), list(...))
### INPUT WRANGLING -----
#### multiple traits
n.trait <- length(J)
traitItem <- rep(1:n.trait, J)
J <- sum(J)
numRS.gen <- ifelse(genModel == "2012", 2, 3)
S.gen <- numRS.gen + n.trait
numRS.fit <- sapply(fitModel, function(x) ifelse(x == "2012", 2, 3))
S.fit <- numRS.fit + n.trait
### DIRECTORIES -----
# if (!is.null(dir)) {
# try(dir <- path.expand(dir))
# if (!dir.exists(dir)) {
# set.seed(Sys.Date())
# tmp1 <- paste0(sample(c(letters, LETTERS, 0:9), 12, T), collapse = "")
# dirx <- paste0(getwd(), "/", tmp1)
# if (!dir.exists(dirx)) {
# dir.create(dirx)
# }
# on.exit(message(paste0("Data saved in: ", dirx)))
# }
# }
#
# if (savext_mcmc == TRUE) {
# if (!dir.exists(paste0(ifelse(dir.exists(dir), dir, dirx), "/", "mcmc"))) {
# dir.create(paste0(ifelse(dir.exists(dir), dir, dirx), "/", "mcmc"))
# }
# }
### LOOP OVER REPLICATIONS -----
for (qqq in seq_along(rrr)) {
time_1 <- Sys.time()
### GENERATE DATA -----
cor2cov <- function(mat = NULL, sd = NULL) {
# is in MBESS package, but MBESS has so many dependencies
checkmate::qassert(mat, "M+[-1,1]")
checkmate::qassert(sd, "N+(0,1]")
return(diag(sd) %*% mat %*% diag(sd))
}
if (is.null(theta_vcov)) {
xx1 <- diag(S.gen)
xx1[1, 2] <- xx1[2, 1] <- -.2
theta_vcov_i <- cov2cor(rWishart(1, df = df_vcov, xx1)[, , 1])
# the default variances are 0.33 for RS and 1.0 for TRAIT
theta_vcov_i <- cor2cov(theta_vcov_i, sqrt(c(rep(.33, S.gen - n.trait), rep(1, n.trait))))
# theta_vcov_i <- diag(S.gen) # middle / extremity / acq / trait(s)
} else if (is.vector(theta_vcov)) {
repeat {
vcov.0 <- cov2cor(rWishart(1, df = df_vcov, diag(theta_vcov))[, , 1])
theta_vcov_i <- cor2cov(vcov.0, sqrt(theta_vcov))
# theta_vcov <- diag(S.gen) * theta_vcov # middle / extremity / trait(s)
if (det(theta_vcov_i) != 0) break
}
} else {
theta_vcov_i <- theta_vcov
}
if (any(dim(theta_vcov_i)!= S.gen) | det(theta_vcov_i) == 0){
warning("Check definition of theta_vcov!")
}
if (genModel == "2012") {
betas_i <- gen_betas(genModel = genModel, J = J, betas = betas)
gen <- generate_irtree_2012(N = N, J = J, betas = betas_i, theta_vcov = theta_vcov_i,
prop.rev = prop.rev, traitItem = traitItem, cat = TRUE)
} else if (genModel == "ext") {
betas_i <- gen_betas(genModel = genModel, J = J, betas = betas)
if (is.null(beta_ARS_extreme)) {
beta_ARS_extreme_i <- truncnorm::rtruncnorm(n = 1, mean = qnorm(.7), sd = sqrt(.1),
a = qnorm(.5), b = qnorm(.9))
} else {
beta_ARS_extreme_i <- beta_ARS_extreme
}
gen <- generate_irtree_ext(N = N, J = J, betas = betas_i, theta_vcov = theta_vcov_i,
prop.rev = prop.rev, traitItem = traitItem, cat = TRUE,
beta_ARS_extreme = beta_ARS_extreme_i, genModel = genModel)
}
genNames <- c(paste0("theta[",apply(expand.grid(1:N, 1:S.gen),1,
paste0, collapse=","),"]"),
paste0("beta[",apply(expand.grid(1:J, 1:(numRS.gen+1)),1,
paste0, collapse=","),"]"),
paste0("Sigma[",apply(expand.grid(1:S.gen, 1:S.gen),1,
paste0, collapse=","),"]"))
if (genModel != "2012") {
genNames <- c(genNames, "beta_ARS_extreme")
}
param.sum <- vector("list", length(S.fit) + 1)
names(param.sum) <- c(fitModel, "gen")
param.sum$gen <- matrix(c(gen$theta, gen$betas, gen$theta_vcov,
switch(genModel, "ext" = gen$beta_ARS_extreme)),
ncol = 1, dimnames = list(genNames, NULL))
returnlist <- list(param.sum = param.sum,
# genModel = genModel,
sim_args = args
# fitModel = fitModel,
# S = S.fit,
# revItem = gen$revItem,
# traitItem = gen$traitItem,
# X = gen$X,
# N = N,
# J = J,
# prop.rev = prop.rev,
# n.trait = n.trait,
# fitMethod = fitMethod,
# df = NULL,
# V = NULL,
# M = M,
# n.chains = n.chains,
# thin = thin,
# warmup = warmup,
# df_vcov = df_vcov,
# startSmall = startSmall,
# jagspath = runjags::runjags.getOption("jagspath")
)
### FIT MODELS -----
for (sss in 1:length(S.fit)) {
### FIT IN JAGS -----
if (fitMethod == "jags") {
fit_jags <- fit_irtree(X = gen$X,
revItem = gen$revItem,
traitItem = gen$traitItem,
fitModel = fitModel[sss], fitMethod = fitMethod, df = df, V = V,
n.chains = n.chains, thin = thin, M = M, warmup = warmup,
outFormat = outFormat, startSmall = startSmall,
method = method,
add2varlist = add2varlist, ...)
# returnlist$sim_args$jagspath <- runjags::runjags.getOption("jagspath")
fit2 <- summarize_irtree_fit(fit_jags)
fit3 <- tidyup_irtree_fit(fit2, plot = FALSE)
returnlist$param.sum[[fitModel[sss]]] <- fit3
tmp0 <- sprintf("sim_%04d_%s_%s_mcmc.RData", rrr[qqq], fitMethod, fitModel[sss])
dirx <- get_dir(dir = dir)
dirmcmc <- paste0(dirx, "/mcmc")
if (any(c(savext_mcmc, savext_all) == TRUE)) {
if (!dir.exists(dirmcmc)) {
dir.create(normalizePath(dirmcmc))
}
on.exit(message(paste0("Data saved in: ", dirx)), add = T)
}
if (savext_mcmc == TRUE) {
tmp11 <- gsub("_mcmc.RData", "", tmp0) %>%
gsub("sim", "mcmc", x = .)
assign(tmp11, fit2$mcmc)
tmp13 <- paste0(dirmcmc, "/", tmp0)
do.call(save, list(tmp11, file = tmp13))
}
if (savext_all == TRUE) {
tmp21 <- gsub("_mcmc.RData", "", tmp0) %>%
gsub("sim", "res", x = .)
tmp22 <- gsub("mcmc", "raw", tmp0)
tmp23 <- paste0(dirmcmc, "/", tmp22)
assign(tmp21, fit_jags)
do.call(save, list(tmp21, file = tmp23))
}
rm(fit_jags, fit2, fit3)
# sum.jags <- runjags::add.summary(fit_jags$samples)
# # sum.jags$mcmc <- NULL
# if (savext_mcmc == TRUE) {
# returnlist_mcmc <- sum.jags$mcmc
# }
# if (keep_mcmc == FALSE) {
# for (iii in seq_along(sum.jags$mcmc)) {
# sum.jags$mcmc[[iii]] <- sum.jags$mcmc[[iii]][, 1, drop = F]
# }
# sum.jags$crosscorr <- NULL
# }
# returnlist$param.sum[[fitModel[sss]]] <- sum.jags
# rm(fit_jags, sum.jags)
} else if (fitMethod == "stan") {
### FIT IN STAN ----
fit_stan <- fit_irtree(X = gen$X,
revItem = gen$revItem,
traitItem = gen$traitItem,
fitModel = fitModel[sss], fitMethod = fitMethod, df = df, V = V,
n.chains = n.chains, thin = thin, M = M, warmup = warmup,
outFormat = outFormat, startSmall = startSmall,
add2varlist = NULL, ...)
fit2 <- summarize_irtree_fit(fit_stan)
fit3 <- tidyup_irtree_fit(fit2, plot = FALSE)
returnlist$param.sum[[fitModel[sss]]] <- fit3
tmp0 <- sprintf("sim_%04d_%s_%s_mcmc.RData", rrr[qqq], fitMethod, fitModel[sss])
dirx <- get_dir(dir = dir)
dirmcmc <- paste0(dirx, "/mcmc")
if (any(c(savext_mcmc, savext_all) == TRUE)) {
if (!dir.exists(dirmcmc)) {
dir.create(normalizePath(dirmcmc))
}
on.exit(message(paste0("Data saved in: ", dirx)), add = T)
}
if (savext_mcmc == TRUE) {
tmp11 <- gsub("_mcmc.RData", "", tmp0) %>%
gsub("sim", "mcmc", x = .)
assign(tmp11, fit2$mcmc)
tmp13 <- paste0(dirmcmc, "/", tmp0)
do.call(save, list(tmp11, file = tmp13))
}
if (savext_all == TRUE) {
tmp21 <- gsub("_mcmc.RData", "", tmp0) %>%
gsub("sim", "res", x = .)
tmp22 <- gsub("mcmc", "raw", tmp0)
tmp23 <- paste0(dirmcmc, "/", tmp22)
assign(tmp21, fit_stan)
do.call(save, list(tmp21, file = tmp23))
}
rm(fit_stan, fit2, fit3)
# returnlist$param.sum[[fitModel[sss]]] <- fit_stan
# returnlist$param.sum[[fitModel[sss]]]$mcmc <- rstan::As.mcmc.list(fit_stan$samples)
# returnlist$param.sum[[fitModel[sss]]]$summary <-
# summary(returnlist$param.sum[[fitModel[sss]]]$mcmc)
# if (savext_mcmc == TRUE) {
# returnlist_mcmc <- returnlist$param.sum[[fitModel[sss]]]$mcmc
# }
# if (keep_mcmc == FALSE) {
# returnlist$param.sum[[fitModel[sss]]]$mcmc <- NULL
# returnlist$param.sum[[fitModel[sss]]]$samples@sim$samples <- vector("list", length = n.chains)
# }
# rm(fit_stan)
}
}
### RETURN -----
time_2 <- Sys.time()
returnlist$date <- difftime(time_2, time_1, units = "hours")
save_file <- paste0("sim-results-", sprintf("%04d", rrr[qqq]), "-", substr(Sys.time(), 1, 10), ".RData")
save_file <- gsub(" CET", "", save_file)
save_file <- gsub(" ", "-", save_file)
save_file <- gsub(":", "-", save_file)
returnName <- sprintf("sim_%04d", rrr[qqq])
assign(returnName, returnlist)
dirx <- get_dir(dir = dir)
do.call(save, list(returnName, file = paste0(dirx, "/", save_file)))
on.exit(message(paste0("Data saved in: ", dirx)), add = T)
# if (savext_mcmc == TRUE) {
# save_file_mcmc <- paste0("mcmc-", sprintf("%04d", rrr[qqq]), ".RData")
# return_name_mcmc <- paste0("mcmc_", sprintf("%04d", rrr[qqq]))
# assign(return_name_mcmc, returnlist_mcmc)
# if (!dir.exists(paste0(ifelse(dir.exists(dir), dir, dirx), "/", "mcmc"))) {
# dir.create(paste0(ifelse(dir.exists(dir), dir, dirx), "/", "mcmc"))
# }
# do.call(save, list(return_name_mcmc, file = paste0(ifelse(dir.exists(dir), dir, dirx), "/mcmc/", save_file_mcmc)))
# }
if (!is.null(dir)) {
if (dir.exists(dir)) {
# cony <- file(paste0(ifelse(dir.exists(dir), dir, dirx), "/progress_", Sys.info()["nodename"], ".txt"), open = "a")
cony <- file(paste0(dir, "/progress.txt"), open = "a")
writeLines(con = cony,
text = paste0(sprintf("%04d", rrr[qqq]),
"\t\tStarted at: ", time_1,
"\t\tEnded at: ", time_2,
"\t\tDifference of: ", format(round(difftime(time_2, time_1, units = "hours"), 3), nsmall = 3, width = 6), # " hours"
"\t\t", Sys.info()["nodename"]
)
)
close(cony)
}
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.