#' Model set-up
#' Code is modified from JointAI to allow for additional control to imputation models
#'
#' @param formula a two sided model formula (see \code{\link[stats]{formula}})
#' or a list of such formulas; (more details below).
#' @param fixed a two sided formula describing the fixed-effects part of the
#' model (see \code{\link[stats]{formula}})
#' @param random only for multi-level models:
#' a one-sided formula of the form \code{~x1 + ... + xn | g},
#' where \code{x1 + ... + xn} specifies the model for the random
#' effects and \code{g} the grouping variable
#' @param data a \code{data.frame} containing the original data
#' (more details below)
#' @param family only for \code{glm_imp} and \code{glmm_imp}/\code{glmer_imp}:
#' a description of the distribution and link function to
#' be used in the model. This can be a character string naming a
#' family function, a family function or the result of a call to
#' a family function. (For more details see below and
#' \code{\link[stats]{family}}.)
#' @param rd_vcov character string or list specifying the structure of the
#' random effects variance covariance matrix, see details below.
#' @param monitor_params named list or vector specifying which parameters
#' should be monitored (more details below)
#' @param inits optional; specification of initial values in the form of a list
#' or a function (see \code{\link[rjags]{jags.model}}).
#' If omitted, starting values for the random number generator are
#' created by \strong{JointAI}, initial values are then generated
#' by JAGS.
#' It is an error to supply an initial value for an observed node.
#' @param auxvars optional; one-sided formula of variables that should be used
#' as predictors in the imputation procedure (and will be imputed
#' if necessary) but are not part of the analysis model(s).
#' For more details with regards to the behaviour with
#' non-linear effects see the vignette.
#' @param models optional; named vector specifying the types of models for
#' (incomplete) covariates.
#' This arguments replaces the argument \code{meth} used in
#' earlier versions.
#' If \code{NULL} (default) models will be determined
#' automatically based on the class of the respective columns of
#' \code{data}.
#' @param refcats optional; either one of \code{"first"}, \code{"last"},
#' \code{"largest"} (which sets the category for all categorical
#' variables) or a named list specifying which category should
#' be used as reference category per categorical variable.
#' Options are the category label, the category number,
#' or one of "first" (the first category),
#' "last" (the last category) or "largest" (chooses the category
#' with the most observations).
#' Default is "first". If reference categories are specified for
#' a subset of the categorical variables the default will be
#' used for the remaining variables.
#' (See also \code{\link{set_refcat}})
#' @param shrinkage optional; either a character string naming the shrinkage
#' method to be used for regression coefficients in all models
#' or a named vector specifying the type of shrinkage to be
#' used in the models given as names.
#' @param rev optional character vector; vector of ordinal outcome variable
#' names for which the odds should be reversed, i.e.,
#' \eqn{logit(y\le k)} instead of \eqn{logit(y > k)}.
#' @param nonprop optional named list of one-sided formulas specifying
#' covariates that have non-proportional effects in cumulative
#' logit models. These covariates should also be part of the
#' regular model formula, and the names of the list should be
#' the names of the ordinal response variables.
#' @param ... additional, optional arguments
#' \describe{
#' \item{`trunc`}{named list specifying limits of truncation for the
#' distribution of the named incomplete variables (see vignette)}
#' \item{`hyperpars`}{list of hyper-parameters, as obtained by
#' \code{\link{default_hyperpars}()}}
#' \item{`scale_vars`}{named vector of (continuous) variables that
#' will be centred and scaled (such that mean = 0 and sd = 1)
#' when they enter a linear predictor to improve
#' convergence of the MCMC sampling. Default is that all
#' numeric variables and integer variables with >20 different
#' values will be scaled.
#' If set to \code{FALSE} no scaling will be done.}
#' \item{`custom`}{named list of JAGS model chunks (character strings)
#' that replace the model for the given variable.}
#' \item{`append_data_list`}{list that will be appended to the list
#' containing the data that is passed to **rjags**
#' (`data_list`). This may be necessary if additional data /
#' variables are needed for custom (covariate) models.}
#' \item{`progress.bar`}{character string specifying the type of
#' progress bar. Possible values are "text" (default), "gui",
#' and "none" (see \code{\link[rjags]{update}}). Note: when
#' sampling is performed in parallel it is not possible to
#' display a progress bar.}
#' \item{`quiet`}{logical; if \code{TRUE} then messages generated by
#' \strong{rjags} during compilation as well as the progress bar
#' for the adaptive phase will be suppressed,
#' (see \code{\link[rjags]{jags.model}})}
#' \item{`keep_scaled_mcmc`}{should the "original" MCMC sample (i.e.,
#' the scaled version returned by \code{coda.samples()}) be
#' kept? (The MCMC sample that is re-scaled to the scale of the
#' data is always kept.)}
#' \item{`modelname`}{character string specifying the name of the
#' model file (including the ending, either .R or .txt). If
#' unspecified a random name will be generated.}
#' \item{`modeldir`}{directory containing the model file or directory
#' in which the model file should be written. If unspecified a
#' temporary directory will be created.}
#' \item{`overwrite`}{logical; whether an existing model file with
#' the specified \code{<modeldir>/<modelname>} should be
#' overwritten. If set to \code{FALSE} and a model already
#' exists, that model will be used. If unspecified (\code{NULL})
#' and a file exists, the user is asked for input on how to
#' proceed.}
#' \item{`keep_model`}{logical; whether the created JAGS model file
#' should be saved or removed from (\code{FALSE}; default) when
#' the sampling has finished.}
#' }
#' @name model_imp_custom
#'
#' @details # The modification is to allow user-specified order on the sequence
#' of imputation models.
#'
#' default Sequence of models:
#' Models generated automatically (i.e., not mentioned in `formula` or `fixed`
#' are specified in a sequence based on the level of the outcome of the
#' respective model in the multi-level hierarchy and within each level
#' according to the number of missing values.
#'
#' After the modification, users can specify the order through \code{model_order}.
#'
#' the following scenario gives the sequential imputation models in an order of
#' y0, y1, y3, y2, y4,..., which is based on 'nmis' variable. The expected order
#' would be y0, y1, y2, y3, y4,.... To specify the order, what we need is just set
#' model_order = c('y0', 'y1', 'y2', 'y3', 'y4', 'y5')
#'
#'
# rowID out lvl nmis nlev ordered type L1
# 8 rowID1 FALSE 1 428 3 TRUE clm y5
# 7 rowID1 FALSE 1 426 4 TRUE clm y4
# 5 rowID1 FALSE 1 423 4 TRUE clm y2
# 6 rowID1 FALSE 1 63 4 TRUE clm y3
# 4 rowID1 FALSE 1 11 4 TRUE clm y1
# 3 rowID1 FALSE 1 3 4 TRUE clm y0
#'
#' @noRd
model_imp_custom <- function(formula = NULL, fixed = NULL, data, random = NULL,
trtvar = NULL, family = NULL, df_basehaz = NULL,
rd_vcov = "blockdiag",
n.chains = 3, n.adapt = 100, n.iter = 0, thin = 1,
monitor_params = c(analysis_main = TRUE), auxvars = NULL,
timevar = NULL, refcats = NULL,
models = NULL, no_model = NULL, trunc = NULL,
model_order = NULL, ord_cov_dummy = TRUE,
shrinkage = FALSE, custom = NULL,
nonprop = NULL, rev = NULL,
ppc = TRUE, seed = NULL, inits = NULL,
scale_vars = NULL, hyperpars = NULL,
modelname = NULL, modeldir = NULL,
keep_model = FALSE, overwrite = NULL,
quiet = TRUE, progress.bar = "text",
warn = TRUE, mess = TRUE,
keep_scaled_mcmc = FALSE,
analysis_type, assoc_type = NULL,
append_data_list = NULL, ...) {
modimpcall <- as.list(match.call())[-1L]
# checks & warnings -------------------------------------------------------
if (!is.null(formula) & is.null(fixed) & is.null(random)) {
formula <- check_formula_list(formula)
fixed <- split_formula_list(formula)$fixed
random <- split_formula_list(formula)$random
}
# check if the arguments meth, n.cores or parallel are provided
# (no longer used)
args <- as.list(match.call())
if (!is.null(args$meth))
errormsg("The argument %s has been replaced by the argument %s.",
dQuote("meth"), dQuote("models"))
if (!is.null(args$parallel) | !is.null(args$n.cores)) {
errormsg("The arguments %s and %s are no longer used. To perform the
computation in parallel, specify future::plan().
For an example, see ?model_imp.",
dQuote("parallel"), dQuote("n.cores"))
}
# data pre-processing --------------------------------------------------------
data <- check_data(data, fixed, random, auxvars, timevar, mess)
# * divide matrices ----------------------------------------------------------
Mlist <- divide_matrices_custom(data, fixed, analysis_type = analysis_type,
random = random, models = models, auxvars = auxvars,
timevar = timevar, no_model = no_model, trtvar = trtvar,
model_order = model_order, ord_cov_dummy = ord_cov_dummy,
scale_vars = scale_vars, refcats = refcats,
nonprop = nonprop, rev = rev,
warn = warn, mess = mess, ppc = ppc,
shrinkage = shrinkage, df_basehaz = df_basehaz,
rd_vcov = rd_vcov)
# * model dimensions ---------------------------------------------------------
par_index_main <- get_model_dim(Mlist$lp_cols[names(Mlist$lp_cols) %in%
names(Mlist$fixed)],
Mlist = Mlist)
par_index_other <- get_model_dim(Mlist$lp_cols[!names(Mlist$lp_cols) %in%
names(Mlist$fixed)],
Mlist = Mlist)
# * model info ---------------------------------------------------------------
info_list <- get_model_info(Mlist, par_index_main = par_index_main,
par_index_other = par_index_other,
trunc = trunc, assoc_type = assoc_type, custom =custom )
if (!ord_cov_dummy){
for (k in names(info_list)){
if (info_list[[k]][["modeltype"]] == "clm" & info_list[[k]][["ncat"]] ==0)
info_list[[k]][["ncat"]] = length(levels(Mlist$data[,k]))
}
}
# * data list ----------------------------------------------------------------
data_list <- get_data_list(Mlist, info_list, hyperpars, append_data_list)
# write model ----------------------------------------------------------------
modelfile <- make_filename(modeldir = modeldir, modelname = modelname,
keep_model = keep_model, overwrite = overwrite,
mess = mess)
if (!file.exists(modelfile) || (file.exists(modelfile) &
attr(modelfile, "overwrite") == TRUE)) {
write_model(info_list = info_list, Mlist = Mlist, modelfile = modelfile)
}
# initial values -------------------------------------------------------------
inits <- get_initial_values(inits = inits, seed = seed, n_chains = n.chains,
warn = warn)
# parameters to monitor ------------------------------------------------------
if (any(grepl("^beta\\[", unlist(monitor_params))) &
any(!is.na(unlist(Mlist$scale_pars)))) {
monitor_params <- c(lapply(monitor_params, function(x) {
if (any(grepl("^beta[", x, fixed = TRUE))) {
x[-grep("^beta[", x, fixed = TRUE)]
} else {
x
}
}),
betas = TRUE)
if (mess)
msg("Note: %s was set in %s because re-scaling of the effects of
the regression coefficients in the main model(s) requires all
of them to be monitored.",
dQuote("betas = TRUE"), dQuote("monitor_params"))
}
var_names <- do.call(get_params, c(list(Mlist = Mlist, info_list = info_list,
mess = mess),
monitor_params))
# run JAGS -----------------------------------------------------------------
# Message if no MCMC sample will be produced.
if (n.iter == 0) {
if (mess)
msg("Note: No MCMC sample will be created when n.iter is set to 0.")
}
future_info <- get_future_info()
if (n.iter == 0) run_jags = run_seq
else run_jags <- ifelse(future_info$parallel, run_parallel, run_seq)
t0 <- Sys.time()
jags_res <- run_jags(n_adapt = n.adapt, n_iter = n.iter, n_chains = n.chains,
inits = inits, thin = thin,
n_workers = future_info$workers,
data_list = data_list, var_names = var_names,
modelfile = modelfile, quiet = quiet,
progress_bar = progress.bar, mess = mess, warn = warn)
adapt <- jags_res$adapt
mcmc <- jags_res$mcmc
t1 <- Sys.time()
if (n.iter > 0 & !inherits(mcmc, "mcmc.list"))
warnmsg("There is no mcmc sample. Something went wrong.")
# post processing ------------------------------------------------------------
if (n.iter > 0 & !is.null(mcmc)) {
MCMC <- mcmc
if (any(!vapply(Mlist$scale_pars, is.null, FUN.VALUE = logical(1)),
!is.na(unlist(Mlist$scale_pars)))) {
coefs <- try(get_coef_names(info_list))
for (k in seq_len(length(MCMC))) {
MCMC[[k]] <- coda::as.mcmc(
rescale(MCMC[[k]],
coefs = do.call(rbind, coefs),
scale_pars = do.call(rbind, unname(Mlist$scale_pars)),
info_list = info_list,
data_list = data_list,
groups = Mlist$groups))
attr(MCMC[[k]], "mcpar") <- attr(mcmc[[k]], "mcpar")
}
}
}
# prepare output -------------------------------------------------------------
mcmc_settings <- list(modelfile = modelfile,
n.chains = n.chains,
n.adapt = n.adapt,
n.iter = n.iter,
variable.names = if (exists("var_names")) var_names,
thin = thin,
inits = inits,
seed = seed)
object <- structure(
list(analysis_type = analysis_type,
data = Mlist$data,
models = Mlist$models,
fixed = Mlist$fixed,
random = Mlist$random,
Mlist = Mlist[setdiff(names(Mlist), c("data", "models",
"fixed", "random",
"M"))],
par_index_main = par_index_main,
par_index_other = par_index_other,
jagsmodel = structure(readChar(modelfile,
file.info(modelfile)$size),
class = "modelstring"),
mcmc_settings = mcmc_settings,
monitor_params = c(monitor_params,
if (!"analysis_main" %in% names(monitor_params))
setNames(TRUE, "analysis_main")),
data_list = data_list,
hyperpars = if (is.null(hyperpars)) default_hyperpars() else hyperpars,
info_list = info_list,
coef_list = get_coef_names(info_list),
model = if (n.adapt > 0) adapt,
sample = if (n.iter > 0 & !is.null(mcmc) & keep_scaled_mcmc) mcmc,
MCMC = if (n.iter > 0 & !is.null(mcmc)) coda::as.mcmc.list(MCMC),
comp_info = list(start_time = t0,
duration = t1 - t0,
remiod_version = packageVersion("remiod"),
future = future_info$call),
call = modimpcall$thecall
), class = "remiod")
object$fitted.values <- try(fitted_values(object, mess = FALSE, warn = FALSE),
silent = TRUE)
object$residuals <- try(residuals(object, type = "working", warn = FALSE),
silent = TRUE)
if (inherits(object$fitted.values, "try-error"))
object$fitted.values <- NULL
if (inherits(object$residuals, "try-error"))
object$residuals <- NULL
if (inherits(adapt, "try-error"))
class(object) <- "remiod_errored"
if (!attr(modelfile, "keep_model")) {
file.remove(modelfile)
}
return(object)
}
#'
#'
glm_imp_custom <- function(formula, family, data, trtvar = NULL,
n.chains = 3, n.adapt = 100, n.iter = 0, thin = 1,
monitor_params = c(analysis_main = TRUE), auxvars = NULL,
refcats = NULL,
models = NULL, no_model = NULL, model_order = NULL,
shrinkage = FALSE, ppc = TRUE, seed = NULL, inits = NULL,
warn = TRUE, mess = TRUE, ord_cov_dummy = TRUE,
...) {
if (missing(formula)) errormsg("No model formula specified.")
if (missing(family))
errormsg("The argument %s needs to be specified.", dQuote("family"))
arglist <- prep_arglist(analysis_type = "glm",
family = family,
formals = formals(), call = match.call(),
sframe = sys.frame(sys.nframe()))
do.call(model_imp_custom, arglist)
}
#'
#'
clm_imp_custom <- function(formula, data, trtvar = NULL,
n.chains = 3, n.adapt = 100, n.iter = 0, thin = 1,
monitor_params = c(analysis_main = TRUE), auxvars = NULL,
refcats = NULL, nonprop = NULL, rev = NULL,
models = NULL, no_model = NULL, model_order = NULL,
shrinkage = FALSE, ppc = TRUE, seed = NULL, inits = NULL,
warn = TRUE, mess = TRUE, ord_cov_dummy = TRUE, ...) {
if (missing(formula)) errormsg("No model formula specified.")
arglist <- prep_arglist(analysis_type = "clm",
formals = formals(), call = match.call(),
sframe = sys.frame(sys.nframe()))
do.call(model_imp_custom, arglist)
}
opm_imp_custom <- function(formula, data, trtvar = NULL,
n.chains = 3, n.adapt = 100, n.iter = 0, thin = 1,
monitor_params = c(analysis_main = TRUE), auxvars = NULL,
refcats = NULL, nonprop = NULL, rev = NULL,
models = NULL, no_model = NULL, model_order = NULL,
shrinkage = FALSE, ppc = TRUE, seed = NULL, inits = NULL,
warn = TRUE, mess = TRUE, ord_cov_dummy = TRUE, ...) {
if (missing(formula)) errormsg("No model formula specified.")
arglist <- prep_arglist(analysis_type = "opm",
formals = formals(), call = match.call(),
sframe = sys.frame(sys.nframe()))
do.call(model_imp_custom, arglist)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.