Nothing
#' Wrapper function that runs MICE in parallel
#'
#' This is a wrapper function for \code{\link{mice}}, using multiple cores to
#' execute \code{\link{mice}} in parallel. As a result, the imputation
#' procedure can be sped up, which may be useful in general. By default,
#' \code{\link{futuremice}} distributes the number of imputations \code{m}
#' about equally over the cores.
#'
#' This function relies on package \code{\link[furrr]{furrr}}, which is a
#' package for R versions 3.2.0 and later. We have chosen to use furrr function
#' \code{future_map} to allow the use of \code{futuremice} on Mac, Linux and
#' Windows systems.
#'
#'
#' This wrapper function combines the output of \code{\link[furrr]{future_map}} with
#' function \code{\link{ibind}} from the \code{\link{mice}} package. A
#' \code{mids} object is returned and can be used for further analyses.
#'
#' A seed value can be specified in the global environment, which will yield
#' reproducible results. A seed value can also be specified within the
#' \code{\link{futuremice}} call, through specifying the argument
#' \code{parallelseed}. If \code{parallelseed} is not specified, a seed value is
#' drawn randomly by default, and accessible through \code{$parallelseed} in the
#' output object. Hence, results will always be reproducible, regardless of
#' whether the seed is specified in the global environment, or by setting the
#' same seed within the function (potentially by extracting the seed from the
#' \code{futuremice} output object.
#'
#' @aliases futuremice
#' @param data A data frame or matrix containing the incomplete data. Similar to
#' the first argument of \code{\link{mice}}.
#' @param m The number of desired imputated datasets. By default $m=5$ as with
#' \code{mice}
#' @param parallelseed A scalar to be used to obtain reproducible results over
#' the futures. The default \code{parallelseed = NA} will result in a seed value
#' that is randomly drawn between -999999999 and 999999999.
#' @param n.core A scalar indicating the number of cores that should be used.
#' @param seed A scalar to be used as the seed value for the mice algorithm
#' within each parallel stream. Please note that the imputations will be the
#' same for all streams and, hence, this should be used if and only if
#' \code{n.core = 1} and if it is desired to obtain the same output as under
#' \code{mice}.
#' @param use.logical A logical indicating whether logical (\code{TRUE}) or
#' physical (\code{FALSE}) CPU's on machine should be used.
#' @param future.plan A character indicating how \code{future}s are resolved.
#' The default \code{multisession} resolves futures asynchronously (in parallel)
#' in separate \code{R} sessions running in the background. See
#' \code{\link[future]{plan}} for more information on future plans.
#' @param ... Named arguments that are passed down to function
#' \code{\link{mice}}.
#'
#' @return A mids object as defined by \code{\link{mids-class}}
#'
#' @author Thom Benjamin Volker, Gerko Vink
#' @seealso \code{\link[future]{future}}, \code{\link[furrr]{furrr}}, \code{\link[furrr]{future_map}},
#' \code{\link[future]{plan}}, \code{\link{mice}}, \code{\link{mids-class}}
#' @references
#' Volker, T.B. and Vink, G. (2022). futuremice: The future starts today.
#' \url{https://www.gerkovink.com/miceVignettes/futuremice/Vignette_futuremice.html}
#'
#' #'Van Buuren, S. (2018).
#' \href{https://stefvanbuuren.name/fimd/parallel-computation.html}{\emph{Flexible Imputation of Missing Data. Second Edition.}}
#' Chapman & Hall/CRC. Boca Raton, FL.
#'
#' @examples
#' # 150 imputations in dataset nhanes, performed by 3 cores
#' \dontrun{
#' imp1 <- futuremice(data = nhanes, m = 150, n.core = 3)
#' # Making use of arguments in mice.
#' imp2 <- futuremice(data = nhanes, m = 100, method = "norm.nob")
#' imp2$method
#' fit <- with(imp2, lm(bmi ~ hyp))
#' pool(fit)
#' }
#'
#' @export
futuremice <- function(data, m = 5, parallelseed = NA, n.core = NULL, seed = NA,
use.logical = TRUE, future.plan = "multisession", ...) {
# check if pacakages available
install.on.demand("parallelly", ...)
install.on.demand("furrr", ...)
install.on.demand("future", ...)
# check form of data and m
data <- check.dataform(data)
m <- check.m(m)
# check if data complete
if (sum(is.na(data)) == 0) {
stop("Data has no missing values")
}
# number of available cores
available <- parallelly::availableCores(logical = use.logical)
if (!is.null(n.core)) {
# check if arguments match CPU specifications
if (n.core > available) {
warning(paste("Number of cores specified is greater than the number of logical cores in your CPU, futuremice falls back to", available, "cores."))
n.core <- available
}
if (n.core > m) {
warning(paste("The number of cores exceeds the number of imputations. The number of cores used is set equal to the number of imputations (m =", m, ")."))
n.core <- m
}
} else {
n.core <- available - 1
message(paste("Number of cores not specified. Based on your machine a value of n.core =", n.core, "is chosen; the imputations are distributed about equally over the cores."))
}
if (n.core > 1) {
dist.core <- cut(1:m, n.core, labels = paste0("core", 1:n.core))
} else {
dist.core <- rep("core1", m)
}
n.imp.core <- as.vector(table(dist.core))
if (!is.na(seed)) {
if (n.core > 1) {
if (interactive()) {
msg <- "Be careful; specifying seed rather than parallelseed results in duplicate imputations.\nDo you want to continue?\n"
ask <- askYesNo(msg, prompts = getOption("askYesNo", gettext(c("Yes", "No, ignore seed", "Cancel"))))
if (isTRUE(ask)) {
seed <- seed
warning("Be careful; the imputations will be the same over the cores.")
} else if (isFALSE(ask)) {
seed <- NA
message("Parallelseed is specified for you, and is accessible in the output object under $parallelseed.")
} else if (is.na(ask)) {
stop("You stopped futuremice. To obtain unique, but reproducible imputations, specify parallelseed.")
}
} else {
warning("Be careful; the imputations will be identical over the cores. Perhaps you want to specify parallelseed, for unique, but reproducible results.")
}
}
}
if (!is.na(parallelseed)) {
set.seed(parallelseed)
} else {
parallelseed <- get(".Random.seed",
envir = globalenv(), mode = "integer",
inherits = FALSE
)
}
# start multisession
future::plan(future.plan,
workers = n.core
)
# begin future
imps <- furrr::future_map(n.imp.core, function(x) {
mice(
data = data,
m = x,
printFlag = FALSE,
seed = seed,
...
)
}, .options = furrr::furrr_options(seed = TRUE))
# end multisession
future::plan(future::sequential)
# stitch future into mids
# postprocess clustered imputation into a mids object
imp <- imps[[1]]
if (length(imps) > 1) {
for (i in 2:length(imps)) {
imp <- ibind(imp, imps[[i]])
}
}
# let imputation matrix correspond to grand m
for (i in 1:length(imp$imp)) {
colnames(imp$imp[[i]]) <- 1:imp$m
}
imp$parallelseed <- parallelseed
return(imp)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.