Nothing
#' Get covariance matrix of incomplete data using multiple imputation
#'
#' Compute covariance matrix of incomplete data using multiple imputation.
#' For multiple imputation, *Multivariate Imputation by Chained Equations*
#' (MICE) from the [mice] package is used. The covariance matrices of the
#' imputed data sets are combined using Rubin's rules.
#'
#' The function also computes the variance explained by different numbers of
#' principal components and the corresponding Fieller (parametric) or bootstrap
#' (nonparametric) confidence intervals.
#'
#' @references
#' Nassiri, V., Lovik, A., Molenberghs, G., & Verbeke, G. (2018).
#' On using multiple imputation for exploratory factor analysis of incomplete
#' data. Behavioral Research Methods 50, 501–517.
#' \doi{10.3758/s13428-017-1013-4}
#'
#' @param data A data frame with missing values coded as `NA`.
#' @param cov_vars Variables in `data` for which to calculate the covariance
#' matrix. Supports (tidy selection)[dplyr::select()]. This allows to
#' select variables that are used for the imputations of missing values, but not
#' the calculations of the covariance matrix. This is especially useful when
#' there are categorical predictors that can improve the imputation of the
#' response variables, but for which covariance cannot be calculated.
#' By default, all variables in `data` are used for both, the imputation and
#' the covariance matrix. Note: Variables and rows used for the imputation, as
#' well as the method for imputation can be configured using the `...`.
#' See also [mice::mice()].
#' @param n_pc Integer or integer vector indicating number of principal
#' components (eigenvectors) for which explained variance (eigenvalues) should
#' be obtained and for which confidence intervals should be computed.
#' Defaults to all principal components, i.e., the number of variables in the
#' data.
#' @param conf Confidence level for constructing confidence intervals. The
#' default is `.95` that is, 95% confidence intervals.
#' @param n_boot Number of bootstrap samples to use for bootstrapped confidence
#' intervals. The default is 1000.
#' @param ci A character string indicating which types of confidence intervals
#' should be constructed for the variance explained by the principal
#' components. If `"boot"`, `"fieller"`, or `"both"`, the corresponding
#' intervals are computed. If `FALSE` (the default) no confidence intervals will
#' be computed. The components for which confidence intervals should be computed
#' can be set with `n_pc`. See [mifa_ci_boot()] and [mifa_ci_fieller()] for
#' details about the two methods.
#' @inheritDotParams mice::mice
#'
#' @seealso [mifa_ci_boot()], [mifa_ci_fieller()], [mice::mice()]
#'
#' @return A list:
#' \describe{
#' \item{cov_combined}{The estimated covariance matrix of the
#' incomplete data, based on the combined covariance matrices of imputed
#' data sets.}
#' \item{cov_imputations}{A list containing the estimated covariance matrixes
#' for all imputed data sets.}
#' \item{var_explained}{A data frame containing the estimated proportions of
#' explained variance for each of specified `n_pc` components. Depending o
#' n `ci`, it will also contain the estimated Fieller's (parametric) and/or
#' bootstrap (nonparametric) confidence interval for the proportion of
#' variance explained by the different numbers of principal components defined
#' by `n_pc`.}
#' \item{mids}{Object of type [mice::mids]. This is the results of
#' the multiple imputation step for the covariance matrix. Can be useful for
#' diagnosing the multiple imputations.}
#' }
#' @export
#' @examples
#' \donttest{
#' if(requireNamespace("psych")) {
#' data <- psych::bfi
#' mifa(data, cov_vars = -c(age, education, gender), ci = "fieller", print = FALSE)
#' }
#' }
mifa <- function(data, cov_vars = dplyr::everything(), n_pc, ci = FALSE,
conf = .95, n_boot = 1000, ...) {
# check and clean arguments
checkmate::assert_data_frame(data, min.rows = 1, min.cols = 2)
checkmate::assert_data_frame(dplyr::select(data, {{ cov_vars }}), min.cols = 2)
if (ci != FALSE) {
checkmate::assert_choice(ci, c(FALSE, "both", "boot", "fieller"))
}
checkmate::assert_number(conf, lower = 0, upper = 1)
checkmate::assert_count(n_boot, positive = TRUE)
n_cov_vars <- ncol(dplyr::select(data, {{ cov_vars }}))
n_pc <- clean_n_pc(n_pc, n_cov_vars)
# create imputed datasets, make sure no NAs are left
imp <- stop_constants(mice::mice(data, ...))
data_imps <- mice::complete(imp, "all")
data_imps <- lapply(data_imps, function(x) mice_impute_all_NA(x, ...))
# Select variables for calculating covariance matrix
data_imps <- lapply(data_imps, function(d) dplyr::select(d, {{ cov_vars }}))
# estimate covariance matrix of imputed values and combine estimates
cov_imps <- lapply(data_imps, stats::cov)
cov_comb <- Reduce("+", cov_imps) / length(cov_imps)
# get proportion of explained variance for different number of pr. components
cov_comb_eigen <- eigen(cov_comb)$values
var_expl <- data.frame(
n_pc = n_pc,
var_explained = (cumsum(cov_comb_eigen) / sum(cov_comb_eigen))[n_pc]
)
# add confidence intervals for variance explained
if ("boot" %in% ci || "both" %in% ci) {
ci_boot <- mifa_ci_boot(data, {{ cov_vars }}, n_pc, conf, n_boot, ...)
var_expl <- dplyr::bind_cols(
var_expl,
ci_boot_lower = ci_boot$lower,
ci_boot_upper = ci_boot$upper
)
}
if ("fieller" %in% ci || "both" %in% ci) {
ci_fieller <- mifa_ci_fieller(cov_imps, n_pc, conf, nrow(data))
var_expl <- dplyr::bind_cols(
var_expl,
ci_fieller_lower = ci_fieller$lower,
ci_fieller_upper = ci_fieller$upper
)
}
structure(
list(
cov_combined = cov_comb,
cov_imputations = cov_imps,
var_explained = var_expl,
mids = imp
),
class = "mifa"
)
}
#' @export
summary.mifa <- function(object, ...) {
mi <- object
cat(sprintf(
"Imputed covariance matrix of %i variables\n\n",
ncol(mi$cov_combined)
))
last_to_print <- function(x, max_width = getOption("width")) {
widths <- pmax(nchar(names(x)), nchar(x))
Position(function(x) x > max_width, cumsum(widths), nomatch = length(x))
}
# Details about data
vars_cov <- colnames(mi$cov_combined)
n_missing <- colSums(is.na(mi$mids$data[, vars_cov]))
widths <- pmax(nchar(names(n_missing)), nchar(n_missing))
n_print <- last_to_print(n_missing)
cat("Variable: ", sprintf("%*s", widths[1:n_print], names(n_missing)[1:n_print]), "\n")
cat("N Imputed: ", sprintf("%*i", widths[1:n_print], n_missing[1:n_print]))
if (n_print < length(vars_cov)) {
cat(sprintf(" ...\n...and %i more variables", length(vars_cov) - n_print))
}
# Details on imputation
cat(sprintf("\n\nNumber of MICE imputations: %i", length(mi$cov_imputations)))
vars_imp <- colnames(mi$mids$data)
vars_diff <- setdiff(vars_imp, vars_cov)
if (length(vars_diff) > 0) {
cat(sprintf("\nAdditional variables used for imputations:\n"))
n_print <- last_to_print(vars_diff)
cat(sprintf("%s", vars_diff[1:n_print]))
if (n_print < length(vars_diff)) {
cat(sprintf("... \n...and %i more variables", length(vars_diff) - n_print))
}
}
# variance expplained
if (is.null(mi$var_explained$ci_fieller_lower)) {
ci_fieller <- NULL
} else {
ci_fieller <- sprintf(
"[%.2f, %.2f]",
mi$var_explained$ci_fieller_lower,
mi$var_explained$ci_fieller_upper
)
}
if (is.null(mi$var_explained$ci_boot_lower)) {
ci_boot <- NULL
} else {
ci_boot <- sprintf(
"[%.2f, %.2f]",
mi$var_explained$ci_boot_lower,
mi$var_explained$ci_boot_upper
)
}
k_width <- max(nchar(mi$var_explained$n_pc))
cat("\n\nCumulative proportion of variance explained by n principal components:")
cat(sprintf("\n\n%*s prop ", k_width, "n"))
if (!is.null(ci_fieller)) cat("Fieller CI ")
if (!is.null(ci_boot)) cat("Bootstrap CI")
cat(
"\n",
sprintf(
"%*i %5.2f %s %s\n",
k_width,
mi$var_explained$n_pc,
mi$var_explained$var_explained,
if (is.null(ci_fieller)) "" else ci_fieller,
if (is.null(ci_boot)) "" else ci_boot
),
"\n",
sep = ""
)
}
#' @export
print.mifa <- function(x, ...) {
summary(x)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.