#' Bootstrap missing values in data from fitted multivariate model
#'
#' @param fit Fitting result, as returned by [fit_mvnorm()] or
#' [fit_mvnorm_hier()]. Or, a matrix of samples, as returned by
#' `coda:::as.matrix.mcmc.list(fit)`
#' @param dat Data with partially missing values
#' @param n Number of bootstrap samples (default = 500)
#' @param progress (Logical) If `TRUE` (default), display a progress bar
#' @return If `missing_only`, matrix of bootstrapped missing values (`nmissing x
#' n`). Otherwise, an array of bootstraps of the full imputed dataset, with
#' dimensions `c(nrow(dat), ncol(dat), n)`.
#' @author Alexey Shiklomanov
#' @export
bootstrap_missing <- function(fit, dat, n = 500, progress = TRUE) {
stopifnot(any(is.na(dat)))
if (!is.matrix(dat)) dat <- as.matrix(dat)
if (is.list(fit) && inherits(fit$samples, "mcmc.list")) {
# Method is not correctly exported, so need to call it manually
samp_mat <- coda:::as.matrix.mcmc.list(fit$samples)
} else if (is.matrix(fit)) {
samp_mat <- fit
} else {
stop("Invalid type for input `fit`.")
}
# Grab n samples from the posterior
fit_samp <- samp_mat[sample(nrow(samp_mat), n), ]
fit_mu <- fit_samp[, grep("^mu\\.\\.", colnames(fit_samp))]
fit_sigma_wide <- fit_samp[, grep("^Sigma\\.\\.", colnames(fit_samp))]
setup <- setup_missing(dat)
out <- array(numeric(), c(nrow(dat), ncol(dat), n))
colnames(out) <- colnames(dat)
attr(out, "imissing") <- which(is.na(dat), arr.ind = TRUE)
if (progress) pb <- progress::progress_bar$new(total = n)
for (i in seq_len(n)) {
if (progress) pb$tick()
mu <- fit_mu[i, ]
sigma <- lowerdiag2mat(fit_sigma_wide[i, ], col_names = FALSE, corr = FALSE,
hier = hier)
fill <- mvnorm_fill_missing(dat, mu, sigma, setup = setup)
out[, , i] <- fill
}
out
}
#' Bootstrap missing values in data from fitted multivariate hierarchical model
#'
#' @param groups a vector of groups. If `NULL`
#' (default), assume a simple multivariate fit.
#' @author Alexey Shiklomanov
#' @inheritParams bootstrap_missing
#' @inherit bootstrap_missing return
#' @export
bootstrap_missing_hier <- function(fit, dat, groups, n = 500, progress = TRUE) {
stopifnot(
any(is.na(dat)),
length(groups) == nrow(dat)
)
if (!is.matrix(dat)) dat <- as.matrix(dat)
if (is.list(fit) && inherits(fit$samples, "mcmc.list")) {
# Method is not correctly exported, so need to call it manually
samp_mat <- coda:::as.matrix.mcmc.list(fit$samples)
} else if (is.matrix(fit)) {
samp_mat <- fit
} else {
stop("Invalid type for input `fit`.")
}
ugroup <- unique(groups)
# Grab n samples from the posterior
fit_samp <- samp_mat[sample(nrow(samp_mat), n), ]
fit_mu_l <- lapply(ugroup, get_mu_group, smat = fit_samp)
names(fit_mu_l) <- ugroup
fit_sigma_wide_l <- lapply(ugroup, get_sigma_group, smat = fit_samp)
names(fit_sigma_wide_l) <- ugroup
setup_l <- lapply(
ugroup,
function(x) setup_missing(dat[groups == x, ])
)
names(setup_l) <- ugroup
out <- array(numeric(), c(nrow(dat), ncol(dat), n))
colnames(out) <- colnames(dat)
attr(out, "imissing") <- which(is.na(dat), arr.ind = TRUE)
if (progress) pb <- progress::progress_bar$new(total = n)
for (i in seq_len(n)) {
if (progress) pb$tick()
for (g in ugroup) {
g_mu <- fit_mu_l[[g]][i,]
g_sigma <- lowerdiag2mat(
fit_sigma_wide_l[[g]][i, ],
col_names = FALSE,
corr = FALSE,
hier = hier
)
g_rows <- which(groups == g)
g_dat <- dat[g_rows, ]
fill <- mvnorm_fill_missing(g_dat, g_mu, g_sigma, setup = setup_l[[g]])
out[g_rows, , i] <- fill
}
}
out
}
#' Tidy summary table of bootstrapped data
#'
#' @param boot_data Bootstrapped data, as returned by [bootstrap_missing()] or
#' [bootstrap_missing_hier()]
#' @param quantiles Length-2 vector of quantiles to calculate
#' @return `data.frame` summarizing the imputed missing values
#' @author Alexey Shiklomanov
#' @export
tidy_bootstrap <- function(boot_data, quantiles = c(0.025, 0.975)) {
imissing <- attr(boot_data, "imissing")
data.frame(
irow = imissing[, "row"],
icol = imissing[, "col"],
column = colnames(boot_data)[imissing[, "col"]],
Mean = apply(boot_data, c(1, 2), mean)[imissing],
SD = apply(boot_data, c(1, 2), sd)[imissing],
lo = apply(boot_data, c(1, 2), quantile, probs = quantiles[1])[imissing],
hi = apply(boot_data, c(1, 2), quantile, probs = quantiles[2])[imissing],
stringsAsFactors = FALSE
)
}
get_mu_group <- function(group, smat) {
rx <- paste0("^mu\\.\\.", group, "\\.\\.")
i <- grep(rx, colnames(smat))
smat[, i]
}
get_sigma_group <- function(group, smat) {
rx <- paste0("^Sigma\\.\\.", group, "\\.\\.")
i <- grep(rx, colnames(smat))
smat[, i]
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.