Nothing
#' Pool analysis results obtained from the imputed datasets
#'
#' @param results an analysis object created by [analyse()].
#'
#' @param conf.level confidence level of the returned confidence interval.
#' Must be a single number between 0 and 1. Default is 0.95.
#'
#' @param alternative a character string specifying the alternative hypothesis,
#' must be one of `"two.sided"` (default), `"greater"` or `"less"`.
#'
#' @param type a character string of either `"percentile"` (default) or
#' `"normal"`. Determines what method should be used to calculate the bootstrap confidence
#' intervals. See details.
#' Only used if `method_condmean(type = "bootstrap")` was specified
#' in the original call to [draws()].
#'
#' @param x a `pool` object generated by [pool()].
#'
#' @param ... not used.
#'
#' @details
#'
#' The calculation used to generate the point estimate, standard errors and
#' confidence interval depends upon the method specified in the original
#' call to [draws()]; In particular:
#'
#' - `method_approxbayes()` & `method_bayes()` both use Rubin's rules to pool estimates
#' and variances across multiple imputed datasets, and the Barnard-Rubin rule to pool
#' degree's of freedom; see Little & Rubin (2002).
#' - `method_condmean(type = "bootstrap")` uses percentile or normal approximation;
#' see Efron & Tibshirani (1994). Note that for the percentile bootstrap, no standard error is
#' calculated, i.e. the standard errors will be `NA` in the object / `data.frame`.
#' - `method_condmean(type = "jackknife")` uses the standard jackknife variance formula;
#' see Efron & Tibshirani (1994).
#' - `method_bmlmi` uses pooling procedure for Bootstrapped Maximum Likelihood MI (BMLMI).
#' See Von Hippel & Bartlett (2021).
#'
#' @references
#' Bradley Efron and Robert J Tibshirani. An introduction to the bootstrap. CRC
#' press, 1994. \[Section 11\]
#'
#' Roderick J. A. Little and Donald B. Rubin. Statistical Analysis with Missing
#' Data, Second Edition. John Wiley & Sons, Hoboken, New Jersey, 2002. \[Section 5.4\]
#'
#' Von Hippel, Paul T and Bartlett, Jonathan W.
#' Maximum likelihood multiple imputation: Faster imputations and consistent standard
#' errors without posterior draws. 2021.
#' @export
pool <- function(
results,
conf.level = 0.95,
alternative = c("two.sided", "less", "greater"),
type = c("percentile", "normal")
) {
assert_that(
has_class(results, "analysis")
)
validate(results)
alternative <- match.arg(alternative)
type <- match.arg(type)
assert_that(
is.numeric(conf.level),
length(conf.level) == 1,
0 < conf.level & conf.level < 1,
msg = "`conf.level` must be between 0 and 1"
)
pool_type <- class(results$results)[[1]]
results_transpose <- transpose_results(
results$results,
get_pool_components(pool_type)
)
pars <- lapply(
results_transpose,
function(x, ...) pool_internal(as_class(x, pool_type), ...),
conf.level = conf.level,
alternative = alternative,
type = type,
D = results$method$D
)
if (pool_type == "bootstrap") {
method <- sprintf("%s (%s)", pool_type, type)
} else {
method <- pool_type
}
ret <- list(
pars = pars,
conf.level = conf.level,
alternative = alternative,
N = length(results$results),
method = method
)
class(ret) <- "pool"
return(ret)
}
#' Expected Pool Components
#'
#' Returns the elements expected to be contained in the analyse object
#' depending on what analysis method was specified.
#'
#' @param x Character name of the analysis method, must one of
#' either `"rubin"`, `"jackknife"`, "`bootstrap"` or `"bmlmi"`.
get_pool_components <- function(x) {
switch(x,
"rubin" = c("est", "df", "se"),
"jackknife" = c("est"),
"bootstrap" = c("est"),
"bmlmi" = c("est")
)
}
#' Internal Pool Methods
#'
#' Dispatches pool methods based upon results object class. See
#' [pool()] for details.
#'
#' @inheritParams pool
#' @param results a list of results i.e. the `x$results` element of an
#' `analyse` object created by [analyse()]).
#' @param D numeric representing the number of imputations between each bootstrap sample in the BMLMI method.
#'
#' @name pool_internal
#' @export
pool_internal <- function(results, conf.level, alternative, type, D) {
UseMethod("pool_internal")
}
#' @importFrom stats qnorm pnorm
#' @rdname pool_internal
#' @export
pool_internal.jackknife <- function(results, conf.level, alternative, type, D) {
alpha <- 1 - conf.level
ests <- results$est
est_point <- ests[1]
ests_jack <- ests[-1] # First estimate is full dataset
mean_jack <- mean(ests_jack)
N_jack <- length(ests_jack)
se_jack <- sqrt(((N_jack - 1) / N_jack) * sum((ests_jack - mean_jack)^2))
ret <- parametric_ci(est_point, se_jack, alpha, alternative, qnorm, pnorm)
return(ret)
}
#' @rdname pool_internal
#' @export
pool_internal.bootstrap <- function(
results,
conf.level,
alternative,
type = c("percentile", "normal"),
D
) {
type <- match.arg(type)
bootfun <- switch(
type,
percentile = pool_bootstrap_percentile,
normal = pool_bootstrap_normal
)
ret <- bootfun(results$est, conf.level, alternative)
return(ret)
}
#' @importFrom stats qt pt var
#' @rdname pool_internal
#' @export
pool_internal.bmlmi <- function(
results,
conf.level,
alternative,
type,
D
) {
ests <- results$est
alpha <- 1 - conf.level
pooled_est <- get_ests_bmlmi(ests, D)
ret <- parametric_ci(
point = pooled_est$est_point,
se = sqrt(pooled_est$est_var),
alpha = alpha,
alternative = alternative,
qfun = qt,
pfun = pt,
df = pooled_est$df
)
return(ret)
}
#' @title Von Hippel and Bartlett pooling of BMLMI method
#'
#' @description Compute pooled point estimates, standard error and degrees of freedom
#' according to the Von Hippel and Bartlett formula for Bootstrapped Maximum Likelihood
#' Multiple Imputation (BMLMI).
#'
#' @param ests numeric vector containing estimates from the analysis of the imputed datasets.
#' @param D numeric representing the number of imputations between each bootstrap sample in the BMLMI method.
#'
#' @return a list containing point estimate, standard error and degrees of freedom.
#'
#' @details `ests` must be provided in the following order: the firsts D elements are related to analyses from
#' random imputation of one bootstrap sample. The second set of D elements (i.e. from D+1 to 2*D)
#' are related to the second bootstrap sample and so on.
#'
#' @references
#' Von Hippel, Paul T and Bartlett, Jonathan W8.
#' Maximum likelihood multiple imputation: Faster imputations and consistent standard errors
#' without posterior draws. 2021
get_ests_bmlmi <- function(ests, D) {
l <- length(ests)
assert_that(
l %% D == 0,
msg = "length of `ests` must be a multiple of `D`"
)
assert_that(
D > 1,
msg = "`D` must be a numeric larger than 1"
)
B <- l / D
ests <- matrix(ests, nrow = B, ncol = D, byrow = TRUE)
est_point <- mean(ests)
SSW <- sum((ests - rowMeans(ests))^2)
SSB <- D * sum((rowMeans(ests) - est_point)^2)
MSW <- SSW / (B * (D - 1))
MSB <- SSB / (B - 1)
resVar <- MSW
randIntVar <- (MSB - MSW) / D
est_var <- (1 + 1 / B) * randIntVar + resVar / (B * D)
df <- (est_var^2) / ((((B + 1) / (B * D))^2 * MSB^2 / (B - 1)) + MSW^2 / (B * D^2 * (D - 1)))
df <- max(3, df)
ret_obj <- list(
est_point = est_point,
est_var = est_var,
df = df
)
return(ret_obj)
}
#' @importFrom stats qt pt
#' @rdname pool_internal
#' @export
pool_internal.rubin <- function(results, conf.level, alternative, type, D) {
ests <- results$est
ses <- results$se
dfs <- results$df
alpha <- 1 - conf.level
v_com <- unique(dfs)
assert_that(
length(v_com) == 1,
msg = "Degrees of freedom should be consistent across all samples"
)
res_rubin <- rubin_rules(
ests = ests,
ses = ses,
v_com = v_com
)
ret <- parametric_ci(
point = res_rubin$est_point,
se = sqrt(res_rubin$var_t),
alpha = alpha,
alternative = alternative,
qfun = qt,
pfun = pt,
df = res_rubin$df
)
return(ret)
}
#' @title Barnard and Rubin degrees of freedom adjustment
#'
#' @description Compute degrees of freedom according to the Barnard-Rubin formula.
#'
#' @param v_com Positive number representing the degrees of freedom in the complete-data analysis.
#' @param var_b Between-variance of point estimate across multiply imputed datasets.
#' @param var_t Total-variance of point estimate according to Rubin's rules.
#' @param M Number of imputations.
#'
#' @return Degrees of freedom according to Barnard-Rubin formula. See Barnard-Rubin (1999).
#'
#' @details The computation takes into account limit cases where there is no missing data
#' (i.e. the between-variance `var_b` is zero) or where the complete-data degrees of freedom is
#' set to `Inf`. Moreover, if `v_com` is given as `NA`, the function returns `Inf`.
#'
#' @references
#' Barnard, J. and Rubin, D.B. (1999).
#' Small sample degrees of freedom with multiple imputation. Biometrika, 86, 948-955.
rubin_df <- function(v_com, var_b, var_t, M) {
if (is.na(v_com) || (is.infinite(v_com) && var_b == 0)) {
df <- Inf
} else {
lambda <- (1 + 1 / M) * var_b / var_t
if (!is.infinite(v_com)) {
v_obs <- ((v_com + 1) / (v_com + 3)) * v_com * (1 - lambda)
}
if (lambda != 0) {
v_old <- (M - 1) / lambda^2
df <- if (is.infinite(v_com)) {
v_old
} else {
(v_old * v_obs) / (v_old + v_obs)
}
} else {
df <- v_obs
}
}
return(df)
}
#' @title Combine estimates using Rubin's rules
#'
#' @description Pool together the results from `M` complete-data analyses according to Rubin's rules. See details.
#'
#' @param ests Numeric vector containing the point estimates from the complete-data analyses.
#' @param ses Numeric vector containing the standard errors from the complete-data analyses.
#' @param v_com Positive number representing the degrees of freedom in the complete-data analysis.
#'
#' @return
#' A list containing:
#'
#' - `est_point`: the pooled point estimate according to Little-Rubin (2002).
#' - `var_t`: total variance according to Little-Rubin (2002).
#' - `df`: degrees of freedom according to Barnard-Rubin (1999).
#'
#' @details `rubin_rules` applies Rubin's rules (Rubin, 1987) for pooling together
#' the results from a multiple imputation procedure. The pooled point estimate `est_point` is
#' is the average across the point estimates from the complete-data analyses (given by the input argument `ests`).
#' The total variance `var_t` is the sum of two terms representing the within-variance
#' and the between-variance (see Little-Rubin (2002)). The function
#' also returns `df`, the estimated pooled degrees of freedom according to Barnard-Rubin (1999)
#' that can be used for inference based on the t-distribution.
#'
#' @seealso [rubin_df()] for the degrees of freedom estimation.
#'
#' @references
#' Barnard, J. and Rubin, D.B. (1999).
#' Small sample degrees of freedom with multiple imputation. Biometrika, 86, 948-955
#'
#' Roderick J. A. Little and Donald B. Rubin. Statistical Analysis with Missing
#' Data, Second Edition. John Wiley & Sons, Hoboken, New Jersey, 2002. \[Section 5.4\]
#' @importFrom stats var
rubin_rules <- function(ests, ses, v_com) {
M <- length(ests)
est_point <- mean(ests)
if (all(is.na(ses))) {
return(
list(
est_point = est_point,
var_t = NA,
df = NA
)
)
}
var_w <- mean(ses^2)
var_b <- var(ests)
var_t <- var_w + var_b + var_b / M
df <- rubin_df(
v_com = v_com,
var_b = var_b,
var_t = var_t,
M = M
)
ret_obj <- list(
est_point = est_point,
var_t = var_t,
df = df
)
return(ret_obj)
}
#' Bootstrap Pooling via Percentiles
#' @description
#' Get point estimate, confidence interval and p-value using
#' percentiles. Note that quantile "type=6" is used,
#' see [stats::quantile()] for details.
#' @param est a numeric vector of point estimates from each bootstrap sample.
#' @inheritParams pool
#' @details
#' The point estimate is taken to be the first element of `est`. The remaining
#' n-1 values of `est` are then used to generate the confidence intervals.
pool_bootstrap_percentile <- function(est, conf.level, alternative) {
est_orig <- est[1]
est <- est[-1]
if (length(est) == 0) { # if n_samples = 0 we cannot estimate pvalue and CIs
ret <- list(
est = est_orig, # First estimate should be original dataset
ci = c(NA, NA),
se = NA,
pvalue = NA
)
return(ret)
}
alpha <- 1 - conf.level
pvals <- pval_percentile(est = est)
names(pvals) <- NULL
index <- switch(
alternative,
two.sided = c(1, 2),
greater = 1,
less = 2
)
quant_2_side <- quantile(
est,
probs = c(alpha / 2, 1 - alpha / 2),
type = 6,
names = FALSE
)
quant_1_side <- quantile(
est,
probs = c(alpha, 1 - alpha),
type = 6,
names = FALSE
)
ci <- switch(
alternative,
two.sided = quant_2_side,
greater = c(-Inf, quant_1_side[2]),
less = c(quant_1_side[1], Inf)
)
ret <- list(
est = est_orig, # First estimate should be original dataset
ci = ci,
se = NA,
pvalue = min(min(pvals[index]) * length(pvals[index]), 1)
)
return(ret)
}
#' @title P-value of percentile bootstrap
#'
#' @description Determines the (not necessarily unique) quantile (type=6) of "est" which gives a value of 0
#' From this, derive the p-value corresponding to the percentile bootstrap via inversion.
#'
#' @param est a numeric vector of point estimates from each bootstrap sample.
#'
#' @details The p-value for H_0: theta=0 vs H_A: theta>0 is the value `alpha` for which `q_alpha = 0`.
#' If there is at least one estimate equal to zero it returns the largest `alpha` such that `q_alpha = 0`.
#' If all bootstrap estimates are > 0 it returns 0; if all bootstrap estimates are < 0 it returns 1. Analogous
#' reasoning is applied for the p-value for H_0: theta=0 vs H_A: theta<0.
#'
#' @return A named numeric vector of length 2 containing the p-value for H_0: theta=0 vs H_A: theta>0
#' (`"pval_greater"`) and the p-value for H_0: theta=0 vs H_A: theta<0 (`"pval_less"`).
#'
#' @importFrom stats uniroot
pval_percentile <- function(est) {
if (all(est == 0)) {
ret <- c(1, 1)
} else if (min(est) > 0) {
ret <- c(0, 1)
} else if (max(est) < 0) {
ret <- c(1, 0)
} else if (any(est == 0)) {
# see ?quantile "type=6" section for explanation
quant <- rank(est, ties.method = "first") / (length(est) + 1)
ret <- c(
min(quant[est == 0]),
1 - max(quant[est == 0])
)
} else {
x <- uniroot(
function(q) quantile(est, probs = q, type = 6),
lower = 0,
upper = 1
)$root
ret <- c(x, 1 - x)
}
names(ret) <- c("pval_greater", "pval_less")
return(ret)
}
#' Bootstrap Pooling via normal approximation
#' @description
#' Get point estimate, confidence interval and p-value using
#' the normal approximation.
#' @param est a numeric vector of point estimates from each bootstrap sample.
#' @inheritParams pool
#' @details
#' The point estimate is taken to be the first element of est. The remaining
#' n-1 values of est are then used to generate the confidence intervals.
#' @importFrom stats sd qnorm pnorm quantile
pool_bootstrap_normal <- function(est, conf.level, alternative) {
est_orig <- est[1] # First estimate should be original dataset
est <- est[-1]
alpha <- 1 - conf.level
se <- sd(est)
ret <- parametric_ci(est_orig, se, alpha, alternative, qnorm, pnorm)
return(ret)
}
#' Calculate parametric confidence intervals
#'
#' @description
#' Calculates confidence intervals based upon a parametric
#' distribution.
#'
#' @param point The point estimate.
#'
#' @param se The standard error of the point estimate. If using a non-"normal"
#' distribution this should be set to 1.
#'
#' @param alpha The type 1 error rate, should be a value between 0 and 1.
#'
#' @param alternative a character string specifying the alternative hypothesis,
#' must be one of "two.sided" (default), "greater" or "less".
#'
#' @param qfun The quantile function for the assumed distribution i.e. `qnorm`.
#'
#' @param pfun The CDF function for the assumed distribution i.e. `pnorm`.
#'
#' @param ... additional arguments passed on `qfun` and `pfun` i.e. `df = 102`.
#'
parametric_ci <- function(point, se, alpha, alternative, qfun, pfun, ...) {
ci <- switch(
alternative,
two.sided = c(-1, 1) * qfun(1 - alpha / 2, ...),
greater = c(-Inf, 1) * qfun(1 - alpha, ...),
less = c(-1, Inf) * qfun(1 - alpha, ...)
) * se + point
pvals <- c(
pfun(point / se, lower.tail = TRUE, ...),
pfun(point / se, lower.tail = FALSE, ...)
)
index <- switch(
alternative,
two.sided = c(1, 2),
greater = 2,
less = 1
)
ret <- list(
est = point,
ci = ci,
se = se,
pvalue = min(pvals[index]) * length(pvals[index])
)
return(ret)
}
#' Transpose results object
#'
#' Transposes a Results object (as created by [analyse()]) in order to group
#' the same estimates together into vectors.
#'
#' @param results A list of results.
#' @param components a character vector of components to extract
#' (i.e. `"est", "se"`).
#'
#' @details
#'
#' Essentially this function takes an object of the format:
#'
#' ```
#' x <- list(
#' list(
#' "trt1" = list(
#' est = 1,
#' se = 2
#' ),
#' "trt2" = list(
#' est = 3,
#' se = 4
#' )
#' ),
#' list(
#' "trt1" = list(
#' est = 5,
#' se = 6
#' ),
#' "trt2" = list(
#' est = 7,
#' se = 8
#' )
#' )
#' )
#' ```
#'
#' and produces:
#'
#' ```
#' list(
#' trt1 = list(
#' est = c(1,5),
#' se = c(2,6)
#' ),
#' trt2 = list(
#' est = c(3,7),
#' se = c(4,8)
#' )
#' )
#' ```
transpose_results <- function(results, components) {
elements <- names(results[[1]])
results_transpose <- list()
for (element in elements) {
results_transpose[[element]] <- list()
for (comp in components) {
results_transpose[[element]][[comp]] <- vapply(
results,
function(x) x[[element]][[comp]],
numeric(1)
)
}
}
return(results_transpose)
}
#' @rdname pool
#' @export
as.data.frame.pool <- function(x, ...) {
data.frame(
parameter = names(x$pars),
est = vapply(x$pars, function(x) x$est, numeric(1)),
se = vapply(x$pars, function(x) x$se, numeric(1)),
lci = vapply(x$pars, function(x) x$ci[[1]], numeric(1)),
uci = vapply(x$pars, function(x) x$ci[[2]], numeric(1)),
pval = vapply(x$pars, function(x) x$pvalue, numeric(1)),
stringsAsFactors = FALSE,
row.names = NULL
)
}
#' @rdname pool
#' @export
print.pool <- function(x, ...) {
n_string <- ife(
x$method %in% c("rubin", "bmlmi"),
as.character(x$N),
sprintf("1 + %s", x$N - 1)
)
string <- c(
"",
"Pool Object",
"-----------",
sprintf("Number of Results Combined: %s", n_string),
sprintf("Method: %s", x$method),
sprintf("Confidence Level: %s", x$conf.level),
sprintf("Alternative: %s", x$alternative),
"",
"Results:",
as_ascii_table(as.data.frame(x), pcol = "pval"),
""
)
cat(string, sep = "\n")
return(invisible(x))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.