#' Within between variance estimation
#'
#' This function implements the within-between variance estimation approach. If the imputations
#' were generated using posterior draws, it implements the approach proposed by Barnard & Rubin (1999).
#' If posterior draws were not used, it implements the WB approach described by von Hippel and
#' Bartlett (2021).
#'
#' @param imps A list of imputed datasets produced by one of the imputation functions
#' in \code{mlmi} or another package.
#' @param analysisFun A function to analyse the imputed datasets that when applied to
#' a dataset returns a list containing a vector \code{est} and covariance matrix \code{var}.
#' @param pd If \code{imps} was not generated by one of the imputation functions in
#' \code{mlmi}, this argument must be specified to indicate whether the imputations
#' were generated using posterior draws (TRUE) or not (FALSE).
#' @param dfComplete The complete data degrees of freedom. If \code{analysisFun} returns a vector
#' of parameter estimates, \code{dfComplete} should be a vector of the same length. If not
#' specified, it is assumed that the complete data degrees of freedom is effectively infinite (1e+05).
#' @param ... Other parameters that are to be passed through to \code{analysisFun}.
#' @return A list containing the overall parameter estimates, its corresponding covariance matrix, and
#' degrees of freedom for each parameter.
#'
#' @references Barnard J, Rubin DB. Miscellanea. Small-sample degrees of freedom with multiple imputation.
#' Biometrika 1999; 86(4): 948-955. \doi{10.1093/biomet/86.4.948}
#'
#' @references von Hippel P.T. and Bartlett J.W. Maximum likelihood multiple imputation: faster,
#' more efficient imputation without posterior draws. Statistical Science 2021; 36(3) 400-420 \doi{10.1214/20-STS793}.
#'
#' @example data-raw/wbExample.r
#'
#'
#' @export
withinBetween <- function(imps, analysisFun, pd=NULL, dfComplete=NULL, ...) {
M <- length(imps)
if ("pd" %in% names(attributes(imps)) == TRUE) {
pd <- as.logical(attributes(imps)['pd'])
} else {
if (is.null(pd)==TRUE) {
stop("Your imputed datasets doesn't have a pd attribute stored, so you must use specify the pd argument when calling wb.")
}
}
#run analysis on first imputation to find out length of parameter vector
result <- analysisFun(imps[[1]],...)
numParms <- length(result$est)
#analyse each imputed datasets
ests <- array(0, dim=c(M,numParms))
vars <- array(0, dim=c(M,numParms,numParms))
for (m in 1:M) {
result <- analysisFun(imps[[m]],...)
ests[m,] <- result$est
vars[m,,] <- result$var
}
thetaHat <- colMeans(ests)
What <- apply(vars, c(2,3), mean)
Bhat <- var(ests)
if (is.null(dfComplete)) {
#assume effectively infinite degrees of freedom for each parameter
dfComplete <- rep(100000,numParms)
}
if (pd==FALSE) {
#implement von Hippel's WB variance
if (numParms==1) {
gammaHatMis <- Bhat/What
gammaTildeMis <- h(gammaHatMis, M-1)
gammaTildeObs <- 1-gammaTildeMis
VTildeML_MLMI_WB <- What / gammaTildeObs
vTildeMLMI_WB <- VTildeML_MLMI_WB + Bhat/M
totalVar <- vTildeMLMI_WB
#degrees of freedom
VTildeML_WB <- VTildeML_MLMI_WB
nuTildeML_WB <- (M-1)*(gammaTildeObs/gammaTildeMis)^2 - 4
nuHatMLMI_WB <- vTildeMLMI_WB^2 / (VTildeML_WB^2/nuTildeML_WB + (Bhat/M)^2 / (M-1))
nuTildeObs <- dfComplete*gammaTildeObs*(dfComplete+3)/(dfComplete+1)
nuTildeMLMI_WB <- max(3,((1/nuHatMLMI_WB) + (1/nuTildeObs))^(-1))
MIdf <- nuTildeMLMI_WB
} else {
gammaHatMis <- solve(What) %*% Bhat
gammaTildeMis <- H(gammaHatMis, M-1)
gammaTildeObs <- diag(numParms)-gammaTildeMis
VTildeML_MLMI_WB <- What %*% solve(gammaTildeObs)
vTildeMLMI_WB <- VTildeML_MLMI_WB + Bhat/M
totalVar <- vTildeMLMI_WB
#degrees of freedom
VTildeML_WB <- diag(VTildeML_MLMI_WB)
Bhat <- diag(Bhat)
gammaTildeMis <- mean(diag(gammaTildeMis))
gammaTildeObs <- mean(diag(gammaTildeObs))
vTildeMLMI_WB <- diag(vTildeMLMI_WB)
nuTildeML_WB <- (M-1)*(gammaTildeObs/gammaTildeMis)^2 - 4
nuHatMLMI_WB <- vTildeMLMI_WB^2 / (VTildeML_WB^2/nuTildeML_WB + (Bhat/M)^2 / (M-1))
nuTildeObs <- dfComplete*gammaTildeObs*(dfComplete+3)/(dfComplete+1)
nuTildeMLMI_WB <- pmax(3,((1/nuHatMLMI_WB) + (1/nuTildeObs))^(-1))
MIdf <- nuTildeMLMI_WB
}
} else {
#Rubin's rules
VhatPDMI_WB <- What + (1+1/M)*Bhat
totalVar <- VhatPDMI_WB
#calculate Barnard and Rubin degrees of freedom, element-wise, as per Stata
MIdf <- rep(0, numParms)
for (i in 1:numParms) {
gammaTildeMis <- (1+1/M)*Bhat[i,i]/totalVar[i,i]
nuHatPDMI_WB <- (M-1)/gammaTildeMis^2
nuTildeObs <- dfComplete*(1-gammaTildeMis)*(dfComplete+1)/(dfComplete+3)
nuTildePDMI_WB <- max(3, (1/nuHatPDMI_WB + 1/nuTildeObs)^(-1))
MIdf[i] <- nuTildePDMI_WB
}
}
list(est=thetaHat, var=totalVar, df=MIdf)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.