Nothing
### Terrence D. Jorgensen
### Last updated: 9 May 2022
### Savalei & Yuan's (2009) model-based bootstrap for missing data
## ----------------------------
## "BootMiss" Class and Methods
## ----------------------------
##' Class For the Results of Bollen-Stine Bootstrap with Incomplete Data
##'
##' This class contains the results of Bollen-Stine bootstrap with missing data.
##'
##'
##' @name BootMiss-class
##' @aliases BootMiss-class show,BootMiss-method summary,BootMiss-method
##' hist,BootMiss-method
##' @docType class
##' @section Objects from the Class: Objects can be created via the
##' \code{\link{bsBootMiss}} function.
##' @slot time A list containing 2 \code{difftime} objects (\code{transform}
##' and \code{fit}), indicating the time elapsed for data transformation and
##' for fitting the model to bootstrap data sets, respectively.
##' @slot transData Transformed data
##' @slot bootDist The vector of \eqn{chi^2} values from bootstrap data sets
##' fitted by the target model
##' @slot origChi The \eqn{chi^2} value from the original data set
##' @slot df The degree of freedom of the model
##' @slot bootP The \emph{p} value comparing the original \eqn{chi^2} with the
##' bootstrap distribution
##' @author Terrence D. Jorgensen (University of Amsterdam;
##' \email{TJorgensen314@@gmail.com})
##' @seealso \code{\link{bsBootMiss}}
##' @examples
##'
##' # See the example from the bsBootMiss function
##'
setClass("BootMiss", representation(time = "list",
transData = "data.frame",
bootDist = "vector",
origChi = "numeric",
df = "numeric",
bootP = "numeric"))
##' @rdname BootMiss-class
##' @aliases show,BootMiss-method
##' @importFrom stats pchisq
##' @export
setMethod("show", "BootMiss",
function(object) {
cat("Chi-Squared = ", object@origChi, "\nDegrees of Freedom = ",
object@df, "\nTheoretical p value = ",
pchisq(object@origChi, object@df, lower.tail = FALSE),
"\n i.e., pchisq(", object@origChi, ", df = ",
object@df, ", lower.tail = FALSE)\n",
"\nBootstrapped p value = ", object@bootP, "\n\n", sep = "")
invisible(object)
})
##' @rdname BootMiss-class
##' @aliases summary,BootMiss-method
##' @importFrom stats var
##' @export
setMethod("summary", "BootMiss",
function(object) {
cat("Time elapsed to transform the data:\n")
print(object@time$transform)
cat("\nTime elapsed to fit the model to", length(object@bootDist),
"bootstrapped samples:\n")
print(object@time$fit)
cat("\nMean of Theoretical Distribution = DF =", object@df,
"\nVariance of Theoretical Distribution = 2*DF =", 2*object@df,
"\n\nMean of Bootstrap Distribution =", mean(object@bootDist),
"\nVariance of Bootstrap Distribution =",
var(object@bootDist), "\n\n")
show(object)
invisible(object)
})
##' @rdname BootMiss-class
##' @aliases hist,BootMiss-method
##' @importFrom stats qchisq dchisq quantile
##' @param object,x object of class \code{BootMiss}
##' @param ... Additional arguments to pass to \code{\link[graphics]{hist}}
##' @param alpha alpha level used to draw confidence limits
##' @param nd number of digits to display
##' @param printLegend \code{logical}. If \code{TRUE} (default), a legend will
##' be printed with the histogram
##' @param legendArgs \code{list} of arguments passed to the
##' \code{\link[graphics]{legend}} function. The default argument is a list
##' placing the legend at the top-left of the figure.
##' @return The \code{hist} method returns a list of \code{length == 2},
##' containing the arguments for the call to \code{hist} and the arguments
##' to the call for \code{legend}, respectively.
##' @export
setMethod("hist", "BootMiss",
function(x, ..., alpha = .05, nd = 2, printLegend = TRUE,
legendArgs = list(x = "topleft")) {
ChiSq <- x@origChi
DF <- x@df
bootDist <- x@bootDist
bCrit <- round(quantile(bootDist, probs = 1 - alpha), nd)
theoDist <- dchisq(seq(.1, max(c(ChiSq, bootDist)), by = .1), df = DF)
Crit <- round(qchisq(p = alpha, df = DF, lower.tail = FALSE), nd)
Lim <- c(0, max(c(ChiSq, bootDist, Crit)))
if (ChiSq > Lim[2]) Lim[2] <- ChiSq
histArgs <- list(...)
histArgs$x <- bootDist
histArgs$freq <- FALSE
if (is.null(histArgs$col)) histArgs$col <- "grey69"
if (is.null(histArgs$xlim)) histArgs$xlim <- Lim
if (is.null(histArgs$main)) histArgs$main <- expression("Model-Based Bootstrap Distribution of" ~ chi^2)
if (is.null(histArgs$ylab)) histArgs$ylab <- "Probability Density"
if (is.null(histArgs$xlab)) histArgs$xlab <- expression(chi^2)
if (printLegend) {
if (nd < length(strsplit(as.character(1 / alpha), "")[[1]]) - 1) {
warning(paste0("The number of digits argument (nd = ", nd ,
") is too low to display your p value at the",
" same precision as your requested alpha level (alpha = ",
alpha, ")"))
}
if (x@bootP < (1 / 10^nd)) {
pVal <- paste(c("< .", rep(0, nd - 1),"1"), collapse = "")
} else {
pVal <- paste("=", round(x@bootP, nd))
}
if (is.null(legendArgs$box.lty)) legendArgs$box.lty <- 0
if (is.null(legendArgs$lty)) legendArgs$lty <- c(1, 2, 2, 1, 0, 0)
if (is.null(legendArgs$lwd)) legendArgs$lwd <- c(2, 2, 2, 3, 0, 0)
#if (is.null(legendArgs$cex)) legendArgs$cex <- c(1.1, 1, 1, 1, 1, 1)
if (is.null(legendArgs$col)) legendArgs$col <- c("black","black","grey69","red","", "")
legendArgs$legend <- c(bquote(chi[.(paste("df =", DF))]^2),
bquote(Critical ~ chi[alpha ~ .(paste(" =", alpha))]^2 == .(Crit)),
bquote(Bootstrap~Critical~chi[alpha ~ .(paste(" =", alpha))]^2 == .(bCrit)),
expression(Observed ~ chi^2),
bquote(.("")),
bquote(Bootstrap ~ italic(p) ~~ .(pVal)))
}
H <- do.call(hist, c(histArgs["x"], plot = FALSE))
histArgs$ylim <- c(0, max(H$density, theoDist))
suppressWarnings({
do.call(hist, histArgs)
lines(x = seq(.1, max(c(ChiSq, bootDist)), by = .1), y = theoDist, lwd = 2)
abline(v = Crit, col = "black", lwd = 2, lty = 2)
abline(v = bCrit, col = "grey69", lwd = 2, lty = 2)
abline(v = ChiSq, col = "red", lwd = 3)
if (printLegend) do.call(legend, legendArgs)
})
## return arguments to create histogram (and optionally, legend)
invisible(list(hist = histArgs, legend = legendArgs))
})
## --------------------
## Constructor Function
## --------------------
##' Bollen-Stine Bootstrap with the Existence of Missing Data
##'
##' Implement the Bollen and Stine's (1992) Bootstrap when missing observations
##' exist. The implemented method is proposed by Savalei and Yuan (2009). This
##' can be used in two ways. The first and easiest option is to fit the model to
##' incomplete data in \code{lavaan} using the FIML estimator, then pass that
##' \code{lavaan} object to \code{bsBootMiss}.
##'
##' The second is designed for users of other software packages (e.g., LISREL,
##' EQS, Amos, or Mplus). Users can import their data, \eqn{\chi^2} value, and
##' model-implied moments from another package, and they have the option of
##' saving (or writing to a file) either the transformed data or bootstrapped
##' samples of that data, which can be analyzed in other programs. In order to
##' analyze the bootstrapped samples and return a \emph{p} value, users of other
##' programs must still specify their model using lavaan syntax.
##'
##'
##' @importFrom lavaan lavInspect parTable
##' @param x A target \code{lavaan} object used in the Bollen-Stine bootstrap
##' @param transformation The transformation methods in Savalei and Yuan (2009).
##' There are three methods in the article, but only the first two are currently
##' implemented here. Use \code{transformation = 1} when there are few missing
##' data patterns, each of which has a large size, such as in a
##' planned-missing-data design. Use \code{transformation = 2} when there are
##' more missing data patterns. The currently unavailable
##' \code{transformation = 3} would be used when several missing data patterns
##' have n = 1.
##' @param nBoot The number of bootstrap samples.
##' @param model Optional. The target model if \code{x} is not provided.
##' @param rawData Optional. The target raw data set if \code{x} is not
##' provided.
##' @param Sigma Optional. The model-implied covariance matrix if \code{x} is
##' not provided.
##' @param Mu Optional. The model-implied mean vector if \code{x} is not
##' provided.
##' @param group Optional character string specifying the name of the grouping
##' variable in \code{rawData} if \code{x} is not provided.
##' @param ChiSquared Optional. The model's \eqn{\chi^2} test statistic if
##' \code{x} is not provided.
##' @param EMcov Optional, if \code{x} is not provided. The EM (or Two-Stage ML)
##' estimated covariance matrix used to speed up Transformation 2 algorithm.
##' @param transDataOnly Logical. If \code{TRUE}, the result will provide the
##' transformed data only.
##' @param writeTransData Logical. If \code{TRUE}, the transformed data set is
##' written to a text file, \code{transDataOnly} is set to \code{TRUE}, and the
##' transformed data is returned invisibly.
##' @param bootSamplesOnly Logical. If \code{TRUE}, the result will provide
##' bootstrap data sets only.
##' @param writeBootData Logical. If \code{TRUE}, the stacked bootstrap data
##' sets are written to a text file, \code{bootSamplesOnly} is set to
##' \code{TRUE}, and the list of bootstrap data sets are returned invisibly.
##' @param writeArgs Optional \code{list}. If \code{writeBootData = TRUE} or
##' \code{writeBootData = TRUE}, user can pass arguments to the
##' \code{\link[utils]{write.table}} function as a list. Some default values
##' are provided: \code{file} = "bootstrappedSamples.dat", \code{row.names} =
##' \code{FALSE}, and \code{na} = "-999", but the user can override all of these
##' by providing other values for those arguments in the \code{writeArgs} list.
##' @param seed The seed number used in randomly drawing bootstrap samples.
##' @param suppressWarn Logical. If \code{TRUE}, warnings from \code{lavaan}
##' function will be suppressed when fitting the model to each bootstrap sample.
##' @param showProgress Logical. Indicating whether to display a progress bar
##' while fitting models to bootstrap samples.
##' @param \dots The additional arguments in the \code{\link[lavaan]{lavaan}}
##' function. See also \code{\link[lavaan]{lavOptions}}
##' @return As a default, this function returns a \code{\linkS4class{BootMiss}}
##' object containing the results of the bootstrap samples. Use \code{show},
##' \code{summary}, or \code{hist} to examine the results. Optionally, the
##' transformed data set is returned if \code{transDataOnly = TRUE}. Optionally,
##' the bootstrap data sets are returned if \code{bootSamplesOnly = TRUE}.
##' @author Terrence D. Jorgensen (University of Amsterdam;
##' \email{TJorgensen314@@gmail.com})
##'
##' Syntax for transformations borrowed from http://www2.psych.ubc.ca/~vsavalei/
##' @seealso \code{\linkS4class{BootMiss}}
##' @references
##'
##' Bollen, K. A., & Stine, R. A. (1992). Bootstrapping goodness-of-fit measures
##' in structural equation models. \emph{Sociological Methods &
##' Research, 21}(2), 205--229. \doi{10.1177/0049124192021002004}
##'
##' Savalei, V., & Yuan, K.-H. (2009). On the model-based bootstrap with missing
##' data: Obtaining a p-value for a test of exact fit. \emph{Multivariate
##' Behavioral Research, 44}(6), 741--763. \doi{10.1080/00273170903333590}
##' @examples
##'
##' \dontrun{
##' dat1 <- HolzingerSwineford1939
##' dat1$x5 <- ifelse(dat1$x1 <= quantile(dat1$x1, .3), NA, dat1$x5)
##' dat1$x9 <- ifelse(is.na(dat1$x5), NA, dat1$x9)
##'
##' targetModel <- "
##' visual =~ x1 + x2 + x3
##' textual =~ x4 + x5 + x6
##' speed =~ x7 + x8 + x9
##' "
##' targetFit <- sem(targetModel, dat1, meanstructure = TRUE, std.lv = TRUE,
##' missing = "fiml", group = "school")
##' summary(targetFit, fit = TRUE, standardized = TRUE)
##'
##' # The number of bootstrap samples should be much higher.
##' temp <- bsBootMiss(targetFit, transformation = 1, nBoot = 10, seed = 31415)
##'
##' temp
##' summary(temp)
##' hist(temp)
##' hist(temp, printLegend = FALSE) # suppress the legend
##' ## user can specify alpha level (default: alpha = 0.05), and the number of
##' ## digits to display (default: nd = 2). Pass other arguments to hist(...),
##' ## or a list of arguments to legend() via "legendArgs"
##' hist(temp, alpha = .01, nd = 3, xlab = "something else", breaks = 25,
##' legendArgs = list("bottomleft", box.lty = 2))
##' }
##'
##' @export
bsBootMiss <- function(x, transformation = 2, nBoot = 500, model, rawData,
Sigma, Mu, group, ChiSquared, EMcov,
writeTransData = FALSE, transDataOnly = FALSE,
writeBootData = FALSE, bootSamplesOnly = FALSE,
writeArgs, seed = NULL, suppressWarn = TRUE,
showProgress = TRUE, ...) {
if(writeTransData) transDataOnly <- TRUE
if(writeBootData) bootSamplesOnly <- TRUE
check.nBoot <- (!is.numeric(nBoot) | nBoot < 1L) & !transDataOnly
if (check.nBoot) stop("The \"nBoot\" argument must be a positive integer.")
## Which transformation?
if (!(transformation %in% 1:2)) stop("User must specify transformation 1 or 2.
Consult Savalei & Yuan (2009) for advice.
Transformation 3 is not currently available.")
if (transformation == 2) SavaleiYuan <- trans2
#if (transformation == 3) SavaleiYuan <- trans3
######################
## Data Preparation ##
######################
## If a lavaan object is supplied, the extracted values for rawData, Sigma, Mu,
## EMcov, and EMmeans will override any user-supplied arguments.
if (hasArg(x)) {
if (!lavInspect(x, "options")$meanstructure)
stop('You must fit the lavaan model with the argument "meanstructure=TRUE".')
nG <- lavInspect(x, "ngroups")
if (nG == 1L) {
rawData <- list(as.data.frame(lavInspect(x, "data")))
} else rawData <- lapply(lavInspect(x, "data"), as.data.frame)
for (g in seq_along(rawData)) {
colnames(rawData[[g]]) <- lavaan::lavNames(x)
checkAllMissing <- apply(rawData[[g]], 1, function(x) all(is.na(x)))
if (any(checkAllMissing)) rawData[[g]] <- rawData[[g]][!checkAllMissing, ]
}
ChiSquared <- lavInspect(x, "fit")[c("chisq", "chisq.scaled")]
ChiSquared <- ifelse(is.na(ChiSquared[2]), ChiSquared[1], ChiSquared[2])
group <- lavInspect(x, "group")
if (length(group) == 0) group <- "group"
group.label <- lavInspect(x, "group.label")
if (length(group.label) == 0) group.label <- 1
Sigma <- lavInspect(x, "cov.ov")
Mu <- lavInspect(x, "mean.ov")
EMcov <- lavInspect(x, "sampstat")$cov
if (nG == 1L) {
Sigma <- list(Sigma)
Mu <- list(Mu)
EMcov <- list(EMcov)
}
} else {
## If no lavaan object is supplied, check that required arguments are.
suppliedData <- c(hasArg(rawData), hasArg(Sigma), hasArg(Mu))
if (!all(suppliedData)) {
stop("Without a lavaan fitted object, user must supply raw data and",
" model-implied moments.")
}
if (!hasArg(model) & !(transDataOnly | bootSamplesOnly)) {
stop("Without model syntax or fitted lavaan object, user can only call",
" this function to save transformed data or bootstrapped samples.")
}
if (!hasArg(ChiSquared) & !(transDataOnly | bootSamplesOnly)) {
stop("Without a fitted lavaan object or ChiSquared argument, user can",
" only call this function to save transformed data, bootstrapped",
" samples, or bootstrapped chi-squared values.")
}
if (!any(c(transDataOnly, bootSamplesOnly))) {
if (!is.numeric(ChiSquared)) stop("The \"ChiSquared\" argument must be numeric.")
}
## If user supplies one-group data & moments, convert to lists.
if ( inherits(rawData, "data.frame")) rawData <- list(rawData)
if (!inherits(rawData, "list")) {
stop("The \"rawData\" argument must be a data.frame or list of data frames.")
} else {
if (!all(sapply(rawData, is.data.frame))) stop("Every element of \"rawData\" must be a data.frame")
}
if (inherits(Sigma, "matrix")) Sigma <- list(Sigma)
if (is.numeric(Mu)) Mu <- list(Mu)
## check whether EMcov was supplied for starting values in Trans2/Trans3
if (!hasArg(EMcov)) {
EMcov <- vector("list", length(Sigma))
} else {
if (inherits(EMcov, "matrix")) EMcov <- list(EMcov)
## check EMcov is symmetric and dimensions match Sigma
for (g in seq_along(EMcov)) {
if (!isSymmetric(EMcov[[g]])) stop("EMcov in group ", g, " not symmetric.")
unequalDim <- !all(dim(EMcov[[g]]) == dim(Sigma[[g]]))
if (unequalDim) stop("Unequal dimensions in Sigma and EMcov.")
}
}
## Check the number of groups by the size of the lists.
unequalGroups <- !all(length(rawData) == c(length(Sigma), length(Mu)))
if (unequalGroups) stop("Unequal number of groups in rawData, Sigma, Mu.
For multiple-group models, rawData must be a list of data frames,
NOT a single data frame with a \"group\" column.")
nG <- length(Sigma)
## In each group, check Sigma is symmetric and dimensions match rawData and Mu.
for (g in seq_along(rawData)) {
if (!isSymmetric(Sigma[[g]])) stop("Sigma in group ", g, " not symmetric.")
unequalDim <- !all(ncol(rawData[[g]]) == c(nrow(Sigma[[g]]), length(Mu[[g]])))
if (unequalDim) stop("Unequal dimensions in rawData, Sigma, Mu.")
}
## Check for names of group levels. If NULL, assign arbitrary ones.
if (!hasArg(group)) group <- "group"
if (!is.character(group)) stop("The \"group\" argument must be a character string.")
if (is.null(names(rawData))) {
group.label <- paste0("g", seq_along(rawData))
} else {
group.label <- names(rawData)
}
}
## save a copy as myTransDat, whose elements will be replaced iteratively by
## group and by missing data pattern within group.
myTransDat <- rawData
names(myTransDat) <- group.label
output <- list()
#########################
## Data Transformation ##
#########################
for (g in seq_along(group.label)) {
if (transformation == 1) {
## get missing data patterns
R <- ifelse(is.na(rawData[[g]]), 1, 0)
rowMissPatt <- apply(R, 1, function(x) paste(x, collapse = ""))
patt <- unique(rowMissPatt)
myRows <- lapply(patt, function(x) which(rowMissPatt == x))
## for each pattern, apply transformation
tStart <- Sys.time()
transDatList <- lapply(patt, trans1, rowMissPatt = rowMissPatt,
dat = rawData[[g]], Sigma = Sigma[[g]], Mu = Mu[[g]])
output$timeTrans <- Sys.time() - tStart
for (i in seq_along(patt)) myTransDat[[g]][myRows[[i]], ] <- transDatList[[i]]
} else {
tStart <- Sys.time()
myTransDat[[g]] <- SavaleiYuan(dat = rawData[[g]], Sigma = Sigma[[g]],
Mu = Mu[[g]], EMcov = EMcov[[g]])
output$timeTrans <- Sys.time() - tStart
}
}
## option to end function here
if (transDataOnly) {
for (g in seq_along(myTransDat)) myTransDat[[g]][ , group] <- group.label[g]
## option to write transformed data to a file
if (writeTransData) {
## Set a few options, if the user didn't.
if (!hasArg(writeArgs)) writeArgs <- list(file = "transformedData.dat",
row.names = FALSE, na = "-999")
if (!exists("file", where = writeArgs)) writeTransArgs$file <- "transformedData.dat"
if (!exists("row.names", where = writeArgs)) writeArgs$row.names <- FALSE
if (!exists("na", where = writeArgs)) writeArgs$na <- "-999"
## add grouping variable and bind together into one data frame
for (g in seq_along(myTransDat)) myTransDat[[g]][ , group] <- group.label[g]
writeArgs$x <- do.call("rbind", myTransDat)
## write to file, print details to screen
do.call("write.table", writeArgs)
cat("Transformed data was written to file \"", writeArgs$file, "\" in:\n\n",
getwd(), "\n\nunless path specified by user in 'file' argument.\n", sep = "")
return(invisible(writeArgs$x))
}
return(do.call("rbind", myTransDat))
}
#############################################
## Bootstrap distribution of fit statistic ##
#############################################
## draw bootstrap samples
if (!is.null(seed)) set.seed(seed)
bootSamples <- lapply(1:nBoot, function(x) getBootSample(myTransDat, group, group.label))
## option to write bootstrapped samples to file(s)
if (writeBootData) {
## Set a few options, if the user didn't.
if (!hasArg(writeArgs)) writeArgs <- list(file = "bootstrappedSamples.dat",
row.names = FALSE, na = "-999")
if (!exists("file", where = writeArgs)) writeTransArgs$file <- "bootstrappedSamples.dat"
if (!exists("row.names", where = writeArgs)) writeArgs$row.names <- FALSE
if (!exists("na", where = writeArgs)) writeArgs$na <- "-999"
## add indicator for bootstrapped sample, bind together into one data frame
for (b in seq_along(bootSamples)) bootSamples[[b]]$bootSample <- b
writeArgs$x <- do.call("rbind", bootSamples)
## write to file, print details to screen
do.call("write.table", writeArgs)
cat("Bootstrapped samples written to file \"", writeArgs$file, "\" in:\n\n",
getwd(), "\n\nunless path specified by user in 'file' argument.\n", sep = "")
return(invisible(bootSamples))
}
## option to end function here
if (bootSamplesOnly) return(bootSamples)
## check for lavaan arguments in (...)
lavaanArgs <- list(...)
lavaanArgs$group <- group
## fit model to bootstrap samples, save distribution of chi-squared test stat
if (hasArg(x)) {
## grab defaults from lavaan object "x"
lavaanArgs$slotParTable <- as.list(parTable(x))
lavaanArgs$slotModel <- x@Model
lavaanArgs$slotOptions <- lavInspect(x, "options")
} else {
lavaanArgs$model <- model
lavaanArgs$missing <- "fiml"
## set defaults that will be necessary for many models to run, that will
## probably not be specified explictly or included in lavaan syntax
lavaanArgs$meanstructure <- TRUE
if (!exists("auto.var", where = lavaanArgs)) lavaanArgs$auto.var <- TRUE
if (!exists("auto.cov.y", where = lavaanArgs)) lavaanArgs$auto.cov.y <- TRUE
if (!exists("auto.cov.lv.x", where = lavaanArgs)) lavaanArgs$auto.cov.lv.x <- TRUE
}
## run bootstrap fits
if (showProgress) {
mypb <- utils::txtProgressBar(min = 1, max = nBoot, initial = 1, char = "=",
width = 50, style = 3, file = "")
bootFits <- numeric()
tStart <- Sys.time()
for (j in 1:nBoot) {
bootFits[j] <- fitBootSample(bootSamples[[j]], args = lavaanArgs,
suppress = suppressWarn)
utils::setTxtProgressBar(mypb, j)
}
close(mypb)
output$timeFit <- Sys.time() - tStart
} else {
tStart <- Sys.time()
bootFits <- sapply(bootSamples, fitBootSample, args = lavaanArgs,
suppress = suppressWarn)
output$timeFit <- Sys.time() - tStart
}
## stack groups, save transformed data and distribution in output object
for (g in seq_along(myTransDat)) myTransDat[[g]][ , group] <- group.label[g]
output$Transformed.Data <- do.call("rbind", myTransDat)
output$Bootstrapped.Distribution <- bootFits
output$Original.ChiSquared <- ChiSquared
if (hasArg(x)) {
output$Degrees.Freedom <- lavInspect(x, "fit")["df"]
} else {
convSamp <- which(!is.na(bootFits))[1]
lavaanArgs$data <- bootSamples[[convSamp]]
lavaanlavaan <- function(...) { lavaan::lavaan(...) }
output$Degrees.Freedom <- lavInspect(do.call(lavaanlavaan, lavaanArgs), "fit")["df"]
}
## calculate bootstrapped p-value
output$Bootstrapped.p.Value <- mean(bootFits >= ChiSquared, na.rm = TRUE)
## print warning if any models didn't converge
if (any(is.na(bootFits))) {
nonConvMessage <- paste("Model did not converge for the following bootstrapped samples",
paste(which(is.na(bootFits)), collapse = "\t"), sep = ":\n")
warning(nonConvMessage)
}
finalResult <- new("BootMiss", time = list(transform = output$timeTrans, fit = output$timeFit), transData = output$Transformed.Data, bootDist = output$Bootstrapped.Distribution, origChi = output$Original.ChiSquared, df = output$Degrees.Freedom, bootP = output$Bootstrapped.p.Value)
finalResult
}
## ----------------
## Hidden Functions
## ----------------
## Function to execute Transformation 1 on a single missing-data pattern
trans1 <- function(MDpattern, rowMissPatt, dat, Sigma, Mu) {
myRows <- which(rowMissPatt == MDpattern)
X <- apply(dat[myRows, ], 2, scale, scale = FALSE)
observed <- !is.na(X[1, ])
Xreduced <- X[ , observed]
Mreduced <- as.numeric(Mu[observed])
SigmaChol <- chol(Sigma[observed, observed])
S <- t(Xreduced) %*% Xreduced / nrow(X)
Areduced <- t(SigmaChol) %*% t(solve(chol(S)))
Yreduced <- t(Areduced %*% t(Xreduced) + Mreduced)
Y <- replace(X, !is.na(X), Yreduced)
Y
}
## Function to execute Transformation 2 on a single group
trans2 <- function(dat, Sigma, Mu, EMcov) {
## Computing Function of A (eq. 12), whose root is desired
eq12 <- function(A) {
ga <- rep(0, pStar)
for (j in 1:J) {
Tj <- Mjs[[j]] %*% A %*% Hjs[[j]] %*% A %*% Mjs[[j]] - Mjs[[j]]
ga <- ga + Njs[j] * Dupinv %*% c(Tj) # same as vech(Tj)
}
ga
}
## Computing Derivative of Function of A (eq. 13)
eq13 <- function(A) {
deriv12 <- matrix(0, nrow = pStar, ncol = pStar)
for (j in 1:J) {
Tj1 <- Mjs[[j]] %*% A %*% Hjs[[j]]
deriv12 <- deriv12 + 2*Njs[j]*Dupinv %*% kronecker(Tj1, Mjs[[j]]) %*% Dup
}
deriv12
}
## get missing data patterns
R <- ifelse(is.na(dat), 1, 0)
rowMissPatt <- apply(R, 1, function(x) paste(x, collapse = ""))
MDpattern <- unique(rowMissPatt)
## sample size within each MD pattern
Njs <- sapply(MDpattern, function(patt) sum(rowMissPatt == patt))
J <- length(MDpattern) # number of MD patterns
p <- ncol(dat) # number of variables in model
pStar <- p*(p + 1) / 2 # number of nonredundant covariance elements
## create empty lists for each MD pattern
Xjs <- vector("list", J)
Hjs <- vector("list", J)
Mjs <- vector("list", J)
## Create Duplication Matrix and its inverse (Magnus & Neudecker, 1999)
Dup <- lavaan::lav_matrix_duplication(p)
Dupinv <- solve(t(Dup) %*% Dup) %*% t(Dup)
## step through each MD pattern, populate Hjs and Mjs
for (j in 1:J) {
Xjs[[j]] <- apply(dat[rowMissPatt == MDpattern[j], ], 2, scale, scale = FALSE)
if (!is.matrix(Xjs[[j]])) Xjs[[j]] <- t(Xjs[[j]])
observed <- !is.na(Xjs[[j]][1, ])
Sj <- t(Xjs[[j]]) %*% Xjs[[j]] / Njs[j]
Hjs[[j]] <- replace(Sj, is.na(Sj), 0)
Mjs[[j]] <- replace(Sj, !is.na(Sj), solve(Sigma[observed, observed]))
Mjs[[j]] <- replace(Mjs[[j]], is.na(Mjs[[j]]), 0)
}
## Compute starting Values for A
if (is.null(EMcov)) {
A <- diag(p)
} else {
EMeig <- eigen(EMcov)
EMrti <- EMeig$vectors %*% diag(1 / sqrt(EMeig$values)) %*% t(EMeig$vectors)
Sigeig <- eigen(Sigma)
Sigrt <- Sigeig$vectors %*% diag(sqrt(Sigeig$values)) %*% t(Sigeig$vectors)
B <- Sigrt %*% EMrti
A <- .5*(B + t(B))
}
## Newton Algorithm for finding root (eq. 14)
crit <- .1
a <- c(A)
fA <- eq12(A)
while (crit > 1e-11) {
dvecF <- eq13(A)
a <- a - Dup %*% solve(dvecF) %*% fA
A <- matrix(a, ncol = p)
fA <- eq12(A)
crit <- max(abs(fA))
}
## Transform dataset X to dataset Y
Yjs <- Xjs
for (j in 1:J) {
observed <- !is.na(Xjs[[j]][1, ])
XjReduced <- Xjs[[j]][ , observed, drop = FALSE]
Aj <- A[observed, observed, drop = FALSE]
Mj <- as.numeric(Mu[observed])
Yj <- t(Aj %*% t(XjReduced) + Mj)
Yjs[[j]] <- replace(Yjs[[j]], !is.na(Yjs[[j]]), Yj)
}
Y <- as.data.frame(do.call("rbind", Yjs))
colnames(Y) <- colnames(dat)
Y
}
## Function to execute Transformation 3 on a single group -- TRANSFORMATION DOES NOT RETURN CH-SQ = 0
trans3 <- function(dat, Sigma, Mu, EMcov) {
# Computing Saturated Means as a Function of A (eq. B1 in Appendix B)
mut <- function(A) {
M <- matrix(0, ncol = 1, nrow = p)
for (j in 1:J) {
M <- M + Njs[[j]] * Mjs[[j]] %*% A %*% Ybarjs[[j]]
}
Mjtoti %*% M
}
# Computing Function of A (eq. 18) whose root is desired
eq18 <- function(A) {
ga <- rep(0, pStar)
mutilda <- mut(A)
for (j in 1:J) {
Tj <- Mjs[[j]] %*% A %*% Hjs[[j]] %*% A %*% Mjs[[j]] - Mjs[[j]]
dif <- A %*% Ybarjs[[j]] - mutilda
middle <- dif %*% t(dif)
Tjnew <- Tj + Mjs[[j]] %*% middle %*% Mjs[[j]]
ga <- ga + Njs[j] * Dupinv %*% c(Tjnew)
}
ga
}
# Computing Derivative of Function eq. 18
deriv18 <- function(A) {
d18 <- matrix(0, nrow = pStar, ncol = pStar)
for (j in 1:J) {
Tj1 <- Mjs[[j]] %*% A %*% Hjs[[j]]
mutilda <- mut(A)
dif <- A %*% Ybarjs[[j]] - mutilda
Tj2 <- Mjs[[j]] %*% dif %*% t(Ybarjs[[j]])
Tj3 <- kronecker(Mjs[[j]] %*% dif, Mjs[[j]]) %*% Mjtoti %*% Tj3add
d18 <- d18 + 2*Njs[j]*Dupinv %*% ((kronecker((Tj1 + Tj2), Mjs[[j]])) - Tj3) %*% Dup
}
d18
}
## get missing data patterns
R <- ifelse(is.na(dat), 1, 0)
rowMissPatt <- apply(R, 1, function(x) paste(x, collapse = ""))
MDpattern <- unique(rowMissPatt)
## sample size within each MD pattern
Njs <- sapply(MDpattern, function(patt) sum(rowMissPatt == patt))
J <- length(MDpattern) # number of MD patterns
p <- ncol(dat) # number of variables in model
pStar <- p*(p + 1) / 2 # number of nonredundant covariance elements
## create empty lists for each MD pattern
Xjs <- vector("list", J)
Ybarjs <- vector("list", J)
Hjs <- vector("list", J)
Mjs <- vector("list", J)
Mjtot <- matrix(0, ncol = p, nrow = p)
Tj3add <- matrix(0, nrow = p, ncol = p * p)
## Create Duplication Matrix and its inverse (Magnus & Neudecker, 1999)
Dup <- lavaan::lav_matrix_duplication(p)
Dupinv <- solve(t(Dup) %*% Dup) %*% t(Dup)
## step through each MD pattern, populate Hjs and Mjs
for (j in 1:J) {
Xjs[[j]] <- apply(dat[rowMissPatt == MDpattern[j], ], 2, scale, scale = FALSE)
if (!is.matrix(Xjs[[j]])) Xjs[[j]] <- t(Xjs[[j]])
observed <- !is.na(Xjs[[j]][1, ])
pj <- p - sum(observed)
means <- colMeans(dat[rowMissPatt == MDpattern[j], ])
Ybarjs[[j]] <- replace(means, is.na(means), 0)
Sj <- t(Xjs[[j]]) %*% Xjs[[j]] / Njs[j]
Hjs[[j]] <- replace(Sj, is.na(Sj), 0)
Mjs[[j]] <- replace(Sj, !is.na(Sj), solve(Sigma[observed, observed]))
Mjs[[j]] <- replace(Mjs[[j]], is.na(Mjs[[j]]), 0)
Mjtot <- Mjtot + Njs[[j]] * Mjs[[j]]
Tj3add <- Tj3add + Njs[[j]] * kronecker(t(Ybarjs[[j]]), Mjs[[j]])
}
Mjtoti <- solve(Mjtot)
## Compute starting Values for A
if (is.null(EMcov)) {
A <- diag(p)
} else {
EMeig <- eigen(EMcov)
EMrti <- EMeig$vectors %*% diag(1 / sqrt(EMeig$values)) %*% t(EMeig$vectors)
Sigeig <- eigen(Sigma)
Sigrt <- Sigeig$vectors %*% diag(sqrt(Sigeig$values)) %*% t(Sigeig$vectors)
B <- Sigrt %*% EMrti
A <- .5*(B + t(B))
}
## Newton Algorithm for finding root (eq. 14)
crit <- .1
a <- c(A)
fA <- eq18(A)
while (crit > 1e-11) {
dvecF <- deriv18(A)
a <- a - Dup %*% solve(dvecF) %*% fA
A <- matrix(a, ncol = p)
fA <- eq18(A)
crit <- max(abs(fA))
}
## Transform dataset X to dataset Y (Z in the paper, eqs. 15-16)
Yjs <- Xjs
for (j in 1:J) {
observed <- !is.na(Xjs[[j]][1, ])
XjReduced <- Xjs[[j]][ , observed, drop = FALSE]
Aj <- A[observed, observed, drop = FALSE]
Mj <- as.numeric((Mu - mut(A))[observed])
Yj <- t(Aj %*% t(XjReduced) + Mj)
Yjs[[j]] <- replace(Yjs[[j]], !is.na(Yjs[[j]]), Yj)
}
Y <- as.data.frame(do.call("rbind", Yjs))
colnames(Y) <- colnames(dat)
Y
}
## Get a single bootstrapped sample from the transformed data. If there are
## multiple groups, bootstrapping occurs independently within each group, and
## a single data frame is returned. A new column is added to indicate group
## membership, which will be ignored in a single-group analysis.
getBootSample <- function(groupDat, group, group.label) {
bootSamp <- list()
for (g in seq_along(groupDat)) {
dat <- groupDat[[g]]
dat[ , group] <- group.label[g]
bootSamp[[g]] <- dat[sample(1:nrow(dat), nrow(dat), replace = TRUE), ]
}
do.call("rbind", bootSamp)
}
## fit the model to a single bootstrapped sample and return chi-squared
##' @importFrom lavaan lavInspect
fitBootSample <- function(dat, args, suppress) {
args$data <- dat
lavaanlavaan <- function(...) { lavaan::lavaan(...) }
if (suppress) {
fit <- suppressWarnings(do.call(lavaanlavaan, args))
} else {
fit <- do.call(lavaanlavaan, args)
}
if (!exists("fit")) return(c(chisq = NA))
if (lavInspect(fit, "converged")) {
chisq <- lavInspect(fit, "fit")[c("chisq", "chisq.scaled")]
} else {
chisq <- NA
}
if (is.na(chisq[2])) return(chisq[1]) else return(chisq[2])
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.