R/lavaan.mi.R
In lavaan.mi: Fit Structural Equation Models to Multiply Imputed Data

Documented in cfa.mi growth.mi lavaan.mi sem.mi

### Terrence D. Jorgensen
### Last updated: 7 March 2025
### function that creates lavaan.mi object, inherits from lavaanList class


##' Fit a lavaan Model to Multiple Imputed Data Sets
##'
##' This function fits a lavaan model to a list of imputed data sets.
##'
##'
##' @aliases lavaan.mi cfa.mi sem.mi growth.mi
##'
##' @param model The analysis model can be specified using lavaan
##'   [lavaan::model.syntax()] or a parameter table (as generated by
##'   [lavaan::lavaanify()] or returned by [lavaan::parTable()]).
##' @param data A a `list` of imputed data sets, or an object class from
##'   which imputed data can be extracted. Recognized classes are
##'   `lavaan.mi` (stored in the `@@DataList` slot),
##'   `amelia` (created by the Amelia package), or
##'   `mids` (created by the mice package).
##' @param \dots additional arguments to pass to [lavaan::lavaan()] or
##'   [lavaan::lavaanList()]. See also [lavaan::lavOptions()].
##'   Note that `lavaanList` provides parallel computing options, as well as
##'   a `FUN=` argument so the user can extract custom output after the model
##'   is fitted to each imputed data set (see **Examples**).  TIP: If a
##'   custom `FUN` is used *and* `parallel = "snow"` is requested,
##'   the user-supplied function should explicitly call `library` or use
##'   \code{\link[base]{::}} for any functions not part of the base distribution.
##'
##' @return A [lavaan.mi-class] object
##'
##' @note This functionality was originally provided via `runMI()` in the
##'   `semTools` package, but there are differences.  See the README file
##'   on the GitHub page for this package (find link in DESCRIPTION).
##'
##' @seealso [poolSat()] for a more efficient method to obtain SEM results
##'  for multiple imputations
##'
##' @author
##'   Terrence D. Jorgensen (University of Amsterdam; \email{TJorgensen314@@gmail.com})
##'
##' @references
##'   Enders, C. K. (2010). *Applied missing data analysis*.
##'   New York, NY: Guilford.
##'
##'   Rubin, D. B. (1987). *Multiple imputation for nonresponse in surveys*.
##'   New York, NY: Wiley. \doi{10.1002/9780470316696}
##'
##' @examples
##' data(HS20imps) # import a list of 20 imputed data sets
##'
##' ## specify CFA model from lavaan's ?cfa help page
##' HS.model <- '
##'   visual  =~ x1 + x2 + x3
##'   textual =~ x4 + x5 + x6
##'   speed   =~ x7 + x8 + x9
##' '
##'
##' ## fit model to imputed data sets
##' fit <- cfa.mi(HS.model, data = HS20imps)
##'
##' \donttest{
##' summary(fit, fit.measures = TRUE, fmi = TRUE)
##' summary(fit, standardized = "std.all", rsquare = TRUE)
##' }
##'
##' ## You can pass other lavaanList() arguments, such as FUN=, which allows
##' ## you to save any custom output from each imputation's fitted model.
##'
##' ## An example with ordered-categorical data:
##' data(binHS5imps) # import a list of 5 imputed data sets
##'
##' ## Define a function to save a list with 2 custom outputs per imputation:
##' ##  - zero-cell tables
##' ##  - the obsolete "WRMR" fit index
##' myCustomFunc <- function(object) {
##'   list(wrmr      = lavaan::fitMeasures(object, "wrmr"),
##'        zeroCells = lavaan::lavInspect(object, "zero.cell.tables"))
##' }
##' ## fit the model
##' catout <- cfa.mi(HS.model, data = binHS5imps, ordered = TRUE,
##'                  FUN = myCustomFunc)
##' ## pooled results
##' \donttest{
##' summary(catout)
##' }
##'
##' ## extract custom output (per imputation)
##' sapply(catout@funList, function(x) x$wrmr) # WRMR for each imputation
##' catout@funList[[1]]$zeroCells # zero-cell tables for first imputation
##' catout@funList[[2]]$zeroCells # zero-cell tables for second imputation ...
##'
##'
##' @importFrom lavaan lavInspect parTable lavParseModelString
##' @importFrom methods as
##' @export
lavaan.mi <- function(model, data, ...) {
  if (!"package:lavaan.mi" %in% search()) {
    warning("Have called the lavaan.mi() function (or a wrapper around it), ",
            "but the 'package:lavaan.mi' was not found in the search() path.\n",
            "In order for the package to be fully functional, attach it using ",
            "library(lavaan.mi).\nDo NOT rely on 'lazy loading' (lavaan.mi::)")
  }

  CALL <- match.call()
  dots <- list(...)
  if (is.null(dots$cmd)) dots$cmd <- "lavaan"

  ## check model
  if (is.character(model)) {
    MOD <- lavParseModelString(model)
  } else if (is.list(model)) {
    ## assume it is a parameter table
    MOD <- model
  }

  ## Don't waste time on EFA, suggest (pre)pooling saturated model
  if (any(MOD$efa != "")) {
    stop("efa()* blocks detected in model= syntax. (Un)rotated factors cannot ",
         "be matched across imputations, threatening validity of pooled results.",
         "\nInstead, EFA/ESEM should be conducted on a single set of (pre)pooled",
         " summary statistics (see the ?poolSat help page).")
  }

  ## check for (Bollen-Stine) bootstrap request
  if (all(!is.null(dots$test),
          tolower(dots$test) %in% c("boot","bootstrap","bollen.stine")) ||
      all(!is.null(dots$se), tolower(dots$se) %in% c("boot","bootstrap"))) {
    stop('Bootstraping unavailable (and not recommended) in combination with ',
         'multiple imputations. For robust confidence intervals of indirect',
         ' effects, see the ?semTools::monteCarloCI help page. To bootstrap ',
         'within each imputation, users can pass a custom function to the ',
         'FUN= argument (see ?lavaanList) to save bootstrap distributions in ',
         'the @funList slot, then manually combine afterward.')
  }

  ## Validate list of imputed data sets
  imputedData <- NULL
  if (missing(data)) {
    #TODO: check for summary statistics
    #TODO: make lavaanList() accept lists of summary stats
    #TODO: Add argument to implement Li Cai's pool-polychorics first, pass
    #      to lavaan for DWLS with pooled WLS.V= and NACOV=, return(lavaan).

  } else if (is.data.frame(data)) {
    stop('lavaan.mi(data=) cannot be a single data.frame')

  } else if (is.list(data)) {

    ## check whether it is a mids or amelia object (both inherit from list)
    if (inherits(data, "mids")) {
      loadNamespace("mice")
      m <- data$m
      imputedData <- vector("list", m)
      for (i in 1:m) {
        imputedData[[i]] <- mice::complete(data, action = i, include = FALSE)
      }
    } else if (inherits(data, "amelia")) {
      imputedData <- data$imputations
      m <- length(imputedData)
      class(imputedData) <- "list" # override "mi" inheritance

    } else {
      ## if not mids or amelia, check it is a list of data.frames
      stopifnot(all(sapply(data, inherits, what = "data.frame")))
      imputedData <- data
      m <- length(imputedData)
      class(imputedData) <- "list" # override inheritance (e.g., "mi" if Amelia)
    }

  } else if (inherits(data, "lavaan.mi")) {
    imputedData <- data@DataList
    m <- length(imputedData)
  } else stop("data= must be a list of imputed data.frames or an object of ",
              "class 'lavaan.mi', 'mids', or 'amelia'")

  ## Function to get custom output for lavaan.mi object
  ## NOTE: Need "lavaan::" to allow for parallel computations
  .getOutput. <- function(obj) {
    converged <- lavaan::lavInspect(obj, "converged")
    if (converged) {
      se <- lavaan::parTable(obj)$se
      se.test <- all(!is.na(se)) & all(se >= 0) & any(se != 0)
      if (lavaan::lavInspect(obj, "ngroups") == 1L && lavaan::lavInspect(obj, "nlevels") == 1L) {
        Heywood.lv <- det(lavaan::lavInspect(obj, "cov.lv")) <= 0
        Heywood.ov <- det(lavaan::lavInspect(obj, "theta")) <= 0
      } else {
        Heywood.lv <- !all(sapply(lavaan::lavInspect(obj, "cov.lv"), det) > 0)
        Heywood.ov <- !all(sapply(lavaan::lavInspect(obj, "theta"), det) > 0)
      }
      suppressWarnings(MIs <- try(lavaan::modindices(obj), silent = TRUE))

    } else {
      se.test <- Heywood.lv <- Heywood.ov <- NA
      MIs <- NULL
    }

    list(sampstat = lavaan::lavInspect(obj, "sampstat"),
         coefMats = lavaan::lavInspect(obj, "est"),
         satPT = data.frame(lavaan::lav_partable_unrestricted(obj),
                            #FIXME: do starting values ALWAYS == estimates?
                            stringsAsFactors = FALSE),
         modindices = MIs,
         cov.lv = lavaan::lavInspect(obj, "cov.lv"), #TODO: calculate from pooled estimates for reliability()
         converged = converged, SE = se.test,
         Heywood.lv = Heywood.lv, Heywood.ov = Heywood.ov)
  }

  ## fit model using lavaanList
  lavListCall <- c(list(quote(lavaan::lavaanList), model = model,
                        dataList = imputedData), dots)
  lavListCall$store.slots <- union(c("partable","vcov","test","h1","baseline"),
                                   lavListCall$store.slots)
  lavListCall$FUN <- if (is.null(dots$FUN)) .getOutput. else function(obj) {
    temp1 <- .getOutput.(obj)
    temp2 <- dots$FUN(obj)
    if (!is.list(temp2)) temp2 <- list(userFUN1 = temp2)
    if (is.null(names(temp2))) names(temp2) <- paste0("userFUN", 1:length(temp2))
    duplicatedNames <- which(sapply(names(temp2), function(x) {
      x %in% c("sampstat","coefMats","satPT","modindices","converged",
               "SE","Heywood.lv","Heywood.ov","cov.lv")
    }))
    for (i in duplicatedNames) names(temp2)[i] <- paste0("userFUN", i)
    c(temp1, temp2)
  }
  fit <- eval(as.call(lavListCall))
  ## Store custom @DataList and @SampleStatsList
  fit@SampleStatsList <- lapply(fit@funList, "[[", i = "sampstat")
  fit@DataList <- imputedData
  ## add parameter table to @h1List
  for (i in 1:m) fit@h1List[[i]] <- c(fit@h1List[[i]],
                                      list(PT = fit@funList[[i]]$satPT))
  ## assign class and add new slots
  fit <- as(fit, "lavaan.mi")
  fit@version <- sapply(c("lavaan","lavaan.mi"),
                        utils::packageDescription, fields = "Version")
  fit@coefList <- lapply(fit@funList, "[[", i = "coefMats")
  fit@miList <- lapply(fit@funList, "[[", i = "modindices")
  fit@phiList <- lapply(fit@funList, "[[", i = "cov.lv")
  fit@call <- CALL
  fit@lavListCall <- lavListCall
  convList <- lapply(fit@funList, "[", i = c("converged","SE",
                                             "Heywood.lv","Heywood.ov"))
  nonConv <- which(sapply(convList, is.null))
  if (length(nonConv)) for (i in nonConv) {
    convList[[i]] <- list(converged = FALSE, SE = NA, Heywood.lv = NA, Heywood.ov = NA)
  }
  fit@convergence <- lapply(convList, function(x) do.call(c, x))
  conv <- which(sapply(fit@convergence, "[", i = "converged"))
  if (!length(conv)) warning('The model did not converge for any imputed data sets.')

  ## keep any remaining funList slots (if allowing users to supply custom FUN)
  funNames <- names(fit@funList[[1]])
  keepIndex <- which(!sapply(funNames, function(x) {
    x %in% c("sampstat","coefMats","satPT","modindices","converged",
             "SE","Heywood.lv","Heywood.ov","cov.lv")
  }))
  if (length(keepIndex)) {
    fit@funList <- lapply(fit@funList, "[", i = keepIndex)
    if (length(keepIndex) > 1L) {
      keepNames <- funNames[keepIndex]
      noNames <- which(keepNames == "")
      for (i in seq_along(noNames)) keepNames[ noNames[i] ] <- paste0("userFUN", i)
      fit@funList <- lapply(fit@funList, "names<-", value = keepNames)
    }
  } else fit@funList <- list()

  NewStartVals <- try(coef_lavaan_mi(fit, type = "user", labels = FALSE),
                           silent = TRUE)
  if (!inherits(NewStartVals, "try-error")) fit@ParTable$start <- NewStartVals
  #FIXME: else do what? warn?

  fit
}

##' @rdname lavaan.mi
##' @export
cfa.mi <- function(model, data, ...) {
  mc <- match.call(expand.dots = TRUE)
  mc$cmd <- "cfa"
  mc[[1L]] <- quote(lavaan.mi::lavaan.mi)
  eval(mc, parent.frame())
}

##' @rdname lavaan.mi
##' @export
sem.mi <- function(model, data, ...) {
  mc <- match.call(expand.dots = TRUE)
  mc$cmd <- "sem"
  mc[[1L]] <- quote(lavaan.mi::lavaan.mi)
  eval(mc, parent.frame())
}

##' @rdname lavaan.mi
##' @export
growth.mi <- function(model, data, ...) {
  mc <- match.call(expand.dots = TRUE)
  mc$cmd <- "growth"
  mc[[1L]] <- quote(lavaan.mi::lavaan.mi)
  eval(mc, parent.frame())
}