R/runLGM.R
In MplusLGM: Automate Latent Growth Mixture Modelling in 'Mplus'

Documented in runLGM

#' @title Run Latent Growth Models (LGM) and replicate the best loglikelihood value (LL)

#' @description Run iterations of an LGM, doubling the number of starting values
#'  until the best LL value has replicated at least twice, both within and between models.

#' @param lgm_object An `mplusObject` with predefined random starting values (`STARTS`) in the ANALYSIS section.
#' @param wd A character string specifying the directory where the results folder will be created for saving the Mplus input, output, and data files.
#' Default is the current working directory.

#' @return A list of class `mplusObject` including results for the replicated model, alongside with :
#' \itemize{
#'   \item - The Mplus input and data files used for the model.
#'   \item - The output files generated by Mplus.
#'   \item - The data results files saved by Mplus.
#' }

#' @details The `runLGM` function run iterations of an LGM in Mplus while gradually increasing the number of random starting values used to optimize the loglikelihood.
#' This approach aims to prevent estimation issues related to local maxima, which can result in selecting the inappropriate model during class enumeration.
#' The function works as follows:
#' \itemize{
#'   \item 1. Estimate the model using the predefined number of random starting values.
#'   \item 2. Rerun the model with double the number of starting values.
#'   \item 3. Continue until the best LL value is successfully replicated both within the model and between 2 consecutive model run,
#'   or the maximum number of allowed starting values is reached. By default the maximum number of allowed starting values
#'    is set 2 times the number of initial starting values raised to the power of 5.
#'   \item 4. Return the `mplusObject` from the replicated model.
#' }
#'
#' This function builds upon the capabilities of the \code{\link[MplusAutomation]{mplusModeler}} function
#' from the MplusAutomation package.

#' @seealso
#' \code{\link[MplusAutomation]{mplusModeler}} for running, and reading an mplusObject.
#' \code{\link{LGMobject}} for creating the mplusObject for a latent growth model.

#' @importFrom purrr pluck
#' @importFrom stringr str_which str_extract str_remove_all str_split str_split_1
#' @importFrom glue glue
#' @importFrom utils head
#' @importFrom MplusAutomation mplusModeler SummaryTable

#' @export
#' @examples
#' \donttest{
#' # Example usage:
#' GBTM_model <-  runLGM(
#'   lgm_object = GBTM_object,
#'   wd = file.path("Results", "Trajectories"))
#' }

# runLGM function ------------------------------------------------------------
runLGM <- function(lgm_object, wd) {

  ## Validate argument ---------------------------------------------------------
  stopifnot(
    class(lgm_object) == c("mplusObject", "list"),
    is.character(wd)
  )

  ## Run Model -----------------------------------------------------------------
  # Run model and attempt to replicate the best Log-likelihood value until the maximum number of starting values is reached.

  model_list <- list()
  model_idx <- 1

  starting_val <- lgm_object %>%
    purrr::pluck("ANALYSIS") %>%
    stringr::str_extract("[:digit:]+[:space:]?[:digit:]*") %>%
    stringr::str_split(pattern = " ", n = 2) %>%
    unlist() %>%
    purrr::discard(~ .x == "") %>%
    as.numeric()

  startval_max <- starting_val * 2 ^ 5

  while (dplyr::first(starting_val) <= dplyr::first(startval_max)) {
    ### Update title (S) -------------------------------------------------------
    lgm_object <- lgm_object %>%
      purrr::modify_in("TITLE",
                       \(title) stringr::str_replace(title, "(?<=S)\\d+", as.character(dplyr::first(starting_val))))

    ### Update analysis (STARTS) -----------------------------------------------
    lgm_object <- lgm_object %>%
      purrr::modify_in("ANALYSIS", \(analysis) {
        stringr::str_replace(
          analysis,
          glue::glue("STARTS = [:digit:]+[:space:]?[:digit:]*"),
          glue::glue("STARTS = {paste(unlist(starting_val), collapse = ' ')}")
        )
      })

    ### Update analysis (K-1STARTS) -----------------------------------------------
    lgm_object <- lgm_object %>%
      purrr::modify_in("ANALYSIS", \(analysis) {
        stringr::str_replace(
          analysis,
          glue::glue("K-1STARTS = [:digit:]+[:space:]?[:digit:]*"),
          glue::glue("K-1STARTS = {paste(unlist(starting_val), collapse = ' ')}")
        )
      })

    ### Update analysis (LRTSTARTS) -----------------------------------------------
    lgm_object <- lgm_object %>%
      purrr::modify_in("ANALYSIS", \(analysis) {
        stringr::str_replace(
          analysis,
          glue::glue("LRTSTARTS = 0 0 [:digit:]+[:space:]?[:digit:]*"),
          glue::glue("LRTSTARTS = 0 0 {paste(unlist(starting_val), collapse = ' ')}")
        )
      })

    ### Update savedata (FILE) -----------------------------------------------
    lgm_object <- lgm_object %>%
      purrr::modify_in("SAVEDATA",
                       \(savedata) stringr::str_replace(savedata, "(?<=S)\\d+", as.character(dplyr::first(starting_val))))

    ### Create model directories -----------------------------------------------
    # Create model directories and paths for Mplus files.

    path_dir <- .createModelDir(lgm_object, wd)

    message(glue::glue("Begin running model: {lgm_object$TITLE}"))

    ### Run model --------------------------------------------------------------
    # Run model from Mplus Object and store it in a list.
      model_list[[model_idx]] <- MplusAutomation::mplusModeler(
        object = lgm_object,
        dataout = path_dir$data,
        modelout = path_dir$input,
        run = 1,
        writeData = "always",
        hashfilename = FALSE,
        quiet = TRUE
      )

    message(glue::glue("Finished running model: {lgm_object$TITLE}"))

    errors <- purrr::pluck(model_list, model_idx, "results", "errors")

    if (!purrr::is_empty(errors)) {

      warning(
        paste(
          unlist(errors),
          collapse = " ")
        )

      return(model_list[[model_idx]])
    }

    ### Extract and Check LL ---------------------------------------------------
    #### Get output from idx model.
    output_file <- purrr::pluck(model_list, model_idx, "results", "output")

    #### Extract best LL within idx model
    line <- output_file %>%
      stringr::str_which(
        "Fit function values|Final stage loglikelihood values" #GCM vs. LGCM
      )

    LL1 <- stringr::str_extract(output_file[line + 2], "-?\\d+\\.?\\d+") # Extract first best LL (regex: sign, digits, decimal, digits)
    LL2 <- stringr::str_extract(output_file[line + 3], "-?\\d+\\.?\\d+") # Extract second best LL

    #### Extract best LL between models
    if (model_idx > 1) {
        error_LL <- tryCatch(
          {
            LL_m1 <- purrr::chuck(model_list, model_idx - 1, "results", "summaries", "LL")
            LL_m2 <- purrr::chuck(model_list, model_idx, "results", "summaries", "LL")
            NULL
          },
          error = function(e) {
            warning("Error: The Model did not provide a log-likelihood value. It likely did not converge. Check output file")
            warning(e$message)
          }
        )

        if (!is.null(error_LL)) {
          return(model_list[[model_idx]])
        }

      #### Check best LL within and between models
        if (LL1 == LL2 & LL_m1 == LL_m2) {
          return(model_list[[model_idx]])
        }
    }

    #### Else if LL are not replicated, double number of starting values and add on to model index
    starting_val <- starting_val %>%
      purrr::map(\(x) as.numeric(x) * 2) %>%
      unlist()

    model_idx <- model_idx + 1
  }
}

# Helper functions -------------------------------------------------------------
## .createModelDir ----------------------------------------------------------
# Creates the specified directory if it does not exist.

.createModelDir <- function(lgm_object, wd) {
  model_title <- lgm_object$TITLE %>%
    stringr::str_remove_all("[;\n]") %>%
    stringr::str_split_1("_") %>%
    stringr::str_to_upper()

  model_dir <- c(wd, utils::head(model_title, -1)) %>%
    purrr::reduce(~ file.path(.x, .y))

  if (!dir.exists(model_dir)) {
    dir.create(model_dir, recursive = TRUE)
  }

  data_file <- file.path(model_dir, glue::glue("{tail(model_title, 1)}.dat"))
  input_file <- file.path(model_dir, glue::glue("{tail(model_title, 1)}.inp"))
  output_file <- file.path(model_dir, glue::glue("{tail(model_title, 1)}.out"))

  return(list(
    dir = model_dir,
    data = data_file,
    input = input_file,
    output = output_file
  ))
}