R/model_set.R
In modelbpp: Model BIC Posterior Probability

Documented in gen_models model_set

#' @title BIC Posterior Probabilities
#' of Neighboring Models
#'
#' @description Identify neighboring
#' models, fit them, and return the
#' BIC posterior probabilities.
#'
#' @details It computes the BIC
#' posterior probabilities of a set
#' of models by the method presented
#' in Wu, Cheung, and Leung (2020).
#'
#' First, a list of model is identified
#' based on user-specified criteria.
#' By default, models differ from a fitted
#' model by one degree of freedom,
#' the 1-df-away *neighboring* models,
#' will be found using [get_add()]
#' and [get_drop].
#'
#' Second, these models will be fitted
#' to the sample dataset, and their
#' BICs will be computed.
#'
#' Third, their BIC posterior
#' probabilities will be computed
#' using their BICs. By default,
#' equal prior
#' probabilities for all the models
#' being fitted will be assumed in
#' the current version. This can be
#' changed by `prior_sem_out`.
#'
#' The results can then be printed,
#' with the models sorted by descending
#' order of BIC posterior
#' probabilities. The results can
#' also be visualized using
#' [model_graph()].
#'
#' @param sem_out It can be the output
#' from an
#' SEM function. Currently it supports
#' [lavaan::lavaan-class] objects only.
#' If it is a named list of
#' [lavaan::lavaan-class] objects, then
#' all arguments for model generation
#' will be ignored, and models will not
#' be refitted. Users need to ensure
#' that the models can be meaningfully
#' compared because they will not be
#' checked.
#'
#' @param partables A `partables`-class
#' object, usually generated by
#' [get_add()] or [get_drop()]. A named
#' list of parameter tables to be fitted
#' along with the original model in
#' `sem_out`.
#' If supplied, all arguments related
#' to identifying models will be ignored.
#' Default is `NULL`.
#'
#' @param model_set_out If set to
#' the output of a previous call
#' to [model_set()] (a `model_set`-class
#' object), the list of stored models
#' will be used. All arguments related
#' to generate neighboring models will
#' be ignored.
#' If supplied, `sem_out` will also be
#' ignored and will be retrieved from
#' `model_set_out`, and `partables`
#' will also be ignored.
#' Default is `NULL`.
#'
#' @param prior_sem_out The prior of the
#' model fitted in `sem_out`. Default
#' is `NULL`, and all models will have
#' equal prior probabilities.
#'
#' @param must_add A character vector
#' of parameters, named in
#' [lavaan::lavaan()] style (e.g.,
#' `"y ~ x"`), that must be added.
#' Default is `NULL``.
#'
#' @param must_not_add A character
#' vector of parameters, named in
#' [lavaan::lavaan()] style (e.g.,
#' `"x1 ~~ x1"`), that must not be
#' added. Default is `NULL`.
#'
#' @param remove_constraints Whether
#' equality constraints will be
#' removed. Default is ``TRUE`.
#'
#' @param exclude_error_cov Exclude
#' error covariances of indicators.
#' Default is `TRUE`.
#'
#' @param exclude_feedback Exclude
#' paths that will result in a feedback
#' loop. For example, if there is
#' path from `x` through `m` to `y`,
#' then the path `x ~ y` will create
#' a feedback loop.  Default has been
#' changed to
#' `TRUE` since Version 0.1.3.5 because
#' feedback loops are usually
#' not included except when theoretically
#' justified. To reproduce results
#' based on previous version, set this
#' argument to `FALSE`.
#'
#' @param exclude_xy_cov Exclude
#' covariance between two variables,
#' in which one has a path to another.
#' For example, if there is
#' path from `x` through `m` to `y`,
#' then the covariance `x ~~ y`,
#' which denotes the covariance between
#' `x` and the error term of `y`, will
#' be excluded if this argument is
#' `TRUE`. Default has been changed to
#' `TRUE` since Version 0.1.3.5 because
#' these covariances rarely are
#' interpretable. To reproduce results
#' based on previous version, set this
#' argument to `FALSE`.
#'
#' @param must_drop A character vector
#' of parameters, named in
#' `lavaan::lavaan()` style (e.g.,
#' `"y ~ x"`), that must be included.
#' Default is `NULL`.
#'
#' @param must_not_drop A character
#' vector of parameters, named in
#' [lavaan::lavaan()] style (e.g.,
#' `"x1 ~~ x1"`), that must not be
#' included. Default is `NULL`.
#'
#' @param df_change_add How many degrees
#' of freedom (*df*) away in the list.
#' All models with *df* change less than
#' or equal to this number will be
#' included, taking into account
#' requirements set by other arguments.
#' Default is 1.
#'
#' @param df_change_drop How many degrees
#' of freedom away in the list. All
#' models with *df* change less than
#' or equal to this number will be
#' included, taking into account
#' requirements set by other arguments.
#' Default is 1.
#'
#' @param remove_duplicated If `TRUE`,
#' the default, duplicated models are
#' removed.
#'
#' @param fit_models If `TRUE`,
#' the
#' models will be fitted to the data,
#' usually stored in `sem_out`.
#' If `FALSE`, the models will be returned
#' as is, in the element `models`
#' of the output. If `model_set_out`
#' is set and models have been fitted,
#' then default is `FALSE`. Otherwise,
#' default is `TRUE`.
#'
#' @param compute_bpp If `TRUE`, then BIC
#' posterior probabilities will be
#' computed. Default
#' is `TRUE`.
#'
#' @param original String. The name of the
#' original (traget) model. Default is
#' `"original"`. Used if `prior_sem_out`
#' is unnamed and only has one value.
#'
#' @param parallel If `TRUE`, parallel
#' processing will be used to fit the
#' models. Default is `FALSE`.
#'
#' @param ncores Numeric. The number of
#' CPU cores to be used if `parallel`
#' is `TRUE`.
#'
#' @param make_cluster_args A list of
#' named arguments to be passed to
#' `parallel::makeCluster()`. Used by
#' advanced users to configure the
#' cluster if `parallel` is `TRUE`.
#' Default is `list()`.
#'
#' @param progress Whether a progress
#' bar will be displayed, implemented
#' by the `pbapply` package or by
#' `utils::txtProgressBar`. Default
#' is `TRUE`.
#'
#' @param verbose Whether additional
#' messages will be displayed, such
#' as the expected processing time.
#' Default is `TRUE`.
#'
#' @param output If `"model_set"`,
#' then the output is a `model_set`-class
#' object. If `"partables"`, the output
#' is a `partables`-class object.
#' Default is `partables`.
#'
#' @param skip_check_sem_out If `TRUE`
#' and `sem_out` is set, check
#' whether `sem_out` is of a supported
#' type (estimator is `"ML"` and
#' the model has only one group). If
#' not, an error will be raised.
#' Can be set to `FALSE` for experimenting
#' the functions on models not officially
#' supported.
#'
#' @param drop_equivalent_models If
#' `TRUE`, the default, equivalent
#' models will be dropped in the final
#' output. This check can only be
#' conducted when no models are fitted
#' in [lavaan::lavaan()] with
#' `fixed.x = TRUE` (which is the
#' default of [lavaan::sem()]).
#'
#' @return The function [model_set()]
#' returns an object of the class
#' `model_set`, a list with the following
#' major elements:
#'
#' * `models`: A named list of parameter
#'    tables. Each represent the models
#'    identified.
#'
#' * `bic`: A numeric vector, of the
#'    same length as `model`. The BIC of
#'    each model.
#'
#' * `postprob`: A numeric vector, of
#'    the same length as `model`. The
#'    BIC posterior probability of each
#'    model.
#'
#' * `fit`: A named list of
#'    [lavaan::lavaan()] output objects
#'    or [update()] for fitting a model
#'    with the added parameters, of the
#'    same length as `model`.
#'
#' * `change`: A numeric vector, of the
#'    same length as `model`. The change
#'    in model df for each fit. A
#'    positive number denotes one less
#'    free parameter. A negative number
#'    denotes one more free parameter or
#'    one less constraint.
#'
#' * `converged`: A named vector of
#'    boolean values, of the same length
#'    as `model`. Indicates whether each
#'    fit converged or not.
#'
#' * `post_check`: A named vector of
#'    boolean values, of the same length
#'    as `model`. Indicates whether the
#'    solution of each fit is admissible
#'    or not. Checked by
#'    [lavaan::lavInspect()].
#'
#' The object returned by [gen_models()]
#' depends on the argument `output`.
#' See the argument `output` for the
#' details
#'
#' @author Shu Fai Cheung <https://orcid.org/0000-0002-9871-9448>
#'
#' @references
#' Wu, H., Cheung, S. F., & Leung, S. O.
#' (2020). Simple use of BIC to assess
#' model selection uncertainty: An
#' illustration using mediation and
#' moderation models.
#' *Multivariate Behavioral Research*,
#' *55*(1), 1--16.
#' \doi{10.1080/00273171.2019.1574546}
#'
#' @seealso [print.model_set()]
#'
#' @examples
#'
#' library(lavaan)
#'
#' dat <- dat_path_model
#'
#' mod <-
#' "
#' x3 ~ a*x1 + b*x2
#' x4 ~ a*x1
#' ab := a*b
#' "
#'
#' fit <- sem(mod, dat_path_model, fixed.x = TRUE)
#'
#' out <- model_set(fit)
#' out
#'
#' @describeIn model_set Compute the BPPs of a list of models.
#'  Can generate the models and/or fit the models. Can also
#'  accept pregenerated models, or just update BPPs.
#' @order 1
#'
#' @export

model_set <- function(sem_out,
                      partables = NULL,
                      model_set_out = NULL,
                      prior_sem_out = NULL,
                      must_add = NULL,
                      must_not_add = NULL,
                      must_drop = NULL,
                      must_not_drop = NULL,
                      remove_constraints = TRUE,
                      exclude_error_cov = TRUE,
                      exclude_feedback = TRUE,
                      exclude_xy_cov = TRUE,
                      df_change_add = 1,
                      df_change_drop = 1,
                      remove_duplicated = TRUE,
                      fit_models = ifelse(!is.null(model_set_out$fit),
                                          FALSE,
                                          TRUE),
                      compute_bpp = TRUE,
                      original = "original",
                      parallel = FALSE,
                      ncores = max(parallel::detectCores(logical = FALSE) - 1, 1),
                      make_cluster_args = list(),
                      progress = TRUE,
                      verbose = TRUE,
                      skip_check_sem_out = FALSE,
                      drop_equivalent_models = TRUE) {
  # if (missing(sem_out)) stop("sem_out is not supplied.")
  user_fits <- FALSE
  if (!missing(sem_out)) {
      if (is.list(sem_out)) {
          all_lavaan <- sapply(sem_out,
                               inherits,
                               what = "lavaan")
          if (!all(all_lavaan)) {
              stop("Not all objects in sem_out are lavaan-class object.")
            }
          if (is.null(names(sem_out))) {
              stop("The list for sem_out needs to be a named list.")
            }
          tmp <- sapply(sem_out, lavaan::lavInspect, what = "converged")
          if (any(!tmp)) {
              stop("One or more supplied models have not converged.")
            }
          user_fits <- TRUE
          model_set_out <- NULL
          remove_duplicated <- FALSE
          fit_models <- FALSE
        } else {
          if (!inherits(sem_out, "lavaan")) {
              stop("sem_out is not a lavaan-class object.")
            }
          if (!skip_check_sem_out) {
              check_sem_out(sem_out)
            }
        }
    }
  if (!is.null(model_set_out)) {
      if (!inherits(model_set_out, "model_set")) {
          stop("model_set_out is not a model_set-class object.")
        }
      sem_out <- model_set_out$sem_out
      mod_to_fit <- model_set_out$models
    } else if (user_fits) {
      mod_to_fit <- do.call(to_partables, sem_out)
    } else {
      if (!is.null(partables)) {
          if (!inherits(partables, "partables")) {
              stop("partables is not a partables-class object.")
            }
          mod_all <- partables
        } else {
          mod_to_add <- get_add(sem_out,
                                must_add = must_add,
                                must_not_add = must_not_add,
                                remove_constraints = remove_constraints,
                                exclude_error_cov = exclude_error_cov,
                                exclude_feedback = exclude_feedback,
                                exclude_xy_cov = exclude_xy_cov,
                                df_change = df_change_add,
                                remove_duplicated = FALSE,
                                progress = progress)
          mod_to_drop <- get_drop(sem_out,
                                  must_drop = must_drop,
                                  must_not_drop = must_not_drop,
                                  df_change = df_change_drop,
                                  remove_duplicated = FALSE,
                                  progress = progress)
          mod_all <- c(mod_to_add, mod_to_drop)
        }
      # NOTE: No need to call unique_models() here.
      # There is a final check below
      # if (remove_duplicated) {
      #     mod_all <- unique_models(mod_all)
      #   }
      class(mod_all) <- setdiff(class(mod_all), "partables")
      pt0 <- lavaan::parameterTable(sem_out)
      mod_to_fit <- c(mod_all, list(`original` = pt0))
    }
  if (remove_duplicated) {
      mod_to_fit <- unique_models(mod_to_fit,
                                  progress = progress)
    }
  if (!inherits(mod_to_fit, "partables")) {
      class(mod_to_fit) <- c("partables", class(mod_to_fit))
    }
  if (fit_models) {
      out <- fit_many(model_list = mod_to_fit,
                      sem_out = sem_out,
                      parallel = parallel,
                      ncores = ncores,
                      make_cluster_args = make_cluster_args,
                      progress = progress,
                      verbose = verbose)
    } else if (user_fits) {
      out <- lavaan_to_sem_outs(sem_out,
                                original = original)
    } else {
      if (!is.null(model_set_out)) {
          out <- model_set_out
          tmp1 <- class(out)
          tmp2 <- which(class(out) == "model_set")
          if (length(tmp2) == 1) {
              tmp1 <- tmp1[-tmp2]
            }
          class(out) <- tmp1
        } else if (user_fits) {
          # Placeholder
        } else {
          # Create an object of the class "sem_outs"
          # Not a good solution but do not have a better one for now
          out <- list(fit = NULL,
                      change = NULL,
                      converged = NULL,
                      post_check = NULL,
                      model_df = NULL,
                      call = NULL)
          class(out) <- c("sem_outs", class(out))
        }
    }
  # Check equivalent clusters
  any_fixedx <- any(sapply(out$fit, lavaan::lavInspect, what = "fixed.x"),
                    na.rm = TRUE)
  # if (drop_equivalent_models && isTRUE(any_fixedx)) {
  #     warning("Cannot check for equivalent models if fixed.x = TRUE.")
  #   }
  if (!is.null(out$fit) && drop_equivalent_models && isFALSE(any_fixedx)) {
      mod_eq <- tryCatch(equivalent_clusters(out$fit,
                                             progress = progress,
                                             name_cluster = TRUE),
                         error = function(e) e)
      if (inherits(mod_eq, "error")) {
          if (grepl("fixed.x = TRUE", mod_eq$message, fixed = TRUE)) {
              warning("Cannot check for equivalent models if fixed.x = TRUE.")
            }
          mod_eq <- NULL
        }
      if (!is.null(mod_eq)) {
          to_drop <- numeric(0)
          m_names <- names(out$fit)
          for (xx in mod_eq) {
              i <- xx[-1]
              to_drop <- c(to_drop, match(i, m_names))
            }
          out$fit <- out$fit[-to_drop]
          if (!is.null(out$change)) {
              out$change <- out$change[-to_drop]
            }
          out$post_check <- out$post_check[-to_drop]
          out$converged <- out$converged[-to_drop]
          if (!is.null(out$model_df)) {
              out$model_df <- out$model_df[-to_drop]
            }
          mod_to_fit_equivalent <- mod_to_fit[to_drop]
          mod_to_fit <- mod_to_fit[-to_drop]
        } else {
          mod_to_fit_equivalent <- NULL
          mod_eq <- NULL
        }
    } else {
      mod_to_fit_equivalent <- NULL
      mod_eq <- NULL
    }
  out$models <- mod_to_fit
  out$models_equivalent <- mod_to_fit_equivalent
  out$equivalent_clusters <- mod_eq
  out$drop_equivalent_models <- drop_equivalent_models
  out$fixed_x <- any_fixedx
  if (compute_bpp && !is.null(out$fit)) {
      bic_list <- sapply(out$fit,
            function(x) {
                out <- tryCatch(as.numeric(lavaan::fitMeasures(x, "bic")),
                                error = function(e) NA)
                out
              })
      out$bic <- bic_list
      if (!is.null(prior_sem_out)) {
          if (sum(prior_sem_out) >= 1) {
              stop("The sum of user prior probabilities must be less than 1.")
            }
          if ((length(prior_sem_out) == 1) && is.null(names(prior_sem_out))) {
              p <- length(out$bic)
              i_original <- which(names(out$models) == original)
              prior_tmp <- rep((1 - prior_sem_out) / (p - 1), p)
              prior_tmp[i_original] <- prior_sem_out
              out$prior <- prior_tmp
            } else {
              p <- length(out$bic)
              q <- length(prior_sem_out)
              i_original <- match(names(prior_sem_out), names(out$models))
              prior_sem_out_tmp <- prior_sem_out[!is.na(i_original)]
              i_original <- i_original[!is.na(i_original)]
              prior_tmp <- rep((1 - sum(prior_sem_out)) / (p - q), p)
              prior_tmp[i_original] <- prior_sem_out_tmp
              out$prior <- prior_tmp
            }
        } else {
          # Assume unbiased priors for all models
          out$prior <- rep(1 / length(out$bic), length(out$bic))
        }
      out$bpp <- bpp(bic = out$bic,
                          prior = out$prior)
    } else {
      out$bic <- NULL
      out$bpp <- NULL
    }
  # TODO: Finalize what to do with NA BIC
  if (any(is.na(out$bic)) && !is.null(out$bic)) {
      out$prior <- rep(NA, length(bic_list))
      out$bpp <- rep(NA, length(bic_list))
    }
  out$model_set_call <- match.call()
  out$sem_out <- sem_out
  # Create Short Names
  out <- short_model_names(out)
  class(out) <- c("model_set", class(out))
  out
}

#' @describeIn model_set Generate a list of models (parameter tables).
#' @order 2
#' @export

gen_models <- function(sem_out,
                       must_add = NULL,
                       must_not_add = NULL,
                       must_drop = NULL,
                       must_not_drop = NULL,
                       remove_constraints = TRUE,
                       exclude_error_cov = TRUE,
                       df_change_add = 1,
                       df_change_drop = 1,
                       remove_duplicated = TRUE,
                       progress = TRUE,
                       output = c("partables", "model_set")) {
    output <- match.arg(output)
    out <- model_set(sem_out = sem_out,
                     must_add = must_add,
                     must_not_add = must_not_add,
                     must_drop = must_drop,
                     must_not_drop = must_not_drop,
                     remove_constraints = remove_duplicated,
                     exclude_error_cov = exclude_error_cov,
                     df_change_add = df_change_add,
                     df_change_drop = df_change_drop,
                     remove_duplicated = remove_duplicated,
                     progress = progress,
                     fit_models = FALSE,
                     compute_bpp = FALSE)
    out2 <- switch(output,
                   model_set = out,
                   partables = out$models)
    out2
  }

#' @title BIC Posterior Probabilities
#' @noRd

bpp <- function(bic,
                prior = NULL) {
    k <- length(bic)
    if (is.null(prior)) {
        prior <- rep(1 / k, k)
      }
    di <- exp(-.5 * (bic - bic[1])) * prior
    out <- di / sum(di)
    out
  }

#' @noRd

short_model_names <- function(object) {
    if (!is.character(names(object$models))) {
        return(object)
      }
    p <- length(object$models)
    short_names <- paste0("M", seq_len(p))
    names(short_names) <- names(object$models)
    object$short_names <- short_names
    object
  }