R/multilevel.cor.R

Defines functions multilevel.cor

Documented in multilevel.cor

#' Within-Group and Between-Group Correlation Matrix
#'
#' This function computes the within-group and between-group correlation matrix
#' by calling the \code{sem} function in the R package \pkg{lavaan} and provides
#' standard errors, z test statistics, and significance values (\emph{p}-values)
#' for testing the hypothesis H0: \eqn{\rho} = 0 for all pairs of variables within
#' and between groups. By default, the function computes the within-group and
#' between-group correlation matrix without standard errors, z test statistics,
#' and significance value.
#'
#' @param data         a data frame.
#' @param ...          an expression indicating the variable names in \code{data},
#'                     e.g., \code{multilevel.cor(dat, x1, x2, x3)}. Note that
#'                     the operators \code{+}, \code{-}, \code{~},
#'                     \code{:}, \code{::}, and \code{!} can also be used to
#'                     select variables, see 'Details' in the \code{\link{df.subset}}
#'                     function.
#' @param cluster      either a character string indicating the variable name of
#'                     the cluster variable in \code{data} or a vector representing
#'                     the nested grouping structure (i.e., group or cluster variable).
#' @param within       a character vector representing variables that are measured
#'                     at the within level and modeled only at the within level.
#'                     Variables not mentioned in \code{within} or \code{between}
#'                     are measured at the within level and will be modeled on both
#'                     the within and between level.
#' @param between      a character vector representing variables that are measured
#'                     at the between level and modeled only at the between level.
#'                     Variables not mentioned in \code{within} or \code{between}
#'                     are measured at the within level and will be modeled on
#'                     both the within and between level.
#' @param estimator    a character string indicating the estimator to be used, i.e.,
#'                     \code{"ML"} for maximum likelihood with conventional
#'                     standard errors and \code{"MLR"} for maximum likelihood with
#'                     Huber-White robust standard errors. The default setting
#'                     depends on the argument \code{sig}, i.e., \code{"ML"} is
#'                     used when specifying \code{sig = FALSE} (default) and
#'                     \code{"MLR"} is used when specifying \code{sig = TRUE}.
#' @param optim.method a character string indicating the optimizer, i.e.,
#'                     \code{"nlminb"} (default) for the unconstrained and
#'                     bounds-constrained quasi-Newton method optimizer and
#'                     \code{"em"} for the Expectation Maximization (EM) algorithm.
#' @param optim.switch logical: if \code{TRUE} (default), model estimation switches
#'                     to Expectation Maximization (EM) algorithm (\code{"em"})
#'                     if the quasi-Newton optimization (\code{"nlminb"} (default))
#'                     does not converge.
#' @param missing      a character string indicating how to deal with missing
#'                     data, i.e., \code{"listwise"} for listwise deletion or
#'                     \code{"fiml"} (default) for full information maximum
#'                     likelihood (FIML) method. Note that it takes longer to
#'                     estimate models while using FIML and using FIML is
#'                     prone to issues with model convergence, these issues might
#'                     be resolved by switching to listwise deletion.
#' @param sig          logical: if \code{TRUE}, statistically significant
#'                     correlation coefficients are shown in boldface on the
#'                     console. Note that standard errors, z test statistics, and
#'                     significance values not provided in the return object when
#'                     \code{sig = FALSE} (default).
#' @param alpha        a numeric value between 0 and 1 indicating the significance
#'                     level at which correlation coefficients are printed
#'                     boldface when \code{sig = TRUE}.
#' @param print        a character string or character vector indicating which
#'                     results to show on the console, i.e. \code{"all"} for all
#'                     results, \code{"cor"} for correlation coefficients,
#'                     \code{"se"} for standard errors, \code{"stat"} for z test
#'                     statistics, and \code{"p"} for \emph{p}-values.
#' @param split        logical: if \code{TRUE}, output table is split in
#'                     within-group and between-group correlation matrix.
#' @param order        logical: if \code{TRUE}, variables in the output table are
#'                     ordered, so that variables specified in the argument
#'                     \code{between} are shown first.
#' @param tri          a character string indicating which triangular of the
#'                     matrix to show on the console when \code{split = TRUE},
#'                     i.e., \code{both} for upper and \code{upper} for the upper
#'                     triangular.
#' @param tri.lower    logical: if \code{TRUE} (default) and \code{split = FALSE}
#'                     (default), within-group correlations are shown in the lower
#'                     triangular and between-group correlation are shown in the
#'                     upper triangular.
#' @param p.adj        a character string indicating an adjustment method for
#'                     multiple testing based on \code{\link{p.adjust}}, i.e.,
#'                     \code{none} (default), \code{bonferroni}, \code{holm},
#'                     \code{hochberg}, \code{hommel}, \code{BH}, \code{BY}, or
#'                     \code{fdr}.
#' @param digits       an integer value indicating the number of decimal places
#'                     to be used for displaying correlation coefficients.
#' @param p.digits     an integer value indicating the number of decimal places
#'                     to be used for displaying \emph{p}-values.
#' @param as.na        a numeric vector indicating user-defined missing values,
#'                     i.e. these values are converted to \code{NA} before
#'                     conducting the analysis. Note that \code{as.na()} function
#'                     is only applied to \code{data} but not to \code{cluster}.
#' @param write        a character string naming a file for writing the output into
#'                     either a text file with file extension \code{".txt"} (e.g.,
#'                     \code{"Output.txt"}) or Excel file with file extension
#'                     \code{".xlsx"}  (e.g., \code{"Output.xlsx"}). If the file
#'                     name does not contain any file extension, an Excel file will
#'                     be written.
#' @param append       logical: if \code{TRUE} (default), output will be appended
#'                     to an existing text file with extension \code{.txt} specified
#'                     in \code{write}, if \code{FALSE} existing text file will be
#'                     overwritten.
#' @param check        logical: if \code{TRUE} (default), argument specification,
#'                     convergence and model identification is checked.
#' @param output       logical: if \code{TRUE} (default), output is shown on the
#'                     console.
#'
#' @details
#' \describe{
#' \item{\strong{Within-Group and Between-Group Variables}}{The specification of
#' the within-group and between-group variables is in line with the syntax in Mplus.
#' That is, the \code{within} argument is used to identify variables in the data
#' frame specified in \code{data} that are measured at the individual level and
#' modeled only at the within level. They are specified to have no variance in
#' the between part of the model. The \code{between} argument is used to identify
#' the variables in the data frame specified in \code{data} that are measured at
#' the cluster level and modeled only at the between level. Variables not mentioned
#' in the arguments \code{within} or \code{between} are measured at the individual
#' level and will be modeled at both the within and between level.}
#' \item{\strong{Estimation Method and Missing Data Handling}}{The default setting
#' for the argument \code{estimator} is depending on the setting of the argument
#' \code{sig}. If \code{sig = FALSE} (default), maximum likelihood estimation
#' (\code{estimator = "ML"}) is used, while maximum likelihood with Huber-White
#' robust standard errors (\code{estimator = "MLR"}) that are robust against
#' non-normality is used when \code{sig = TRUE}. In the presence of missing data,
#' full information maximum likelihood (FIML) method (\code{missing = "fiml"}) is
#' used by default. Note that FIML method cannot deal with within-group variables
#' that have no variance within some clusters. In this
#' cases, the function will switch to listwise deletion. Using FIML method might
#' result in issues with model convergence, which will be resolved by switching
#' to listwise deletion (\code{missing = "listwise"}).}
#' \item{\strong{Optimizer}}{The lavaan package uses a quasi-Newton optimization
#' method (\code{"nlminb"}) by default. If the optimizer does not converge, model
#' estimation switches to the Expectation Maximization (EM) algorithm (\code{"em"})
#' if the argument \code{optim.switch} is specified as \code{TRUE} (default).}
#' \item{\strong{Statistical Significance}}{Statistically significant correlation
#' coefficients can be shown in boldface on the console by specifying \code{sig = TRUE}.
#' However, this option is not supported when using R Markdown, i.e., the argument
#' \code{sig} will switch to \code{FALSE}.}
#' \item{\strong{Adjustment Method for Multiple Testing }}{Adjustment method for
#' multiple testing when specifying the argument \code{p.adj} is applied to
#' the within-group and between-group correlation matrix separately.}
#' }
#'
#' @author
#' Takuya Yanagida \email{takuya.yanagida@@univie.ac.at}
#'
#' @seealso
#' \code{\link{multilevel.descript}}, \code{\link{multilevel.icc}},
#' \code{\link{multilevel.cfa}}, \code{\link{cluster.scores}},
#' \code{\link{write.result}}
#'
#' @references
#' Hox, J., Moerbeek, M., & van de Schoot, R. (2018). \emph{Multilevel analysis:
#' Techniques and applications} (3rd. ed.). Routledge.
#'
#' Snijders, T. A. B., & Bosker, R. J. (2012). \emph{Multilevel analysis: An
#' introduction to basic and advanced multilevel modeling} (2nd ed.). Sage
#' Publishers.
#'
#' @return
#' Returns an object of class \code{misty.object}, which is a list with following
#' entries:
#' \item{\code{call}}{function call}
#' \item{\code{type}}{type of analysis}
#' \item{\code{data}}{data frame specified in \code{data} including the group variable
#'                    specified in \code{cluster}}
#' \item{\code{args}}{specification of function arguments}
#' \item{\code{model.fit}}{fitted lavaan object (\code{mod.fit})}
#' \item{\code{result}}{list with result tables, i.e., \code{summary} for the
#'                      specification of the estimation method and missing data
#'                      handling in lavaan, \code{wb.cor} for the within- and
#'                      between-group correlations, \code{wb.se} for the standard
#'                      error of the within- and between-group correlations,
#'                      \code{wb.stat} for the test statistic of within- and between-group
#'                      correlations, \code{wb.p} for the significance value of
#'                      the within- and between-group correlations, \code{with.cor}
#'                      for the within-group correlations, \code{with.se} for the
#'                      standard error of the within-group correlations, \code{with.stat}
#'                      for the test statistic of within-group correlations, \code{with.p}
#'                      for the significance value of the within-group correlations,
#'                      \code{betw.cor} for the between-group correlations, \code{betw.se}
#'                      for the standard error of the between-group correlations,
#'                      \code{betw.stat} for the test statistic of between-group
#'                      correlations, \code{betw.p} for the significance value of
#'                      the between-group correlations}
#'
#' @note
#' The function uses the functions \code{sem}, \code{lavInspect},
#' \code{lavMatrixRepresentation}, \code{lavTech}, \code{parameterEstimates},
#' and \code{standardizedsolution} provided in the R package \pkg{lavaan} by
#' Yves Rosseel (2012).
#'
#' @export
#'
#' @examples
#' \dontrun{
#' # Load data set "Demo.twolevel" in the lavaan package
#' data("Demo.twolevel", package = "lavaan")
#'
#' #----------------------------------------------------------------------------
#' # Cluster variable specification
#'
#' # Example 1: Specification using the argument '...'
#' multilevel.cor(Demo.twolevel, y1, y2, y3, cluster = "cluster")
#'
#' # Alternative specification with cluster variable 'cluster' in 'data'
#' multilevel.cor(Demo.twolevel[, c("y1", "y2", "y3", "cluster")], cluster = "cluster")
#'
#' # Alternative specification with cluster variable 'cluster' not in 'data'
#' multilevel.cor(Demo.twolevel[, c("y1", "y2", "y3")], cluster = Demo.twolevel$cluster)
#'
#' #----------------------------------------------------------------------------
#' # Example 2: All variables modeled at both the within and between level
#' # Highlight statistically significant result at alpha = 0.05
#' multilevel.cor(Demo.twolevel, y1, y2, y3, sig = TRUE, cluster = "cluster")
#'
#' # Example 3: Split output table in within-group and between-group correlation matrix.
#' multilevel.cor(Demo.twolevel, y1, y2, y3, cluster = "cluster", split = TRUE)
#'
#' # Example 4: Print correlation coefficients, standard errors, z test statistics,
#' # and p-values
#' multilevel.cor(Demo.twolevel, y1, y2, y3, cluster = "cluster", sig = TRUE, print = "all")
#'
#' # Example 5: Print correlation coefficients and p-values
#' # significance values with Bonferroni correction
#' multilevel.cor(Demo.twolevel, y1, y2, y3, cluster = "cluster", sig = TRUE,
#'                print = c("cor", "p"), p.adj = "bonferroni")
#'
#' #----------------------------------------------------------------------------
#' # Example 6: Variables "y1", "y2", and "y2" modeled at both the within and between level
#' # Variables "w1" and "w2" modeled at the cluster level
#' multilevel.cor(Demo.twolevel, y1, y2, y3, w1, w2, cluster = "cluster",
#'                between = c("w1", "w2"))
#'
#' # Example 7: Show variables specified in the argument 'between' first
#' multilevel.cor(Demo.twolevel, y1, y2, y3, w1, w2, cluster = "cluster",
#'                between = c("w1", "w2"), order = TRUE)
#'
#' #----------------------------------------------------------------------------
#' # Example 8: Variables "y1", "y2", and "y2" modeled only at the within level
#' # Variables "w1" and "w2" modeled at the cluster level
#' multilevel.cor(Demo.twolevel, y1, y2, y3, w1, w2, cluster = "cluster",
#'                within = c("y1", "y2", "y3"), between = c("w1", "w2"))
#'
#' #----------------------------------------------------------------------------
#' # Example 9: lavaan model and summary of the multilevel model used to compute the
#' # within-group and between-group correlation matrix
#'
#' mod <- multilevel.cor(Demo.twolevel, y1, y2, y3, cluster = "cluster", output = FALSE)
#'
#' # lavaan model syntax
#' mod$model
#'
#' # Fitted lavaan object
#' lavaan::summary(mod$model.fit, standardized = TRUE)
#'
#' #----------------------------------------------------------------------------
#' # Write Results
#'
#' # Example 10a: Write Results into a text file
#' multilevel.cor(Demo.twolevel, y1, y2, y3, cluster = "cluster",
#'                write = "Multilevel_Correlation.txt")
#'
#' # Example 10b: Write Results into a Excel file
#' multilevel.cor(Demo.twolevel, y1, y2, y3, cluster = "cluster",
#'                write = "Multilevel_Correlation.xlsx")
#' }
multilevel.cor <- function(data, ..., cluster, within = NULL, between = NULL,
                           estimator = c("ML", "MLR"), optim.method = c("nlminb", "em"),
                           optim.switch = TRUE, missing = c("listwise", "fiml"),
                           sig = FALSE, alpha = 0.05, print = c("all", "cor", "se", "stat", "p"),
                           split = FALSE, order = FALSE, tri = c("both", "lower", "upper"),
                           tri.lower = TRUE, p.adj = c("none", "bonferroni", "holm", "hochberg", "hommel", "BH", "BY", "fdr"),
                           digits = 2, p.digits = 3, as.na = NULL, write = NULL,
                           append = TRUE, check = TRUE, output = TRUE) {

  #_____________________________________________________________________________
  #
  # Initial Check --------------------------------------------------------------

  # Check if input 'data' is missing or NULL
  if (isTRUE(missing(data) || is.null(data))) { stop("Please specify a data frame for the argument 'data'", call. = FALSE) }

  # Check input 'cluster'
  if (isTRUE(missing(cluster) ||is.null(cluster))) { stop("Please specify a variable name or vector representing the grouping structure for the argument 'cluster'.", call. = FALSE) }

  #_____________________________________________________________________________
  #
  # Data -----------------------------------------------------------------------

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Data using the argument '...' ####

  if (isTRUE(!missing(...))) {

    # Extract data
    x <- as.data.frame(data[, .var.names(data = data, ..., cluster = cluster), drop = FALSE])

    # Cluster variable
    cluster <- data[, cluster]

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Data without using the argument '...' ####

  } else {

    # Data frame
    x <- as.data.frame(data)

    # Data and cluster
    var.group <- .var.group(data = x, cluster = cluster)

    # Data
    if (isTRUE(!is.null(var.group$data))) { x <- var.group$data }

    # Cluster variable
    if (isTRUE(!is.null(var.group$cluster))) { cluster <- var.group$cluster }

  }

  # Convert 'cluster' as tibble into a vector
  if (!is.null(cluster) && isTRUE("tbl" %in% substr(class(cluster), 1L, 3L))) { cluster <- unname(unlist(cluster)) }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Numeric Variables ####

  x <- (!vapply(as.data.frame(x), is.numeric, FUN.VALUE = logical(1L))) |> (\(y) if (isTRUE(any(y))) {

    warning(paste0("Non-numeric variables were excluded from the analysis: ", paste(names(which(y)), collapse = ", ")), call. = FALSE)

    return(as.data.frame(x)[, -which(y), drop = FALSE])

  } else {

    return(x)

  })()

  if (isTRUE(ncol(x) == 0L)) { stop("No variables left for analysis after excluding non-numeric variables.", call. = FALSE) }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Convert user-missing values into NA ####

  if (isTRUE(!is.null(as.na))) { x <- .as.na(x, na = as.na) }

  #_____________________________________________________________________________
  #
  # Input Check ----------------------------------------------------------------

  # Check inputs
  .check.input(logical = c("optim.switch", "sig", "split", "order", "tri.lower", "append", "output"),
               s.character = list(estimator = c("ML", "MLR"), optim.method = c("nlminb", "em"), missing = c("listwise", "fiml"), tri = c("both", "lower", "upper")),
               m.character = list(print = c("all", "cor", "se", "stat", "p")),
               args = c("alpha", "p.adj", "digits", "p.digits", "write2"),
               package = "lavaan", envir = environment(), input.check = check)

  # Within- and Between-Group Variables Check
  if (isTRUE(check)) {

    # Variables in 'within' and 'between'
    intersect(within, between) |> (\(y) if (isTRUE(length(y) > 0L)) { stop(paste0("Following ", ifelse(length(y) == 1L, "variable is ", "variables are "), "specified in both arguments 'within' and 'between': ", paste(y, collapse = ", ")), call. = FALSE) })()

    #...................
    ### Within-Group Variable ####

    # Within variables in 'data'
    (!within %in% colnames(x)) |> (\(y) if (isTRUE(any(y))) { stop(paste0(ifelse(length(which(y)) == 1L, "Variable ", "Variables "), "specified in the argument 'within' ", ifelse(length(which(y)) == 1L, "was ", "were "), "not found in 'data': ", paste(within[which(y)], collapse = ", ")), call. = FALSE) })()

    # Variance within clusters
    vapply(x[, within, drop = FALSE], function(y) all(tapply(y, cluster, var, na.rm = TRUE) < .Machine$double.eps^0.5), FUN.VALUE = logical(1L)) |> (\(y) if (isTRUE(any(y))) { stop(paste0("Following within-group ", ifelse(length(which(y)) == 1L, "variable has ", "variables have "), "no variance within clusters: ", paste(names(which(y)), collapse = ", ")), call. = FALSE) })()

    #...................
    ### Between-Group Variable ####

    # Between variables in 'data'
    (!between %in% colnames(x)) |> (\(y) if (isTRUE(any(y))) { stop(paste0(ifelse(length(which(y)) == 1L, "Variable ", "Variables "), "specified in the argument 'between' ", ifelse(length(which(y)) == 1L, "was ", "were "), "not found in 'data': ", paste(between[which(y)], collapse = ", ")), call. = FALSE) })()

    # Variance within clusters
    vapply(x[, between, drop = FALSE], function(y) any(tapply(y, cluster, var, na.rm = TRUE) != 0L), FUN.VALUE = logical(1L)) |> (\(y) if (isTRUE(any(y))) { stop(paste0("Following between-group ", ifelse(length(which(y)) == 1L, "variable has ", "variables have "), "variance within clusters: ", paste(names(which(y)), collapse = ", ")), call. = FALSE) })()

    #...................
    ### Within-/Between-Group Variable ####

    # Variance between clusters
    vapply(x[, setdiff(colnames(x), c(within, between)), drop = FALSE], function(y) misty::multilevel.icc(y, cluster = cluster) < .Machine$double.eps^0.5, FUN.VALUE = logical(1L)) |> (\(y) if (isTRUE(any(y))) { stop(paste0("Following ", ifelse(length(which(y)) == 1L, "variable has ", "variables have "), "no variance between clusters and should be specified in the 'within' argument: ", paste(names(which(y)), collapse = ", ")), call. = FALSE) })()

  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Within-Group and Between Variables ####

  # Within Only and Within/Between Variables
  if (isTRUE(is.null(between))) { var.with <- colnames(x) } else { var.with <- setdiff(colnames(x), between) }

  # At least one within variables
  if (isTRUE(length(var.with) == 0L)) { stop("Please specify at least one within-group variable.", call. = FALSE) }

  # Between Only and Within/Between variables
  if (isTRUE(is.null(within))) { var.betw <- colnames(x) } else { var.betw <- setdiff(colnames(x), within) }

  # At least one between variables
  if (isTRUE(length(var.betw) == 0L)) { stop("Please specify at least one between-group variable.", call. = FALSE) }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Data frame with Cluster Variable ####

  x <- data.frame(x[, unique(c(var.with, var.betw))], .cluster = cluster)

  n.total <- nrow(x)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Missing Data on the Cluster Variable ####

  if (isTRUE(any(is.na(cluster)))) {

    warning(paste0("Data contains missing values on the cluster variable, number of cases removed from the analysis: ", sum(is.na(cluster))), call. = FALSE)

    x <- x[!is.na(cluster), ]

  }

  #_____________________________________________________________________________
  #
  # Arguments ------------------------------------------------------------------

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Estimator ####

  # Default setting: "MLR" if sig = TRUE, "ML" if sig = FALSE
  estimator <- if (isTRUE(sig)) { ifelse(all(c("ML", "MLR") %in% estimator), "MLR", estimator) } else { "ML" }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Standard Errors ####

  # Default setting: "robust.huber.white" if sig = TRUE, "none" if sig = FALSE
  se <- if (isTRUE(sig)) { ifelse(estimator == "MLR", "robust.huber.white", "standard") } else { "none" }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Optimizer ####

  if (isTRUE(all(c("nlminb", "em") %in% optim.method))) { optim.method <- "nlminb" }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Missing data ####

  #...................
  ### Missing values present ####
  if (isTRUE(any(is.na(x)))) {

    complete <- FALSE

    if (isTRUE(all(c("listwise", "fiml") %in% missing))) { missing <- "fiml" }

  #...................
  ### No missing values ####
  } else {

    complete <- TRUE
    missing <- "listwise"

  }

  # Cases with missing on all variables
  if (isTRUE(missing == "fiml")) {

    x <- misty::na.prop(x[, -which(colnames(x) %in% c(".cluster", between)), drop = FALSE], append = FALSE) |>
      (\(y) if (any(y == 1L)) {

        warning(paste0("Data contains cases with missing values on all variables measured at the within level, number of cases removed from the analysis: ", sum(y == 1L)), call. = FALSE)

        # Remove cases with missing on all variables
        return(x[which(y < 1L), ])

      } else {

        return(x)

      })()

  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Print correlation, sample size or significance values ####

  if (isTRUE(all(c("all", "cor", "se", "stat", "p") %in% print))) { print <- "cor" }

  if (isTRUE(length(print) == 1L && "all" %in% print)) { print <- c("cor", "se", "stat", "p") }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Print triangular ####

  tri <- ifelse(all(c("both", "lower", "upper") %in% tri), "lower", tri)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Adjustment method for multiple testing ####

  p.adj <- ifelse(all(c("none", "bonferroni", "holm", "hochberg", "hommel", "BH", "BY", "fdr") %in% p.adj), "none", p.adj)

  #_____________________________________________________________________________
  #
  # Main Function --------------------------------------------------------------

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Model specification ####

  # At least two within- and two between-group variables
  if (isTRUE(length(var.with) >= 2L && length(var.betw) >= 2L)) {

    mod <- paste(" level: 1\n  ",
                 # Within model
                 paste(apply(combn(length(var.with), 2L), 2L, function(y) paste(var.with[y[1L]], var.with[y[2L]], sep = " ~~ " )), collapse = "\n   "),
                 "\n level: 2\n  ",
                 # Between model
                 paste(apply(combn(length(var.betw), 2L), 2L, function(y) paste(var.betw[y[1L]], var.betw[y[2L]], sep = " ~~ " )), collapse = "\n   "))

  # At least two within-group variable, but only one between-group variable
  } else if (isTRUE(length(var.with) >= 2L && length(var.betw) == 1L)){

    mod <- paste(" level: 1\n  ",
                 # Within model
                 paste(apply(combn(length(var.with), 2L), 2L, function(y) paste(var.with[y[1L]], var.with[y[2L]], sep = " ~~ " )), collapse = "\n   "),
                 "\n level: 2\n  ",
                 # Between model
                 paste(var.betw, var.betw, sep = " ~~ "), collapse = "\n   ")

  # At least two between-group variable, but only one within-group variable
  } else if (isTRUE(length(var.with) == 1L && length(var.betw) >= 2L)){

    mod <- paste(" level: 1\n  ",
                 # Within model
                 paste(var.with, var.with, sep = " ~~ " ), collapse = "\n   ",
                 "\n level: 2\n  ",
                 # Between model
                 paste(apply(combn(length(var.betw), 2L), 2L, function(y) paste(var.betw[y[1L]], var.betw[y[2L]], sep = " ~~ " )), collapse = "\n   "))

  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Model estimation ####

  model.fit <- tryCatch(suppressWarnings(lavaan::sem(mod, data = x, cluster = ".cluster", estimator = estimator,
                                                     missing = missing, optim.method = optim.method, se = se,
                                                     check.gradient = FALSE, check.post = FALSE, check.vcov = FALSE)),
                        error = function(y) {

                          if (isTRUE(missing == "fiml")) {

                            stop("There was an estimation problem in lavaan, switching to missing = \"listwise\" might solve the problem.", call. = FALSE)

                          } else if (isTRUE(estimator == "MLR")){

                            stop("There was an estimation problem in lavaan, switching to estimator = \"ML\" might solve the problem.", call. = FALSE)

                          } else {

                            stop("There was an estimation problem in lavaan, correlation matrix could not be computed.", call. = FALSE)

                          }})

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Convergence check ####

  if (isTRUE(!lavaan::lavInspect(model.fit, what = "converged"))) {

    #...................
    ### Quasi-Newton method optimizer ####

    if (isTRUE(optim.method == "nlminb")) {

      if (isTRUE(optim.switch)) {

        message("Quasi-Newton optimizer did not converge, switched to the EM algorithm.")

        # Model estimation with EM algorithm
        model.fit <- suppressWarnings(lavaan::sem(mod, data = x, cluster = ".cluster", estimator = estimator,
                                                  missing = missing, optim.method = "em",
                                                  se = ifelse(estimator == "MLR", "robust.huber.white", "standard"),
                                                  check.gradient = FALSE, check.post = FALSE, check.vcov = FALSE))

        # Model not converged
        if (isTRUE(!lavaan::lavInspect(model.fit, what = "converged"))) {

          if (isTRUE(missing == "listwise")) {

            stop("EM algorithm did not converge.", call. = FALSE)

          } else {

            stop("EM algorithm did not converge. Switching to missing = \"listwise\" might solve the estimation problem.", call. = FALSE)

          }

        }

      }

    #...................
    ### Expectation Maximization (EM) algorithm ####

    } else if(isTRUE(optim.method == "em")) {

      if (isTRUE(missing == "listwise")) {

        stop("EM algorithm did not converge.", call. = FALSE)

      } else {

        stop("EM algorithm did not converge. Switching to missing = \"listwise\" might solve the estimation problem.", call. = FALSE)

      }

    }

  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Convergence and model identification checks ####

  if (isTRUE(check)) {

    check.vcov <- check.theta.w <- check.theta.b <- check.cov.lv.w <- check.cov.lv.b <- TRUE

    if (isTRUE(se != "none")) {

      #...................
      ### Standard error ####

      if (isTRUE(any(is.na(unlist(lavaan::lavInspect(model.fit, what = "se")))))) { stop("Standard errors could not be computed.", call. = FALSE) }

      #...................
      ### Variance-covariance matrix of the estimated parameters ####

      eigvals <- eigen(lavaan::lavInspect(model.fit, what = "vcov"), symmetric = TRUE, only.values = TRUE)$values

      # Model contains equality constraints
      model.fit.par <- lavaan::parameterTable(model.fit)$op == "=="

      if (isTRUE(any(model.fit.par))) { eigvals <- rev(eigvals)[-seq_len(sum(model.fit.par))] }

      if (isTRUE(min(eigvals) < .Machine$double.eps^(3L/4L))) {

        warning("The variance-covariance matrix of the estimated parameters is not positive definite. This may be a symptom that the model is not identified.", call. = FALSE)

        check.vcov <- FALSE

      }

    }

    #...................
    ### Negative variance of observed variables ####

    #### Within Level
    if (isTRUE(any(diag(lavaan::lavInspect(model.fit, what = "theta")$within) < 0L))) {

      warning("Some estimated variances of the observed variables at the Within level are negative.", call. = FALSE)

      check.theta.w <- FALSE

    } else if (isTRUE(any(eigen(lavaan::lavTech(model.fit, what = "theta")$within, symmetric = TRUE, only.values = TRUE)$values < (-1L * .Machine$double.eps^(3/4))))) {

      warning("The model-implied variance-covariance matrix of the residuals of the observed variables is not positive definite indicating an absolute residual correlations greater one.", call. = FALSE)

      check.theta.w <- FALSE

    }

    #### Between Level
    if (isTRUE(any(diag(lavaan::lavInspect(model.fit, what = "theta")$.cluster) < 0L))) {

      warning("Some estimated variances of the observed variables at the Between level are negative.", call. = FALSE)

      check.theta.b <- FALSE

    } else if (isTRUE(any(eigen(lavaan::lavTech(model.fit, what = "theta")$.cluster, symmetric = TRUE, only.values = TRUE)$values < (-1L * .Machine$double.eps^(3/4))))) {

      warning("The model-implied variance-covariance matrix of the residuals of the observed variables at the Between level is not positive definite indicating an absolute residual correlations greater one.", call. = FALSE)

      check.theta.b <- FALSE

    }

    #...................
    ### Negative variance of latent variables ####

    #### Within Level
    if (isTRUE(!is.null(lavaan::lavTech(model.fit, what = "cov.lv")$within))) {

      if (isTRUE(any(diag(lavaan::lavTech(model.fit, what = "cov.lv")$within) < 0L))) {

        warning("Some estimated variances of the latent variables at the Within level are negative.", call. = FALSE)

        check.cov.lv.w <- FALSE

      }

    # Model-implied variance-covariance matrix of the latent variables
    } else if (any(dim(lavaan::lavTech(model.fit, what = "cov.lv")$within) != 0L)) {

      if (isTRUE(any(eigen(lavaan::lavTech(model.fit, what = "cov.lv")$within, symmetric = TRUE, only.values = TRUE)$values < (-1L * .Machine$double.eps^(3/4))))) {

        warning("The model-implied variance-covariance matrix of the latent variables at the Within level is not positive definite indicating an absolute correlation greater one.", call. = FALSE)

        check.cov.lv.w <- FALSE

      }

    }

    #### Between Level
    if (isTRUE(!is.null(lavaan::lavTech(model.fit, what = "cov.lv")$cluster))) {

      if (isTRUE(any(diag(lavaan::lavTech(model.fit, what = "cov.lv")$.cluster) < 0L))) {

        warning("Some estimated variances of the latent variables at the Between level are negative.", call. = FALSE)

        check.cov.lv.b <- FALSE

      }

    # Model-implied variance-covariance matrix of the latent variables
    } else if (any(dim(lavaan::lavTech(model.fit, what = "cov.lv")$.cluster) != 0L)) {

      if (isTRUE(any(eigen(lavaan::lavTech(model.fit, what = "cov.lv")$.cluster, symmetric = TRUE, only.values = TRUE)$values < (-1L * .Machine$double.eps^(3/4))))) {

        warning("The model-implied variance-covariance matrix of the latent variables at the Between level is not positive definite indicating an absolute correlation greater one.", call. = FALSE)

        check.cov.lv.b <- FALSE

      }

    }

  } else {

    check.vcov <- check.theta.w <- check.theta.b <- check.cov.lv.w <- check.cov.lv.b <- NULL

  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Extract Results ####

  # Standardized solution
  stand <- lavaan::lavMatrixRepresentation(lavaan::standardizedSolution(model.fit))

  # Visible binding for global variable
  mat <- level <- NULL

  #...................
  ### Within-Group Results ####

  # Theta
  with.stand.theta <- subset(stand[ unlist(subset(lavaan::lavMatrixRepresentation(lavaan::parameterestimates(model.fit)), level == 1L, select = "id")), ], mat == "theta")

  # Parameter estimate, z and significance value matrix
  with.p <- with.stat <- with.se <- with.cor <- matrix(NA, ncol = max(stand[, "col"]), nrow = max(stand[, "row"]))

  # Model estimation with SE
  if (isTRUE(se != "none")) {

    for (i in seq_len(nrow(with.stand.theta))) {

      with.cor[with.stand.theta[i, "row"], with.stand.theta[i, "col"]] <- with.stand.theta[i, "est.std"]
      with.se[with.stand.theta[i, "row"], with.stand.theta[i, "col"]] <- with.stand.theta[i, "se"]
      with.stat[with.stand.theta[i, "row"], with.stand.theta[i, "col"]] <- with.stand.theta[i, "z"]
      with.p[with.stand.theta[i, "row"], with.stand.theta[i, "col"]] <- with.stand.theta[i, "pvalue"]

    }

  # Model estimation without SE
  } else {

    for (i in seq_len(nrow(with.stand.theta))) {

      with.cor[with.stand.theta[i, "row"], with.stand.theta[i, "col"]] <- with.stand.theta[i, "est.std"]

    }

  }

  with.cor[lower.tri(with.cor)] <- t(with.cor)[lower.tri(with.cor)]
  with.se[lower.tri(with.se)] <- t(with.se)[lower.tri(with.se)]
  with.stat[lower.tri(with.stat)] <- t(with.stat)[lower.tri(with.stat)]
  with.p[lower.tri(with.p)] <- t(with.p)[lower.tri(with.p)]

  colnames(with.cor) <- colnames(with.se) <- colnames(with.stat) <- colnames(with.p) <-
  rownames(with.cor) <- rownames(with.se) <- rownames(with.stat) <- rownames(with.p) <- sapply(seq_len(max(stand[, "row"])), function(y) unique(stand[which(y == stand$row), "lhs"]))

  #...................
  ### Between-Group Results ####

  # Standardized solution
  betw.stand.theta <- subset(stand[ unlist(subset(lavaan::lavMatrixRepresentation(lavaan::parameterestimates(model.fit)), level == 2L, select = "id")), ], mat == "theta")

  # Parameter estimate, z and significance value matrix
  betw.p <- betw.stat <- betw.se <- betw.cor <- matrix(NA, ncol = max(stand[, "col"]), nrow = max(stand[, "row"]))

  # Model estimation with SE
  if (isTRUE(se != "none")) {

    for (i in seq_len(nrow(betw.stand.theta))) {

      betw.cor[betw.stand.theta[i, "row"], betw.stand.theta[i, "col"]] <- betw.stand.theta[i, "est.std"]
      betw.se[betw.stand.theta[i, "row"], betw.stand.theta[i, "col"]] <- betw.stand.theta[i, "se"]
      betw.stat[betw.stand.theta[i, "row"], betw.stand.theta[i, "col"]] <- betw.stand.theta[i, "z"]
      betw.p[betw.stand.theta[i, "row"], betw.stand.theta[i, "col"]] <- betw.stand.theta[i, "pvalue"]

    }

  # Model estimation without SE
  } else {

    for (i in seq_len(nrow(betw.stand.theta))) {

      betw.cor[betw.stand.theta[i, "row"], betw.stand.theta[i, "col"]] <- betw.stand.theta[i, "est.std"]

    }

  }

  betw.cor[lower.tri(betw.cor)] <- t(betw.cor)[lower.tri(betw.cor)]
  betw.se[lower.tri(betw.se)] <- t(betw.se)[lower.tri(betw.se)]
  betw.stat[lower.tri(betw.stat)] <- t(betw.stat)[lower.tri(betw.stat)]
  betw.p[lower.tri(betw.p)] <- t(betw.p)[lower.tri(betw.p)]

  colnames(betw.cor) <- colnames(betw.se) <- colnames(betw.stat) <- colnames(betw.p) <-
  rownames(betw.cor) <- rownames(betw.se) <- rownames(betw.stat) <- rownames(betw.p) <- sapply(seq_len(max(stand[, "row"])), function(y) unique(stand[which(y == stand$row), "lhs"]))

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Order Between Variables ####

  if (isTRUE(order && !is.null(between))) {

    pos.betw <- which(colnames(betw.cor) %in% between)
    pos.with <- which(!colnames(betw.cor) %in% between)

    with.cor <- with.cor[c(pos.betw, pos.with), c(pos.betw, pos.with)]
    with.se <- with.se[c(pos.betw, pos.with), c(pos.betw, pos.with)]
    with.stat <- with.stat[c(pos.betw, pos.with), c(pos.betw, pos.with)]
    with.p <- with.p[c(pos.betw, pos.with), c(pos.betw, pos.with)]

    betw.cor <- betw.cor[c(pos.betw, pos.with), c(pos.betw, pos.with)]
    betw.se <- betw.se[c(pos.betw, pos.with), c(pos.betw, pos.with)]
    betw.stat <- betw.stat[c(pos.betw, pos.with), c(pos.betw, pos.with)]
    betw.p <- betw.p[c(pos.betw, pos.with), c(pos.betw, pos.with)]

  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Combine Within-Group and Between-Group Results ####

  #...................
  ### Within-group correlations in the lower triangular ####
  if (isTRUE(tri.lower)) {

    # Within-group results
    wb.cor <- with.cor
    wb.se <- with.se
    wb.stat <- with.stat
    wb.p <- with.p

    # Between-group results
    wb.cor[upper.tri(wb.cor)] <- betw.cor[upper.tri(wb.cor)]
    wb.se[upper.tri(wb.se)] <- betw.se[upper.tri(wb.se)]
    wb.stat[upper.tri(wb.stat)] <- betw.stat[upper.tri(wb.stat)]
    wb.p[upper.tri(wb.p)] <- betw.p[upper.tri(wb.p)]

  #...................
  ### Within-group correlations in the upper triangular ####
  } else {

    # Within-group results
    wb.cor <- betw.cor
    wb.se <- betw.se
    wb.stat <- betw.stat
    wb.p <- betw.p

    # Between-group results
    wb.cor[upper.tri(wb.cor)] <- with.cor[upper.tri(wb.cor)]
    wb.se[upper.tri(wb.se)] <- with.se[upper.tri(wb.se)]
    wb.stat[upper.tri(wb.stat)] <- with.stat[upper.tri(wb.stat)]
    wb.p[upper.tri(wb.p)] <- with.p[upper.tri(wb.p)]

  }

  #...................
  ### Adjust p-values for multiple comparison ####
  if (isTRUE(p.adj != "none")) {

    wb.p[lower.tri(wb.p)] <- p.adjust(wb.p[lower.tri(wb.p)], method = p.adj)
    wb.p[upper.tri(wb.p)] <- p.adjust(wb.p[upper.tri(wb.p)], method = p.adj)

  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Split Within-Group and Between-Group Results ####

  # Model estimation with SE
  if (isTRUE(se != "none")) {

    # Within-group results
    with.cor <- with.cor[which(apply(with.cor, 1L, function(y) !all(is.na(y)))), which(apply(with.cor, 2L, function(y) !all(is.na(y))))]
    with.se <- with.se[which(apply(with.se, 1L, function(y) !all(is.na(y)))), which(apply(with.se, 2L, function(y) !all(is.na(y))))]
    with.stat <- with.stat[which(apply(with.stat, 1L, function(y) !all(is.na(y)))), which(apply(with.stat, 2L, function(y) !all(is.na(y))))]
    with.p <- with.p[which(apply(with.p, 1L, function(y) !all(is.na(y)))), which(apply(with.p, 2L, function(y) !all(is.na(y))))]

    # Between-group results
    betw.cor <- betw.cor[which(apply(betw.cor, 1L, function(y) !all(is.na(y)))), which(apply(betw.cor, 2L, function(y) !all(is.na(y))))]
    betw.se <- betw.se[which(apply(betw.se, 1L, function(y) !all(is.na(y)))), which(apply(betw.se, 2L, function(y) !all(is.na(y))))]
    betw.stat <- betw.stat[which(apply(betw.stat, 1L, function(y) !all(is.na(y)))), which(apply(betw.stat, 2L, function(y) !all(is.na(y))))]
    betw.p <- betw.p[which(apply(betw.p, 1L, function(y) !all(is.na(y)))), which(apply(betw.p, 2L, function(y) !all(is.na(y))))]

  # Model estimation without SE
  } else {

    # Within-group results
    with.cor <- with.cor[which(apply(with.cor, 1L, function(y) !all(is.na(y)))), which(apply(with.cor, 2L, function(y) !all(is.na(y))))]

    # Between-group results
    betw.cor <- betw.cor[which(apply(betw.cor, 1L, function(y) !all(is.na(y)))), which(apply(betw.cor, 2L, function(y) !all(is.na(y))))]

  }

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Adjust p-values for multiple comparison ####

  if (isTRUE(p.adj != "none")) {

    with.p[lower.tri(with.p)] <- p.adjust(with.p[lower.tri(with.p)], method = p.adj)
    with.p[upper.tri(with.p)] <- p.adjust(with.p[upper.tri(with.p)], method = p.adj)

    betw.p[lower.tri(betw.p)] <- p.adjust(betw.p[lower.tri(betw.p)], method = p.adj)
    betw.p[upper.tri(betw.p)] <- p.adjust(betw.p[upper.tri(betw.p)], method = p.adj)

  }

  #_____________________________________________________________________________
  #
  # Return object --------------------------------------------------------------

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## lavaan summary ####

  lavaan.summary <- data.frame(c(paste("lavaan", lavaan::lavInspect(model.fit, what = "version")), "", "Estimator", "Standard Errors", "Missing Data", "", "",
                                 "Number of Observations", "Number of Clusters"),
                               c("", "",
                                 # Estimator
                                 estimator,
                                 # Standard errors
                                 switch(lavaan::lavTech(model.fit, what = "options")$se,
                                        "none" = "None",
                                        "standard" = "Conventional",
                                        "robust.huber.white" = "Huber-White"),
                                 # Missing data
                                 ifelse(isTRUE(complete), "None",
                                        switch(missing,
                                               "listwise" = "Listwise Deletion",
                                               "fiml" = "FIML")), "", "Used",
                                 # Number of observations
                                 lavaan::lavInspect(model.fit, what = "nobs"),
                                 # Number of clusters
                                 lavaan::lavInspect(model.fit, what = "nclusters")),
                               c(rep("", times = 6L), "Total", n.total, ""),
                               fix.empty.names = FALSE)

  #~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
  ## Object ####

  object <- list(call = match.call(),
                 type = "multilevel.cor",
                 data = x,
                 args = list(within = within, between = between, estimator = estimator, optim.method = optim.method, se = se, optim.switch = optim.switch, missing = missing, sig = sig, alpha = alpha, print = print,
                             split = split, order = order, tri = tri, tri.lower = tri.lower, p.adj = p.adj, digits = digits, p.digits = p.digits, as.na = as.na, write = write, append = append, check = check, output = output),
                 model = mod,
                 model.fit = model.fit,
                 check = list(vcov = check.vcov, theta.w = check.theta.w, theta.b = check.theta.b,
                              cov.lv.w = check.cov.lv.w, cov.lv.b = check.cov.lv.b),
                 result = list(summary = lavaan.summary,
                               wb.cor = wb.cor, wb.se = wb.se, wb.stat = wb.stat, wb.p = wb.p,
                               with.cor = with.cor, with.se = with.se, with.stat = with.stat, with.p = with.p,
                               betw.cor = betw.cor, betw.se = betw.se, betw.stat = betw.stat, betw.p = betw.p))

  class(object) <- "misty.object"

  #_____________________________________________________________________________
  #
  # Write Results --------------------------------------------------------------

  if (isTRUE(!is.null(write))) { .write.result(object = object, write = write, append = append) }

  #_____________________________________________________________________________
  #
  # Output ---------------------------------------------------------------------

  if (isTRUE(output)) { print(object, check = FALSE) }

  return(invisible(object))

}

#_______________________________________________________________________________

Try the misty package in your browser

Any scripts or data that you put into this service are public.

misty documentation built on Sept. 9, 2025, 5:23 p.m.