R/main.R

Defines functions str_affix get_model_names get_factor_score_validity_se get_factor_score_validity get_factor_score_coefficients get_model_implied_correlations check_matrix2lavaan lav2ram matrix2lavaan add_composite_scores add_factor_scores model_complete fixed2free sim_standardized sim_standardized_matrices

Documented in add_composite_scores add_factor_scores check_matrix2lavaan fixed2free get_factor_score_coefficients get_factor_score_validity get_factor_score_validity_se get_model_implied_correlations get_model_names lav2ram matrix2lavaan model_complete sim_standardized sim_standardized_matrices

#' Return model characteristics
#'
#' Function that takes a lavaan model with standardized parameters
#' and returns a list with model characteristics
#'
#' This function supports the `~` operator for regressions,
#' the `~~` for covariances (but not variances), and the `=~` latent
#' variable loadings. It does not support intercepts (e.g,. `y ~ 1`),
#' thresholds, scaling factors, formative factors, or equality constraints.
#' @export
#' @param m Structural model represented by lavaan syntax
#' @param max_iterations Maximum number of iterations before the algorithm fails
#' @param composite_threshold Loadings with absolute values less
#' than this threshold will not be counted as composite indicators
#' @return list of path and covariance coefficients
#' @examples
#' library(simstandard)
#' # lavaan model
#' m = "Latent_1 =~ 0.8 * Ob_1 + 0.7 * Ob_2 + 0.4 * Ob_3"
#'
#' sim_standardized_matrices(m)
sim_standardized_matrices <- function(m,
                                      max_iterations = 100,
                                      composite_threshold = NULL) {

  # Parameter Table
  pt <- lavaan::lavParTable(m, fixed.x = FALSE)

  # Checks----

  # Check for formative variables
  if (any(pt$op == "<~")) stop(
    "Formative variables (defined with <~) are not allowed for this function.")

  # Check for user-set variances
  if (any((pt$user != 0) & (pt$lhs == pt$rhs) & (pt$op == "~~"))) {
    pt_manual_var <- pt[(pt$user != 0) & (pt$lhs == pt$rhs) & (pt$op == "~~"), ]
    pt_manual_rows <- paste0(pt_manual_var$lhs,
      " ", pt_manual_var$op,
      " ", ifelse(is.na(pt_manual_var$ustart),
        "",
        paste0(pt_manual_var$ustart, " * ")
      ),
      pt_manual_var$rhs,
      collapse = "\n"
    )
    stop(paste0(
      "All variances are set automatically to create standardized data.",
      " You may not set variances manually. ",
      ifelse(nrow(pt_manual_var) > 1,
        "Remove the following parameters:\n",
        "Remove the following parameter:\n"
      ),
      pt_manual_rows
    ))
  }


  # Check for unset paths and covariances
  if (any(pt$free == 1, na.rm = TRUE)) {
    pt_unset <- pt[pt$free == 1, ]
    pt_unset_rows <- paste0(pt_unset$lhs,
      " ", pt_unset$op,
      " ", pt_unset$rhs,
      collapse = "\n"
    )
    warning(paste0(
      ifelse(nrow(pt_unset) > 1,
        paste0("Because the following relations were not set, ",
               "they are assumed to be 0:\n"),
        paste0("Because the following relationship was not set, ",
               "it is assumed to be 0:\n")
      ),
      pt_unset_rows
    ))
  }

  # Check for paths greater than 1
  if (any(abs(pt$ustart) > 1, na.rm = TRUE)) {
    pt_greater <- pt[abs(pt$ustart) > 1, ]
    pt_greater_rows <- paste0(pt_greater$lhs,
      " ", pt_greater$op,
      " ", pt_greater$ustart,
      " * ", pt_greater$rhs,
      collapse = "\n"
    )
    warningmessage <- paste0(
      "Although it is sometimes possible to set standardized parameters ",
      "greater than 1 or less than -1, it is rare to do so. More often ",
      "than not, it causes model convergence problems. Check to make sure ",
      "you set such a value on purpose. ",
      ifelse(
       nrow(pt_greater) > 1,
       "The following paths were set to values outside the range of -1 to 1:\n",
       "The following path was set to a value outside the range of -1 to 1:\n"
      ),
      pt_greater_rows
    )

    warning(warningmessage)
  }


  # Variable Names----
  v_all <- unique(c(pt$lhs, pt$rhs))
  v_latent <- unique(pt$lhs[pt$op == "=~"])
  v_observed <- v_all[!(v_all %in% v_latent)]
  v_indicator <- unique(pt$rhs[pt$op == "=~"])
  v_y <- unique(pt$lhs[pt$op == "~"])
  v_latent_endogenous <- v_latent[
    (v_latent %in% v_y) | (v_latent %in% v_indicator)]
  v_latent_exogenous <- v_latent[!(v_latent %in% v_latent_endogenous)]

  v_observed_endogenous <- v_observed[
    v_observed %in% v_y | v_observed %in% v_indicator]
  v_observed_exogenous <- v_observed[!v_observed %in% v_observed_endogenous]
  v_observed_indicator <- v_observed[v_observed %in% v_indicator]
  v_latent_indicator <- v_latent[v_latent %in% v_indicator]
  v_observed_y <- v_observed_endogenous[!(v_observed_endogenous %in%
                                            v_observed_indicator)]
  v_order <- c(v_observed, v_latent)
  v_error_y <- str_affix(v_observed_y, prefix = "e_")
  v_disturbance <- str_affix(v_latent_endogenous, prefix = "d_")
  v_error <- str_affix(v_observed_endogenous, prefix = "e_")
  v_exogenous <- c(v_latent_exogenous, v_observed_exogenous)
  v_endogenous <- c(v_latent_endogenous, v_observed_endogenous)
  v_residual <- c(v_disturbance, v_error)
  v_ellipse <- c(v_latent, v_residual)
  v_source <- c(v_exogenous, v_residual)
  v_factor_score <- v_ellipse[!(v_ellipse %in% v_error_y)]
  v_FS <- str_affix(v_factor_score, suffix = "_FS")

  # Set unspecified parameters to 0
  pt[is.na(pt[, "ustart"]), "ustart"] <- 0

  # Make RAM matrices----

  # Names for A, S and new S matrices
  vS <- vA <- c(v_endogenous, v_exogenous)

  # Number of Variables
  k <- length(vA)

  # Initialize A matrix and exogenous correlation matrix
  exo_cor <- A <- matrix(0, k, k, dimnames = list(vA, vA))



  # Assign loadings to A
  for (i in pt[pt[, "op"] == "=~", "id"]) {
    A[pt$rhs[i], pt$lhs[i]] <- pt$ustart[i]
  }

  # Assign regressions to A
  for (i in pt[pt[, "op"] == "~", "id"]) {
    A[pt$lhs[i], pt$rhs[i]] <- pt$ustart[i]
  }

  # Assign correlations to exo_cor
  diag(exo_cor) <- 1
  for (i in pt[(pt[, "op"] == "~~") & pt$lhs != pt$rhs, "id"]) {
    exo_cor[pt$lhs[i], pt$rhs[i]] <- pt$ustart[i]
    exo_cor[pt$rhs[i], pt$lhs[i]] <- pt$ustart[i]
  }

  # Solving for error variances and correlation matrix----

  # Column of k ones
  v1 <- matrix(1, k)

  # Initial estimate of error variances
  varS <- as.vector(v1 - (A * A) %*% v1)
  S <- diag(varS) %*% exo_cor %*% diag(varS)

  # Initial estimate of the correlation matrix
  iA <- solve(diag(k) - A)
  R <- iA %*% S %*% t(iA)

  # Set interaction count at 0
  iterations <- 0

  # Find values for S matrix
  while ((round(sum(diag(R)), 10) != k) * (iterations < max_iterations)) {
    R <- iA %*% S %*% t(iA)
    sdS <- diag(diag(S) ^ 0.5)
    S <- diag(diag(diag(k) - R)) + (sdS %*% exo_cor %*% sdS)
    diag(S)[diag(S) < 0] <- 0.00000001
    iterations <- iterations + 1
  }
  if (iterations >= max_iterations) {
    stop(paste0(
      "Model did not converge after ",
      max_iterations,
      " iterations because at least one variable had a negative variance: ",
      paste0(colnames(A)[(diag(S) == 0.00000001)], collapse = ", ")
    ))
  }

  dimnames(S) <- dimnames(A)

  # Filter Matrix
  filter_matrix <- diag((vA %in% v_observed) * 1)
  dimnames(filter_matrix) <- dimnames(A)

  # Big Matrices----

  A_residual_diag <- sqrt(diag(S[v_endogenous, v_endogenous, drop = FALSE]))
  if (length(A_residual_diag) > 1) {
    A_residual <- diag(A_residual_diag)
  } else {
    if (length(A_residual_diag) == 1) {
      A_residual <- matrix(A_residual_diag, nrow = 1, ncol = 1)
    } else {
      A_residual <- matrix(nrow = 0, ncol = 0)
    }
  }

  A_big <- rbind(
    cbind(A[v_endogenous, , drop = FALSE], A_residual),
    matrix(0, nrow = nrow(A), ncol = nrow(A) + length(v_residual))
  )
  v_big <- c(vA, v_residual)
  dimnames(A_big) <- list(v_big, v_big)

  # Initialize S_big
  S_big <- matrix(0,
    nrow = length(v_big),
    ncol = length(v_big),
    dimnames = dimnames(A_big)
  )

  # Insert off-diagonal values of S into S_big
  S_big[
    v_source,
    v_source
  ] <- exo_cor[c(v_exogenous, v_endogenous),
    c(v_exogenous, v_endogenous),
    drop = FALSE
  ]
  # Insert diagonal values of S_big
  diag(S_big) <- c(
    rep(0, length(v_endogenous)),
    rep(1, length(v_exogenous) + length(v_residual))
  )


  # Compute big correlation matrix of all variables
  iA_big <- solve(diag(nrow(A_big)) - A_big)
  R_big <- iA_big %*% S_big %*% t(iA_big)

  R_xx <- R_big[v_observed_indicator, v_observed_indicator, drop = FALSE]
  R_xy <- R_big[v_observed_indicator, v_factor_score, drop = FALSE]


  # Factor and composite scores ----

  if (length(v_observed_indicator) > 0) {
    i_Rxx <- solve(R_xx)

    A_factor_score <- i_Rxx %*% R_xy

    colnames(A_factor_score) <- v_factor_score

    if (length(v_latent) > 0) {
      v_composite_score <- paste0(v_latent, "_Composite")
    } else {
      v_composite_score <- character(0)
    }


    # Matrix of which observed variables are summed to create composite scores for each latent variable
    A_composite <- matrix(0,
                          nrow =  length(v_observed_indicator),
                          ncol = length(v_latent),
                          dimnames = list(v_observed_indicator, v_latent))





    find_observed_indicators <- function(v) {
      indicators <- pt[pt[,"op"] == "=~" & pt[,"lhs"] == v, "rhs"]
      observed_indicators <- indicators[indicators %in% v_observed_indicator]
      latent_indicators <- indicators[indicators %in% v_latent]
      if (length(latent_indicators > 0)) {
        for (li in latent_indicators) {
          observed_indicators <- c(observed_indicators,find_observed_indicators(li))
        }
      }
      unique(observed_indicators)
    }

    for (v in v_latent) {
      which_indicators <- find_observed_indicators(v)
      A_composite[which_indicators, v] <- sign(R_big[which_indicators, v])
    }


    CM_composite <- t(A_composite) %*%
      R[v_observed_indicator,
        v_observed_indicator,
        drop = FALSE] %*%
      A_composite

    A_composite_w <- A_composite %*%
      diag(diag(CM_composite) ^ -0.5, nrow = nrow(CM_composite))

    colnames(A_composite_w) <- v_composite_score

    colnames(A_factor_score) <- v_FS

    # Factor weights
    fw <- cbind(A_factor_score, A_composite_w)
    W <- matrix(0,
      nrow = nrow(A_big),
      ncol = nrow(A_big) + ncol(fw),
      dimnames = list(
        rownames(A_big),
        c(rownames(A_big), colnames(fw))
      )
    )

    diag(W) <- 1

    W[rownames(fw), colnames(fw)] <- fw

    # Grand correlation matrix of all variables
    R_all <- stats::cov2cor(t(W) %*% R_big %*% W)


    v_order_all <- c(v_order, v_residual, v_FS, v_composite_score)

    R_all <- R_all[v_order_all, v_order_all]
  } else {
    R_all <- R_big
    A_factor_score <- NULL
    A_composite_w <- NULL
    v_FS <- character(0)
    v_composite_score <- character(0)
  }

  # Make complete lavaan model syntax
  lavaan_variances <- paste0(
    v_order,
    " ~~ ",
    diag(S[v_order, v_order]),
    " * ",
    v_order,
    collapse = "\n")

  # Factor Score Validity
  factor_score_validity <- diag(R_all[v_FS,
                                      v_factor_score,
                                      drop = FALSE])
  names(factor_score_validity) <- v_FS
  factor_score_se <- sqrt(1 - factor_score_validity ^ 2)

  # Composite Score Validity
  composite_score_validity <- diag(R_all[v_latent,
                                         v_composite_score,
                                         drop = FALSE])
  names(composite_score_validity) <- v_composite_score


  # Return list ----

  l_names <- list(
    v_observed = v_observed,
    v_latent = v_latent,
    v_latent_exogenous = v_latent_exogenous,
    v_latent_endogenous = v_latent_endogenous,
    v_observed_exogenous = v_observed_exogenous,
    v_observed_endogenous = v_observed_endogenous,
    v_observed_indicator = v_observed_indicator,
    v_disturbance = v_disturbance,
    v_error = v_error,
    v_residual = v_residual,
    v_factor_score = str_affix(v_latent, suffix = "_FS"),
    v_factor_score_disturbance = str_affix(v_disturbance, suffix = "_FS"),
    v_factor_score_error = str_affix(v_error, suffix = "_FS"),
    v_composite_score = v_composite_score
  )

  l <- list(
    RAM_matrices = list(
      A = A[v_order, v_order],
      S = S[v_order, v_order],
      filter_matrix = filter_matrix[v_order, v_order],
      iA = iA[v_order, v_order]
    ),
    Correlations = list(
      R = R[v_order, v_order],
      R_all = R_all
    ),
    Coefficients = list(
      factor_score = A_factor_score,
      factor_score_validity = factor_score_validity,
      factor_score_se = factor_score_se,
      composite_score = A_composite_w,
      composite_score_validity = composite_score_validity
    ),
    lavaan_models = list(
      model_without_variances = m,
      model_with_variances = paste0(m, "\n# Variances\n", lavaan_variances),
      model_free = fixed2free(m)
    ),
    v_names = l_names,
    iterations = iterations
  )

  class(l) <- c("simstandard", class(l))
  l
}


#' Generates simulated data with standardized parameters.
#'
#' This function takes a lavaan model with standardized parameters
#' and simulates latent scores, errors, disturbances, and observed scores.
#'
#' This function supports the `~` operator for regressions, the `~~` for
#' covariances (but not variances), and the `=~` latent variable loadings.
#' It does not support intercepts (e.g,. `y ~ 1`), thresholds,
#' scaling factors, formative factors, or equality constraints.
#'
#' @export
#' @param m Structural model represented by lavaan syntax
#' @param n Number of simulated cases
#' @param observed Include observed variables
#' @param latent Include latent variables
#' @param errors Include observed error and latent disturbances variables
#' @param factor_scores Include factor score variables
#' @param composites Include composite variables
#' @param matrices Include matrices as attribute of tibble
#' @param ... Arguments passed to `simstandardized_matrices`
#' @return tibble with standardized data
#' @examples
#' library(simstandard)
#' # Lavaan model
#' m = "Latent_1 =~ 0.8 * Ob_1 + 0.7 * Ob_2 + 0.4 * Ob_3"
#'
#' # simulate 10 cases
#' sim_standardized(m, n = 10)
sim_standardized <- function(
  m,
  n = 1000,
  observed = TRUE,
  latent = TRUE,
  errors = TRUE,
  factor_scores = FALSE,
  composites = FALSE,
  matrices = FALSE,
  ...) {

  # Get main object from sim_standardized_matrices
  o <- sim_standardized_matrices(m, ...)

  # Names of variables in S (Symmetric) matrix

  S_names <- c(
    o$v_names$v_observed_exogenous,
    o$v_names$v_observed_endogenous,
    o$v_names$v_latent_exogenous,
    o$v_names$v_latent_endogenous
  )

  # Simulate exogenous variables in S matricx
  u <- mvtnorm::rmvnorm(
    n = n,
    sigma = o$RAM_matrices$S[S_names, S_names, drop = FALSE])
  colnames(u) <- c(
    o$v_names$v_observed_exogenous,
    o$v_names$v_error,
    o$v_names$v_latent_exogenous,
    o$v_names$v_disturbance
  )

  # Create all variables from exogenous variables
  v <- u %*% t(o$RAM_matrices$iA[S_names, S_names, drop = FALSE])

  # Make blank matrix with n rows
  d_blank <- matrix(nrow = n, ncol = 0)

  # Extract observed indicators of latent varibles
  d_observed_indicators <- v[, o$v_names$v_observed_indicator, drop = FALSE]

  # Calculate estimated factor scores
  if (length(o$v_names$v_observed_indicator) > 0) {
    d_factor_scores <- d_observed_indicators %*% o$Coefficients$factor_score
  } else {
    d_factor_scores <- d_blank
  }

  # Calculate composite scores
  if (length(o$v_names$v_observed_indicator) > 0) {
    d_composite_scores <- d_observed_indicators %*%
      o$Coefficients$composite_score
  } else {
    d_composite_scores <- d_blank
  }

  # Make data to be returned
  d <- tibble::as_tibble(
    cbind(
      v[, c(o$v_names$v_observed, o$v_names$v_latent), drop = FALSE],
      u[, c(o$v_names$v_disturbance, o$v_names$v_error), drop = FALSE],
      d_factor_scores,
      d_composite_scores
    )
  )



  # Decide which variables to return
  v_include <- character(0)
  if (observed) v_include <- c(v_include, o$v_names$v_observed)
  if (latent) v_include <- c(v_include, o$v_names$v_latent)
  if (errors) v_include <- c(v_include, o$v_names$v_error)
  if (errors) v_include <- c(v_include, o$v_names$v_disturbance)
  if (factor_scores) v_include <- c(v_include, o$v_names$v_factor_score)
  if (composites) v_include <- c(v_include, o$v_names$v_composite_score)

  d <- d[, v_include]

  # Attach metadata as attribute
  if (matrices) attr(d, "matrices") <- o

  # Return tibble
  return(d)
}

#' Remove fixed parameters from a lavaan model
#'
#' @export
#' @param m Structural model represented by lavaan syntax
#' @return character string representing lavaan model
#' @importFrom rlang .data
#' @examples
#' library(simstandard)
#' # lavaan model with fixed parameters
#' m = "
#' Latent_1 =~ 0.9 * Ob_11 + 0.8 * Ob_12 + 0.7 * Ob_13
#' Latent_2 =~ 0.9 * Ob_21 + 0.6 * Ob_22 + 0.4 * Ob_23
#' "
#' # Same model, but with fixed parameters removed.
#' m_free <- fixed2free(m)
#' cat(m_free)
fixed2free <- function(m) {
  m %>%
    lavaan::lavaanify(fixed.x = FALSE) %>%
    dplyr::filter(.data$lhs != .data$rhs) %>%
    dplyr::group_by(.data$lhs, .data$op) %>%
    dplyr::summarise(rhs = paste(.data$rhs, collapse = " + ")) %>%
    dplyr::arrange(dplyr::desc(.data$op)) %>%
    tidyr::unite("l", .data$lhs, .data$op, .data$rhs, sep = " ") %>%
    dplyr::pull(.data$l) %>%
    paste(collapse = "\n")

}


#' Function that takes a lavaan model with standardized paths and
#' loadings and returns a complete lavaan model syntax with
#' standardized variances
#'
#' @export
#' @param m Structural model represented by lavaan syntax
#' @return character string representing lavaan model
#' @examples
#' library(simstandard)
#' # lavaan model
#' m = "
#' Latent_1 =~ 0.9 * Ob_11 + 0.8 * Ob_12 + 0.7 * Ob_13
#' Latent_2 =~ 0.9 * Ob_21 + 0.6 * Ob_22 + 0.4 * Ob_23
#' Latent_2 ~ 0.6 * Latent_1
#' "
#' # Same lavaan syntax, but with standardized variances
#' m_complete <- model_complete(m)
#' cat(m_complete)
model_complete <- function(m) {
  sim_standardized_matrices(m)$lavaan_models$model_with_variances
}

#' For each latent variable in a structural model, add an estimated
#' factor score to observed data.
#'
#' @export
#' @param d A data.frame with observed data in standardized form (i.e, z-scores)
#' @param m A character string with lavaan model
#' @param mu Population mean of the observed scores. Factor scores
#' will also have this mean. Defaults to 0.
#' @param sigma Population standard deviation of the observed scores.
#' Factor scores will also have this standard deviation. Defaults to 1.
#' @param CI Add confidence intervals? Defaults to `FALSE`. If `TRUE`,
#' for each factor score, a lower and upper bound of the confidence
#' interval is created. For example, the lower bound of factor
#' score `X` is `X_LB`, and the upper bound is `X_UB`.
#' @param p confidence interval proportion. Defaults to 0.95
#' @param names_suffix A character string added to each factor score name
#' @param keep_observed_scores The observed scores are returned along
#' with the factor scores.
#' @param ... parameters passed to simstandardized_matrices
#' @return data.frame with observed data and estimated factor scores
#' @examples
#' library(simstandard)
#' # lavaan model
#' m = "
#' X =~ 0.9 * X1 + 0.8 * X2 + 0.7 * X3
#' "
#'
#' # Make data.frame for two cases
#' d <- data.frame(
#'   X1 = c(1.2, -1.2),
#'   X2 = c(1.5, -1.8),
#'   X3 = c(1.8, -1.1))
#'
#' # Compute factor scores for two cases
#' add_factor_scores(d, m)
add_factor_scores <- function(d,
                              m,
                              mu = 0,
                              sigma = 1,
                              CI = FALSE,
                              p = 0.95,
                              names_suffix = "_FS",
                              keep_observed_scores = TRUE,
                              ...) {
  sm <- sim_standardized_matrices(m, ...)

  # Coefficients for estimated factor scores
  v_FS <- paste0(sm$v_names$v_latent, "_FS")
  latent_factor_score <- sm$Coefficients$factor_score[, v_FS, drop = FALSE]

  # Get observed score names
  v_observed <- rownames(sm$Coefficients$factor_score)

  if (!all(v_observed %in% colnames(d))) stop(
    "Some observed variables specified in the model are missing from the data."
    )

  # Get observed data
  d_observed <- (as.matrix(d[, v_observed, drop = FALSE]) - mu) / sigma


  # Make factor scores
  d_factor_score <- (d_observed %*% latent_factor_score) * sigma + mu
  colnames(d_factor_score) <- sub(pattern = "_FS$",
                            replacement = names_suffix,
                            x = colnames(d_factor_score))

  # Bind factor scores to observed data
  if (keep_observed_scores) {
    d_all <- cbind(as.data.frame(d), as.data.frame(d_factor_score))
  } else {
    d_all <- as.data.frame(d_factor_score)
  }


  if (CI) {
    # Make CI
    z <- -1 * stats::qnorm((1 - p) / 2)
    FS_se <- sm$Coefficients$factor_score_se[v_FS]
    d_lower_bound <- d_factor_score - z * FS_se * sigma
    colnames(d_lower_bound) <- paste0(colnames(d_factor_score), "_LB")
    d_upper_bound <- d_factor_score + z * FS_se * sigma
    colnames(d_upper_bound) <- paste0(colnames(d_factor_score), "_UB")
    d_all <- cbind(d_all, d_lower_bound, d_upper_bound)
  }
  d_all
}

#' For each latent variable in a structural model, add a
#' composite score to observed data.
#'
#' @export
#' @param d A data.frame with observed data in standardized form (i.e, z-scores)
#' @param m A character string with lavaan model
#' @param mu Score means. Composite scores will also have this mean.
#' Defaults to 0.
#' @param sigma Score standard deviations. Composite scores will also have
#' this standard deviation. Defaults to 1.
#' @param names_suffix A character string added to each composite score name
#' @param keep_observed_scores The observed scores are returned along
#' with the composite scores.
#' @param ... parameters passed to simstandardized_matrices
#' @return data.frame with observed data and estimated factor scores
#' @examples
#' library(simstandard)
#' # lavaan model
#' m = "
#' X =~ 0.9 * X1 + 0.8 * X2 + 0.7 * X3
#' "
#'
#' # Make data.frame for two cases
#' d <- data.frame(
#'   X1 = c(1.2, -1.2),
#'   X2 = c(1.5, -1.8),
#'   X3 = c(1.8, -1.1))
#'
#' # Compute composite scores for two cases
#' add_composite_scores(d, m)
add_composite_scores <- function(d,
                                 m,
                                 mu = 0,
                                 sigma = 1,
                                 names_suffix = "_Composite",
                                 keep_observed_scores = TRUE,
                                 ...) {
  sm <- sim_standardized_matrices(m, ...)

  # Coefficients for composite scores
  l_composite_score <- sm$Coefficients$composite_score

  # Get observed score names
  v_observed <- rownames(sm$Coefficients$composite_score)

  # Get data column names
  d_names <- colnames(d)

  if (!all(v_observed %in% d_names)) stop(
    "Some observed variables specified in the model are missing from the data."
    )

  # Get observed data
  d_observed <- (as.matrix(d[, v_observed, drop = FALSE]) - mu) / sigma

  # Make composite scores
  d_composite_score <- (d_observed %*% l_composite_score) * sigma + mu
  colnames(d_composite_score) <- sub(pattern = "_Composite$",
                            replacement = names_suffix,
                            x = colnames(d_composite_score))

  # Bind composite scores to observed data
  if (keep_observed_scores) {
    d_all <- cbind(as.data.frame(d), as.data.frame(d_composite_score))
  } else {
    d_all <- as.data.frame(d_composite_score)
  }

  d_all
}

#' Create lavaan model syntax from matrix coefficients
#'
#' @export
#' @param measurement_model A matrix or data.frame with measurement model
#' loadings. Column names are latent variables. Row names or the first
#' column of a data.frame are indicator variables.
#' @param structural_model A matrix or data.frame with structural model
#' coefficients (i.e., regressions). Column names are "causal" variables.
#' Row names or the first column of a data.frame are "effect" variables.
#' @param covariances A matrix or data.frame with model covariances.
#' Column names must match the row names. If a data.frame, row variable
#' names can be specified in the first column.
#' @return a character string with lavaan syntax
#' @examples
#' library(simstandard)
#' # Specifying the measurement model:
#' # For a data.frame, the column names are latent variables,
#' # and the indicators can be specified as rownames.
#' m <- data.frame(X = c(0.7,0.8,0,0),
#'                 Y = c(0,0,0.8,0.9))
#' rownames(m) <- c("A", "B", "C", "D")
#' # Indicator variables can also be specified
#' # as the first column variable
#' # with subsequent column names as latent variables
#' m <- data.frame(Indicators = c("A", "B", "C", "D"),
#'                 X = c(0.7,0.8,0,0),
#'                 Y = c(0,0,0.8,0.9))
#' # Alternately, a matrix can be used:
#' m <- matrix(c(0.7,0.8,0,0,
#'               0,0,0.8,0.9),
#'               ncol = 2,
#'               dimnames = list(c("A", "B", "C", "D"),
#'                               c("X", "Y")))
#' # Specifying the structural coefficients:
#' # The regression coefficients of the structural model can be
#' # specified as either a data.frame or a matrix. Column names
#' # are the predictors and row names are the criterion variables.
#' # With a data.frame, criterion variables can alternataly be
#' # specified with as the first column.
#' s <- matrix(0.5, nrow = 1, ncol = 1, dimnames = list("Y", "X"))
#' # The covariance matrix must be symmetric. Can also be specified
#' # as a data. frame.
#' Sigma <- matrix(c(1, 0.3,
#'                   0.3, 1),
#'                 nrow = 2,
#'                 ncol = 2,
#'                 dimnames = list(c("B","C"),
#'                                 c("B","C")) )
#' model <- matrix2lavaan(measurement_model = m,
#'                        structural_model = s,
#'                        covariances = Sigma)
#' cat(model)
matrix2lavaan <- function(
  measurement_model = NULL,
  structural_model = NULL,
  covariances = NULL) {
  lav_m <- character(0)
  lav_s <- character(0)
  lav_c <- character(0)

  # Measurement model ----
  if (!is.null(measurement_model)) {


    measurement_model <- check_matrix2lavaan(
      m = measurement_model,
      mname = "Measurement model")

    testcol_m <- colnames(measurement_model)[1]
    lav_m <- measurement_model %>%
      dplyr::rename(Test = !!testcol_m) %>%
      tidyr::gather(key = "Construct",
                    value = "Loading",
                    -1,
                    factor_key = TRUE) %>%
      dplyr::filter(.data$Loading != 0) %>%
      dplyr::mutate(
        model = paste0(.data$Loading, " * ", .data$Test)) %>%
      dplyr::group_by(.data$Construct) %>%
      dplyr::summarise(
        model = paste0(
          .data$model,
          collapse = " + ")) %>%
      dplyr::summarise(
        model = paste0(
          .data$Construct,
          " =~ ",
          .data$model,
          collapse = "\n")) %>%
      dplyr::pull(.data$model)

  }

  # Structural model ----
  if (!is.null(structural_model)) {

    structural_model <- check_matrix2lavaan(
      m = structural_model,
      mname = "Structural model")

    testcol_s <- colnames(structural_model)[1]
    lav_s <- structural_model %>%
      dplyr::rename(Criterion = !!testcol_s) %>%
      tidyr::gather(key = "Predictor",
                    value = "Coefficient",
                    -1,
                    factor_key = TRUE) %>%
      dplyr::filter(.data$Coefficient != 0) %>%
      dplyr::mutate(
        model = paste0(.data$Coefficient, " * ", .data$Predictor)) %>%
      dplyr::group_by(.data$Criterion) %>%
      dplyr::summarise(
        model = paste0(
          .data$model,
          collapse = " + ")) %>%
      dplyr::summarise(
        model = paste0(
          .data$Criterion,
          " ~ ",
          .data$model,
          collapse = "\n")) %>%
      dplyr::pull(.data$model)

  }

  # Covariances ----
  if (!is.null(covariances)) {


    covariances <- check_matrix2lavaan(
      m = covariances,
      mname = "Covariances")

    mcovariances <- as.matrix(covariances[, -1, drop = FALSE])
    rownames(mcovariances) <- colnames(mcovariances)
    if (!isSymmetric(mcovariances)) stop("covariances must be symmetric.")

    for (j in seq(2, ncol(covariances))) {
      for (i in seq_len(nrow(covariances))) {
        if (i + 1 < j) covariances[i, j] <- NA
        if (i + 1 == j & covariances[i, j] == 1) covariances[i, j] <- NA
      }
    }

    testcol_covariances <- colnames(covariances)[1]
    lav_c <- covariances %>%
      dplyr::rename(Test = !!testcol_covariances) %>%
      tidyr::gather(key = "Construct",
                    value = "Coefficient",
                    -1,
                    factor_key = TRUE) %>%
      dplyr::filter(!is.na(.data$Coefficient)) %>%
      dplyr::filter(.data$Coefficient != 0) %>%
      dplyr::mutate(
        model = paste0(.data$Coefficient, " * ", .data$Test)) %>%
      dplyr::group_by(.data$Construct) %>%
      dplyr::summarise(
        model = paste0(
          .data$model,
          collapse = " + ")) %>%
      dplyr::summarise(
        model = paste0(
          .data$Construct,
          " ~~ ",
          .data$model,
          collapse = "\n")) %>%
      dplyr::pull(.data$model)
  }

  paste(lav_m, lav_s, lav_c, sep = "\n")

}

#' Extract standardized RAM matrices from a lavaan object
#'
#' @export
#' @param fit An object of class lavaan
#' @return list of RAM matrices A (asymmetric paths),
#' S (symmetric paths), and F (filter matrix)
lav2ram <- function(fit) {
  pt <- lavaan::standardizedSolution(fit)
  pt$id <- seq_len(nrow(pt))
  v_all <- unique(c(pt$lhs, pt$rhs))
  v_latent <- unique(pt$lhs[pt$op == "=~"])
  v_observed <- v_all[!(v_all %in% v_latent)]

  k <- length(v_all)
  A <- matrix(0, nrow = k, ncol = k, dimnames = list(v_all, v_all))
  S <- matrix(0, nrow = k, ncol = k, dimnames = list(v_all, v_all))
  F_matrix <- A
  diag(F_matrix) <- 1
  F_matrix <- F_matrix[v_observed, ]

  # Assign loadings to A
  for (i in pt[pt[, "op"] == "=~", "id"]) {
    A[pt$rhs[i], pt$lhs[i]] <- pt$est.std[i]
  }

  # Assign regressions to A
  for (i in pt[pt[, "op"] == "~", "id"]) {
    A[pt$lhs[i], pt$rhs[i]] <- pt$est.std[i]
  }

  # Assign correlations to exo_cor

  for (i in pt[(pt[, "op"] == "~~"), "id"]) {
    S[pt$lhs[i], pt$rhs[i]] <- pt$est.std[i]
    S[pt$rhs[i], pt$lhs[i]] <- pt$est.std[i]
  }

  list(A = A, S = S, F = F_matrix)

}

#' Checks matrices for matrix2lavaan function
#'
#' @param m matrix, data.frame or tibble
#' @param mname Name of m
#' @keywords internal
#' @usage NULL
check_matrix2lavaan <- function(m, mname) {

  if ("matrix" %in% class(m)) {
    if (is.null(rownames(m))) stop(paste(mname, "must have row names."))
    if (is.null(colnames(m))) stop(paste(mname, "must have column names."))
    m <- as.data.frame(m) %>%
      tibble::rownames_to_column(var = "Test")
  }

  if (!("data.frame" %in% class(m))) {
    stop(
      paste(mname, "must be a data.frame, tibble, or matrix."))
  }

  if (!(purrr::map_chr(m[, 1, drop = FALSE],
                       class) %in% c("character", "factor"))) {
    if (any(rownames(m) == as.character(seq_len(length(
      rownames(m)
    ))))) {
      stop(
        paste(mname,
              paste0("must either have indicator variable names ",
                     "in the first column or as rownames of the data.frame."))
      )
    } else
      m <-
        tibble::rownames_to_column(m, var = "Test")

  }

  m
}


#' Return model-implied correlation matrix
#'
#' Function that takes a lavaan model with standardized parameters and
#' returns a model-implied correlation matrix
#' @export
#' @param m Structural model represented by lavaan syntax or output
#' of sim_standardized_matrices function.
#' @param observed Include observed variables
#' @param latent Include latent variables
#' @param errors Include observed error and latent disturbances variables
#' @param factor_scores Include factor score variables
#' @param composites Include composite variables
#' @param ... parameters passed to the `sim_standardized_matrices` function
#' @return A correlation matrix
#' @examples
#' library(simstandard)
#' # lavaan model
#' m = "Latent_1 =~ 0.8 * Ob_1 + 0.7 * Ob_2 + 0.4 * Ob_3"
#'
#' get_model_implied_correlations(m)
get_model_implied_correlations <- function(m,
                                           observed = TRUE,
                                           latent = FALSE,
                                           errors = FALSE,
                                           factor_scores = FALSE,
                                           composites = FALSE,
                                           ...) {
  if ("simstandard" %in% class(m)) {
    fit <- m
  } else {
      fit <- sim_standardized_matrices(m, ...)
      }

  # Variable names
  v_names <- character(0)

  # Observed Variable Names
  if (observed) v_names <- c(v_names, fit$v_names$v_observed)

  # Latent Variable Names
  if (latent) v_names <- c(v_names, fit$v_names$v_latent)

  # Error Variable Names
  if (errors) v_names <- c(v_names, fit$v_names$v_residual)

  # Factor-Score Variable Names
  if (factor_scores) v_names <- c(v_names, paste0(fit$v_names$v_latent, "_FS"))

  # Composite Variable Names
  if (composites) v_names <- c(v_names, fit$v_names$v_composite_score)

  # Return correlation matrix
  fit$Correlations$R_all[v_names, v_names]
}

#' Return factor score coefficients
#'
#' @param m Structural model represented by lavaan syntax or output
#' of sim_standardized_matrices function.
#' @param latent Include latent variables.
#' @param errors Include observed error and latent disturbances variables.
#' @param ... parameters passed to the `sim_standardized_matrices` function
#'
#' @return A matrix of factor score coefficients
#' @export
#'
#' @examples
#' m <- "
#' A =~ 0.5 * A1 + 0.8 * A2 + 0.8 * A3
#' B =~ 0.5 * B1 + 0.8 * B2 + 0.8 * B3
#' B ~ 0.5 * A
#' "
#' get_factor_score_coefficients(m)
get_factor_score_coefficients <- function(m,
                                          latent = TRUE,
                                          errors = FALSE,
                                          ...) {
  if ("simstandard" %in% class(m)) {
    fit <- m
  } else {
    fit <- sim_standardized_matrices(m, ...)
    }

  # Variable names
  v_names <- character(0)

  # Latent Variable Names
  if (latent) v_names <- c(v_names, fit$v_names$v_latent)

  # Error Variable Names
  if (errors)
    v_names <- c(v_names, fit$v_names$v_residual)

  v_names <- paste0(v_names, "_FS")

  fit$Coefficients$factor_score[, v_names, drop = FALSE]

}


#' Return factor score validity coefficients
#'
#' @param m Structural model represented by lavaan syntax or output
#' of sim_standardized_matrices function.
#' @param latent Include latent variables.
#' @param errors Include observed error and latent disturbances variables.
#' @param ... parameters passed to the `sim_standardized_matrices` function
#'
#' @return A matrix of validity coefficients
#' @export
#'
#' @examples
#' m <- "
#' A =~ 0.5 * A1 + 0.8 * A2 + 0.8 * A3
#' B =~ 0.5 * B1 + 0.8 * B2 + 0.8 * B3
#' B ~ 0.5 * A
#' "
#' get_factor_score_validity(m)
get_factor_score_validity <- function(m,
                                      latent = TRUE,
                                      errors = FALSE,
                                      ...) {
  if ("simstandard" %in% class(m)) {
    fit <- m
  } else {
    fit <- sim_standardized_matrices(m, ...)
    }

  # Variable names
  v_names <- character(0)

  # Latent Variable Names
  if (latent) v_names <- c(v_names, fit$v_names$v_latent)

  # Error Variable Names
  if (errors) v_names <- c(v_names, fit$v_names$v_residual)

  v_names <- paste0(v_names, "_FS")

  fit$Coefficients$factor_score_validity[v_names]
}

#' Return factor score validity coefficient standard errors
#'
#' @param m Structural model represented by lavaan syntax or output
#' of sim_standardized_matrices function.
#' @param latent Include latent variables.
#' @param errors Include observed error and latent disturbances variables.
#' @param ... parameters passed to the `sim_standardized_matrices` function
#'
#' @return A matrix of factor score standard errors
#' @export
#'
#' @examples
#' m <- "
#' A =~ 0.5 * A1 + 0.8 * A2 + 0.8 * A3
#' B =~ 0.5 * B1 + 0.8 * B2 + 0.8 * B3
#' B ~ 0.5 * A
#' "
#' get_factor_score_validity_se(m)
get_factor_score_validity_se <- function(m,
                                      latent = TRUE,
                                      errors = FALSE,
                                      ...) {
  if ("simstandard" %in% class(m)) {
    fit <- m
  } else {
    fit <- sim_standardized_matrices(m, ...)
    }

  # Variable names
  v_names <- character(0)

  # Latent Variable Names
  if (latent) v_names <- c(v_names, fit$v_names$v_latent)

  # Error Variable Names
  if (errors) v_names <- c(v_names, fit$v_names$v_residual)

  v_names <- paste0(v_names, "_FS")

  fit$Coefficients$factor_score_se[v_names]
}


#' Return model names
#'
#' @param m Structural model represented by lavaan syntax or
#' output of sim_standardized_matrices function.
#' @param ... parameters passed to the `sim_standardized_matrices` function
#'
#' @return A list of variable names
#' @export
#'
#' @examples
#' m <- "
#' A =~ 0.5 * A1 + 0.8 * A2 + 0.8 * A3
#' B =~ 0.5 * B1 + 0.8 * B2 + 0.8 * B3
#' B ~ 0.5 * A
#' "
#' get_model_names(m)
get_model_names <- function(m,
                            ...) {
  if ("simstandard" %in% class(m)) {
    fit <- m
  } else {
    fit <- sim_standardized_matrices(m, ...)
    }

  fit$v_names
}

#' Affix a prefix and/or suffix to a vector, but not if the vector is empty
#'
#' @param prefix a vector
#' @param suffix a vector
#' @param ... arguments passed on to paste0
#' @noRd
str_affix <- function(x, prefix = "", suffix = "", ...) {
  # when ready to use R >= 4.0.1, use paste0 with recycle0 = TRUE
  if (length(x) == 0) return(character(0))
  paste0(prefix, x, suffix, ...)
}
wjschne/simstandard documentation built on April 13, 2024, 4:44 p.m.