#' Exploratory Graph Model
#'
#' @description Function to fit the Exploratory Graph Model
#'
#' @param data Matrix or data frame.
#' Should consist only of variables to be used in the analysis.
#' Can be raw data or a correlation matrix
#'
#' @param EGM.model Character vector (length = 1).
#' Sets the procedure to conduct \code{EGM}.
#' Available options:
#'
#' \itemize{
#'
#' \item \code{"EGA"} (default) --- Applies \code{\link[EGAnet]{EGA}} to obtain the
#' (sparse) regularized network structure, communities, and memberships
#'
#' \item \code{"standard"} --- Applies the standard EGM model which
#' estimates communities based on the non-regularized empirical partial
#' correlation matrix and sparsity is set using \code{p.in} and \code{p.out}
#'
#' }
#'
#' @param communities Numeric vector (length = 1).
#' Number of communities to use for the \code{"standard"} type of EGM.
#' Defaults to \code{NULL}.
#' Providing no input will use the communities and memberships output
#' from the Walktrap algorithm (\code{\link[igraph]{cluster_walktrap}}) based
#' on the empirical non-regularized partial correlation matrix
#'
#' @param structure Numeric or character vector (length = \code{ncol(data)}).
#' Can be theoretical factors or the structure detected by \code{\link[EGAnet]{EGA}}.
#' Defaults to \code{NULL}
#'
#' @param search Boolean (length = 1).
#' Whether a search over parameters should be conducted.
#' Defaults to \code{FALSE}.
#' Set to \code{TRUE} to select a model over a variety of parameters that
#' optimizes the \code{opt} objective
#'
#' @param p.in Numeric vector (length = 1).
#' Probability that a node is randomly linked to other nodes in the same community.
#' Within community edges are set to zero based on \code{quantile(x, prob = 1 - p.in)}
#' ensuring the lowest edge values are set to zero (i.e., most probable to \emph{not}
#' be randomly connected).
#' Only used for \code{EGM.type = "standard"}.
#' Defaults to \code{NULL} but must be set
#'
#' @param p.out Numeric vector (length = 1).
#' Probability that a node is randomly linked to other nodes \emph{not} in the same community.
#' Between community edges are set to zero based on \code{quantile(x, prob = 1 - p.out)}
#' ensuring the lowest edge values are set to zero (i.e., most probable to \emph{not}
#' be randomly connected).
#' Only used for \code{EGM.type = "standard"}.
#' Defaults to \code{NULL} but must be set
#'
#' @param opt Character vector (length = 1).
#' Fit index used to select from when searching over models
#' (only applies to \code{EGM.type = "search"}).
#' Available options include:
#'
#' \itemize{
#'
#' \item \code{"AIC"}
#'
#' \item \code{"BIC"}
#'
#' \item \code{"CFI"}
#'
#' \item \code{"chisq"}
#'
#' \item \code{"logLik"}
#'
#' \item \code{"RMSEA"}
#'
#' \item \code{"SRMR"}
#'
#' \item \code{"TEFI"}
#'
#' \item \code{"TEFI.adj"}
#'
#' \item \code{"TLI"}
#'
#' }
#'
#' Defaults to \code{"SRMR"}
#'
#' @param constrained Boolean (length = 1).
#' Whether memberships of the communities should
#' be added as a constraint when optimizing the network loadings.
#' Defaults to \code{TRUE} which ensures assigned loadings are
#' guaranteed to never be smaller than any cross-loadings.
#' Set to \code{FALSE} to freely estimate each loading similar to exploratory factor analysis
#'
#' @param verbose Boolean (length = 1).
#' Should progress be displayed?
#' Defaults to \code{TRUE}.
#' Set to \code{FALSE} to not display progress
#'
#' @param ... Additional arguments to be passed on to
#' \code{\link[EGAnet]{auto.correlate}},
#' \code{\link[EGAnet]{network.estimation}},
#' \code{\link[EGAnet]{community.detection}},
#' \code{\link[EGAnet]{community.consensus}},
#' \code{\link[EGAnet]{community.unidimensional}},
#' \code{\link[EGAnet]{EGA}}, and
#' \code{\link[EGAnet]{net.loads}}
#'
#' @examples
#' # Get depression data
#' data <- depression[,24:44]
#'
#' # Estimate EGM (using EGA)
#' egm_ega <- EGM(data)
#'
#' # Estimate EGM (using EGA) specifying communities
#' egm_ega_communities <- EGM(data, communities = 3)
#'
#' # Estimate EGM (using EGA) specifying structure
#' egm_ega_structure <- EGM(
#' data, structure = c(
#' 1, 1, 1, 2, 1, 1, 1,
#' 1, 1, 1, 3, 2, 2, 2,
#' 2, 3, 3, 3, 3, 3, 2
#' )
#' )
#'
#' # Estimate EGM (using standard)
#' egm_standard <- EGM(
#' data, EGM.model = "standard",
#' communities = 3, # specify number of communities
#' p.in = 0.95, # probability of edges *in* each community
#' p.out = 0.80 # probability of edges *between* each community
#' )
#'
#' \dontrun{
#' # Estimate EGM (using EGA search)
#' egm_ega_search <- EGM(
#' data, EGM.model = "EGA", search = TRUE
#' )
#'
#' # Estimate EGM (using EGA search and AIC criterion)
#' egm_ega_search_AIC <- EGM(
#' data, EGM.model = "EGA", search = TRUE, opt = "AIC"
#' )
#'
#' # Estimate EGM (using search)
#' egm_search <- EGM(
#' data, EGM.model = "standard", search = TRUE,
#' communities = 3, # need communities or structure
#' p.in = 0.95 # only need 'p.in'
#' )}
#'
#' @author Hudson F. Golino <hfg9s at virginia.edu> and Alexander P. Christensen <alexpaulchristensen@gmail.com>
#'
#' @export
#'
# Estimate EGM ----
# Updated 06.11.2024
EGM <- function(
data, EGM.model = c("standard", "EGA"),
communities = NULL, structure = NULL, search = FALSE,
p.in = NULL, p.out = NULL,
opt = c(
"AIC", "BIC", "CFI",
"chisq", "logLik", "RMSEA",
"SRMR", "TEFI", "TEFI.adj", "TLI"
), constrained = TRUE, verbose = TRUE, ...
)
{
# Set default
EGM.model <- set_default(EGM.model, "ega", EGM)
opt <- set_default(opt, "srmr", EGM)
# Set up EGM type internally
EGM.type <- switch(
EGM.model,
"ega" = swiftelse(search, "ega.search", "ega"),
"standard" = swiftelse(search, "search", "standard")
)
# Detect TEFI adjusted fit
if(opt == "tefi.adj"){
experimental("EGM")
}
# Check data and structure
data <- EGM_errors(
data, EGM.type, communities, structure,
p.in, p.out, constrained, verbose, ...
)
# Switch and return results based on type
return(
switch(
EGM.type,
"standard" = EGM.standard(data, communities, structure, p.in, p.out, opt, constrained, ...),
"search" = EGM.search(data, communities, structure, p.in, opt, constrained, verbose, ...),
"ega" = EGM.EGA(data, structure, opt, constrained, ...),
"ega.search" = EGM.EGA.search(data, communities, structure, opt, constrained, verbose, ...)
)
)
}
#' @noRd
# EGM Errors ----
# Updated 05.11.2024
EGM_errors <- function(
data, EGM.type, communities, structure,
p.in, p.out, constrained, verbose, ...
)
{
# 'data' errors
object_error(data, c("matrix", "data.frame", "tibble"), "EGM")
# Check for tibble
if(get_object_type(data) == "tibble"){
data <- as.data.frame(data)
}
# Check for NULL communities
if(!is.null(communities)){
# If not NULL, check 'communities' errors
typeof_error(communities, "numeric", "EGM")
object_error(communities, "vector", "EGM")
length_error(communities, 1, "EGM")
}
# Check for NULL structure
if(!is.null(structure)){
# If not NULL, check 'structure' errors
typeof_error(structure, c("character", "numeric"), "EGM")
object_error(structure, "vector", "EGM")
length_error(structure, dim(data)[2], "EGM")
}
# Check first for parameters involved in both search and standard
if(!grepl("ega", EGM.type)){
# Check for NULL in 'p.in'
if(is.null(p.in)){
.handleSimpleError(
h = stop,
msg = paste0(
"Input for 'p.in' is `NULL`. A value between 0 and 1 for 'p.in' must be provided."
),
call = "EGM"
)
}
# Check 'p.in' errors
typeof_error(p.in, "numeric", "EGM")
range_error(p.in, c(0, 1), "EGM")
length_error(p.in, c(1, communities), "EGM")
}
# 'constrained' errors
length_error(constrained, 1, "EGM")
typeof_error(constrained, "logical", "EGM")
# Check for EGM type
if(EGM.type == "search"){
# 'verbose' errors
length_error(verbose, 1, "EGM")
typeof_error(verbose, "logical", "EGM")
}else if(EGM.type == "standard"){
# Check for NULL in 'p.out'
if(is.null(p.out)){
.handleSimpleError(
h = stop,
msg = paste0(
"Input for 'p.out' is `NULL`. A value between 0 and 1 for 'p.out' must be provided."
),
call = "EGM"
)
}
# Check 'p.out' errors
typeof_error(p.out, "numeric", "EGM")
range_error(p.out, c(0, 1), "EGM")
length_error(p.out, c(1, communities), "EGM")
}
# Check for usable data
if(needs_usable(list(...))){
data <- usable_data(data, FALSE)
}
# Return data in case of tibble
return(data)
}
#' @exportS3Method
# S3 Print Method ----
# Updated 09.11.2024
print.EGM <- function(x, digits = 3, ...)
{
# Return EGA results
print(x$EGA)
# Add break
cat("\n\n----\n\n")
# Send title
cat("Fit based on Optimized Loadings\n\n")
# Add fit metrics
print(format_decimal(x$model$optimized$fit, digits), quote = FALSE)
}
#' @exportS3Method
# S3 Summary Method ----
# Updated 09.11.2024
summary.EGM <- function(object, digits = 3, ...)
{
print(object) # same as print
}
#' @exportS3Method
# S3 Plot Method ----
# Updated 22.10.2024
plot.EGM <- function(x, ...)
{
# Return EGA plot
plot(x$EGA)
}
# LOADINGS ----
#' @noRd
# Network loadings to partial correlations ----
# Updated 14.10.2024
nload2pcor <- function(loadings)
{
# Compute partial correlation
P <- tcrossprod(loadings)
# Obtain interdependence
interdependence <- sqrt(rowSums(loadings^2))
# Set diagonal to interdependence
diag(P) <- interdependence
# Compute matrix I
I <- diag(sqrt(1 / interdependence))
# Compute zero-order correlations
R <- I %*% P %*% I
# Obtain inverse covariance matrix
INV <- solve(R)
# Compute matrix D
D <- diag(sqrt(1 / diag(INV)))
# Compute partial correlations
P <- -D %*% INV %*% D
# Set diagonal to zero
diag(P) <- 0
# Return partial correlation
return(P)
}
#' @noRd
# Network loadings to correlations ----
# Updated 14.10.2024
nload2cor <- function(loadings)
{
# Compute partial correlation
P <- tcrossprod(loadings)
# Obtain interdependence
interdependence <- sqrt(rowSums(loadings^2))
# Set diagonal to interdependence
diag(P) <- interdependence
# Compute matrix I
I <- diag(sqrt(1 / interdependence))
# Return correlation
return(I %*% P %*% I)
}
# OPTIMIZATION ----
#' @noRd
# Estimated loadings cost (based on SRMR) ----
# Updated 06.11.2024
srmr_N_cost <- function(
loadings_vector, zeros, R,
loading_structure, rows,
constrained, lower_triangle, ...
)
{
# Check for constraint
if(constrained){
# Assemble loading matrix
loadings_matrix <- matrix(loadings_vector * zeros, nrow = rows, byrow = TRUE)
# Obtain assign loadings
assign_loadings <- loadings_matrix[loading_structure]
# Transpose loadings matrix
loadings_matrix <- t(loadings_matrix)
# Obtain differences
differences <- abs(loadings_matrix) - assign_loadings
# Obtain difference values
difference_values <- (differences * (differences > 0))^2
# Convert loadings to implied correlations following `nload2cor`
# Uses raw code to avoid overhead of additional function calls
# Compute partial correlation (decrease loadings based on constraints)
P <- tcrossprod(loadings_matrix)
# Obtain interdependence
interdependence <- sqrt(rowSums(loadings_matrix^2))
# Set diagonal to interdependence
diag(P) <- interdependence
# Compute matrix I
I <- diag(sqrt(1 / interdependence))
# Get implied R
implied_R <- I %*% P %*% I
# Check for positive definite
return(
swiftelse(
!anyNA(implied_R) && is_positive_definite(implied_R), # return SRMR
srmr(R, implied_R) + sqrt(mean(difference_values)), Inf
)
)
}else{ # Without constraints, send it
# Assemble loading matrix
loadings_matrix <- matrix(loadings_vector * zeros, ncol = rows)
# Compute partial correlation
P <- tcrossprod(loadings_matrix)
# Obtain interdependence
interdependence <- sqrt(rowSums(loadings_matrix^2))
# Set diagonal to interdependence
diag(P) <- interdependence
# Compute matrix I
I <- diag(sqrt(1 / interdependence))
# Get implied R
implied_R <- I %*% P %*% I
# Check for positive definite
return(
swiftelse(
!anyNA(implied_R) && is_positive_definite(implied_R), # return SRMR
srmr(R, implied_R), Inf
)
)
}
}
#' @noRd
# Estimated loadings gradient (based on SRMR) ----
# Updated 06.11.2024
srmr_N_gradient <- function(
loadings_vector, zeros, R,
loading_structure, rows,
constrained, lower_triangle, ...
)
{
# Check for constraint
if(constrained){
# Assemble loading matrix
loadings_matrix <- matrix(loadings_vector * zeros, nrow = rows, byrow = TRUE)
# Obtain assign loadings
assign_loadings <- loadings_matrix[loading_structure]
# Transpose loadings matrix
loadings_matrix <- t(loadings_matrix)
# Obtain differences
differences <- abs(loadings_matrix) - assign_loadings
# Obtain difference values
difference_values <- (differences * (differences > 0))
# Convert loadings to implied correlations following `nload2cor`
# Uses raw code to avoid overhead of additional function calls
# Compute partial correlation
implied_P <- tcrossprod(loadings_matrix)
# Obtain interdependence
interdependence <- sqrt(rowSums(loadings_matrix^2))
# Set diagonal to interdependence
diag(implied_P) <- interdependence
# Compute matrix I
I <- diag(sqrt(1 / interdependence))
# Compute error
error <- (I %*% implied_P %*% I - R)[lower_triangle]
# Derivative of error with respect to P (covariance)
dError <- matrix(0, nrow = ncol(R), ncol = ncol(R))
dError[lower_triangle] <- 2 * error / length(error)
dError <- dError + t(dError)
# Return gradient
return(
as.vector( # (2x leads to fewer iterations)
t(crossprod(2 * loadings_matrix, I %*% tcrossprod(dError, I))) +
difference_values
) * zeros
)
}else{ # Without constraints, send it
# Assemble loading matrix
loadings_matrix <- matrix(loadings_vector * zeros, ncol = rows)
# Compute partial covariance
implied_P <- tcrossprod(loadings_matrix)
# Obtain interdependence
interdependence <- sqrt(rowSums(loadings_matrix^2))
# Set diagonal to interdependence
diag(implied_P) <- interdependence
# Compute matrix I
I <- diag(sqrt(1 / interdependence))
# Compute error
error <- (I %*% implied_P %*% I - R)[lower_triangle]
# Derivative of error with respect to P (covariance)
dError <- matrix(0, nrow = ncol(R), ncol = ncol(R))
dError[lower_triangle] <- 2 * error / length(error)
dError <- dError + t(dError)
# Return gradient (2x leads to fewer iterations)
return(as.vector(t(crossprod(2 * loadings_matrix, I %*% tcrossprod(dError, I)))) * zeros)
}
}
# LIKELIHOOD ----
#' @noRd
# Log-likelihood only ----
# Updated 06.10.2024
log_likelihood <- function(n, p, R, S, type = c("partial", "zero"))
{
# Set default to zero-order
if(missing(type)){
type <- "zero"
}else{type <- match.arg(type)}
# Return
return(
swiftelse(
type == "zero",
-(n / 2) * (p * log(2 * pi) + log(det(R)) + sum(diag(S %*% solve(R)))),
(n / 2) * (log(det(R)) - sum(diag((S %*% R)))) - (n * p / 2) * log(2 * pi)
)
)
}
#' @noRd
# Compute log-likelihood metrics ----
# Updated 06.10.2024
likelihood <- function(n, p, R, S, loadings, type)
{
# Get number of communities
m <- dim(loadings)[2]
# Log-likelihood
loglik <- log_likelihood(n, p, R, S, type)
# Total number of parameters
parameters <- (p * m) + p + ((m * (m - 1)) / 2)
# Model parameters
model_parameters <- parameters - sum(loadings == 0)
# Return log-likelihood
return(
c(
logLik = loglik,
AIC = -2 * loglik + 2 * model_parameters, # -2L + 2k
BIC = -2 * loglik + model_parameters * log(n) # -2L + klog(n)
# EBIC = -2 * loglik + model_parameters * log(n) + 2 * gamma * log(
# choose(parameters, model_parameters)
# ), # -2L + klog(n) + 2 gamma log(binom(pk))
# GFI = 1 - sum((R - S)^2) / sum(S^2)
)
)
}
#' @noRd
# Compute model parameters for TEFI adjustment ----
# Updated 22.10.2024
compute_tefi_adjustment <- function(loadings, correlations)
{
# Obtain dimensions
dimensions <- dim(loadings)
# Total number of parameters
parameters <- (dimensions[1] * dimensions[2]) + dimensions[1] +
((dimensions[2] * (dimensions[2] - 1)) / 2)
# Obtain model parameters
model_parameters <- parameters - sum(loadings == 0)
# Return adjustment
return(-2 * log(model_parameters) + mean(abs(correlations)))
}
# EGM MODELS ----
#' @noRd
# EGM | Standard ----
# Updated 06.11.2024
EGM.standard <- function(data, communities, structure, p.in, p.out, opt, constrained, ...)
{
# Get ellipse
ellipse <- list(...)
# Get dimensions
data_dimensions <- dim(data)
dimension_names <- dimnames(data)
# Determine if sample size was provided
if(data_dimensions[1] == data_dimensions[2]){
# Check if sample size was provided
if("n" %in% names(ellipse)){
# Set sample size
data_dimensions[1] <- ellipse$n
# Set empirical zero-order correlations
empirical_R <- data
}else{
# Stop and send error
.handleSimpleError(
h = stop,
msg = paste0(
"A symmetric (", data_dimensions[1], " x ",
data_dimensions[2], ") matrix was input but sample size was not provided.\n\n",
"If you'd like to use a correlation matrix, please set the argument 'n' to your sample size"
),
call = "EGM"
)
}
}else{
# Estimate zero-order correlations
empirical_R <- auto.correlate(data, ...)
}
# Obtain partial correlations
empirical_P <- cor2pcor(empirical_R)
# Check for whether structure is provided
if(is.null(structure)){
# Obtain structure if not provided
walktrap <- igraph::cluster_walktrap(convert2igraph(abs(empirical_P)))
# Obtain memberships based on number of communities
structure <- swiftelse(
is.null(communities) || unique_length(walktrap$membership) == communities,
walktrap$membership, cutree(as.hclust(walktrap), k = communities)
)
}
# Set number of communities
communities <- unique_length(structure)
# Set up community variables
community_variables <- lapply(
seq_len(communities), function(community){structure == community}
)
# Obtain community structure
community_P <- create_community_structure(
P = empirical_P, total_variables = data_dimensions[2],
communities = communities,
community_variables = community_variables,
p.in = p.in, p.out = p.out
)
# Update loadings
output <- silent_call(net.loads(community_P, structure, ...))
output$std <- output$std[dimension_names[[2]],, drop = FALSE]
# Obtain dimension names
loading_names <- dimnames(output$std)
# Compute network scores
standard_scores <- compute_scores(output, data, "network", "simple")$std.scores
# Compute community correlations
standard_correlations <- cor(standard_scores, use = "pairwise")
# Compute model-implied correlations
standard_R <- nload2cor(output$std)
standard_P <- cor2pcor(standard_R)
# Obtain loadings vector and get bounds
loadings_vector <- as.vector(output$std)
zeros <- loadings_vector != 0
loadings_length <- length(loadings_vector)
# Set up loading structure
# Uses transpose for 2x speed up in optimization
loading_structure <- matrix(FALSE, nrow = communities, ncol = data_dimensions[2])
# Fill structure
for(i in seq_along(structure)){
loading_structure[structure[i], i] <- TRUE
}
# Optimize over loadings
result <- silent_call(
nlminb(
start = loadings_vector, objective = srmr_N_cost,
gradient = srmr_N_gradient,
zeros = zeros, R = empirical_R,
loading_structure = loading_structure,
rows = communities, n = data_dimensions[1],
constrained = constrained,
lower_triangle = lower.tri(empirical_R),
lower = rep(-1, loadings_length),
upper = rep(1, loadings_length),
control = list(eval.max = 10000, iter.max = 10000)
)
)
# Set estimate
result$estimate <- result$par
# Extract optimized loadings
optimized_loadings <- matrix(
result$estimate, nrow = data_dimensions[2],
dimnames = loading_names
)
# Replace loadings output with optimized loadings
output$std <- optimized_loadings
# Compute network scores
optimized_scores <- compute_scores(output, data, "network", "simple")$std.scores
# Compute community correlations
optimized_correlations <- cor(optimized_scores, use = "pairwise")
# Compute model-implied correlations
optimized_R <- nload2cor(optimized_loadings)
optimized_P <- cor2pcor(optimized_R)
# Set up EGA
ega_list <- list(
dim.variables = data.frame(
items = dimension_names[[2]],
dimension = structure
),
network = community_P, wc = structure,
n.dim = unique_length(structure), correlation = empirical_R,
n = data_dimensions[1], TEFI = tefi(empirical_R, structure)$VN.Entropy.Fit
); class(ega_list) <- "EGA"
# Attach methods to network
attr(ega_list$network, which = "methods") <- list(
model = "egm", communities = communities, p.in = p.in, p.out = p.out
)
# Attach class to memberships
class(ega_list$wc) <- "EGA.community"
# Attach methods to memberships
names(ega_list$wc) <- dimension_names[[2]]
attr(ega_list$wc, which = "methods") <- list(
algorithm = swiftelse(is.null(structure), NULL, "Walktrap"),
objective_function = NULL
)
# Set up results
results <- list(
EGA = ega_list, structure = structure,
model = list( # general list
standard = list( # using standard parameters
loadings = output$std,
scores = standard_scores,
correlations = standard_correlations,
fit = fit(
n = data_dimensions[1], p = data_dimensions[2],
R = standard_R, S = empirical_R,
loadings = output$std, correlations = standard_correlations,
structure = structure, ci = 0.95
),
implied = list(R = standard_R, P = standard_P)
),
optimized = list( # using optimized parameters
loadings = optimized_loadings,
scores = optimized_scores,
correlations = optimized_correlations,
fit = fit(
n = data_dimensions[1], p = data_dimensions[2],
R = optimized_R, S = empirical_R,
loadings = optimized_loadings, correlations = optimized_correlations,
structure = structure, ci = 0.95
),
implied = list(R = optimized_R, P = optimized_P)
)
)
)
# Set class
class(results) <- "EGM"
# Return results
return(results)
}
#' @noRd
# EGM | Search ----
# Updated 06.11.2024
EGM.search <- function(data, communities, structure, p.in, opt, constrained, verbose, ...)
{
# Perform search based on 'p.in'
p_grid <- expand.grid(
p_in = seq(p.in, 1, 0.05), p_out = seq(0.00, p.in, 0.05)
)
# Get number of search
p_number <- dim(p_grid)[1]
# Fit name based on fit
fit_name <- switch(
opt,
"aic" = "AIC", "bic" = "BIC",
"cfi" = "CFI", "chisq" = "chisq",
"loglik" = "logLik", "rmsea" = "RMSEA",
"srmr" = "SRMR", "tefi" = "TEFI",
"tefi.adj" = "TEFI.adj", "tli" = "TLI"
)
# Loop over grid search
grid_search <- parallel_process(
iterations = p_number, datalist = seq_len(p_number),
FUN = function(i, data, p_grid, communities, structure, opt, constrained, ...){
# First, try
output <- silent_call(
try(
EGM.standard(
data = data, communities = communities,
structure = structure, p.in = p_grid$p_in[i],
p.out = p_grid$p_out[i], opt = opt,
constrained = constrained, ...
), silent = TRUE
)
)
# Return result
return(swiftelse(is(output, "try-error"), NULL, output))
},
# `EGM.standard` arguments
data = data, p_grid = p_grid,
communities = communities, structure = structure,
opt = opt, constrained = constrained, ...,
# `parallel_process` arguments
ncores = 1, progress = verbose
)
# Obtain fits
optimized_fits <- nvapply(
grid_search, function(x){
swiftelse(is.null(x), NA, x$model$optimized$fit[[fit_name]])
}
)
# Check for all NA
if(all(is.na(optimized_fits))){
# Set error if no solution is found
.handleSimpleError(
h = stop,
msg = paste0(
"Search could not converge to any solutions. \n\n",
"Try:\n",
" + increasing 'p.in'\n",
" + changing 'communities'\n",
" + changing 'structure'"
),
call = "EGM"
)
}
# Index function based on fit
index_FUN <- switch(
opt,
"aic" = which.min, "bic" = which.min,
"cfi" = which.max, "chisq" = which.min,
"loglik" = which.max, "rmsea" = which.min,
"srmr" = which.min, "tefi" = which.min,
"tefi.adj" = which.min, "tli" = which.max
)
# Obtain index
opt_index <- index_FUN(optimized_fits)
# Set up final model
results <- grid_search[[opt_index]]
# Add 'p.in' and 'p.out' parameters
results$search <- c(
p.in = p_grid[opt_index, "p_in"],
p.out = p_grid[opt_index, "p_out"]
)
# Overwrite class
class(results) <- "EGM"
# Return results
return(results)
}
#' @noRd
# EGM | EGA ----
# Updated 06.11.2024
EGM.EGA <- function(data, structure, opt, constrained, ...)
{
# Obtain data dimensions
data_dimensions <- dim(data)
# Estimate EGA
ega <- EGA(data, plot.EGA = FALSE, ...)
empirical_P <- cor2pcor(ega$correlation)
# Update number of rows based on EGA
data_dimensions[1] <- ega$n
# Obtain variable names from the network
variable_names <- dimnames(ega$network)[[2]]
# Set memberships based on structure
if(!is.null(structure)){
# Update EGA features
ega$wc[] <- structure
ega$dim.variables$dimension <- structure
}else{
structure <- ega$wc
}
# Set communities
ega$n.dim <- communities <- unique_length(structure)
# Obtain standard network loadings
output <- silent_call(net.loads(A = ega$network, wc = ega$wc, ...))
# Obtain standard loadings
standard_loadings <- output$std[variable_names,, drop = FALSE]
# Compute network scores
standard_scores <- compute_scores(output, data, "network", "simple")$std.scores
# Compute community correlations
standard_correlations <- cor(standard_scores, use = "pairwise")
# Obtain model-implied correlations
standard_R <- nload2cor(standard_loadings)
standard_P <- cor2pcor(standard_R)
# Get loading dimensions
dimensions <- dim(standard_loadings)
dimension_names <- dimnames(standard_loadings)
# Obtain loadings vector and get bounds
loadings_vector <- as.vector(standard_loadings)
zeros <- loadings_vector != 0
loadings_length <- length(loadings_vector)
# Set up loading structure
# Uses transpose for 2x speed up in optimization
loading_structure <- matrix(
FALSE, nrow = dimensions[2],
ncol = dimensions[1],
dimnames = list(dimension_names[[2]], dimension_names[[1]])
)
# Fill structure
for(i in seq_along(structure)){
loading_structure[structure[i], i] <- TRUE
}
# Optimize over loadings
result <- silent_call(
nlminb(
start = loadings_vector, objective = srmr_N_cost,
gradient = srmr_N_gradient,
zeros = zeros, R = ega$correlation,
loading_structure = loading_structure,
rows = communities, n = data_dimensions[1],
constrained = constrained,
lower_triangle = lower.tri(ega$correlation),
lower = rep(-1, loadings_length),
upper = rep(1, loadings_length),
control = list(eval.max = 10000, iter.max = 10000)
)
)
# Set estimate
result$estimate <- result$par
# Extract optimized loadings
optimized_loadings <- matrix(
result$estimate, nrow = dimensions[1],
dimnames = dimension_names
)
# Replace loadings output with optimized loadings
output$loadings$std <- optimized_loadings
# Compute network scores
optimized_scores <- compute_scores(output, data, "network", "simple")$std.scores
# Compute community correlations
optimized_correlations <- cor(optimized_scores, use = "pairwise")
# Obtain model-implied correlations
optimized_R <- nload2cor(optimized_loadings)
optimized_P <- cor2pcor(optimized_R)
# Set up results
results <- list(
EGA = ega, structure = structure,
model = list( # general list
standard = list( # using standard parameters
loadings = standard_loadings,
scores = standard_scores,
correlations = standard_correlations,
fit = fit(
n = data_dimensions[1], p = data_dimensions[2],
R = standard_R, S = ega$correlation,
loadings = output$std, correlations = standard_correlations,
structure = ega$wc, ci = 0.95
),
implied = list(R = standard_R, P = standard_P)
),
optimized = list( # using optimized parameters
loadings = optimized_loadings,
scores = optimized_scores,
correlations = optimized_correlations,
fit = fit(
n = data_dimensions[1], p = data_dimensions[2],
R = optimized_R, S = ega$correlation,
loadings = optimized_loadings,
correlations = optimized_correlations,
structure = ega$wc, ci = 0.95
),
implied = list(R = optimized_R, P = optimized_P)
)
)
)
# Set class
class(results) <- "EGM"
# Return results
return(results)
}
# EGM | EGA search ----
# Updated 06.11.2024
EGM.EGA.search <- function(data, communities, structure, opt, constrained, verbose, ...)
{
# Fit name based on fit
fit_name <- switch(
opt,
"aic" = "AIC", "bic" = "BIC",
"cfi" = "CFI", "chisq" = "chisq",
"loglik" = "logLik", "rmsea" = "RMSEA",
"srmr" = "SRMR", "tefi" = "TEFI",
"tefi.adj" = "TEFI.adj", "tli" = "TLI"
)
# Get number of dimensions
data_dimensions <- dim(data)
# Put data through `EBICglasso.qgraph`
glasso_output <- network.estimation(data, network.only = FALSE, ...)
# Obtain attributes
glasso_attr <- attributes(glasso_output$estimated_network)
# Obtain fits
fits <- lapply(
seq_len(glasso_attr$methods$nlambda), function(i){
# Convert to partial correlations
P <- wi2net(glasso_output$output$results$wi[,,i])
dimnames(P) <- glasso_attr$dimnames
# Obtain organized output
output <- try(
silent_call(
glasso_fit(
data = data, S = glasso_output$output$S,
glasso_attr = glasso_attr, P = P,
data_dimensions = data_dimensions,
constrained = constrained, opt = opt,
...
)
), silent = TRUE
)
# Return output
return(swiftelse(is(output, "try-error"), NULL, output))
}
)
# Add names to fits
names(fits) <- glasso_output$output$lambda
# Obtain fit index
fit_index <- nvapply(fits[!lvapply(fits, is.null)], function(x){
return(x$model$optimized$fit[[fit_name]])
})
# Index function based on fit
index_FUN <- switch(
opt,
"aic" = min, "bic" = min,
"cfi" = max, "chisq" = min,
"loglik" = max, "rmsea" = min,
"srmr" = min, "tefi" = min,
"tefi.adj" = min, "tli" = max
)
# Obtain optimum value
opt_value <- index_FUN(fit_index)
# Obtain index
opt_index <- max(which(fit_index == opt_value))
# Obtain lambda (as character)
lambda <- names(fit_index)[[opt_index]]
# Obtain results
results <- fits[[lambda]]
# Set up 'dim.variables' for EGA
dim.variables <- fast.data.frame(
c(dimnames(results$EGA$network)[[2]], structure),
nrow = data_dimensions[2], ncol = 2,
colnames = c("items", "dimension")
)
# Add to EGA results
results$EGA$dim.variables <- dim.variables[order(dim.variables$dimension),]
# Compute TEFI for EGA
results$EGA$TEFI <- tefi(glasso_output$output$S, results$EGA$wc)$VN.Entropy.Fit
# Set up network methods
attributes(results$EGA$network) <- glasso_attr
attributes(results$EGA$network)$methods[
c("model.selection", "lambda", "criterion")
] <- list(
model.selection = fit_name, lambda = as.numeric(lambda), criterion = opt_value
)
# Overwrite class
class(results) <- "EGM"
# Return results
return(results)
}
# EGM UTILITIES ----
#' @noRd
# Creates community structure ----
# Updated 08.10.2024
create_community_structure <- function(
P, total_variables, communities, community_variables, p.in, p.out
)
{
# Set vectors of 'p.in' and 'p.out'
p.in <- swiftelse(length(p.in) == 1, rep(p.in, communities), p.in)
p.out <- swiftelse(length(p.out) == 1, rep(p.out, communities), p.out)
# Set diagonal to missing
diag(P) <- NA
# Set community blocks
for(i in seq_len(communities)){
# Randomly set zero in block
indices <- P[community_variables[[i]], community_variables[[i]]]
# Check for current sparsity
if(compute_density(indices) > p.in[i]){
# Get lower triangle
lower_triangle <- lower.tri(indices)
# Sample to set to zero
indices[
abs(indices) < quantile(
abs(indices[lower_triangle]), probs = 1 - p.in[i], na.rm = TRUE
)
] <- 0
# Set back into block
P[community_variables[[i]], community_variables[[i]]] <- indices
}
# Set between-community indices
indices <- P[community_variables[[i]], -unlist(community_variables[-i])]
# Check for current sparsity
if(compute_density(indices) > p.out[i]){
# Get threshold value
threshold_value <- quantile(abs(indices), probs = 1 - p.out[i], na.rm = TRUE)
# Set below to zero
indices[abs(indices) < threshold_value] <- 0
# Set back into block
P[community_variables[[i]], -unlist(community_variables[-i])] <- indices
# Do other side
indices <- P[-unlist(community_variables[-i]), community_variables[[i]]]
# Set below to zero
indices[abs(indices) < threshold_value] <- 0
# Set back into block
P[-unlist(community_variables[-i]), community_variables[[i]]] <- indices
}
}
# Set diagonal to zero
diag(P) <- 0
# Return community network
return(P)
}
#' @noRd
# Computes density ----
# Updated 08.10.2024
compute_density <- function(network)
{
# Get dimensions
dimensions <- dim(network)
# Obtain total number of edges
edges <- prod(dimensions)
# Check for on-diagonal
if(dimensions[1] == dimensions[2]){
# Total possible edges
total_possible <- (edges - dimensions[2]) / 2
# Compute sparsity
return((total_possible - (sum(network == 0, na.rm = TRUE) / 2)) / total_possible)
}else{ # Otherwise, treat as off-diagonal
# Compute sparsity
return((edges - sum(network == 0, na.rm = TRUE)) / edges)
}
}
#' @noRd
# GLASSO fit ----
# Updated 06.11.2024
glasso_fit <- function(data, S, glasso_attr, P, data_dimensions, constrained, opt, ...)
{
# Obtain structure based on community detection
structure <- community.detection(P, ...)
# Obtain communities
communities <- unique_length(structure)
# Initialize loadings
output <- silent_call(net.loads(A = P, wc = structure, ...))
standard_loadings <- output$std[
glasso_attr$dimnames[[2]], seq_len(communities), drop = FALSE
]
# Obtain scores
standard_scores <- compute_scores(output, data, "network", "simple")$std.scores
# Extract standard correlations
standard_correlations <- cor(standard_scores, use = "pairwise")
# Obtain model-implied correlations
standard_R <- nload2cor(standard_loadings)
standard_P <- cor2pcor(standard_R)
# Get dimension names
dimensions <- dim(standard_loadings)
dimension_names <- dimnames(standard_loadings)
# Obtain loadings vector and get bounds
loadings_vector <- as.vector(standard_loadings)
zeros <- loadings_vector != 0
loadings_length <- length(loadings_vector)
# Set up loading structure
# Uses transpose for 2x speed up in optimization
loading_structure <- matrix(
FALSE, nrow = dimensions[2],
ncol = dimensions[1],
dimnames = list(dimension_names[[2]], dimension_names[[1]])
)
# Fill structure
for(i in seq_along(structure)){
loading_structure[structure[i], i] <- TRUE
}
# Optimize over loadings
result <- try(
nlminb(
start = loadings_vector, objective = srmr_N_cost,
gradient = srmr_N_gradient,
zeros = zeros, R = S,
loading_structure = loading_structure,
rows = communities, n = data_dimensions[1],
constrained = constrained, lower_triangle = lower.tri(S),
lower = rep(-1, loadings_length),
upper = rep(1, loadings_length),
control = list(eval.max = 10000, iter.max = 10000)
), silent = TRUE
)
# Check for error
if(is(result, "try-error")){
stop("bad result")
}
# Set estimate
result$estimate <- result$par
# Extract optimized loadings
optimized_loadings <- matrix(
result$estimate, nrow = dimensions[1],
dimnames = dimension_names
)
# Update output
output$std <- optimized_loadings
# Extract optimized scores
optimized_scores <- compute_scores(output, data, "network", "simple")$std.scores
# Extract optimized correlations
optimized_correlations <- cor(optimized_scores, use = "pairwise")
# Obtain model-implied correlations
optimized_R <- nload2cor(optimized_loadings)
optimized_P <- cor2pcor(optimized_R)
# Set up EGA object
ega <- list(
network = P, wc = structure, n.dim = communities,
correlation = S, n = data_dimensions[1]
); class(ega) <- "EGA"
# Set up results
return(
list(
EGA = ega, structure = structure,
model = list( # general list
standard = list( # using standard parameters
loadings = standard_loadings,
scores = standard_scores,
correlations = standard_correlations,
fit = fit(
n = data_dimensions[1], p = data_dimensions[2],
R = standard_R, S = ega$correlation,
loadings = output$std, correlations = standard_correlations,
structure = ega$wc, ci = 0.95
),
implied = list(R = standard_R, P = standard_P)
),
optimized = list( # using optimized parameters
loadings = optimized_loadings,
scores = optimized_scores,
correlations = optimized_correlations,
fit = fit(
n = data_dimensions[1], p = data_dimensions[2],
R = optimized_R, S = ega$correlation,
loadings = optimized_loadings,
correlations = optimized_correlations,
structure = ega$wc, ci = 0.95
),
implied = list(R = optimized_R, P = optimized_P)
)
)
)
)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.