Nothing
# Cluster Metrics for Network Analysis
# Summary measures for macro/per-cluster networks and multilayer networks
# ==============================================================================
# 1. Edge Weight Aggregation
# ==============================================================================
#' Aggregate Edge Weights
#'
#' Aggregates a vector of edge weights using various methods.
#' Compatible with igraph's edge.attr.comb parameter.
#'
#' @param w Numeric vector of edge weights
#' @param method Aggregation method: "sum", "mean", "median", "max", "min",
#' "prod", "density", "geomean"
#' @param n_possible Number of possible edges (for density calculation)
#' @return Single aggregated value
#' @export
#' @examples
#' w <- c(0.5, 0.8, 0.3, 0.9)
#' aggregate_weights(w, "sum") # 2.5
#' aggregate_weights(w, "mean") # 0.625
#' aggregate_weights(w, "max") # 0.9
aggregate_weights <- function(w, method = "sum", n_possible = NULL) {
# Remove NA and zero weights
w <- w[!is.na(w) & w != 0]
if (length(w) == 0) return(0)
switch(method,
"sum" = sum(w),
"mean" = mean(w),
"median" = stats::median(w),
"max" = max(w),
"min" = min(w),
"prod" = prod(w),
"density" = if (!is.null(n_possible) && n_possible > 0) {
sum(w) / n_possible
} else {
sum(w) / length(w)
},
"geomean" = {
pos_w <- w[w > 0]
if (length(pos_w) == 0) 0 else exp(mean(log(pos_w)))
},
stop("Unknown method: ", method, call. = FALSE)
)
}
#' @rdname aggregate_weights
#' @export
wagg <- aggregate_weights
# ==============================================================================
# 2. Cluster Summary (Macro/Cluster Aggregates)
# ==============================================================================
#' Cluster Summary Statistics
#'
#' Aggregates node-level network weights to cluster-level summaries. Computes
#' both macro (cluster-to-cluster) transitions and per-cluster transitions
#' (how nodes connect inside each cluster).
#'
#' This is the core function for Multi-Cluster Multi-Level (MCML) analysis.
#' Use \code{\link{as_tna}} to convert results to tna objects for further
#' analysis with the tna package.
#'
#' @param x Network input. Accepts multiple formats:
#' \describe{
#' \item{matrix}{Numeric adjacency/weight matrix. Row and column names are
#' used as node labels. Values represent edge weights (e.g., transition
#' counts, co-occurrence frequencies, or probabilities).}
#' \item{cograph_network}{A cograph network object. The function extracts
#' the weight matrix from \code{x$weights} or converts via
#' \code{to_matrix()}. Clusters can be auto-detected from node attributes.}
#' \item{tna}{A tna object from the tna package. Extracts \code{x$weights}.}
#' \item{cluster_summary}{If already a cluster_summary, returns unchanged.}
#' }
#'
#' @param clusters Cluster/group assignments for nodes. Accepts multiple formats:
#' \describe{
#' \item{NULL}{(default) Auto-detect from cograph_network. Looks for columns
#' named 'clusters', 'cluster', 'groups', or 'group' in \code{x$nodes}.
#' Throws an error if no cluster column is found.
#' This option only works when \code{x} is a cograph_network.}
#' \item{vector}{Cluster membership for each node, in the same order as the
#' matrix rows/columns. Can be numeric (1, 2, 3) or character ("A", "B").
#' Cluster names will be derived from unique values.
#' Example: \code{c(1, 1, 2, 2, 3, 3)} assigns first two nodes to cluster 1.}
#' \item{data.frame}{A data frame where the first column contains node names
#' and the second column contains group/cluster names.
#' Example: \code{data.frame(node = c("A", "B", "C"), group = c("G1", "G1", "G2"))}}
#' \item{named list}{Explicit mapping of cluster names to node labels.
#' List names become cluster names, values are character vectors of node
#' labels that must match matrix row/column names.
#' Example: \code{list(Alpha = c("A", "B"), Beta = c("C", "D"))}}
#' }
#'
#' @param method Aggregation method for combining edge weights within/between
#' clusters. Controls how multiple node-to-node edges are summarized:
#' \describe{
#' \item{"sum"}{(default) Sum of all edge weights. Best for count data
#' (e.g., transition frequencies). Preserves total flow.}
#' \item{"mean"}{Average edge weight. Best when cluster sizes differ and
#' you want to control for size. Note: when input is already a transition
#' matrix (rows sum to 1), "mean" avoids size bias.
#' Example: cluster with 5 nodes won't have 5x the weight of cluster with 1 node.}
#' \item{"median"}{Median edge weight. Robust to outliers.}
#' \item{"max"}{Maximum edge weight. Captures strongest connection.}
#' \item{"min"}{Minimum edge weight. Captures weakest connection.}
#' \item{"density"}{Sum divided by number of possible edges. Normalizes
#' by cluster size combinations.}
#' \item{"geomean"}{Geometric mean of positive weights. Useful for
#' multiplicative processes.}
#' }
#'
#' @param type Post-processing applied to aggregated weights. Determines the
#' interpretation of the resulting matrices:
#' \describe{
#' \item{"tna"}{(default) Row-normalize so each row sums to 1. Creates
#' transition probabilities suitable for Markov chain analysis.
#' Interpretation: "Given I'm in cluster A, what's the probability
#' of transitioning to cluster B?"
#' Required for use with tna package functions.
#' Diagonal is zero; per-cluster data is in \code{$clusters}.}
#' \item{"raw"}{No normalization. Returns aggregated counts/weights as-is.
#' Use for frequency analysis or when you need raw counts.
#' Compatible with igraph's contract + simplify output.}
#' \item{"cooccurrence"}{Symmetrize the matrix: (A + t(A)) / 2.
#' For undirected co-occurrence analysis.}
#' \item{"semi_markov"}{Row-normalize with duration weighting.
#' For semi-Markov process analysis.}
#' }
#'
#' @param directed Logical. If \code{TRUE} (default), treat network as directed.
#' A->B and B->A are separate edges. If \code{FALSE}, edges are undirected
#' and the matrix is symmetrized before processing.
#'
#' @param compute_within Logical. If \code{TRUE} (default), compute per-cluster
#' transition matrices for each cluster. Each cluster gets its own n_i x n_i
#' matrix showing internal node-to-node transitions.
#' Set to \code{FALSE} to skip this computation for better performance when
#' only the macro (cluster-level) summary is needed.
#'
#' @return A \code{cluster_summary} object (S3 class) containing:
#' \describe{
#' \item{macro}{A tna object representing the macro (cluster-level) network:
#' \describe{
#' \item{weights}{k x k matrix of cluster-to-cluster weights, where k is
#' the number of clusters. Row i, column j contains the aggregated
#' weight from cluster i to cluster j. Diagonal contains aggregated
#' intra-cluster weight (retention / self-loops). Processing depends on \code{type}.}
#' \item{inits}{Numeric vector of length k. Initial state distribution
#' across clusters, computed from column sums of the original matrix.
#' Represents the proportion of incoming edges to each cluster.}
#' }
#' }
#' \item{clusters}{Named list with one element per cluster. Each element is
#' a tna object containing:
#' \describe{
#' \item{weights}{n_i x n_i matrix for nodes inside that cluster.
#' Shows internal transitions between nodes in the same cluster.}
#' \item{inits}{Initial distribution for the cluster.}
#' }
#' NULL if \code{compute_within = FALSE}.}
#' \item{cluster_members}{Named list mapping cluster names to their member node labels.
#' Example: \code{list(A = c("n1", "n2"), B = c("n3", "n4", "n5"))}}
#' \item{meta}{List of metadata:
#' \describe{
#' \item{type}{The \code{type} argument used ("tna", "raw", etc.)}
#' \item{method}{The \code{method} argument used ("sum", "mean", etc.)}
#' \item{directed}{Logical, whether network was treated as directed}
#' \item{n_nodes}{Total number of nodes in original network}
#' \item{n_clusters}{Number of clusters}
#' \item{cluster_sizes}{Named vector of cluster sizes}
#' }
#' }
#' }
#'
#' @details
#' ## Workflow
#'
#' Typical MCML analysis workflow:
#' \preformatted{
#' # 1. Create network
#' net <- cograph(edges, nodes = nodes)
#' net$nodes$clusters <- group_assignments
#'
#' # 2. Compute cluster summary
#' cs <- cluster_summary(net, type = "tna")
#'
#' # 3. Convert to tna models
#' tna_models <- as_tna(cs)
#'
#' # 4. Analyze/visualize
#' plot(tna_models$macro)
#' tna::centralities(tna_models$macro)
#' }
#'
#' ## Between-Cluster Matrix Structure
#'
#' The \code{macro$weights} matrix has clusters as both rows and columns:
#' \itemize{
#' \item Off-diagonal (row i, col j): Aggregated weight from cluster i to cluster j
#' \item Diagonal (row i, col i): Per-cluster total (sum of internal edges in cluster i)
#' }
#'
#' When \code{type = "tna"}, rows sum to 1 and diagonal values represent
#' "retention rate" - the probability of staying inside the same cluster.
#'
#' ## Choosing method and type
#'
#' \tabular{lll}{
#' \strong{Input data} \tab \strong{Recommended} \tab \strong{Reason} \cr
#' Edge counts \tab method="sum", type="tna" \tab Preserves total flow, normalizes to probabilities \cr
#' Transition matrix \tab method="mean", type="tna" \tab Avoids cluster size bias \cr
#' Frequencies \tab method="sum", type="raw" \tab Keep raw counts for analysis \cr
#' Correlation matrix \tab method="mean", type="raw" \tab Average correlations \cr
#' }
#'
#' @export
#' @seealso
#' \code{\link{as_tna}} to convert results to tna objects,
#' \code{\link{plot_mcml}} for two-layer visualization,
#' \code{\link{plot_mtna}} for flat cluster visualization
#'
#' @examples
#' # -----------------------------------------------------
#' # Basic usage with matrix and cluster vector
#' # -----------------------------------------------------
#' mat <- matrix(runif(100), 10, 10)
#' diag(mat) <- 0
#' rownames(mat) <- colnames(mat) <- LETTERS[1:10]
#'
#' clusters <- c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3)
#' cs <- cluster_summary(mat, clusters)
#'
#' # Access results
#' cs$macro$weights # 3x3 cluster transition matrix
#' cs$macro$inits # Initial distribution
#' cs$clusters$`1`$weights # Per-cluster 1 transitions
#' cs$meta # Metadata
#'
#' # -----------------------------------------------------
#' # Named list clusters (more readable)
#' # -----------------------------------------------------
#' clusters <- list(
#' Alpha = c("A", "B", "C"),
#' Beta = c("D", "E", "F"),
#' Gamma = c("G", "H", "I", "J")
#' )
#' cs <- cluster_summary(mat, clusters, type = "tna")
#' cs$macro$weights # Rows/cols named Alpha, Beta, Gamma
#' cs$clusters$Alpha # Per-cluster Alpha network
#'
#' # -----------------------------------------------------
#' # Auto-detect clusters from cograph_network
#' # -----------------------------------------------------
#' net <- as_cograph(mat)
#' net$nodes$clusters <- c(1, 1, 1, 2, 2, 2, 3, 3, 3, 3)
#' cs <- cluster_summary(net) # No clusters argument needed
#'
#' # -----------------------------------------------------
#' # Different aggregation methods
#' # -----------------------------------------------------
#' cs_sum <- cluster_summary(mat, clusters, method = "sum") # Total flow
#' cs_mean <- cluster_summary(mat, clusters, method = "mean") # Average
#' cs_max <- cluster_summary(mat, clusters, method = "max") # Strongest
#'
#' # -----------------------------------------------------
#' # Raw counts vs TNA probabilities
#' # -----------------------------------------------------
#' cs_raw <- cluster_summary(mat, clusters, type = "raw")
#' cs_tna <- cluster_summary(mat, clusters, type = "tna")
#'
#' rowSums(cs_raw$macro$weights) # Various sums
#' rowSums(cs_tna$macro$weights) # All equal to 1
#'
#' # -----------------------------------------------------
#' # Skip within-cluster computation for speed
#' # -----------------------------------------------------
#' cs_fast <- cluster_summary(mat, clusters, compute_within = FALSE)
#' cs_fast$clusters # NULL
#'
#' # -----------------------------------------------------
#' # Convert to tna objects for tna package
#' # -----------------------------------------------------
#' cs <- cluster_summary(mat, clusters, type = "tna")
#' tna_models <- as_tna(cs)
#' # tna_models$macro # tna object
#' # tna_models$Alpha # tna object (cluster network)
cluster_summary <- function(x,
clusters = NULL,
method = c("sum", "mean", "median", "max",
"min", "density", "geomean"),
type = c("tna", "cooccurrence", "semi_markov", "raw"),
directed = TRUE,
compute_within = TRUE) {
# If already a cluster_summary, return as-is
if (inherits(x, "cluster_summary")) {
return(x)
}
type <- match.arg(type)
method <- match.arg(method)
# Store original for cluster extraction
x_orig <- x
# Extract matrix from various input types
if (inherits(x, "cograph_network")) {
# Use stored weights matrix if available, else convert
mat <- if (!is.null(x$weights)) x$weights else to_matrix(x)
} else if (inherits(x, "tna")) {
mat <- x$weights
} else {
mat <- x
}
# Auto-detect clusters from cograph_network if not provided
if (is.null(clusters) && inherits(x_orig, "cograph_network")) {
nodes <- x_orig$nodes
if (!is.null(nodes)) {
# Look for cluster column (priority order)
cluster_cols <- c("clusters", "cluster", "groups", "group")
for (col in cluster_cols) {
if (col %in% names(nodes)) {
clusters <- nodes[[col]]
break
}
}
}
# Also check node_groups
if (is.null(clusters) && !is.null(x_orig$node_groups)) {
ng <- x_orig$node_groups
cluster_col <- intersect(c("cluster", "group", "layer"), names(ng))
if (length(cluster_col) > 0) {
clusters <- ng[[cluster_col[1]]]
}
}
if (is.null(clusters)) {
stop("No clusters found in cograph_network. ",
"Add a 'clusters' column to nodes or provide clusters argument.",
call. = FALSE)
}
} else if (is.null(clusters)) {
stop("clusters argument is required for matrix input", call. = FALSE)
}
# Validate input matrix
if (!is.matrix(mat) || !is.numeric(mat)) {
stop("x must be a cograph_network, tna object, or numeric matrix", call. = FALSE)
}
if (nrow(mat) != ncol(mat)) {
stop("x must be a square matrix", call. = FALSE)
}
n <- nrow(mat)
node_names <- rownames(mat)
if (is.null(node_names)) node_names <- as.character(seq_len(n))
# Convert clusters to list format
cluster_list <- .normalize_clusters(clusters, node_names)
n_clusters <- length(cluster_list)
cluster_names <- names(cluster_list)
if (is.null(cluster_names)) cluster_names <- as.character(seq_len(n_clusters))
names(cluster_list) <- cluster_names
# Get node indices for each cluster
cluster_indices <- lapply(cluster_list, function(nodes_vec) {
match(nodes_vec, node_names)
})
# ============================================================================
# Macro (cluster-level) computation (always computed)
# ============================================================================
# Aggregate macro (cluster-to-cluster) weights
between_raw <- matrix(0, n_clusters, n_clusters,
dimnames = list(cluster_names, cluster_names))
for (i in seq_len(n_clusters)) {
idx_i <- cluster_indices[[i]]
n_i <- length(idx_i)
for (j in seq_len(n_clusters)) {
idx_j <- cluster_indices[[j]]
n_j <- length(idx_j)
if (i == j) {
# Diagonal: aggregated intra-cluster weight (retention)
# Includes node self-loops (A->A) -- these are valid intra-cluster flow
w_ii <- mat[idx_i, idx_i, drop = FALSE]
n_possible <- n_i * n_i
between_raw[i, j] <- aggregate_weights(as.vector(w_ii), method,
n_possible)
} else {
# Off-diagonal: inter-cluster transitions
w_ij <- mat[idx_i, idx_j]
n_possible <- n_i * n_j
between_raw[i, j] <- aggregate_weights(as.vector(w_ij), method, n_possible)
}
}
}
# Process based on type
between_weights <- .process_weights(between_raw, type, directed)
# Compute inits from column sums
col_sums <- colSums(between_raw)
total <- sum(col_sums)
if (total > 0) {
between_inits <- col_sums / total
} else {
between_inits <- rep(1 / n_clusters, n_clusters)
}
names(between_inits) <- cluster_names
# Preserve original sequence data from tna input (no transformation)
orig_data <- if (inherits(x_orig, "tna")) x_orig$data
# Build $macro
between <- structure(
list(
weights = between_weights,
inits = between_inits,
labels = cluster_names,
data = orig_data
),
type = if (type == "tna") "relative" else "frequency",
scaling = character(0),
class = "tna"
)
# ============================================================================
# Per-cluster computation (optional)
# ============================================================================
cl_data <- NULL
if (isTRUE(compute_within)) {
cl_data <- lapply(seq_len(n_clusters), function(i) {
idx_i <- cluster_indices[[i]]
n_i <- length(idx_i)
cl_nodes <- cluster_list[[i]]
cl_name <- cluster_names[[i]]
if (n_i <= 1) {
# Single node: self-loop value preserved
cl_raw <- mat[idx_i, idx_i, drop = FALSE]
dimnames(cl_raw) <- list(cl_nodes, cl_nodes)
cl_weights_i <- .process_weights(cl_raw, type, directed)
cl_inits_i <- setNames(1, cl_nodes)
} else {
# Extract intra-cluster raw weights (self-loops preserved)
cl_raw <- mat[idx_i, idx_i]
dimnames(cl_raw) <- list(cl_nodes, cl_nodes)
# Process based on type
cl_weights_i <- .process_weights(cl_raw, type, directed)
# Per-cluster inits (handle NAs)
col_sums_w <- colSums(cl_raw, na.rm = TRUE)
total_w <- sum(col_sums_w, na.rm = TRUE)
cl_inits_i <- if (!is.na(total_w) && total_w > 0) {
col_sums_w / total_w
} else {
rep(1 / n_i, n_i)
}
names(cl_inits_i) <- cl_nodes
}
structure(
list(
weights = cl_weights_i,
inits = cl_inits_i,
labels = cl_nodes,
data = orig_data
),
type = if (type == "tna") "relative" else "frequency",
scaling = character(0),
class = "tna"
)
})
names(cl_data) <- cluster_names
class(cl_data) <- "group_tna"
}
# ============================================================================
# Build result
# ============================================================================
result <- structure(
list(
macro = between,
clusters = cl_data,
cluster_members = cluster_list,
meta = list(
type = type,
method = method,
directed = directed,
n_nodes = n,
n_clusters = n_clusters,
cluster_sizes = vapply(cluster_list, length, integer(1))
)
),
class = "cluster_summary"
)
result
}
#' @rdname cluster_summary
#' @return See \code{\link{cluster_summary}}.
#' @export
#' @examples
#' mat <- matrix(c(0.5, 0.2, 0.3, 0.1, 0.6, 0.3, 0.4, 0.1, 0.5), 3, 3,
#' byrow = TRUE,
#' dimnames = list(c("A", "B", "C"), c("A", "B", "C")))
#' csum(mat, list(G1 = c("A", "B"), G2 = c("C")))
csum <- cluster_summary
# ==============================================================================
# 2b. Build MCML from Raw Transition Data
# ==============================================================================
#' Build MCML from Raw Transition Data
#'
#' Builds a Multi-Cluster Multi-Level (MCML) model from raw transition data
#' (edge lists or sequences) by recoding node labels to cluster labels and
#' counting actual transitions. Unlike \code{\link{cluster_summary}} which
#' aggregates a pre-computed weight matrix, this function works from the
#' original transition data to produce the TRUE Markov chain over cluster states.
#'
#' @param x Input data. Accepts multiple formats:
#' \describe{
#' \item{data.frame with from/to columns}{Edge list. Columns named
#' from/source/src/v1/node1/i and to/target/tgt/v2/node2/j are
#' auto-detected. Optional weight column (weight/w/value/strength).}
#' \item{data.frame without from/to columns}{Sequence data. Each row is a
#' sequence, columns are time steps. Consecutive pairs (t, t+1) become
#' transitions.}
#' \item{tna object}{If \code{x$data} is non-NULL, uses sequence path on
#' the raw data. Otherwise falls back to \code{\link{cluster_summary}}.}
#' \item{cograph_network}{If \code{x$data} is non-NULL, detects edge list
#' vs sequence data. Otherwise falls back to \code{\link{cluster_summary}}.}
#' \item{cluster_summary}{Returns as-is.}
#' \item{square numeric matrix}{Falls back to \code{\link{cluster_summary}}.}
#' \item{non-square or character matrix}{Treated as sequence data.}
#' }
#'
#' @param clusters Cluster/group assignments. Accepts:
#' \describe{
#' \item{named list}{Direct mapping. List names = cluster names, values =
#' character vectors of node labels.
#' Example: \code{list(A = c("N1","N2"), B = c("N3","N4"))}}
#' \item{data.frame}{A data frame where the first column contains node names
#' and the second column contains group/cluster names.
#' Example: \code{data.frame(node = c("N1","N2","N3"), group = c("A","A","B"))}}
#' \item{membership vector}{Character or numeric vector. Node names are
#' extracted from the data.
#' Example: \code{c("A","A","B","B")}}
#' \item{column name string}{For edge list data.frames, the name of a
#' column containing cluster labels. The mapping is built from unique
#' (node, group) pairs in both from and to columns.}
#' \item{NULL}{Auto-detect from \code{cograph_network$nodes} or
#' \code{$node_groups} (same logic as \code{\link{cluster_summary}}).}
#' }
#'
#' @param method Aggregation method for combining edge weights: "sum", "mean",
#' "median", "max", "min", "density", "geomean". Default "sum".
#' @param type Post-processing: "tna" (row-normalize), "cooccurrence"
#' (symmetrize), "semi_markov", or "raw". Default "tna".
#' @param directed Logical. Treat as directed network? Default TRUE.
#' @param compute_within Logical. Compute within-cluster matrices? Default TRUE.
#'
#' @return A \code{cluster_summary} object with \code{meta$source = "transitions"},
#' fully compatible with \code{\link{plot_mcml}}, \code{\link{as_tna}}, and
#' \code{\link{splot}}.
#'
#' @export
#' @seealso \code{\link{cluster_summary}} for matrix-based aggregation,
#' \code{\link{as_tna}} to convert to tna objects,
#' \code{\link{plot_mcml}} for visualization
#'
#' @examples
#' # Edge list with clusters
#' edges <- data.frame(
#' from = c("A", "A", "B", "C", "C", "D"),
#' to = c("B", "C", "A", "D", "D", "A"),
#' weight = c(1, 2, 1, 3, 1, 2)
#' )
#' clusters <- list(G1 = c("A", "B"), G2 = c("C", "D"))
#' cs <- build_mcml(edges, clusters)
#' cs$macro$weights
#'
#' # Sequence data with clusters
#' seqs <- data.frame(
#' T1 = c("A", "C", "B"),
#' T2 = c("B", "D", "A"),
#' T3 = c("C", "C", "D"),
#' T4 = c("D", "A", "C")
#' )
#' cs <- build_mcml(seqs, clusters, type = "raw")
#' cs$macro$weights
build_mcml <- function(x,
clusters = NULL,
method = c("sum", "mean", "median", "max",
"min", "density", "geomean"),
type = c("tna", "frequency", "cooccurrence",
"semi_markov", "raw"),
directed = TRUE,
compute_within = TRUE) {
# If already an mcml or cluster_summary, return as-is
if (inherits(x, c("mcml", "cluster_summary"))) {
return(x)
}
type <- match.arg(type)
method <- match.arg(method)
input_type <- .detect_mcml_input(x)
switch(input_type,
"group_tna" = .group_tna_to_mcml(x, clusters, method, type,
directed, compute_within),
"edgelist" = .build_mcml_edgelist(x, clusters, method, type,
directed, compute_within),
"sequence" = .build_mcml_sequence(x, clusters, method, type,
directed, compute_within),
"tna_data" = .build_mcml_sequence(.decode_tna_data(x$data), clusters,
method, type, directed, compute_within),
"tna_matrix" = .as_mcml(cluster_summary(x, clusters, method = method,
type = type, directed = directed,
compute_within = compute_within)),
"cograph_data" = {
data <- x$data
# Auto-detect clusters from network if not provided
if (is.null(clusters)) {
clusters <- .auto_detect_clusters(x)
}
sub_type <- .detect_mcml_input(data)
if (sub_type == "edgelist") {
.build_mcml_edgelist(data, clusters, method, type,
directed, compute_within)
} else {
.build_mcml_sequence(data, clusters, method, type,
directed, compute_within)
}
},
"cograph_matrix" = {
if (is.null(clusters)) {
clusters <- .auto_detect_clusters(x)
}
.as_mcml(cluster_summary(x, clusters, method = method, type = type,
directed = directed,
compute_within = compute_within))
},
"matrix" = .as_mcml(cluster_summary(x, clusters, method = method,
type = type, directed = directed,
compute_within = compute_within)),
stop("Cannot build MCML from input of class '", class(x)[1], "'",
call. = FALSE)
)
}
#' Convert a cluster_summary to mcml (strip tna classes)
#' @keywords internal
.as_mcml <- function(cs) {
# Strip tna class from macro
m <- unclass(cs$macro)
attributes(m) <- NULL
names(m) <- c("weights", "inits", "labels", "data")
if (length(m) >= 4) {
names(m) <- c("weights", "inits", "labels", "data")[seq_along(m)]
}
# Strip tna/group_tna from clusters
cl <- NULL
if (!is.null(cs$clusters)) {
cl <- lapply(cs$clusters, function(obj) {
o <- unclass(obj)
attributes(o) <- NULL
names(o) <- c("weights", "inits", "labels", "data")[seq_along(o)]
o
})
names(cl) <- names(cs$clusters)
}
structure(
list(
macro = m,
clusters = cl,
cluster_members = cs$cluster_members,
meta = cs$meta
),
class = "mcml"
)
}
#' Decode numeric tna_seq_data back to character labels
#' @keywords internal
.decode_tna_data <- function(data) {
if (is.null(data)) return(NULL)
tna_labels <- attr(data, "labels")
if (is.null(tna_labels) || !is.numeric(data)) return(data)
decoded <- as.data.frame(
matrix(tna_labels[data], nrow = nrow(data)),
stringsAsFactors = FALSE
)
if (!is.null(colnames(data))) colnames(decoded) <- colnames(data)
decoded
}
#' Convert a group_tna to mcml
#'
#' Two modes depending on whether \code{clusters} is provided:
#' \describe{
#' \item{With clusters (row-level)}{For group_tna from
#' \code{tna::group_model(cluster_data(...))}. \code{clusters} is the
#' row-to-group assignments. Per-cluster tnas are taken as-is.
#' Macro data is the assignments vector.}
#' \item{Without clusters (node-level)}{For group_tna from
#' \code{as_tna(cluster_summary(...))}. Cluster membership inferred
#' from each tna's labels. Macro rebuilt from original data.}
#' }
#' @keywords internal
.group_tna_to_mcml <- function(x, clusters = NULL, method = "sum",
type = "tna", directed = TRUE,
compute_within = TRUE) {
nms <- names(x)
# ------------------------------------------------------------------
# Case 1: clusters provided -> row-level grouping (from group_model)
# ------------------------------------------------------------------
if (!is.null(clusters)) {
cluster_nms <- names(x)
# Strip tna classes, preserve data
cl_data <- lapply(cluster_nms, function(nm) {
obj <- x[[nm]]
list(
weights = obj$weights,
inits = obj$inits,
labels = obj$labels,
data = obj$data
)
})
names(cl_data) <- cluster_nms
# Macro data = the assignments
macro_data <- clusters
# All groups share the same labels (states)
all_labels <- x[[1]]$labels
n_groups <- length(cluster_nms)
return(structure(
list(
macro = list(
weights = NULL,
inits = NULL,
labels = cluster_nms,
data = macro_data
),
clusters = cl_data,
cluster_members = NULL,
meta = list(
type = type,
method = method,
directed = directed,
n_nodes = length(all_labels),
n_clusters = n_groups,
cluster_sizes = vapply(cl_data, function(cl) {
nrow_data <- if (!is.null(cl$data)) nrow(cl$data) else 0L
nrow_data
}, integer(1)),
source = "group_tna"
)
),
class = "mcml"
))
}
# ------------------------------------------------------------------
# Case 2: no clusters -> node-level grouping (from as_tna)
# ------------------------------------------------------------------
cluster_nms <- setdiff(nms, "macro")
if (length(cluster_nms) == 0) cluster_nms <- nms # nocov
# Check if labels differ across groups (node-level) or are same (row-level)
label_sets <- lapply(cluster_nms, function(nm) sort(x[[nm]]$labels))
all_same <- length(unique(label_sets)) == 1
if (all_same && length(cluster_nms) > 1) {
stop("All groups have the same labels -- this is a row-level group_tna. ",
"Provide clusters argument with row-to-group assignments.",
call. = FALSE)
}
# Node-level: membership from each tna's labels
cluster_members <- lapply(cluster_nms, function(nm) x[[nm]]$labels)
names(cluster_members) <- cluster_nms
# Recover dropped clusters from macro labels
if ("macro" %in% nms) {
missing <- setdiff(x[["macro"]]$labels, cluster_nms)
if (length(missing) > 0) { # nocov start
assigned <- unlist(cluster_members, use.names = FALSE)
orig <- NULL
for (nm in nms) {
if (!is.null(x[[nm]]$data)) { orig <- x[[nm]]$data; break }
}
if (!is.null(orig)) {
decoded <- .decode_tna_data(orig)
if (is.data.frame(decoded)) {
all_nodes <- sort(unique(unlist(decoded, use.names = FALSE)))
all_nodes <- all_nodes[!is.na(all_nodes)]
unassigned <- setdiff(all_nodes, assigned)
if (length(missing) == 1 && length(unassigned) > 0) {
cluster_members[[missing]] <- unassigned
}
}
}
} # nocov end
}
# Rebuild from data if available
orig_data <- NULL
for (nm in nms) {
if (!is.null(x[[nm]]$data)) { orig_data <- x[[nm]]$data; break } # nocov
}
if (!is.null(orig_data)) { # nocov start
.build_mcml_sequence(.decode_tna_data(orig_data), cluster_members,
method, type, directed, compute_within) # nocov end
} else {
# No data: reconstruct from weight matrices
all_nodes <- unlist(cluster_members, use.names = FALSE)
n <- length(all_nodes)
mat <- matrix(0, n, n, dimnames = list(all_nodes, all_nodes))
lapply(cluster_nms, function(nm) {
w <- x[[nm]]$weights
labs <- x[[nm]]$labels
mat[labs, labs] <<- w
})
.as_mcml(cluster_summary(mat, cluster_members, method = method,
type = type, directed = directed,
compute_within = compute_within))
}
}
#' Detect input type for build_mcml
#' @keywords internal
.detect_mcml_input <- function(x) {
if (inherits(x, "group_tna")) return("group_tna")
if (inherits(x, "tna")) {
if (!is.null(x$data)) return("tna_data")
return("tna_matrix")
}
if (inherits(x, "cograph_network")) {
if (!is.null(x$data)) return("cograph_data")
return("cograph_matrix")
}
if (is.data.frame(x)) {
col_names <- tolower(names(x))
from_cols <- c("from", "source", "src", "v1", "node1", "i")
to_cols <- c("to", "target", "tgt", "v2", "node2", "j")
has_from <- any(from_cols %in% col_names)
has_to <- any(to_cols %in% col_names)
if (has_from && has_to) return("edgelist")
return("sequence")
}
if (is.matrix(x)) {
if (is.numeric(x) && nrow(x) == ncol(x)) return("matrix")
return("sequence")
}
"unknown"
}
#' Auto-detect clusters from cograph_network
#' @keywords internal
.auto_detect_clusters <- function(x) {
clusters <- NULL
if (!is.null(x$nodes)) {
cluster_cols <- c("clusters", "cluster", "groups", "group")
for (col in cluster_cols) {
if (col %in% names(x$nodes)) {
clusters <- x$nodes[[col]]
break
}
}
}
if (is.null(clusters) && !is.null(x$node_groups)) {
ng <- x$node_groups
cluster_col <- intersect(c("cluster", "group", "layer"), names(ng))
if (length(cluster_col) > 0) {
clusters <- ng[[cluster_col[1]]]
}
}
if (is.null(clusters)) {
stop("No clusters found in cograph_network. ",
"Add a 'clusters' column to nodes or provide clusters argument.",
call. = FALSE)
}
clusters
}
#' Build node-to-cluster lookup from cluster specification
#' @keywords internal
.build_cluster_lookup <- function(clusters, all_nodes) {
if (is.data.frame(clusters)) { # nocov start
# Defensive: .normalize_clusters converts df to list before this is called
stopifnot(ncol(clusters) >= 2)
nodes <- as.character(clusters[[1]])
groups <- as.character(clusters[[2]])
clusters <- split(nodes, groups)
} # nocov end
if (is.list(clusters) && !is.data.frame(clusters)) {
# Named list: cluster_name -> node vector
lookup <- character(0)
for (cl_name in names(clusters)) {
nodes <- clusters[[cl_name]]
lookup[nodes] <- cl_name
}
# Verify all nodes are mapped
unmapped <- setdiff(all_nodes, names(lookup))
if (length(unmapped) > 0) {
stop("Unmapped nodes: ",
paste(utils::head(unmapped, 5), collapse = ", "),
call. = FALSE)
}
return(lookup)
}
if (is.character(clusters) || is.factor(clusters)) {
clusters <- as.character(clusters)
if (length(clusters) != length(all_nodes)) {
stop("Membership vector length (", length(clusters),
") must equal number of unique nodes (", length(all_nodes), ")",
call. = FALSE)
}
lookup <- setNames(clusters, all_nodes)
return(lookup)
}
if (is.numeric(clusters) || is.integer(clusters)) {
if (length(clusters) != length(all_nodes)) {
stop("Membership vector length (", length(clusters),
") must equal number of unique nodes (", length(all_nodes), ")",
call. = FALSE)
}
lookup <- setNames(as.character(clusters), all_nodes)
return(lookup)
}
stop("clusters must be a named list, character/numeric vector, or column name",
call. = FALSE)
}
#' Build cluster_summary from transition vectors
#' @keywords internal
.build_from_transitions <- function(from_nodes, to_nodes, weights,
cluster_lookup, cluster_list,
method, type, directed,
compute_within, data = NULL) {
# Sort clusters alphabetically (TNA convention)
cluster_list <- cluster_list[order(names(cluster_list))]
cluster_names <- names(cluster_list)
n_clusters <- length(cluster_names)
# Recode to cluster labels
from_clusters <- cluster_lookup[from_nodes]
to_clusters <- cluster_lookup[to_nodes]
# ---- Macro (cluster-level) matrix (includes diagonal = per-cluster loops) ----
between_raw <- matrix(0, n_clusters, n_clusters,
dimnames = list(cluster_names, cluster_names))
# Include ALL transitions -- node-level self-loops (A->A) are valid
# cluster-level self-loops (e.g. discuss->discuss = Social->Social)
b_from <- from_clusters
b_to <- to_clusters
b_w <- weights
if (length(b_from) > 0) {
# Build pair keys and aggregate
pair_keys <- paste(b_from, b_to, sep = "\t")
names(b_w) <- pair_keys
agg_vals <- tapply(b_w, pair_keys, function(w) {
n_possible <- NULL
if (method == "density") {
parts <- strsplit(names(w)[1], "\t")[[1]]
n_i <- length(cluster_list[[parts[1]]])
n_j <- length(cluster_list[[parts[2]]])
n_possible <- n_i * n_j
}
aggregate_weights(w, method, n_possible)
})
for (key in names(agg_vals)) {
parts <- strsplit(key, "\t")[[1]]
between_raw[parts[1], parts[2]] <- agg_vals[[key]]
}
}
# Process macro weights
between_weights <- .process_weights(between_raw, type, directed)
# Compute inits from column sums
col_sums <- colSums(between_raw)
total <- sum(col_sums)
if (total > 0) {
between_inits <- col_sums / total
} else {
between_inits <- rep(1 / n_clusters, n_clusters)
}
names(between_inits) <- cluster_names
# Preserve original data as-is (no recoding or filtering)
between <- list(
weights = between_weights,
inits = between_inits,
labels = cluster_names,
data = data
)
# ---- Per-cluster matrices ----
cl_data <- NULL
if (isTRUE(compute_within)) {
# Filter intra-cluster transitions (same cluster, including self-loops)
is_intra <- from_clusters == to_clusters
w_from <- from_nodes[is_intra]
w_to <- to_nodes[is_intra]
w_w <- weights[is_intra]
cl_data <- lapply(seq_len(n_clusters), function(i) {
cl_name <- cluster_names[i]
cl_nodes <- cluster_list[[cl_name]]
n_i <- length(cl_nodes)
if (n_i <= 1) {
# Single node: build matrix from self-loop transitions
in_cluster <- w_from %in% cl_nodes & w_to %in% cl_nodes
self_w <- w_w[in_cluster]
self_val <- if (length(self_w) > 0) {
aggregate_weights(self_w, method)
} else {
0
}
cl_raw <- matrix(self_val, 1, 1,
dimnames = list(cl_nodes, cl_nodes))
cl_weights_i <- .process_weights(cl_raw, type, directed)
cl_inits_i <- setNames(1, cl_nodes)
} else {
# Filter transitions for this cluster (self-loops preserved)
in_cluster <- w_from %in% cl_nodes & w_to %in% cl_nodes
keep <- in_cluster
cf <- w_from[keep]
ct <- w_to[keep]
cw <- w_w[keep]
cl_raw <- matrix(0, n_i, n_i,
dimnames = list(cl_nodes, cl_nodes))
if (length(cf) > 0) {
pair_keys <- paste(cf, ct, sep = "\t")
agg_vals <- tapply(cw, pair_keys, function(w) {
aggregate_weights(w, method)
})
for (key in names(agg_vals)) {
parts <- strsplit(key, "\t")[[1]]
cl_raw[parts[1], parts[2]] <- agg_vals[[key]]
}
}
cl_weights_i <- .process_weights(cl_raw, type, directed)
col_sums_w <- colSums(cl_raw, na.rm = TRUE)
total_w <- sum(col_sums_w, na.rm = TRUE)
cl_inits_i <- if (!is.na(total_w) && total_w > 0) {
col_sums_w / total_w
} else {
rep(1 / n_i, n_i)
}
names(cl_inits_i) <- cl_nodes
}
list(
weights = cl_weights_i,
inits = cl_inits_i,
labels = cl_nodes,
data = data
)
})
names(cl_data) <- cluster_names
}
# ---- Edges data.frame ----
edge_type <- ifelse(from_clusters == to_clusters, "within", "between")
edges <- data.frame(
from = from_nodes,
to = to_nodes,
weight = weights,
cluster_from = unname(from_clusters),
cluster_to = unname(to_clusters),
type = edge_type,
stringsAsFactors = FALSE
)
# ---- Assemble result ----
all_nodes <- sort(unique(c(from_nodes, to_nodes)))
n_nodes <- length(all_nodes)
structure(
list(
macro = between,
clusters = cl_data,
edges = edges,
data = data,
cluster_members = cluster_list,
meta = list(
type = type,
method = method,
directed = directed,
n_nodes = n_nodes,
n_clusters = n_clusters,
cluster_sizes = vapply(cluster_list, length, integer(1)),
source = "transitions"
)
),
class = "mcml"
)
}
#' Build MCML from edge list data.frame
#' @keywords internal
.build_mcml_edgelist <- function(df, clusters, method, type,
directed, compute_within) {
col_names <- tolower(names(df))
# Detect from/to columns
from_col <- which(col_names %in% c("from", "source", "src",
"v1", "node1", "i"))[1]
if (is.na(from_col)) from_col <- 1L
to_col <- which(col_names %in% c("to", "target", "tgt",
"v2", "node2", "j"))[1]
if (is.na(to_col)) to_col <- 2L
# Detect weight column
weight_col <- which(col_names %in% c("weight", "w", "value", "strength"))[1]
has_weight <- !is.na(weight_col)
from_vals <- as.character(df[[from_col]])
to_vals <- as.character(df[[to_col]])
weights <- if (has_weight) as.numeric(df[[weight_col]]) else rep(1, nrow(df))
# Remove rows with NA in from/to
valid <- !is.na(from_vals) & !is.na(to_vals)
from_vals <- from_vals[valid]
to_vals <- to_vals[valid]
weights <- weights[valid]
all_nodes <- sort(unique(c(from_vals, to_vals)))
# Handle clusters parameter
if (is.character(clusters) && length(clusters) == 1 &&
clusters %in% names(df)) {
# Column name: build lookup from both from+group and to+group
group_col <- df[[clusters]]
group_col <- as.character(group_col[valid])
# Build mapping from from-side
from_map <- setNames(group_col, from_vals)
# Build mapping from to-side
to_map <- setNames(group_col, to_vals)
# Merge (from takes priority if conflicting, but shouldn't)
full_map <- c(to_map, from_map)
# Keep unique node -> cluster mapping
full_map <- full_map[!duplicated(names(full_map))]
# Build cluster_list
cluster_list <- split(names(full_map), unname(full_map))
cluster_list <- lapply(cluster_list, sort)
cluster_lookup <- full_map
# Re-derive all_nodes from the lookup
all_nodes <- sort(names(cluster_lookup))
} else {
# List or vector clusters
if (is.null(clusters)) {
stop("clusters argument is required for data.frame input", call. = FALSE)
}
if (is.list(clusters) && !is.data.frame(clusters)) {
cluster_list <- clusters
} else {
# Membership vector
cluster_list <- .normalize_clusters(clusters, all_nodes)
}
cluster_lookup <- .build_cluster_lookup(cluster_list, all_nodes)
}
.build_from_transitions(from_vals, to_vals, weights,
cluster_lookup, cluster_list,
method, type, directed, compute_within,
data = df)
}
#' Build MCML from sequence data.frame
#' @keywords internal
.build_mcml_sequence <- function(df, clusters, method, type,
directed, compute_within) {
if (is.matrix(df)) df <- as.data.frame(df, stringsAsFactors = FALSE)
stopifnot(is.data.frame(df))
nc <- ncol(df)
if (nc < 2) {
stop("Sequence data must have at least 2 columns (time steps)",
call. = FALSE)
}
# Extract consecutive pairs: (col[t], col[t+1]) for all rows
pairs <- lapply(seq_len(nc - 1), function(t) {
from_t <- as.character(df[[t]])
to_t <- as.character(df[[t + 1]])
data.frame(from = from_t, to = to_t, stringsAsFactors = FALSE)
})
pairs <- do.call(rbind, pairs)
# Remove NA pairs
valid <- !is.na(pairs$from) & !is.na(pairs$to)
from_vals <- pairs$from[valid]
to_vals <- pairs$to[valid]
weights <- rep(1, length(from_vals))
all_nodes <- sort(unique(c(from_vals, to_vals)))
if (is.null(clusters)) {
stop("clusters argument is required for sequence data", call. = FALSE)
}
if (is.list(clusters) && !is.data.frame(clusters)) {
cluster_list <- clusters
} else {
cluster_list <- .normalize_clusters(clusters, all_nodes)
}
cluster_lookup <- .build_cluster_lookup(cluster_list, all_nodes)
.build_from_transitions(from_vals, to_vals, weights,
cluster_lookup, cluster_list,
method, type, directed, compute_within,
data = df)
}
#' Process weights based on type
#' @keywords internal
.process_weights <- function(raw_weights, type, directed = TRUE) {
if (type == "raw" || type == "frequency") {
return(raw_weights)
}
if (type == "cooccurrence") {
# Symmetrize
return((raw_weights + t(raw_weights)) / 2)
}
if (type == "tna" || type == "semi_markov") {
# Row-normalize so rows sum to 1
rs <- rowSums(raw_weights, na.rm = TRUE)
processed <- raw_weights / ifelse(rs == 0 | is.na(rs), 1, rs)
processed[is.na(processed)] <- 0
return(processed)
}
# Default: return as-is
raw_weights # nocov
}
#' Convert cluster_summary to tna Objects
#'
#' Converts a \code{cluster_summary} object to proper tna objects that can be
#' used with all functions from the tna package. Creates a macro (cluster-level)
#' tna model and per-cluster tna models (internal transitions within each
#' cluster), returned as a flat \code{group_tna} object.
#'
#' This is the final step in the MCML workflow, enabling full integration with
#' the tna package for centrality analysis, bootstrap validation, permutation
#' tests, and visualization.
#'
#' @param x A \code{cluster_summary} object created by \code{\link{cluster_summary}}.
#' The cluster_summary should typically be created with \code{type = "tna"} to
#' ensure row-normalized transition probabilities. If created with
#' \code{type = "raw"}, the raw counts will be passed to \code{tna::tna()}
#' which will normalize them.
#'
#' @return A \code{group_tna} object (S3 class) -- a flat named list of tna
#' objects. The first element is named \code{"macro"} and represents the
#' cluster-level transitions. Subsequent elements are named by cluster name
#' and represent internal transitions within each cluster.
#' \describe{
#' \item{macro}{A tna object representing cluster-level transitions.
#' Contains \code{$weights} (k x k transition matrix), \code{$inits}
#' (initial distribution), and \code{$labels} (cluster names).
#' Use this for analyzing how learners/entities move between high-level
#' groups or phases.}
#' \item{<cluster_name>}{Per-cluster tna objects, one per cluster. Each tna
#' object represents internal transitions within that cluster. Contains
#' \code{$weights} (n_i x n_i matrix), \code{$inits} (initial distribution),
#' and \code{$labels} (node labels). Clusters with single nodes or zero-row
#' nodes are excluded (tna requires positive row sums).}
#' }
#'
#' @details
#' ## Requirements
#'
#' The tna package must be installed. If not available, the function throws
#' an error with installation instructions.
#'
#' ## Workflow
#'
#' \preformatted{
#' # Full MCML workflow
#' net <- cograph(edges, nodes = nodes)
#' net$nodes$clusters <- group_assignments
#' cs <- cluster_summary(net, type = "tna")
#' tna_models <- as_tna(cs)
#'
#' # Now use tna package functions
#' plot(tna_models$macro)
#' tna::centralities(tna_models$macro)
#' tna::bootstrap(tna_models$macro, iter = 1000)
#'
#' # Analyze per-cluster patterns
#' plot(tna_models$ClusterA)
#' tna::centralities(tna_models$ClusterA)
#' }
#'
#' ## Excluded Clusters
#'
#' A per-cluster tna cannot be created when:
#' \itemize{
#' \item The cluster has only 1 node (no internal transitions possible)
#' \item Some nodes in the cluster have no outgoing edges (row sums to 0)
#' }
#'
#' These clusters are silently excluded. The macro (cluster-level)
#' model still includes all clusters.
#'
#' @export
#' @seealso
#' \code{\link{cluster_summary}} to create the input object,
#' \code{\link{plot_mcml}} for visualization without conversion,
#' \code{tna::tna} for the underlying tna constructor
#'
#' @examples
#' # -----------------------------------------------------
#' # Basic usage
#' # -----------------------------------------------------
#' mat <- matrix(runif(36), 6, 6)
#' diag(mat) <- 0
#' rownames(mat) <- colnames(mat) <- LETTERS[1:6]
#'
#' clusters <- list(
#' G1 = c("A", "B"),
#' G2 = c("C", "D"),
#' G3 = c("E", "F")
#' )
#'
#' cs <- cluster_summary(mat, clusters, type = "tna")
#' tna_models <- as_tna(cs)
#'
#' # Print summary
#' tna_models
#'
#' # -----------------------------------------------------
#' # Access components
#' # -----------------------------------------------------
#' # Macro (cluster-level) tna
#' tna_models$macro
#' tna_models$macro$weights # 3x3 transition matrix
#' tna_models$macro$inits # Initial distribution
#' tna_models$macro$labels # c("G1", "G2", "G3")
#'
#' # Per-cluster tnas
#' names(tna_models) # "macro", "G1", "G2", "G3"
#' tna_models$G1 # tna for cluster G1
#' tna_models$G1$weights # 2x2 matrix (A, B)
#'
#' # -----------------------------------------------------
#' # Use with tna package (requires tna)
#' # -----------------------------------------------------
#' if (requireNamespace("tna", quietly = TRUE)) {
#' # Plot
#' plot(tna_models$macro)
#' plot(tna_models$G1)
#'
#' # Centrality analysis
#' tna::centralities(tna_models$macro)
#' tna::centralities(tna_models$G1)
#' tna::centralities(tna_models$G2)
#' }
#'
#' \dontrun{
#' # Bootstrap requires a tna object built from raw sequence data (has $data)
#' # as_tna() returns weight-matrix-based tnas which don't satisfy that requirement
#' if (requireNamespace("tna", quietly = TRUE)) {
#' boot <- tna::bootstrap(tna_models$macro, iter = 1000)
#' summary(boot)
#' }
#' }
#'
#' # -----------------------------------------------------
#' # Check which within-cluster models were created
#' # -----------------------------------------------------
#' cs <- cluster_summary(mat, clusters, type = "tna")
#' tna_models <- as_tna(cs)
#'
#' # All cluster names
#' names(cs$cluster_members)
#'
#' # Clusters with valid per-cluster models
#' setdiff(names(tna_models), "macro")
#'
#' # Clusters excluded (single node or zero rows)
#' setdiff(names(cs$cluster_members), names(tna_models))
as_tna <- function(x) {
UseMethod("as_tna")
}
#' @rdname as_tna
#' @return A \code{group_tna} object (flat list of tna objects: macro + per-cluster).
#' @export
as_tna.cluster_summary <- function(x) {
if (!requireNamespace("tna", quietly = TRUE)) {
stop("Package 'tna' is required for as_tna()", call. = FALSE) # nocov
}
# Macro (cluster-level) tna
between_tna <- tna::tna(x$macro$weights, inits = x$macro$inits)
between_tna$data <- x$macro$data
# Per-cluster tnas
within_tnas <- lapply(names(x$clusters), function(cl) {
w <- x$clusters[[cl]]$weights
inits <- x$clusters[[cl]]$inits
# Skip if matrix has rows that sum to 0 (tna requires positive rows)
if (any(rowSums(w) == 0)) {
return(NULL)
}
obj <- tna::tna(w, inits = inits)
obj$data <- x$clusters[[cl]]$data
obj
})
names(within_tnas) <- names(x$clusters)
# Remove NULL entries
within_tnas <- within_tnas[!vapply(within_tnas, is.null, logical(1))]
# Combine macro + all cluster tnas into flat group_tna
all_tnas <- c(list(macro = between_tna), within_tnas)
class(all_tnas) <- "group_tna"
all_tnas
}
#' @rdname as_tna
#' @return A \code{group_tna} object (flat list of tna objects: macro + per-cluster).
#' @export
as_tna.mcml <- function(x) {
if (!requireNamespace("tna", quietly = TRUE)) {
stop("Package 'tna' is required for as_tna()", call. = FALSE) # nocov
}
# Macro (cluster-level) tna
between_tna <- tna::tna(x$macro$weights, inits = x$macro$inits)
between_tna$data <- x$macro$data
# Per-cluster tnas
within_tnas <- list()
if (!is.null(x$clusters)) {
within_tnas <- lapply(names(x$clusters), function(cl) {
w <- x$clusters[[cl]]$weights
inits <- x$clusters[[cl]]$inits
# Skip if matrix has rows that sum to 0 (tna requires positive rows)
if (any(rowSums(w) == 0)) { # nocov start
return(NULL)
} # nocov end
obj <- tna::tna(w, inits = inits)
obj$data <- x$clusters[[cl]]$data
obj
})
names(within_tnas) <- names(x$clusters)
within_tnas <- within_tnas[!vapply(within_tnas, is.null, logical(1))]
}
all_tnas <- c(list(macro = between_tna), within_tnas)
class(all_tnas) <- "group_tna"
all_tnas
}
#' @rdname as_tna
#' @return A \code{tna} object constructed from the input.
#' @export
as_tna.default <- function(x) {
if (inherits(x, "tna")) {
return(x)
}
stop("Cannot convert object of class '", class(x)[1], "' to tna", call. = FALSE)
}
# print.cluster_tna removed -- as_tna() now returns group_tna directly,
# which has its own print method via the tna package.
# ==============================================================================
# 8. as_mcml -- Convert to mcml
# ==============================================================================
#' Convert to mcml
#'
#' Convert various objects to the \code{mcml} class -- a clean, tna-independent
#' representation of a multilayer cluster network.
#'
#' @param x Object to convert.
#' @param ... Additional arguments passed to methods.
#' @return An \code{mcml} object with components \code{macro}, \code{clusters},
#' \code{cluster_members}, and \code{meta}.
#' @seealso \code{\link{build_mcml}}, \code{\link{as_tna}}
#' @export
#'
#' @examples
#' # From cluster_summary
#' mat <- matrix(c(0.5, 0.2, 0.3,
#' 0.1, 0.6, 0.3,
#' 0.4, 0.1, 0.5), 3, 3, byrow = TRUE,
#' dimnames = list(c("A", "B", "C"), c("A", "B", "C")))
#' clusters <- list(G1 = c("A", "B"), G2 = c("C"))
#' cs <- cluster_summary(mat, clusters, type = "tna")
#' m <- as_mcml(cs)
#' m$macro$weights
#'
#' \dontrun{
#' # From group_tna with cluster assignments (requires tna + Nestimate)
#' seqs <- data.frame(T1 = c("A", "B", "A"), T2 = c("B", "A", "B"))
#' mod <- tna::tna(seqs)
#' cl <- Nestimate::cluster_data(mod, k = 2)
#' gt <- tna::group_model(cl)
#' m <- as_mcml(gt, clusters = cl$assignments)
#' }
as_mcml <- function(x, ...) {
UseMethod("as_mcml")
}
#' @rdname as_mcml
#' @return An \code{mcml} object.
#' @export
as_mcml.cluster_summary <- function(x, ...) {
.as_mcml(x)
}
#' @rdname as_mcml
#' @param clusters Integer or character vector of row-to-group assignments.
#' Required when the \code{group_tna} has the same labels across all groups
#' (row-level clustering from \code{tna::group_model(cluster_data(...))}).
#' @param method Aggregation method for macro weights (default \code{"sum"}).
#' @param type Transition type (default \code{"tna"}).
#' @param directed Logical; whether the network is directed (default \code{TRUE}).
#' @return An \code{mcml} object. When \code{clusters} is provided,
#' \code{macro$data} contains the cluster assignments and \code{macro$weights}
#' is \code{NULL} (the macro is the sequence of clusters, not a summary).
#' @export
as_mcml.group_tna <- function(x, clusters = NULL, method = "sum",
type = "tna", directed = TRUE, ...) {
.group_tna_to_mcml(x, clusters = clusters, method = method,
type = type, directed = directed,
compute_within = TRUE)
}
#' @rdname as_mcml
#' @return The input \code{mcml} object unchanged.
#' @export
as_mcml.mcml <- function(x, ...) {
x
}
#' @rdname as_mcml
#' @export
as_mcml.default <- function(x, ...) {
if (inherits(x, "mcml")) return(x) # nocov
stop("Cannot convert object of class '", class(x)[1], "' to mcml. ",
"Use build_mcml() for raw data inputs.", call. = FALSE)
}
#' Normalize cluster specification to list format
#' @keywords internal
.normalize_clusters <- function(clusters, node_names) {
if (is.data.frame(clusters)) {
# Data frame with node and group columns
stopifnot(ncol(clusters) >= 2)
nodes <- as.character(clusters[[1]])
groups <- as.character(clusters[[2]])
clusters <- split(nodes, groups)
}
if (is.list(clusters)) {
# Already a list - validate node names
all_nodes <- unlist(clusters)
if (!all(all_nodes %in% node_names)) {
missing <- setdiff(all_nodes, node_names)
stop("Unknown nodes in clusters: ",
paste(utils::head(missing, 5), collapse = ", "), call. = FALSE)
}
return(clusters)
}
if (is.vector(clusters) && (is.numeric(clusters) || is.integer(clusters))) {
# Membership vector
if (length(clusters) != length(node_names)) {
stop("Membership vector length (", length(clusters),
") must equal number of nodes (", length(node_names), ")",
call. = FALSE)
}
# Convert to list
unique_clusters <- sort(unique(clusters))
cluster_list <- lapply(unique_clusters, function(k) {
node_names[clusters == k]
})
names(cluster_list) <- as.character(unique_clusters)
return(cluster_list)
}
if (is.factor(clusters) || is.character(clusters)) {
# Named membership
if (length(clusters) != length(node_names)) {
stop("Membership vector length must equal number of nodes", call. = FALSE)
}
clusters <- as.character(clusters)
unique_clusters <- unique(clusters)
cluster_list <- lapply(unique_clusters, function(k) {
node_names[clusters == k]
})
names(cluster_list) <- unique_clusters
return(cluster_list)
}
stop("clusters must be a list, numeric vector, or factor", call. = FALSE)
}
# ==============================================================================
# 3. Cluster Quality Metrics
# ==============================================================================
#' Cluster Quality Metrics
#'
#' Computes per-cluster and global quality metrics for network partitioning.
#' Supports both binary and weighted networks.
#'
#' @param x Adjacency matrix
#' @param clusters Cluster specification (list or membership vector)
#' @param weighted Logical; if TRUE, use edge weights; if FALSE, binarize
#' @param directed Logical; if TRUE, treat as directed network
#' @return A `cluster_quality` object with:
#' \item{per_cluster}{Data frame with per-cluster metrics}
#' \item{global}{List of global metrics (modularity, coverage)}
#' @export
#' @examples
#' mat <- matrix(runif(100), 10, 10)
#' diag(mat) <- 0
#' clusters <- c(1,1,1,2,2,2,3,3,3,3)
#'
#' q <- cluster_quality(mat, clusters)
#' q$per_cluster # Per-cluster metrics
#' q$global # Modularity, coverage
cluster_quality <- function(x,
clusters,
weighted = TRUE,
directed = TRUE) {
# Validate and prepare
if (!is.matrix(x) || !is.numeric(x)) {
stop("x must be a numeric matrix", call. = FALSE)
}
n <- nrow(x)
node_names <- rownames(x)
if (is.null(node_names)) node_names <- as.character(seq_len(n))
# Normalize clusters
cluster_list <- .normalize_clusters(clusters, node_names)
n_clusters <- length(cluster_list)
# Create membership vector for global metrics
membership <- integer(n)
for (k in seq_along(cluster_list)) {
idx <- match(cluster_list[[k]], node_names)
membership[idx] <- k
}
# Work with weighted or binarized matrix
if (weighted) {
A <- x
} else {
A <- (x > 0) * 1
}
# Total edges/weights
m_total <- sum(A)
if (!directed) m_total <- m_total / 2
# Compute per-cluster metrics
metrics_list <- lapply(seq_along(cluster_list), function(k) {
S <- match(cluster_list[[k]], node_names)
n_S <- length(S)
if (n_S == 0) { # nocov start
return(data.frame(
cluster = k,
n_nodes = 0,
internal_edges = 0,
cut_edges = 0,
internal_density = NA_real_,
avg_internal_degree = NA_real_,
expansion = NA_real_,
cut_ratio = NA_real_,
conductance = NA_real_
))
} # nocov end
# Internal edges/weights (within cluster)
m_S <- sum(A[S, S])
if (!directed) m_S <- m_S / 2
# Cut edges/weights (crossing cluster boundary)
not_S <- setdiff(seq_len(n), S)
if (directed) {
c_S <- sum(A[S, not_S]) + sum(A[not_S, S])
} else {
c_S <- sum(A[S, not_S])
}
# Metrics
max_internal <- n_S * (n_S - 1)
if (!directed) max_internal <- max_internal / 2
internal_density <- if (max_internal > 0) m_S / max_internal else NA_real_
avg_internal_degree <- if (n_S > 0) 2 * m_S / n_S else NA_real_
expansion <- if (n_S > 0) c_S / n_S else NA_real_
max_cut <- n_S * (n - n_S)
cut_ratio <- if (max_cut > 0) c_S / max_cut else NA_real_
vol_S <- 2 * m_S + c_S
conductance <- if (vol_S > 0) c_S / vol_S else NA_real_
data.frame(
cluster = k,
cluster_name = names(cluster_list)[k],
n_nodes = n_S,
internal_edges = m_S,
cut_edges = c_S,
internal_density = internal_density,
avg_internal_degree = avg_internal_degree,
expansion = expansion,
cut_ratio = cut_ratio,
conductance = conductance
)
})
per_cluster <- do.call(rbind, metrics_list)
rownames(per_cluster) <- NULL
# Global metrics
total_internal <- sum(per_cluster$internal_edges)
coverage <- if (m_total > 0) total_internal / m_total else NA_real_
modularity <- .compute_modularity(A, membership, directed)
structure(
list(
per_cluster = per_cluster,
global = list(
modularity = modularity,
coverage = coverage,
n_clusters = n_clusters
)
),
class = "cluster_quality"
)
}
#' @rdname cluster_quality
#' @return See \code{\link{cluster_quality}}.
#' @export
#' @examples
#' mat <- matrix(runif(100), 10, 10)
#' diag(mat) <- 0
#' cqual(mat, c(1,1,1,2,2,2,3,3,3,3))
cqual <- cluster_quality
#' Compute modularity
#' @keywords internal
.compute_modularity <- function(A, membership, directed = TRUE) {
n <- nrow(A)
m <- sum(A)
if (!directed) m <- m / 2
if (m == 0) return(NA_real_)
if (directed) {
k_out <- rowSums(A)
k_in <- colSums(A)
} else {
k <- rowSums(A)
k_out <- k_in <- k
}
Q <- 0
for (i in seq_len(n)) {
for (j in seq_len(n)) {
if (membership[i] == membership[j]) {
expected <- k_out[i] * k_in[j] / m
Q <- Q + (A[i, j] - expected)
}
}
}
Q / m
}
# ==============================================================================
# 3b. Cluster Significance Testing
# ==============================================================================
#' Test Significance of Community Structure
#'
#' Compares observed modularity against a null model distribution to assess
#' whether the detected community structure is statistically significant.
#'
#' @importFrom stats sd pnorm
#'
#' @param x Network input: adjacency matrix, igraph object, or cograph_network.
#' @param communities A communities object (from \code{\link{communities}} or
#' igraph) or a membership vector (integer vector where \code{communities[i]}
#' is the community of node i).
#' @param n_random Number of random networks to generate for the null
#' distribution. Default 100.
#' @param method Null model type:
#' \describe{
#' \item{"configuration"}{Preserves degree sequence (default). More
#' stringent test.}
#' \item{"gnm"}{Erdos-Renyi model with same number of edges. Tests against
#' random baseline.}
#' }
#' @param seed Random seed for reproducibility. Default NULL.
#'
#' @return A \code{cograph_cluster_significance} object with:
#' \describe{
#' \item{observed_modularity}{Modularity of the input communities}
#' \item{null_mean}{Mean modularity of random networks}
#' \item{null_sd}{Standard deviation of null modularity}
#' \item{z_score}{Standardized score: (observed - null_mean) / null_sd}
#' \item{p_value}{One-sided p-value (probability of observing equal or
#' higher modularity by chance)}
#' \item{null_values}{Vector of modularity values from null distribution}
#' \item{method}{Null model method used}
#' \item{n_random}{Number of random networks generated}
#' }
#'
#' @details
#' The test works by:
#' \enumerate{
#' \item Computing the modularity of the provided community structure
#' \item Generating \code{n_random} random networks using the specified null model
#' \item For each random network, detecting communities with Louvain and
#' computing modularity
#' \item Comparing the observed modularity to this null distribution
#' }
#'
#' A significant result (low p-value) indicates that the community structure
#' is stronger than expected by chance for networks with similar properties.
#'
#' @references
#' Reichardt, J., & Bornholdt, S. (2006).
#' Statistical mechanics of community detection.
#' \emph{Physical Review E}, 74, 016110.
#'
#' @export
#' @seealso \code{\link{communities}}, \code{\link{cluster_quality}}
#'
#' @examples
#' \dontrun{
#' if (requireNamespace("igraph", quietly = TRUE)) {
#' g <- igraph::make_graph("Zachary")
#' comm <- community_louvain(g)
#'
#' # Test significance
#' sig <- cluster_significance(g, comm, n_random = 100, seed = 123)
#' print(sig)
#'
#' # Configuration model (stricter test)
#' sig2 <- cluster_significance(g, comm, method = "configuration")
#' }
#' }
cluster_significance <- function(x,
communities,
n_random = 100,
method = c("configuration", "gnm"),
seed = NULL) {
method <- match.arg(method)
if (!is.null(seed)) {
saved_rng <- .save_rng()
on.exit(.restore_rng(saved_rng), add = TRUE)
set.seed(seed)
}
# Convert to igraph
if (inherits(x, "igraph")) {
g <- x
} else if (is.matrix(x)) {
g <- igraph::graph_from_adjacency_matrix(x, weighted = TRUE, mode = "directed")
} else if (inherits(x, "cograph_network")) {
g <- to_igraph(x)
} else {
g <- to_igraph(x)
}
# Get membership vector
if (inherits(communities, "communities") ||
inherits(communities, "cograph_communities")) {
mem <- igraph::membership(communities)
} else if (is.numeric(communities) || is.integer(communities)) {
mem <- as.integer(communities)
} else {
stop("communities must be a communities object or membership vector",
call. = FALSE)
}
# Observed modularity
obs_mod <- igraph::modularity(g, mem)
# Generate null distribution
null_mods <- numeric(n_random)
n_nodes <- igraph::vcount(g)
n_edges <- igraph::ecount(g)
for (i in seq_len(n_random)) {
if (method == "configuration") {
# Configuration model - preserve degree sequence
deg <- igraph::degree(g)
g_null <- tryCatch(
igraph::sample_degseq(deg, method = "configuration"),
error = function(e) { # nocov start
# Fallback to configuration.simple if configuration fails
igraph::sample_degseq(deg, method = "configuration.simple")
} # nocov end
)
} else {
# G(n,m) model - same number of nodes and edges
g_null <- igraph::sample_gnm(n_nodes, n_edges, directed = igraph::is_directed(g))
}
# Detect communities on null graph (use Louvain for speed)
comm_null <- tryCatch(
igraph::cluster_louvain(g_null),
error = function(e) { # nocov start
# If Louvain fails, use fast_greedy
igraph::cluster_fast_greedy(igraph::as_undirected(g_null))
} # nocov end
)
null_mods[i] <- igraph::modularity(comm_null)
}
# Statistics
null_mean <- mean(null_mods)
null_sd <- sd(null_mods)
z_score <- if (null_sd > 0) (obs_mod - null_mean) / null_sd else NA_real_
p_value <- if (!is.na(z_score)) pnorm(z_score, lower.tail = FALSE) else NA_real_
result <- list(
observed_modularity = obs_mod,
null_mean = null_mean,
null_sd = null_sd,
z_score = z_score,
p_value = p_value,
null_values = null_mods,
method = method,
n_random = n_random
)
class(result) <- "cograph_cluster_significance"
result
}
#' @rdname cluster_significance
#' @return See \code{\link{cluster_significance}}.
#' @export
#' @examples
#' if (requireNamespace("igraph", quietly = TRUE)) {
#' g <- igraph::make_graph("Zachary")
#' comm <- community_louvain(g)
#' csig(g, comm, n_random = 20, seed = 1)
#' }
csig <- cluster_significance
#' @noRd
#' @export
print.cograph_cluster_significance <- function(x, ...) {
cat("Cluster Significance Test\n")
cat("=========================\n\n")
cat(" Null model: ", x$method, "(n =", x$n_random, ")\n")
cat(" Observed modularity: ", round(x$observed_modularity, 4), "\n")
cat(" Null mean: ", round(x$null_mean, 4), "\n")
cat(" Null SD: ", round(x$null_sd, 4), "\n")
cat(" Z-score: ", round(x$z_score, 2), "\n")
cat(" P-value: ", format.pval(x$p_value), "\n\n")
if (!is.na(x$p_value)) {
if (x$p_value < 0.001) {
cat(" Conclusion: Highly significant community structure (p < 0.001)\n")
} else if (x$p_value < 0.01) {
cat(" Conclusion: Very significant community structure (p < 0.01)\n")
} else if (x$p_value < 0.05) {
cat(" Conclusion: Significant community structure (p < 0.05)\n")
} else {
cat(" Conclusion: No significant community structure (p >= 0.05)\n")
}
}
invisible(x)
}
#' Plot Cluster Significance
#'
#' Creates a histogram of the null distribution with the observed value marked.
#'
#' @param x A \code{cograph_cluster_significance} object
#' @param ... Additional arguments passed to \code{hist}
#' @return Invisibly returns x
#' @examples
#' \dontrun{
#' if (requireNamespace("igraph", quietly = TRUE)) {
#' g <- igraph::make_graph("Zachary")
#' comm <- community_louvain(g)
#' sig <- cluster_significance(g, comm, n_random = 20, seed = 42)
#' plot(sig)
#' }
#' }
#' @export
plot.cograph_cluster_significance <- function(x, ...) {
# Create histogram of null distribution
h <- graphics::hist(
x$null_values,
main = paste0("Cluster Significance Test (", x$method, ")"),
xlab = "Modularity",
col = "lightgray",
border = "white",
...
)
# Add observed value line
graphics::abline(v = x$observed_modularity, col = "#C62828", lwd = 2, lty = 2)
# Add legend
graphics::legend(
"topright",
legend = c(
paste0("Observed (Q = ", round(x$observed_modularity, 3), ")"),
paste0("Null mean (", round(x$null_mean, 3), ")")
),
col = c("#C62828", "black"),
lwd = c(2, 1),
lty = c(2, 1),
bty = "n"
)
# Add null mean line
graphics::abline(v = x$null_mean, col = "black", lwd = 1)
# Add p-value text
graphics::mtext(
paste0("p = ", format.pval(x$p_value)),
side = 3,
adj = 1,
cex = 0.9
)
invisible(x)
}
# ==============================================================================
# 4. Layer Similarity Metrics
# ==============================================================================
#' Layer Similarity
#'
#' Computes similarity between two network layers.
#'
#' @param A1 First adjacency matrix
#' @param A2 Second adjacency matrix
#' @param method Similarity method: "jaccard", "overlap", "hamming", "cosine",
#' "pearson"
#' @return Numeric similarity value
#' @export
#' @examples
#' A1 <- matrix(c(0,1,1,0, 1,0,0,1, 1,0,0,1, 0,1,1,0), 4, 4)
#' A2 <- matrix(c(0,1,0,0, 1,0,1,0, 0,1,0,1, 0,0,1,0), 4, 4)
#'
#' layer_similarity(A1, A2, "jaccard") # Edge overlap
#' layer_similarity(A1, A2, "cosine") # Weight similarity
layer_similarity <- function(A1, A2,
method = c("jaccard", "overlap", "hamming",
"cosine", "pearson")) {
method <- match.arg(method)
if (!identical(dim(A1), dim(A2))) {
stop("Matrices must have identical dimensions", call. = FALSE)
}
E1 <- A1 > 0
E2 <- A2 > 0
switch(method,
"jaccard" = {
intersection <- sum(E1 & E2)
union <- sum(E1 | E2)
if (union == 0) NA_real_ else intersection / union
},
"overlap" = {
intersection <- sum(E1 & E2)
min_size <- min(sum(E1), sum(E2))
if (min_size == 0) NA_real_ else intersection / min_size
},
"hamming" = {
sum(xor(E1, E2))
},
"cosine" = {
dot_product <- sum(A1 * A2)
norm1 <- sqrt(sum(A1^2))
norm2 <- sqrt(sum(A2^2))
if (norm1 == 0 || norm2 == 0) NA_real_ else dot_product / (norm1 * norm2)
},
"pearson" = {
stats::cor(as.vector(A1), as.vector(A2))
}
)
}
#' @rdname layer_similarity
#' @export
lsim <- layer_similarity
#' Pairwise Layer Similarities
#'
#' Computes similarity matrix for all pairs of layers.
#'
#' @param layers List of adjacency matrices (one per layer)
#' @param method Similarity method
#' @return Symmetric matrix of pairwise similarities
#' @export
#' @examples
#' # layers <- list(T1 = mat1, T2 = mat2, T3 = mat3)
#' # layer_similarity_matrix(layers, "cosine")
layer_similarity_matrix <- function(layers,
method = c("jaccard", "overlap", "cosine",
"pearson")) {
method <- match.arg(method)
L <- length(layers)
if (L < 2) {
stop("Need at least 2 layers for comparison", call. = FALSE)
}
layer_names <- names(layers)
if (is.null(layer_names)) layer_names <- paste0("Layer", seq_len(L))
sim_matrix <- matrix(NA_real_, L, L,
dimnames = list(layer_names, layer_names))
for (i in seq_len(L)) {
sim_matrix[i, i] <- 1
for (j in seq_len(i - 1)) {
sim <- layer_similarity(layers[[i]], layers[[j]], method)
sim_matrix[i, j] <- sim
sim_matrix[j, i] <- sim
}
}
sim_matrix
}
#' @rdname layer_similarity_matrix
#' @export
lsim_matrix <- layer_similarity_matrix
#' Degree Correlation Between Layers
#'
#' Measures hub consistency across layers via degree correlation.
#'
#' @param layers List of adjacency matrices
#' @param mode Degree type: "total", "in", "out"
#' @return Correlation matrix between layer degree sequences
#' @examples
#' mat1 <- matrix(c(0, 1, 0, 1, 0, 1, 0, 1, 0), 3, 3)
#' mat2 <- matrix(c(0, 0, 1, 1, 0, 0, 0, 1, 0), 3, 3)
#' layers <- list(L1 = mat1, L2 = mat2)
#' layer_degree_correlation(layers, mode = "total")
#' @export
layer_degree_correlation <- function(layers, mode = c("total", "in", "out")) {
mode <- match.arg(mode)
L <- length(layers)
degrees <- lapply(layers, function(A) {
switch(mode,
"total" = rowSums(A) + colSums(A),
"in" = colSums(A),
"out" = rowSums(A)
)
})
degree_matrix <- do.call(cbind, degrees)
layer_names <- names(layers)
if (is.null(layer_names)) layer_names <- paste0("Layer", seq_len(L))
colnames(degree_matrix) <- layer_names
stats::cor(degree_matrix)
}
#' @rdname layer_degree_correlation
#' @export
ldegcor <- layer_degree_correlation
# ==============================================================================
# 5. Supra-Adjacency Matrix Construction
# ==============================================================================
#' Supra-Adjacency Matrix
#'
#' Builds the supra-adjacency matrix for multilayer networks.
#' Diagonal blocks = intra-layer, off-diagonal = inter-layer.
#'
#' @param layers List of adjacency matrices (same dimensions)
#' @param omega Inter-layer coupling coefficient (scalar or L x L matrix)
#' @param coupling Coupling type: "diagonal", "full", or "custom"
#' @param interlayer_matrices For coupling="custom", list of inter-layer matrices
#' @return Supra-adjacency matrix of dimension (N*L) x (N*L)
#' @export
#' @examples
#' # layers <- list(L1 = mat1, L2 = mat2)
#' # S <- supra_adjacency(layers, omega = 0.5)
#' # dim(S) # (2*n) x (2*n)
supra_adjacency <- function(layers,
omega = 1,
coupling = c("diagonal", "full", "custom"),
interlayer_matrices = NULL) {
coupling <- match.arg(coupling)
L <- length(layers)
if (L < 1) stop("Need at least 1 layer", call. = FALSE)
dims <- vapply(layers, function(A) c(nrow(A), ncol(A)), integer(2))
if (!all(dims[1, ] == dims[1, 1]) || !all(dims[2, ] == dims[2, 1])) {
stop("All layers must have identical dimensions", call. = FALSE)
}
n <- nrow(layers[[1]])
N <- n * L
A_supra <- matrix(0, N, N)
node_names <- rownames(layers[[1]])
if (is.null(node_names)) node_names <- as.character(seq_len(n))
layer_names <- names(layers)
if (is.null(layer_names)) layer_names <- paste0("L", seq_len(L))
supra_names <- paste0(rep(layer_names, each = n), "_", rep(node_names, L))
dimnames(A_supra) <- list(supra_names, supra_names)
# Fill diagonal blocks (intra-layer)
for (a in seq_len(L)) {
idx <- ((a - 1) * n + 1):(a * n)
A_supra[idx, idx] <- layers[[a]]
}
# Fill off-diagonal blocks (inter-layer)
if (L > 1) {
I <- diag(n)
omega_matrix <- if (is.matrix(omega)) {
if (!identical(dim(omega), c(L, L))) {
stop("omega matrix must be L x L", call. = FALSE)
}
omega
} else {
matrix(omega, L, L)
}
for (a in seq_len(L - 1)) {
for (b in (a + 1):L) {
idx_a <- ((a - 1) * n + 1):(a * n)
idx_b <- ((b - 1) * n + 1):(b * n)
interlayer <- switch(coupling,
"diagonal" = omega_matrix[a, b] * I,
"full" = matrix(omega_matrix[a, b], n, n),
"custom" = {
if (is.null(interlayer_matrices)) {
stop("interlayer_matrices required for custom coupling",
call. = FALSE)
}
idx_pair <- which(a == seq_len(L - 1) & b == (a + 1))
if (length(idx_pair) == 1) {
interlayer_matrices[[idx_pair]]
} else {
omega_matrix[a, b] * I
}
}
)
A_supra[idx_a, idx_b] <- interlayer
A_supra[idx_b, idx_a] <- t(interlayer)
}
}
}
structure(
A_supra,
n_nodes = n,
n_layers = L,
node_names = node_names,
layer_names = layer_names,
omega = omega,
coupling = coupling,
class = c("supra_adjacency", "matrix")
)
}
#' @rdname supra_adjacency
#' @export
supra <- supra_adjacency
#' Extract Layer from Supra-Adjacency Matrix
#'
#' @param x Supra-adjacency matrix
#' @param layer Layer index to extract
#' @return Intra-layer adjacency matrix
#' @export
#' @examples
#' L1 <- matrix(c(0,.5,.3,.5,0,.4,.3,.4,0), 3, 3)
#' L2 <- matrix(c(0,.2,.6,.2,0,.1,.6,.1,0), 3, 3)
#' S <- supra_adjacency(list(L1 = L1, L2 = L2), omega = 0.5)
#' supra_layer(S, 1)
supra_layer <- function(x, layer) {
n <- attr(x, "n_nodes")
L <- attr(x, "n_layers")
if (layer < 1 || layer > L) {
stop("layer must be between 1 and ", L, call. = FALSE)
}
idx <- ((layer - 1) * n + 1):(layer * n)
A <- x[idx, idx]
node_names <- attr(x, "node_names")
dimnames(A) <- list(node_names, node_names)
A
}
#' @rdname supra_layer
#' @export
#' @examples
#' L1 <- matrix(c(0,.5,.3,.5,0,.4,.3,.4,0), 3, 3)
#' L2 <- matrix(c(0,.2,.6,.2,0,.1,.6,.1,0), 3, 3)
#' S <- supra_adjacency(list(L1 = L1, L2 = L2), omega = 0.5)
#' extract_layer(S, 2)
extract_layer <- supra_layer
#' Extract Inter-Layer Block
#'
#' @param x Supra-adjacency matrix
#' @param from Source layer index
#' @param to Target layer index
#' @return Inter-layer adjacency matrix
#' @export
#' @examples
#' L1 <- matrix(c(0,.5,.3,.5,0,.4,.3,.4,0), 3, 3)
#' L2 <- matrix(c(0,.2,.6,.2,0,.1,.6,.1,0), 3, 3)
#' S <- supra_adjacency(list(L1 = L1, L2 = L2), omega = 0.5)
#' supra_interlayer(S, 1, 2)
supra_interlayer <- function(x, from, to) {
n <- attr(x, "n_nodes")
L <- attr(x, "n_layers")
if (from < 1 || from > L || to < 1 || to > L) {
stop("layer indices must be between 1 and ", L, call. = FALSE)
}
idx_from <- ((from - 1) * n + 1):(from * n)
idx_to <- ((to - 1) * n + 1):(to * n)
x[idx_from, idx_to]
}
#' @rdname supra_interlayer
#' @export
#' @examples
#' L1 <- matrix(c(0,.5,.3,.5,0,.4,.3,.4,0), 3, 3)
#' L2 <- matrix(c(0,.2,.6,.2,0,.1,.6,.1,0), 3, 3)
#' S <- supra_adjacency(list(L1 = L1, L2 = L2), omega = 0.5)
#' extract_interlayer(S, 1, 2)
extract_interlayer <- supra_interlayer
# ==============================================================================
# 6. Layer Aggregation
# ==============================================================================
#' Aggregate Layers
#'
#' Combines multiple network layers into a single network.
#'
#' @param layers List of adjacency matrices
#' @param method Aggregation: "sum", "mean", "max", "min", "union", "intersection"
#' @param weights Optional layer weights (for weighted sum)
#' @return Aggregated adjacency matrix
#' @export
#' @examples
#' # layers <- list(L1 = mat1, L2 = mat2, L3 = mat3)
#' # aggregate_layers(layers, "sum") # Total
#' # aggregate_layers(layers, "mean") # Average
#' # aggregate_layers(layers, "union") # Any edge
#' # aggregate_layers(layers, "intersection") # All edges
aggregate_layers <- function(layers,
method = c("sum", "mean", "max", "min",
"union", "intersection"),
weights = NULL) {
method <- match.arg(method)
L <- length(layers)
if (L == 0) stop("Need at least 1 layer", call. = FALSE)
if (L == 1) return(layers[[1]])
n <- nrow(layers[[1]])
arr <- array(0, dim = c(n, n, L))
for (l in seq_len(L)) {
arr[, , l] <- layers[[l]]
}
result <- switch(method,
"sum" = {
if (!is.null(weights)) {
if (length(weights) != L) {
stop("weights must have length equal to number of layers",
call. = FALSE)
}
Reduce(`+`, Map(`*`, layers, weights))
} else {
apply(arr, c(1, 2), sum)
}
},
"mean" = apply(arr, c(1, 2), mean),
"max" = apply(arr, c(1, 2), max),
"min" = apply(arr, c(1, 2), min),
"union" = {
result <- matrix(0, n, n)
for (l in seq_len(L)) {
result <- result | (layers[[l]] > 0)
}
result * 1
},
"intersection" = {
result <- matrix(1, n, n)
for (l in seq_len(L)) {
result <- result & (layers[[l]] > 0)
}
result * 1
}
)
dimnames(result) <- dimnames(layers[[1]])
result
}
#' @rdname aggregate_layers
#' @export
lagg <- aggregate_layers
# ==============================================================================
# 7. Verification (igraph compatibility)
# ==============================================================================
#' Verify Against igraph
#'
#' Confirms numerical match with igraph's contract_vertices + simplify.
#'
#' @param x Adjacency matrix
#' @param clusters Cluster specification
#' @param method Aggregation method
#' @param type Normalization type. Defaults to "raw" for igraph compatibility.
#' @return List with comparison results
#' @export
verify_with_igraph <- function(x, clusters, method = "sum", type = "raw") {
if (!requireNamespace("igraph", quietly = TRUE)) { # nocov start
message("igraph package not available for verification")
return(NULL)
} # nocov end
# Use type = "raw" by default since igraph contract+simplify gives raw values
our_result <- cluster_summary(x, clusters, method = method, directed = TRUE,
type = type)
g <- igraph::graph_from_adjacency_matrix(x, weighted = TRUE, mode = "directed")
node_names <- rownames(x)
if (is.null(node_names)) node_names <- as.character(seq_len(nrow(x)))
cluster_list <- .normalize_clusters(clusters, node_names)
membership <- integer(nrow(x))
for (k in seq_along(cluster_list)) {
idx <- match(cluster_list[[k]], node_names)
membership[idx] <- k
}
g_contracted <- igraph::contract(g, membership,
vertex.attr.comb = list(name = "first"))
g_simplified <- igraph::simplify(g_contracted,
edge.attr.comb = list(weight = method))
igraph_result <- igraph::as_adjacency_matrix(g_simplified,
attr = "weight",
sparse = FALSE)
diag(igraph_result) <- 0
# Compare off-diagonal only (diagonal = intra-cluster retention,
# not comparable to igraph's contract+simplify)
our_offdiag <- our_result$macro$weights
diag(our_offdiag) <- 0
matches <- all.equal(our_offdiag, igraph_result,
check.attributes = FALSE, tolerance = 1e-10)
list(
our_result = our_result$macro$weights,
igraph_result = igraph_result,
matches = isTRUE(matches),
difference = if (!isTRUE(matches)) matches else NULL
)
}
#' @rdname verify_with_igraph
#' @export
#' @examples
#' if (requireNamespace("igraph", quietly = TRUE)) {
#' mat <- matrix(runif(100), 10, 10)
#' diag(mat) <- 0
#' rownames(mat) <- colnames(mat) <- LETTERS[1:10]
#' clusters <- c(1,1,1,2,2,2,3,3,3,3)
#' verify_igraph(mat, clusters)
#' }
verify_igraph <- verify_with_igraph
# ==============================================================================
# Print Methods
# ==============================================================================
#' @noRd
#' @export
print.cluster_summary <- function(x, ...) {
cat("Cluster Summary\n")
cat("---------------\n")
n_clusters <- x$meta$n_clusters
n_nodes <- x$meta$n_nodes
cluster_names <- names(x$cluster_members)
cluster_sizes <- x$meta$cluster_sizes
cat("Type:", x$meta$type, "\n")
cat("Method:", x$meta$method, "\n")
cat("Clusters:", n_clusters, "\n")
cat("Nodes:", n_nodes, "\n")
cat("Cluster sizes:", paste(cluster_sizes, collapse = ", "), "\n\n")
# Macro (cluster-level) output
bw <- x$macro$weights
cat("Macro (cluster-level) weights (", nrow(bw), "x", ncol(bw), "):\n", sep = "")
cat(" Inits:", paste(round(x$macro$inits, 3), collapse = ", "), "\n")
if (nrow(bw) <= 6) {
print(round(bw, 3))
} else {
cat(" [showing first 6x6 corner]\n")
print(round(bw[1:6, 1:6], 3))
}
# Per-cluster output
if (!is.null(x$clusters)) {
cat("\nPer-cluster weights:\n")
n_show <- min(3, length(x$clusters))
for (i in seq_len(n_show)) {
cl_name <- names(x$clusters)[i]
cl_mat <- x$clusters[[cl_name]]$weights
cat(" ", cl_name, " (", nrow(cl_mat), " nodes)\n", sep = "")
}
if (length(x$clusters) > 3) {
cat(" ... and", length(x$clusters) - 3, "more clusters\n")
}
} else {
cat("\nPer-cluster: not computed\n")
}
invisible(x)
}
#' @noRd
#' @export
print.mcml <- function(x, ...) {
n_clusters <- x$meta$n_clusters
n_nodes <- x$meta$n_nodes
cluster_sizes <- x$meta$cluster_sizes
cat("MCML Network\n")
cat("============\n")
cat("Type:", x$meta$type, " | Method:", x$meta$method, "\n")
cat("Nodes:", n_nodes, " | Clusters:", n_clusters, "\n")
cat("Transitions:", nrow(x$edges), "\n")
n_between <- sum(x$edges$type == "between")
n_within <- sum(x$edges$type == "within")
cat(" Macro:", n_between, " | Per-cluster:", n_within, "\n\n")
cat("Clusters:\n")
for (cl in names(x$cluster_members)) {
cat(" ", cl, " (", cluster_sizes[cl], "): ",
paste(x$cluster_members[[cl]], collapse = ", "), "\n", sep = "")
}
cat("\nMacro (cluster-level) weights:\n")
print(round(x$macro$weights, 4))
invisible(x)
}
#' @noRd
#' @export
print.cluster_quality <- function(x, ...) {
cat("Cluster Quality Metrics\n")
cat("=======================\n\n")
cat("Global metrics:\n")
cat(" Modularity:", round(x$global$modularity, 4), "\n")
cat(" Coverage: ", round(x$global$coverage, 4), "\n")
cat(" Clusters: ", x$global$n_clusters, "\n\n")
cat("Per-cluster metrics:\n")
print(x$per_cluster, row.names = FALSE)
invisible(x)
}
# ==============================================================================
# 8. Summarize Network
# ==============================================================================
#' Summarize Network by Clusters
#'
#' Creates a summary network where each cluster becomes a single node.
#' Edge weights are aggregated from the original network using the specified
#' method. Returns a cograph_network object ready for plotting.
#'
#' @param x A weight matrix, tna object, or cograph_network.
#' @param cluster_list Cluster specification:
#' \itemize{
#' \item Named list of node vectors (e.g., \code{list(A = c("n1", "n2"), B = c("n3", "n4"))})
#' \item String column name from nodes data (e.g., "clusters", "groups")
#' \item NULL to auto-detect from common column names
#' }
#' @param method Aggregation method for edge weights: "sum", "mean", "max",
#' "min", "median", "density", "geomean". Default "sum".
#' @param directed Logical. Treat network as directed. Default TRUE.
#'
#' @return A cograph_network object with:
#' \itemize{
#' \item One node per cluster (named by cluster)
#' \item Edge weights = aggregated between-cluster weights
#' \item nodes$size = cluster sizes (number of original nodes)
#' }
#'
#' @export
#' @seealso \code{\link{cluster_summary}}, \code{\link{plot_mcml}}
#'
#' @examples
#' # Create a network with clusters
#' mat <- matrix(runif(100), 10, 10)
#' diag(mat) <- 0
#' rownames(mat) <- colnames(mat) <- LETTERS[1:10]
#'
#' # Define clusters
#' clusters <- list(
#' Group1 = c("A", "B", "C"),
#' Group2 = c("D", "E", "F"),
#' Group3 = c("G", "H", "I", "J")
#' )
#'
#' # Create summary network
#' summary_net <- summarize_network(mat, clusters)
#' splot(summary_net)
#'
#' # With cograph_network (auto-detect clusters column)
#' Net <- cograph(mat)
#' Net$nodes$clusters <- rep(c("A", "B", "C"), c(3, 3, 4))
#' summary_net <- summarize_network(Net) # Auto-detects 'clusters'
summarize_network <- function(x,
cluster_list = NULL,
method = c("sum", "mean", "max", "min",
"median", "density", "geomean"),
directed = TRUE) {
method <- match.arg(method)
# Extract weight matrix and nodes data
nodes_df <- NULL
if (inherits(x, "cograph_network")) {
mat <- to_matrix(x)
nodes_df <- get_nodes(x)
lab <- if (!is.null(nodes_df$label)) nodes_df$label else rownames(mat)
} else if (inherits(x, "tna")) {
mat <- x$weights
lab <- x$labels
if (is.null(lab)) lab <- rownames(mat)
} else if (is.matrix(x)) {
mat <- x
lab <- rownames(mat)
if (is.null(lab)) lab <- as.character(seq_len(nrow(mat)))
} else {
stop("x must be a cograph_network, tna object, or matrix", call. = FALSE)
}
# Handle cluster_list specification
if (is.character(cluster_list) && length(cluster_list) == 1) {
# Column name provided
if (is.null(nodes_df)) {
stop("To use a column name for cluster_list, x must be a cograph_network",
call. = FALSE)
}
if (!cluster_list %in% names(nodes_df)) {
stop("Column '", cluster_list, "' not found in nodes. Available: ",
paste(names(nodes_df), collapse = ", "), call. = FALSE)
}
cluster_col <- nodes_df[[cluster_list]]
cluster_list <- split(lab, cluster_col)
} else if (is.null(cluster_list) && !is.null(nodes_df)) {
# Auto-detect from common column names
cluster_cols <- c("clusters", "cluster", "groups", "group", "community", "module")
for (col in cluster_cols) {
if (col %in% names(nodes_df)) {
cluster_col <- nodes_df[[col]]
cluster_list <- split(lab, cluster_col)
message("Using '", col, "' column for clusters")
break
}
}
}
if (is.null(cluster_list)) {
stop("cluster_list required: provide a list, column name, or add a ",
"'clusters'/'groups' column to nodes", call. = FALSE)
}
# Compute cluster summary with raw aggregation (no normalization)
cs <- cluster_summary(mat, cluster_list, method = method, directed = directed,
type = "raw")
# Create cograph_network from macro (cluster-level) matrix
result <- cograph(cs$macro$weights, directed = directed)
# Add cluster sizes to nodes
result$nodes$size <- cs$meta$cluster_sizes[match(result$nodes$label, names(cs$cluster_members))]
result
}
#' @rdname summarize_network
#' @return See \code{\link{summarize_network}}.
#' @export
cluster_network <- summarize_network
#' @rdname summarize_network
#' @return See \code{\link{summarize_network}}.
#' @export
cnet <- summarize_network
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.