Nothing
#' @docType class
#' @title Networks with Unit-Level Attributes (R6 Class)
#' @description
#' The `iglm.data` class is a container for storing, validating, and analyzing
#' unit-level attributes (x_attribute, y_attribute) and connections (z_network).
#' @import R6
#' @import ragg
#' @importFrom igraph graph_from_edgelist layout_with_fr vcount add_vertices V plot.igraph
#' @importFrom grDevices colorRampPalette adjustcolor
#' @importFrom graphics legend plot
#' @importFrom Matrix sparseMatrix spMatrix bdiag diag
#' @export
iglm.data_generator <- R6::R6Class("iglm.data",
private = list(
.x_attribute = NULL,
.y_attribute = NULL,
.z_network = NULL,
.neighborhood = NULL,
.overlap = NULL,
.fix_z_alocal = NULL,
.directed = NULL,
.n_actor = NULL,
.type_x = NULL,
.type_y = NULL,
.scale_x = NULL,
.scale_y = NULL,
.fix_x = NULL,
.fix_z = NULL,
.descriptives = NULL,
.validate = function() {
errors <- character()
# Check scales
if (length(private$.scale_x) != 1 || private$.scale_x <= 0) {
errors <- c(errors, "'scale_x' must be a single positive number.")
}
if (length(private$.scale_y) != 1 || private$.scale_y <= 0) {
errors <- c(errors, "'scale_y' must be a single positive number.")
}
# Check types
valid_types <- c("binomial", "poisson", "normal")
if (!private$.type_x %in% valid_types) {
errors <- c(errors, "type_x must be one of 'binomial', 'poisson', or 'normal'.")
} else if (private$.type_x != "normal" && private$.scale_x != 1) {
warning("type_x is not 'normal', but scale_x is not 1. Setting scale_x to 1.")
private$.scale_x <- 1
}
if (!private$.type_y %in% valid_types) {
errors <- c(errors, "type_y must be one of 'binomial', 'poisson', or 'normal'.")
} else if (private$.type_y != "normal" && private$.scale_y != 1) {
warning("type_y is not 'normal', but scale_y is not 1. Setting scale_y to 1.")
private$.scale_y <- 1
}
# Check attribute lengths
if (length(private$.x_attribute) != private$.n_actor) {
errors <- c(errors, "Length of 'x_attribute' must be equal to 'n_actor'.")
}
if (length(private$.y_attribute) != private$.n_actor) {
errors <- c(errors, "Length of 'y_attribute' must be equal to 'n_actor'.")
}
if (!inherits(private$.descriptives, "list")) {
errors <- c(errors, "'descriptives' must be a list.")
}
# Check attribute value constraints
if (private$.type_x == "binomial" && !all(private$.x_attribute %in% c(0, 1))) {
errors <- c(errors, "For 'binomial' type, 'x_attribute' must be a binary vector.")
}
# browser()
if (private$.type_x == "poisson" && !all(floor(private$.x_attribute) == private$.x_attribute & private$.x_attribute >= 0)) {
errors <- c(errors, "For 'poisson' type, 'x_attribute' must be a vector of non-negative integers.")
}
if (private$.type_y == "binomial" && !all(private$.y_attribute %in% c(0, 1))) {
errors <- c(errors, "For 'binomial' type, 'y_attribute' must be a binary vector.")
}
if (!is.logical(private$.fix_z_alocal)) {
stop("`fix_z_alocal` must be a logical value (TRUE or FALSE).", call. = FALSE)
}
if (private$.type_y == "poisson" && !all(floor(private$.y_attribute) == private$.y_attribute & private$.y_attribute >= 0)) {
errors <- c(errors, "For 'poisson' type, 'y_attribute' must be a vector of non-negative integers.")
}
# Check z_network format
if (!is.matrix(private$.z_network) && !inherits(private$.z_network, "Matrix")) {
errors <- c(errors, "'z_network' must be a matrix or a sparse Matrix object.")
} else {
if (ncol(private$.z_network) == 2) {
if (sum(is.na(private$.z_network)) > 0) {
errors <- c(errors, "'z_network' edge list contains NA values.")
}
if (any(private$.z_network < 1) || any(private$.z_network > private$.n_actor)) {
errors <- c(errors, "'z_network' edge list contains invalid actor indices.")
}
} else if (ncol(private$.z_network) != private$.n_actor) {
errors <- c(errors, "'z_network' must be either an edge list with 2 columns or an adjacency matrix of size n_actor x n_actor.")
}
}
# Check neighborhood format
if (!is.null(private$.neighborhood)) {
if (!is.matrix(private$.neighborhood) && !inherits(private$.neighborhood, "Matrix")) {
errors <- c(errors, "'neighborhood' must be a matrix or a sparse Matrix object.")
} else {
if (sum(is.na(private$.neighborhood)) > 0) {
errors <- c(errors, "'neighborhood' edge list contains NA values.")
}
if (ncol(private$.neighborhood) == 2) {
if (any(private$.neighborhood < 1) || any(private$.neighborhood > private$.n_actor)) {
errors <- c(errors, "'neighborhood' edge list contains invalid actor indices.")
}
} else if (ncol(private$.neighborhood) != private$.n_actor) {
errors <- c(errors, "'neighborhood' must be either an edge list with 2 columns or an adjacency matrix of size n_actor x n_actor.")
}
}
}
# Check directed flag
if (!is.logical(private$.directed) || length(private$.directed) != 1) {
errors <- c(errors, "'directed' must be a single logical value (TRUE or FALSE).")
}
if (!is.logical(private$.fix_z) || length(private$.fix_z) != 1) {
errors <- c(errors, "'fix_z' must be a single logical value (TRUE or FALSE).")
}
if (!is.logical(private$.fix_x) || length(private$.fix_x) != 1) {
errors <- c(errors, "'fix_x' must be a single logical value (TRUE or FALSE).")
}
if (length(errors) > 0) stop(paste(errors, collapse = "\n"))
invisible(self)
}
),
public = list(
#' @description
#' Create a new `iglm.data` object, that includes data on two attributes and one network.
#'
#' @param x_attribute A numeric vector for the first unit-level attribute.
#' @param y_attribute A numeric vector for the second unit-level attribute.
#' @param z_network A matrix representing the network. Can be a 2-column
#' edgelist or a square adjacency matrix.
#' @param neighborhood An optional matrix for the neighborhood representing local dependence.
#' Can be a 2-column edgelist or a square adjacency matrix.
#' A tie in `neighborhood` between actor i and j indicates that j is in the neighborhood of i,
#' implying dependence between the respective actors.
#' @param directed A logical value indicating if `z_network` is directed.
#' If `NA` (default), directedness is inferred from the symmetry of
#' `z_network`.
#' @param n_actor An integer for the number of actors in the system.
#' If `NA` (default), `n_actor` is inferred from the attributes or
#' network matrices.
#' @param type_x Character string for the type of `x_attribute`.
#' Must be one of `"binomial"`, `"poisson"`, or `"normal"`.
#' Default is `"binomial"`.
#' @param type_y Character string for the type of `y_attribute`.
#' Must be one of `"binomial"`, `"poisson"`, or `"normal"`.
#' Default is `"binomial"`.
#' @param scale_x A positive numeric value for scaling (e.g., variance
#' for "normal" type). Default is 1.
#' @param scale_y A positive numeric value for scaling (e.g., variance
#' for "normal" type). Default is 1.
#' @param fix_x Logical. If `TRUE`, the `x_attribute` is treated as fixed
#' during model estimation and simulation. Default is `FALSE`.
#' @param fix_z Logical. If `TRUE`, the `z_network` is treated as fixed
#' during model estimation and simulation. Default is `FALSE`.
#' @param fix_z_alocal Logical. If `TRUE` (default), alocal dyads in the neighborhood are fixed.
#' @param return_neighborhood Logical. If `TRUE` (default) and
#' `neighborhood` is `NULL`, a full neighborhood (all dyads) is
#' generated implying global dependence. If `FALSE`, no neighborhood is set.
#' @param file (character) Optional file path to load a saved `iglm.data` object state.
#' @return A new `iglm.data` object.
initialize = function(x_attribute = NULL, y_attribute = NULL, z_network = NULL,
neighborhood = NULL, directed = NA, n_actor = NA,
type_x = "binomial", type_y = "binomial",
scale_x = 1,
scale_y = 1,
fix_x = FALSE,
fix_z = FALSE,
fix_z_alocal = TRUE,
return_neighborhood = TRUE,
file = NULL) {
# browser()
if (!is.null(file)) {
if (!file.exists(file)) {
stop(paste("File", file, "does not exist."))
}
data_loaded <- readRDS(file)
required_fields <- c(
"x_attribute", "y_attribute", "z_network",
"neighborhood", "directed", "n_actor",
"type_x", "type_y", "scale_x",
"scale_y", "fix_x", "fix_z", "fix_z_alocal"
)
if (!is.list(data_loaded) || !all(required_fields %in% names(data_loaded))) {
stop("File does not contain a valid iglm.data state.", call. = FALSE)
}
x_attribute <- data_loaded$x_attribute
y_attribute <- data_loaded$y_attribute
z_network <- data_loaded$z_network
neighborhood <- data_loaded$neighborhood
directed <- data_loaded$directed
n_actor <- data_loaded$n_actor
type_x <- data_loaded$type_x
type_y <- data_loaded$type_y
scale_x <- data_loaded$scale_x
scale_y <- data_loaded$scale_y
fix_x <- data_loaded$fix_x
fix_z <- data_loaded$fix_z
fix_z_alocal <- data_loaded$fix_z_alocal
}
private$.type_x <- type_x
private$.type_y <- type_y
private$.scale_x <- scale_x
private$.scale_y <- scale_y
private$.fix_x <- fix_x
private$.fix_z <- fix_z
private$.fix_z_alocal <- as.logical(fix_z_alocal)
private$.descriptives <- list()
if (return_neighborhood) {
if (is.null(neighborhood)) {
if (is.na(n_actor)) {
stop("n_actor must be provided if neighborhood is not provided.")
}
neighborhood <- expand.grid(1:n_actor, 1:n_actor)
neighborhood <- neighborhood[neighborhood$Var1 != neighborhood$Var2, ]
neighborhood <- as.matrix(neighborhood)
}
}
if (is.null(z_network)) {
private$.z_network <- matrix(0, nrow = 0, ncol = 2)
} else {
private$.z_network <- z_network
}
if (ncol(private$.z_network) > 2) {
if (!directed) {
private$.z_network[lower.tri(private$.z_network)] <- 0
}
private$.z_network <- which(private$.z_network == 1, arr.ind = T)
}
if (!directed) {
wrong_tmp <- private$.z_network[, 1] > private$.z_network[, 2]
correct_tmp <- private$.z_network[, 1] < private$.z_network[, 2]
private$.z_network <- rbind(
private$.z_network[correct_tmp, c(1, 2)],
private$.z_network[wrong_tmp, c(2, 1)]
)
private$.z_network <- private$.z_network[!duplicated(private$.z_network), ]
private$.z_network <- matrix(private$.z_network, ncol = 2)
} else {
private$.z_network <- matrix(private$.z_network, ncol = 2)
}
if (is.na(n_actor)) {
if (return_neighborhood) {
if (ncol(neighborhood) > 2) {
private$.n_actor <- nrow(neighborhood)
} else {
private$.n_actor <- max(neighborhood)
}
} else if (!is.null(x_attribute)) {
private$.n_actor <- length(x_attribute)
} else if (!is.null(y_attribute)) {
private$.n_actor <- length(y_attribute)
} else if (ncol(z_network) > 2) {
private$.n_actor <- nrow(z_network)
} else {
private$.n_actor <- max(z_network)
}
} else {
private$.n_actor <- n_actor
}
private$.fix_x <- fix_x
private$.fix_z <- fix_z
if (is.na(private$.n_actor)) {
stop("n_actor could not be inferred. Please provide n_actor.")
}
if (is.null(x_attribute) | (length(x_attribute) != private$.n_actor)) {
private$.x_attribute <- numeric(length = private$.n_actor)
} else {
private$.x_attribute <- x_attribute
}
if (is.null(y_attribute) | (length(y_attribute) != private$.n_actor)) {
private$.y_attribute <- numeric(length = private$.n_actor)
} else {
private$.y_attribute <- y_attribute
}
if (is.na(directed)) {
if (ncol(private$.z_network) == 2) {
z_network_tmp <- matrix(0, nrow = private$.n_actor, ncol = private$.n_actor)
z_network_tmp[private$.z_network] <- 1
private$.directed <- !isSymmetric(z_network_tmp)
} else {
private$.directed <- !isSymmetric(private$.z_network)
}
} else {
private$.directed <- directed
}
if (return_neighborhood) {
if (ncol(neighborhood) == 2) {
sp_nb <- spMatrix(
nrow = private$.n_actor, ncol = private$.n_actor,
i = neighborhood[, 1], j = neighborhood[, 2],
x = rep(1, length(neighborhood[, 2]))
)
sp_nb_trans <- sparseMatrix(i = sp_nb@j + 1, j = sp_nb@i + 1, dims = sp_nb@Dim)
overlap <- sp_nb %*% sp_nb_trans
overlap <- as(overlap, "TsparseMatrix")
overlap <- cbind(
overlap@i + 1,
overlap@j + 1
)
private$.overlap <- overlap[overlap[, 1] != overlap[, 2], ]
private$.neighborhood <- neighborhood
} else {
positions <- which(neighborhood == 1, arr.ind = T)
sp_nb <- spMatrix(
nrow = ncol(neighborhood), ncol = ncol(neighborhood),
i = positions[, 1], j = positions[, 2], x = rep(1, length(positions[, 2]))
)
sp_nb_trans <- sparseMatrix(i = sp_nb@j + 1, j = sp_nb@i + 1, dims = sp_nb@Dim)
overlap <- as.matrix(sp_nb %*% sp_nb_trans > 0)
diag(overlap) <- 0
private$.overlap <- which(overlap == 1, arr.ind = T)
private$.neighborhood <- which(neighborhood == 1, arr.ind = T)
}
} else {
private$.overlap <- matrix(0, nrow = 0, ncol = 2)
}
private$.validate()
invisible(self)
},
#' @description
#' Sets the `z_network` of the `iglm.data` object.
#' @param z_network A matrix representing the network. Can be a 2-column
#' edgelist or a square adjacency matrix.
#' @return The `iglm.data` object itself (`self`), invisibly.
set_z_network = function(z_network) {
if (ncol(z_network) > 2) {
if (!private$.directed) {
z_network[lower.tri(z_network)] <- 0
}
z_network <- which(z_network == 1, arr.ind = T)
}
private$.z_network <- z_network
private$.validate()
invisible(self)
},
#' @description
#' Sets the `type_x` of the `iglm.data` object.
#' @param type_x A character string for the type of `x_attribute`.
#' Must be one of `"binomial"`, `"poisson"`, or `"normal"`.
#' @return The `iglm.data` object itself (`self`), invisibly.
set_type_x = function(type_x) {
private$.type_x <- type_x
private$.validate()
invisible(self)
},
#' @description
#' Sets the `type_y` of the `iglm.data` object.
#' @param type_y A character string for the type of `y_attribute`.
#' Must be one of `"binomial"`, `"poisson"`, or `"normal"`.
#' @return The `iglm.data` object itself (`self`), invisibly.
set_type_y = function(type_y) {
private$.type_y <- type_y
private$.validate()
invisible(self)
},
#' @description
#' Sets the `scale_x` of the `iglm.data` object.
#' @param scale_x A positive numeric value for scaling (e.g., variance
#' for "normal" type).
#' @return The `iglm.data` object itself (`self`), invisibly.
set_scale_x = function(scale_x) {
private$.scale_x <- scale_x
private$.validate()
invisible(self)
},
#' @description
#' Sets the `scale_y` of the `iglm.data` object.
#' @param scale_y A positive numeric value for scaling (e.g., variance
#' for "normal" type).
#' @return The `iglm.data` object itself (`self`), invisibly.
set_scale_y = function(scale_y) {
private$.scale_y <- scale_y
private$.validate()
invisible(self)
},
#' @description
#' Sets the `x_attribute` of the `iglm.data` object.
#' @param x_attribute A numeric vector for the first unit-level attribute.
#' @return The `iglm.data` object itself (`self`), invisibly.
set_x_attribute = function(x_attribute) {
private$.x_attribute <- x_attribute
private$.validate()
invisible(self)
},
#' @description
#' Sets the `y_attribute` of the `iglm.data` object.
#' @param y_attribute A numeric vector for the first unit-level attribute.
#' @return The `iglm.data` object itself (`self`), invisibly.
set_y_attribute = function(y_attribute) {
private$.y_attribute <- y_attribute
private$.validate()
invisible(self)
},
#' @description
#' Gathers the current state of the `iglm.data` object into a list.
#' This includes all attributes, network, and configuration
#' details necessary to reconstruct the object later.
#' @return A list containing the current state of the `iglm.data` object.
gather = function() {
data_to_save <- list(
x_attribute = private$.x_attribute,
y_attribute = private$.y_attribute,
z_network = private$.z_network,
neighborhood = private$.neighborhood,
directed = private$.directed,
n_actor = private$.n_actor,
type_x = private$.type_x,
type_y = private$.type_y,
scale_x = private$.scale_x,
scale_y = private$.scale_y,
fix_x = private$.fix_x,
fix_z = private$.fix_z,
fix_z_alocal = private$.fix_z_alocal
)
return(data_to_save)
},
#' @description
#' Sets the option whether alocal edges are fixed or not.
#' @param fix_z_alocal A logical value indicating whether alocal edges should be treated as fixed or not.
set_fix_z_alocal = function(fix_z_alocal) {
if (!is.logical(fix_z_alocal)) {
stop("`fix_z_alocal` must be a logical value (TRUE or FALSE).", call. = FALSE)
}
private$.fix_z_alocal <- fix_z_alocal
private$.validate()
},
#' @description
#' Deletes isolates from the `z_network` and updates the attributes and neighborhood accordingly.
#' Isolates are actors that do not have any connections in the `z_network`. This method identifies such actors, removes them from the attributes and neighborhood, and updates the `z_network` to reflect the new actor indices.
#' @return The `iglm.data` object itself (`self`), invisibly.
delete_isolates = function() {
# browser()
if (ncol(private$.z_network) == 2) {
actors_in_network <- unique(c(private$.z_network[, 1], private$.z_network[, 2]))
isolates <- setdiff(1:private$.n_actor, actors_in_network)
actor_df <- data.frame(
id_old = 1:private$.n_actor,
in_network = 1:private$.n_actor %in% actors_in_network
)
actor_df$id_new <- NA
actor_df$id_new[actor_df$in_network] <- 1:sum(actor_df$in_network)
if (length(isolates) > 0) {
private$.x_attribute <- private$.x_attribute[-isolates]
private$.y_attribute <- private$.y_attribute[-isolates]
private$.n_actor <- length(private$.x_attribute)
private$.z_network <- private$.z_network[!private$.z_network[, 1] %in% isolates & !private$.z_network[, 2] %in% isolates, , drop = FALSE]
private$.z_network[, 1] <- actor_df$id_new[private$.z_network[, 1]]
private$.z_network[, 2] <- actor_df$id_new[private$.z_network[, 2]]
if (!is.null(private$.neighborhood)) {
private$.neighborhood <- private$.neighborhood[!private$.neighborhood[, 1] %in% isolates & !private$.neighborhood[, 2] %in% isolates, , drop = FALSE]
private$.neighborhood[, 1] <- actor_df$id_new[private$.neighborhood[, 1]]
private$.neighborhood[, 2] <- actor_df$id_new[private$.neighborhood[, 2]]
private$.overlap <- private$.overlap[!private$.overlap[, 1] %in% isolates & !private$.overlap[, 2] %in% isolates, , drop = FALSE]
private$.overlap[, 1] <- actor_df$id_new[private$.overlap[, 1]]
private$.overlap[, 2] <- actor_df$id_new[private$.overlap[, 2]]
}
}
}
invisible(self)
},
#' @description
#' Saves the current state of the `iglm.data` object to a specified file path
#' in RDS format. This includes all attributes, network, and configuration
#' details necessary to reconstruct the object later.
#' @param file (character) The file where the object state should be saved. Must have a .rds extension.
#' @return The `iglm.data` object itself (`self`), invisibly.
save = function(file) {
if (missing(file) || !is.character(file) || length(file) != 1) {
stop("A valid 'file' (character string) must be provided.", call. = FALSE)
}
extension <- tools::file_ext(file)
if (tolower(extension) != "rds") {
stop("File extension must be .rds", call. = FALSE)
}
data_to_save <- self$gather()
saveRDS(data_to_save, file = file)
message(paste("Object state saved to:", file))
invisible(self)
},
#' @description
#' Sets the `fix_x` of the `iglm.data` object.
#' @param fix_x A logical value indicating if `x_attribute` is fixed or random.
#' @return The `iglm.data` object itself (`self`), invisibly.
set_fix_x = function(fix_x) {
private$.fix_x <- fix_x
private$.validate()
invisible(self)
},
#' @description
#' Sets the `fix_z` of the `iglm.data` object.
#' @param fix_z A logical value indicating if `z_network` is fixed or random.
#' @return The `iglm.data` object itself (`self`), invisibly.
set_fix_z = function(fix_z) {
private$.fix_z <- fix_z
private$.validate()
invisible(self)
},
#' @description
#' Calculates the density of the `z_network`.
#' @return A numeric value for the network density.
mean_z = function() {
m <- nrow(private$.z_network) / (private$.n_actor * (private$.n_actor - 1) / (2 - private$.directed))
private$.descriptives$density_z <- m
invisible(m)
},
#' @description
#' Calculates the mean of the `x_attribute`.
#' @return A numeric value for the mean of `x_attribute`.
mean_x = function() {
m <- mean(private$.x_attribute)
private$.descriptives$density_x <- m
invisible(m)
},
#' @description
#' Calculates the mean of the `y_attribute`.
#' @return A numeric value for the mean of `y_attribute`.
mean_y = function() {
m <- mean(private$.y_attribute)
private$.descriptives$density_y <- m
invisible(m)
},
#' @description
#' Calculates the distribution of the `x_attribute`.
#' @param value_range (numeric vector) Optional range of values to consider for the distribution. If `NULL` (default), the range is inferred from the data.
#' @param prob (logical) If `TRUE` (default), returns probabilities; if `FALSE`, returns frequencies.
#' @param plot (logical) If `TRUE` (default), plots the distribution using a density plot for continuous data or a bar plot for discrete data.
#' @return A numeric vector representing the distribution of `x_attribute` (invisible).
x_distribution = function(value_range = NULL, prob = TRUE, plot = TRUE) {
if (private$.type_x == "normal") {
tmp_density <- density(private$.x_attribute, from = value_range[1], to = value_range[2])
names(tmp_density$y) <- tmp_density$x
if (plot) {
plot(las = 1,tmp_density,
main = "Density of x_attribute",
xlab = "x_attribute values", ylab = "Density"
)
}
private$.descriptives$x_distribution <- tmp_density$y
} else {
if (is.null(value_range)) {
value_range <- range(private$.x_attribute)
}
info <- factor(as.numeric(private$.x_attribute),
levels = seq(from = value_range[1], to = value_range[2])
)
info <- table(info)
if (sum(info) > 0) {
info <- info / (sum(info) * prob + (!prob))
}
private$.descriptives$x_distribution <- info
if (plot) {
barplot(info,
main = "Distribution of x_attribute",
xlab = "x_attribute values",
ylim = c(0, max(info) * 1.2),
ylab = ifelse(prob, "Probability", "Frequency")
)
}
}
invisible(private$.descriptives$x_distribution)
},
#' @description
#' Calculates the distribution of the `y_attribute`.
#' @param value_range (numeric vector) Optional range of values to consider for the distribution. If `NULL` (default), the range is inferred from the data.
#' @param prob (logical) If `TRUE` (default), returns probabilities; if `FALSE`, returns frequencies.
#' @param plot (logical) If `TRUE` (default), plots the distribution using a density plot for continuous data or a bar plot for discrete data.
#' @return A numeric vector representing the distribution of `y_attribute` (invisible).
y_distribution = function(value_range = NULL, prob = TRUE, plot = TRUE) {
if (is.null(value_range)) {
value_range <- range(private$.y_attribute)
}
if (private$.type_y == "normal") {
tmp_density <- density(private$.y_attribute, from = value_range[1], to = value_range[2])
names(tmp_density$y) <- tmp_density$x
if (plot) {
plot(las = 1,tmp_density,
main = "Density of y_attribute",
xlab = "y_attribute values", ylab = "Density"
)
}
private$.descriptives$y_distribution <- tmp_density$y
} else {
info <- factor(as.numeric(private$.y_attribute),
levels = seq(from = value_range[1], to = value_range[2])
)
info <- table(info)
if (sum(info) > 0) {
info <- info / (sum(info) * prob + (!prob))
}
private$.descriptives$y_distribution <- info
if (plot) {
barplot(info,
main = "Distribution of y_attribute",
ylim = c(0, max(info) * 1.2),
xlab = "y_attribute values", ylab = ifelse(prob, "Probability", "Frequency")
)
}
}
invisible(private$.descriptives$y_distribution)
},
#' @description
#' Calculates the matrix of edgewise shared partners.
#' This is a two-path matrix (e.g., $A A^T$ or $A^T A$).
#'
#' @param type (character) The type of two-path to calculate for directed
#' networks. Ignored if network is undirected.
#' Must be one of:
#' `"OTP"` (Outgoing Two-Path, \eqn{z_{i,j}\, z_{i,h} \, z_{j,h}} ),
#' `"ISP"` (Ingoing Shared Partner, \eqn{z_{i,j}\, z_{h,i} \, z_{j,h}}),
#' `"OSP"` (Outgoing Shared Partner, \eqn{z_{i,j}\, z_{i,h} \, z_{j,h}}),
#' `"ITP"` (Incoming Two-Path, \eqn{z_{i,j}\, z_{h,i} \, z_{j,h}}),
#' `"ALL"` (Any one of the above).
#' Default is `"ALL"`.
#' @return A sparse matrix (`dgCMatrix`) of shared partner counts.
edgewise_shared_partner = function(type = "ALL") {
if (is.null(private$.descriptives$edgewise_shared_partner)) {
private$.descriptives$edgewise_shared_partner <- list()
}
if (!private$.directed) {
res <- self$dyadwise_shared_partner(type = "ALL")
res <- res[private$.z_network]
private$.descriptives$edgewise_shared_partner$ALL <- res
} else {
if (type == "OTP") {
res <- self$dyadwise_shared_partner(type = "OTP")
res <- res[private$.z_network]
private$.descriptives$edgewise_shared_partner$OTP <- res
return(res)
} else if (type == "ISP") {
res <- self$dyadwise_shared_partner(type = "ISP")
res <- res[private$.z_network]
private$.descriptives$edgewise_shared_partner$ISP <- res
} else if (type == "OSP") {
res <- self$dyadwise_shared_partner(type = "OSP")
res <- res[private$.z_network]
private$.descriptives$edgewise_shared_partner$OSP <- res
} else if (type == "ITP") {
res <- self$dyadwise_shared_partner(type = "ITP")
res <- res[private$.z_network]
private$.descriptives$edgewise_shared_partner$ITP <- res
} else if (type == "ALL") {
res <- self$dyadwise_shared_partner(type = "ALL")
res <- res[private$.z_network]
private$.descriptives$edgewise_shared_partner$ALL <- res
} else {
stop("type must be one of 'OTP', 'ISP', 'OSP', 'ITP', or 'ALL'.")
}
}
return(res)
},
#' @description
#' Sets the neighborhood and overlap matrices.
#' @param neighborhood A matrix for a secondary neighborhood.
#' Can be a 2-column edgelist or a square adjacency matrix.
#' @param overlap A matrix for the overlap network.
#' Can be a 2-column edgelist or a square adjacency matrix.
#' @return None. Updates the internal neighborhood and overlap matrices.
set_neighborhood_overlap = function(neighborhood, overlap) {
if (!is.matrix(neighborhood)) {
stop("'neighborhood' must be a matrix or a sparse Matrix object.")
}
if (ncol(neighborhood) > 2) {
private$.neighborhood <- which(neighborhood == 1, arr.ind = T)
} else {
private$.neighborhood <- neighborhood
}
if (!is.matrix(overlap)) {
stop("'overlap' must be a matrix or a sparse Matrix object.")
}
if (ncol(overlap) > 2) {
private$.overlap <- which(overlap == 1, arr.ind = T)
} else {
private$.overlap <- overlap
}
},
#' @description
#' Calculates the matrix of dyadwise shared partners.
#'
#' @param type (character) The type of two-path to calculate for directed
#' networks. Ignored if network is undirected.
#' Must be one of:
#' `"OTP"` (Outgoing Two-Path, \eqn{z_{i,h} \, z_{j,h}} ),
#' `"ISP"` (Ingoing Shared Partner, \eqn{z_{h,i} \, z_{j,h}}),
#' `"OSP"` (Outgoing Shared Partner, \eqn{z_{i,h} \, z_{j,h}}),
#' `"ITP"` (Incoming Two-Path, \eqn{z_{h,i} \, z_{j,h}}),
#' `"ALL"` (Any one of the above).
#' Default is `"ALL"`.
#' @return A sparse matrix (`dgCMatrix`) of shared partner counts.
dyadwise_shared_partner = function(type = "ALL") {
adj_mat <- sparseMatrix(
i = private$.z_network[, 1],
j = private$.z_network[, 2],
symmetric = !private$.directed,
dims = c(private$.n_actor, private$.n_actor)
)
if (is.null(private$.descriptives$dyadwise_shared_partner)) {
private$.descriptives$dyadwise_shared_partner <- list()
}
if (!private$.directed) {
# browser()
res <- Matrix::t(adj_mat) %*% t(adj_mat)
diag(res) <- NA
res[lower.tri(res)] <- NA
private$.descriptives$dyadwise_shared_partner$ALL <- res
} else {
if (type == "OTP") {
res <- adj_mat %*% Matrix::t(adj_mat)
private$.descriptives$dyadwise_shared_partner$OTP <- res
return(res)
} else if (type == "ISP") {
res <- adj_mat %*% adj_mat
private$.descriptives$dyadwise_shared_partner$ISP <- res
} else if (type == "OSP") {
res <- Matrix::t(adj_mat) %*% Matrix::t(adj_mat)
private$.descriptives$dyadwise_shared_partner$OSP <- res
} else if (type == "ITP") {
res <- Matrix::t(adj_mat) %*% adj_mat
private$.descriptives$dyadwise_shared_partner$ITP <- res
} else if (type == "ALL") {
adj_mat_symm <- pmax(adj_mat, t(adj_mat))
res <- Matrix::t(adj_mat_symm) %*% adj_mat_symm
private$.descriptives$dyadwise_shared_partner$ALL <- res
} else {
stop("type must be one of 'OTP', 'ISP', 'OSP', 'ITP', or 'ALL'.")
}
}
return(res)
},
#' @description
#' Calculates the geodesic distance distribution of the symmetrized
#' `z_network`.
#'
#' @param value_range (numeric vector) A vector `c(min, max)` specifying
#' the range of distances to tabulate. If `NULL` (default), the range
#' is inferred from the data.
#' @param plot (logical) If `TRUE`, plots the distribution.
#' @param prob (logical) If `TRUE` (default), returns a probability
#' distribution (proportions). If `FALSE`, returns raw counts.
#' @return A named vector (a `table` object) with the distribution of
#' geodesic distances. Includes `Inf` for unreachable pairs.
geodesic_distances_distribution = function(value_range = NULL, prob = TRUE, plot = TRUE) {
if (is.null(private$.descriptives$geodesic_distances)) {
self$geodesic_distances()
}
D <- private$.descriptives$geodesic_distances
D_vec <- as.vector(D)
if (is.null(value_range)) {
if (length(D_vec[is.finite(D_vec) & D_vec > 0]) > 0) {
value_range <- range(D_vec[is.finite(D_vec) & D_vec > 0])
} else {
value_range <- c(1, 1)
}
}
info_factor <- factor(D_vec,
levels = c(seq(from = value_range[1], to = value_range[2]), Inf)
)
info <- table(info_factor)
if (sum(info) > 0) {
info <- info / (sum(info) * prob + (!prob))
}
private$.descriptives$geodesic_distances_distribution <- info
if (plot) {
barplot(info,
ylim = c(0, max(info) * 1.2),
xlab = "Geodesic Distance", ylab = ifelse(prob, "Proportion", "Count"),
las = 1
)
}
invisible(info)
},
#' @description
#' Calculates the all-pairs geodesic distance matrix for the
#' symmetrized `z_network` using a matrix-based BFS algorithm.
#' @return A sparse matrix (`dgCMatrix`) where `D[i, j]` is the
#' shortest path distance from i to j. `Inf` indicates no path.
#' @importFrom Matrix sparseMatrix t Matrix diag nnzero Diagonal
geodesic_distances = function() {
adj_mat <- Matrix::sparseMatrix(
i = private$.z_network[, 1],
j = private$.z_network[, 2],
dims = c(private$.n_actor, private$.n_actor)
)
# Make symmetric
adj_mat <- pmax(adj_mat, Matrix::t(adj_mat))
D <- Matrix::Matrix(Inf, nrow(adj_mat), nrow(adj_mat), dimnames = dimnames(adj_mat))
# Distance to oneself is 0
Matrix::diag(D) <- 0
F_tmp <- Matrix::Diagonal(nrow(adj_mat))
# k = current path length
k <- 0
# Loop while any search has a non-empty frontier
while (Matrix::nnzero(F_tmp) > 0) {
k <- k + 1
F_next <- (adj_mat %*% F_tmp) > 0
F_new <- F_next & (D == Inf)
D[F_new] <- k
F_tmp <- F_new
}
private$.descriptives$geodesic_distances <- D
return(D)
},
#' @description
#' Calculates the distribution of edgewise shared partners.
#'
#' @param type (character) The type of shared partner matrix to use.
#' See `edgewise_shared_partner` for details. Default is `"ALL"`.
#' @param value_range (numeric vector) A vector `c(min, max)` specifying
#' the range of counts to tabulate. If `NULL` (default), the range
#' is inferred from the data.
#' @param prob (logical) If `TRUE` (default), returns a probability
#' distribution (proportions). If `FALSE`, returns raw counts.
#' @param plot (logical) If `TRUE`, plots the distribution.
#' @return A named vector (a `table` object) with the distribution of
#' shared partner counts.
edgewise_shared_partner_distribution = function(type = "ALL",
value_range = NULL,
prob = TRUE,
plot = TRUE) {
if (!type %in% c("OTP", "ITP", "ISP", "OSP", "ALL")) {
stop("type must be one of 'OTP', 'ISP', 'OSP', 'ITP', or 'ALL'.")
}
if (length(value_range) != 2 & !is.null(value_range)) {
stop("'value_range' must be a numeric vector of length 2.")
}
if (sum(value_range < 0) > 0) {
stop("'value_range' values must be non-negative.")
}
if (is.null(private$.descriptives$edgewise_shared_partner_dist)) {
private$.descriptives$edgewise_shared_partner_dist <- list()
}
if (type == "OTP") {
# Calculate the ESP if not already done so
if (is.null(private$.descriptives$edgewise_shared_partner$OTP)) {
self$edgewise_shared_partner(type = "OTP")
}
info <- private$.descriptives$edgewise_shared_partner$OTP
} else if (type == "ITP") {
if (is.null(private$.descriptives$edgewise_shared_partner$ITP)) {
self$edgewise_shared_partner(type = "ITP")
}
info <- private$.descriptives$edgewise_shared_partner$ITP
} else if (type == "ISP") {
if (is.null(private$.descriptives$edgewise_shared_partner$ISP)) {
self$edgewise_shared_partner(type = "ISP")
}
info <- private$.descriptives$edgewise_shared_partner$ISP
} else if (type == "OSP") {
if (is.null(private$.descriptives$edgewise_shared_partner$OSP)) {
self$edgewise_shared_partner(type = "OSP")
}
info <- private$.descriptives$edgewise_shared_partner$OSP
} else if (type == "ALL") {
if (is.null(private$.descriptives$edgewise_shared_partner$ALL)) {
self$edgewise_shared_partner(type = "ALL")
}
info <- private$.descriptives$edgewise_shared_partner$ALL
}
if (is.null(value_range)) {
value_range <- range(as.numeric(info))
}
info <- factor(as.numeric(info),
levels = seq(from = value_range[1], to = value_range[2])
)
info <- table(info)
# Transform to probability from frequency
if (sum(info) > 0) {
info <- info / (sum(info) * prob + (!prob))
}
if (type == "OTP") {
private$.descriptives$edgewise_shared_partner_distribution$OTP <- info
} else if (type == "ITP") {
private$.descriptives$edgewise_shared_partner_distribution$ITP <- info
} else if (type == "ISP") {
private$.descriptives$edgewise_shared_partner_distribution$ISP <- info
} else if (type == "OSP") {
private$.descriptives$edgewise_shared_partner_distribution$OSP <- info
} else if (type == "ALL") {
private$.descriptives$edgewise_shared_partner_distribution$ALL <- info
}
if (plot) {
barplot(info,
ylim = c(0, max(info) * 1.2),
xlab = paste0("Number of ", type, "- Edgewise Shared Partners"),
ylab = ifelse(prob, "Proportion", "Count"),
las = 1
)
}
invisible(info)
},
#' @description
#' Calculates the distribution of dyadwise shared partners.
#'
#' @param type (character) The type of shared partner matrix to use.
#' See `dyadwise_shared_partner` for details. Default is `"ALL"`.
#' @param value_range (numeric vector) A vector `c(min, max)` specifying
#' the range of counts to tabulate. If `NULL` (default), the range
#' is inferred from the data.
#' @param plot (logical) If `TRUE`, plots the distribution.
#' @param prob (logical) If `TRUE` (default), returns a probability
#' distribution (proportions). If `FALSE`, returns raw counts.
#' @return A named vector (a `table` object) with the distribution of
#' shared partner counts.
dyadwise_shared_partner_distribution = function(type = "ALL",
value_range = NULL,
prob = TRUE, plot = TRUE) {
if (!type %in% c("OTP", "ITP", "ISP", "OSP", "ALL")) {
stop("type must be one of 'OTP', 'ISP', 'OSP', 'ITP', or 'ALL'.")
}
if (length(value_range) != 2 & !is.null(value_range)) {
stop("'value_range' must be a numeric vector of length 2.")
}
if (sum(value_range < 0) > 0) {
stop("'value_range' values must be non-negative.")
}
if (is.null(private$.descriptives$dyadwise_shared_partner_dist)) {
private$.descriptives$dyadwise_shared_partner_dist <- list()
}
if (type == "OTP") {
# Calculate the ESP if not already done so
if (is.null(private$.descriptives$dyadwise_shared_partner$OTP)) {
self$dyadwise_shared_partner(type = "OTP")
}
info <- private$.descriptives$dyadwise_shared_partner$OTP
} else if (type == "ITP") {
if (is.null(private$.descriptives$dyadwise_shared_partner$ITP)) {
self$dyadwise_shared_partner(type = "ITP")
}
info <- private$.descriptives$dyadwise_shared_partner$ITP
} else if (type == "ISP") {
if (is.null(private$.descriptives$dyadwise_shared_partner$ISP)) {
self$dyadwise_shared_partner(type = "ISP")
}
info <- private$.descriptives$dyadwise_shared_partner$ISP
} else if (type == "OSP") {
if (is.null(private$.descriptives$dyadwise_shared_partner$OSP)) {
self$dyadwise_shared_partner(type = "OSP")
}
info <- private$.descriptives$dyadwise_shared_partner$OSP
} else if (type == "ALL") {
if (is.null(private$.descriptives$dyadwise_shared_partner$ALL)) {
self$dyadwise_shared_partner(type = "ALL")
}
info <- private$.descriptives$dyadwise_shared_partner$ALL
}
if (is.null(value_range)) {
value_range <- range(as.numeric(info), na.rm = TRUE)
}
info <- factor(as.numeric(info),
levels = seq(from = value_range[1], to = value_range[2])
)
info <- table(info)
# Transform to probability from frequency
if (sum(info) > 0) {
info <- info / (sum(info) * prob + (!prob))
}
if (type == "OTP") {
private$.descriptives$dyadwise_shared_partner_distribution$OTP <- info
} else if (type == "ITP") {
private$.descriptives$dyadwise_shared_partner_distribution$ITP <- info
} else if (type == "ISP") {
private$.descriptives$dyadwise_shared_partner_distribution$ISP <- info
} else if (type == "OSP") {
private$.descriptives$dyadwise_shared_partner_distribution$OSP <- info
} else if (type == "ALL") {
private$.descriptives$dyadwise_shared_partner_distribution$ALL <- info
}
if (plot) {
barplot(info,
xlab = paste0("Number of ", type, "- Dyadwise Shared Partners"),
ylab = ifelse(prob, "Proportion", "Count"),
las = 1, ylim = c(0, max(info) * 1.2)
)
}
invisible(info)
},
#' @description
#' Calculates the degree distribution of the `z_network`.
#'
#' @param value_range (numeric vector) A vector `c(min, max)` specifying
#' the range of degrees to tabulate. If `NULL` (default), the range
#' is inferred from the data.
#' @param prob (logical) If `TRUE` (default), returns a probability
#' distribution (proportions). If `FALSE`, returns raw counts.
#' @param plot (logical) If `TRUE`, plots the degree distribution.
#' @return If the network is directed, a list containing two `table`
#' objects: `in_degree` and `out_degree`. If undirected, a single
#' `table` object with the degree distribution.
degree_distribution = function(value_range = NULL, prob = TRUE, plot = TRUE) {
if (is.null(private$.descriptives$degree)) {
self$degree()
}
if (is.null(value_range)) {
value_range <- range(unlist(private$.descriptives$degree))
}
if (private$.directed) {
info_in <- factor(private$.descriptives$degree$in_degree_seq,
levels = seq(from = value_range[1], to = value_range[2])
)
info_out <- factor(private$.descriptives$degree$out_degree_seq,
levels = seq(from = value_range[1], to = value_range[2])
)
info_in <- table(info_in)
info_out <- table(info_out)
if (sum(info_in) > 0) {
info_in <- info_in / (sum(info_in) * prob + (!prob))
}
if (sum(info_out) > 0) {
info_out <- info_out / (sum(info_out) * prob + (!prob))
}
info <- list(
in_degree = info_in,
out_degree = info_out
)
private$.descriptives$degree_distribution <- info
} else {
info <- factor(private$.descriptives$degree$degree_seq,
levels = seq(from = value_range[1], to = value_range[2])
)
if (is.null(value_range)) {
value_range <- range(as.numeric(info))
}
info <- table(info)
if (sum(info) > 0) {
info <- info / (sum(info) * prob + (!prob))
}
private$.descriptives$degree_distribution <- info
}
if (plot) {
if (private$.directed) {
barplot(info$in_degree,
xlab = "In-Degree",
ylab = ifelse(prob, "Proportion", "Count"),
las = 1, ylim = c(0, max(info$in_degree) * 1.2)
)
barplot(info$out_degree,
xlab = "Out-Degree",
ylab = ifelse(prob, "Proportion", "Count"),
las = 1, ylim = c(0, max(info$out_degree) * 1.2)
)
} else {
barplot(info,
xlab = "Degree",
ylab = ifelse(prob, "Proportion", "Count"),
las = 1, ylim = c(0, max(info) * 1.2)
)
}
}
invisible(info)
},
#' @description
#' Calculates the degree sequence(s) of the `z_network`.
#'
#' @return If the network is directed, a list containing two vectors:
#' `in_degree_seq` and `out_degree_seq`. If undirected, a single
#' list containing the vector `degree_seq`.
degree = function() {
res <- list()
if (private$.directed) {
if (ncol(private$.z_network) == 2) {
in_degree_seq_res <- table(private$.z_network[, 2])
out_degree_seq_res <- table(private$.z_network[, 1])
in_degree_seq <- numeric(private$.n_actor)
in_degree_seq[as.numeric(names(in_degree_seq_res))] <- in_degree_seq_res
out_degree_seq <- numeric(private$.n_actor)
out_degree_seq[as.numeric(names(out_degree_seq_res))] <- out_degree_seq_res
# in_zero_values = which(! (1:private$.n_actor %in% names(in_degree_seq)))
# in_degree_seq = c(in_degree_seq, rep(0, length(in_zero_values)))
# out_zero_values = which(! (1:private$.n_actor %in% names(out_degree_seq)))
# out_degree_seq = c(in_degree_seq, rep(0, length(out_zero_values)))
} else {
in_degree_seq <- colSums(private$.z_network)
out_degree_seq <- rowSums(private$.z_network)
in_zero_values <- which(in_degree_seq == 0)
out_zero_values <- which(out_degree_seq == 0)
}
res$in_degree_seq <- in_degree_seq
res$out_degree_seq <- out_degree_seq
} else {
if (ncol(private$.z_network) == 2) {
tmp <- table(private$.z_network)
degree_seq <- numeric(private$.n_actor)
degree_seq[as.numeric(names(tmp))] <- tmp
} else {
degree_seq <- colSums(private$.z_network)
}
res$degree_seq <- degree_seq
}
private$.descriptives$degree <- res
return(res)
},
#' @description
#' Calculates the spillover degree distribution between actors with
#' `x_attribute == 1` and actors with `y_attribute == 1`.
#'
#' @param prob (logical) If `TRUE` (default), returns a probability
#' distribution (proportions). If `FALSE`, returns raw counts.
#' @param value_range (numeric vector) A vector `c(min, max)` specifying
#' the range of degrees to tabulate. If `NULL` (default), the range
#' is inferred from the data.
#' @param plot (logical) If `TRUE`, plots the distributions.
#' @return A list containing two `table` objects:
#' `out_spillover_degree` (from x_i=1 to y_j=1) and
#' `in_spillover_degree` (from y_i=1 to x_j=1).
spillover_degree_distribution = function(prob = TRUE, value_range = NULL, plot = TRUE) {
actors_x <- which(private$.x_attribute > self$mean_x())
actors_y <- which(private$.y_attribute > self$mean_y())
adj_mat_x_y <- matrix(
data = NA, nrow = length(actors_x),
ncol = length(actors_y),
dimnames = list(actors_x, actors_y)
)
overlap_tmp <- private$.overlap[(private$.overlap[, 1] %in% actors_x) & (private$.overlap[, 2] %in% actors_y), ]
overlap_tmp[, 1] <- match(overlap_tmp[, 1], rownames(adj_mat_x_y))
overlap_tmp[, 2] <- match(overlap_tmp[, 2], colnames(adj_mat_x_y))
adj_mat_x_y[overlap_tmp] <- 0
# Exclude the units that do not have any overlaps so they cannot have spillover edges
has_valid_overlap_row <- rowSums(!is.na(adj_mat_x_y)) > 0
has_valid_overlap_col <- colSums(!is.na(adj_mat_x_y)) > 0
adj_mat_x_y <- adj_mat_x_y[has_valid_overlap_row, has_valid_overlap_col, drop = FALSE]
edges_x_y <- matrix(private$.z_network[(private$.z_network[, 1] %in% actors_x) & (private$.z_network[, 2] %in% actors_y), ],
ncol = 2
)
if (nrow(edges_x_y) > 0) {
which_overlap <- check_overlap(edges_x_y, private$.overlap)
edges_x_y_overlap <- matrix(edges_x_y[which_overlap, ], ncol = 2)
edges_x_y_overlap[, 1] <- match(edges_x_y_overlap[, 1], rownames(adj_mat_x_y))
edges_x_y_overlap[, 2] <- match(edges_x_y_overlap[, 2], colnames(adj_mat_x_y))
# edges_x_y_nonoverlap <- edges_x_y[!which_overlap,]
adj_mat_x_y[edges_x_y_overlap] <- 1
# adj_mat_x_y[edges_x_y_nonoverlap] = NA
tmp1 <- rowSums(adj_mat_x_y, na.rm = T)
tmp2 <- colSums(adj_mat_x_y, na.rm = T)
if (is.null(value_range)) {
value_range <- range(unique(c(tmp1, tmp2)))
}
out_degree_x_y <- table(factor(tmp1, levels = seq(from = value_range[1], to = value_range[2]))) /
(nrow(adj_mat_x_y) * prob + (!prob))
in_degree_x_y <- table(factor(tmp2, levels = seq(from = value_range[1], to = value_range[2]))) /
(ncol(adj_mat_x_y) * prob + (!prob))
res <- list(
out_spillover_degree = out_degree_x_y,
in_spillover_degree = in_degree_x_y
)
} else {
if (is.null(value_range)) {
value_range <- c(0, 1)
}
out_degree_x_y <- table(factor(0, levels = seq(from = value_range[1], to = value_range[2])))
in_degree_x_y <- table(factor(0, levels = seq(from = value_range[1], to = value_range[2])))
res <- list(
out_spillover_degree = out_degree_x_y,
in_spillover_degree = in_degree_x_y
)
}
private$.descriptives$spillover_degree_distribution <- res
if (plot) {
barplot(out_degree_x_y,
xlab = "Spillover Outdegree",
ylab = ifelse(prob, "Proportion", "Count"),
las = 1, ylim = c(0, max(out_degree_x_y) * 1.2)
)
barplot(in_degree_x_y,
xlab = "Spillover Indegree",
ylab = ifelse(prob, "Proportion", "Count"),
las = 1, ylim = c(0, max(in_degree_x_y) * 1.2)
)
}
invisible(res)
},
#' @description
#' Plot the network using `igraph`.
#'
#' Visualizes the `z_network` using the `igraph` package.
#' Nodes can be colored by `x_attribute` and sized by `y_attribute`.
#' `neighborhood` edges can be plotted as a background layer.
#'
#' @param node_color (character) Attribute to map to node color.
#' One of `"x"` (default), `"y"`, or `"none"`.
#' @param node_size (character) Attribute to map to node size.
#' One of `"y"` (default), `"x"`, or `"constant"`.
#' @param show_overlap (logical) If `TRUE` (default), plot the
#' `neighborhood` edges as a background layer.
#' @param layout An `igraph` layout function (e.g., `igraph::layout_with_fr`).
#' @param network_edges_col (character) Color for the `z_network` edges.
#' @param neighborhood_edges_col (character) Color for the `neighborhood` edges.
#' @param main (character) The main title for the plot.
#' @param legend_col_n_levels (integer) Number of levels for the color legend.
#' @param legend_size_n_levels (integer) Number of levels for the size legend.
#' @param legend_pos (character) Position of the legend (e.g., `"right"`).
#' @param alpha_neighborhood (numeric) Alpha transparency for neighborhood edges.
#' @param edge.width (numeric) Width of the network edges.
#' @param vertex.frame.width (numeric) Width of the vertex frame.
#' @param edge.arrow.size (numeric) Size of the arrowheads for directed edges.
#' @param coords (matrix) Optional matrix of x-y coordinates for node layout.
#' @param legend_size (numeric) Scaling factor for the size legend.
#' @param ... Additional arguments passed to `plot.igraph`.
#' @return A list containing the `igraph` object (`graph`) and the
#' layout coordinates (`coords`), invisibly.
plot = function(node_color = "x",
node_size = "y",
show_overlap = TRUE,
layout = igraph::layout_with_fr,
network_edges_col = "grey60",
neighborhood_edges_col = "orange",
main = "",
legend_col_n_levels = NULL,
legend_size_n_levels = NULL,
legend_pos = "right",
alpha_neighborhood = 0.2,
edge.width = 1,
edge.arrow.size = 1,
vertex.frame.width = 0.5,
coords = NULL, legend_size = 0.5,
...) {
# browser()
if (is.null(legend_col_n_levels)) {
if (node_color == "x") {
if (private$.type_x == "binomial") {
legend_col_n_levels <- 2
} else {
legend_col_n_levels <- 4
}
} else {
if (private$.type_y == "binomial") {
legend_col_n_levels <- 2
} else {
legend_col_n_levels <- 4
}
}
}
if (is.null(legend_size_n_levels)) {
if (node_size == "x") {
if (private$.type_x == "binomial") {
legend_size_n_levels <- 2
} else {
legend_size_n_levels <- 3
}
} else {
if (private$.type_y == "binomial") {
legend_size_n_levels <- 2
} else {
legend_size_n_levels <- 3
}
}
}
# --- build igraph object from edge list
if (!private$.directed) {
g <- igraph::graph_from_edgelist(as.matrix(private$.z_network[private$.z_network[, 1] < private$.z_network[, 2], ]),
directed = private$.directed
)
} else {
g <- igraph::graph_from_edgelist(as.matrix(private$.z_network), directed = private$.directed)
}
n <- private$.n_actor
if (igraph::vcount(g) < n) {
g <- igraph::add_vertices(g, n - igraph::vcount(g))
}
# set canonical vertex names = 1:n so mapping is stable
igraph::V(g)$name <- as.character(seq_len(igraph::vcount(g)))
# --- map attributes to nodes
xa <- private$.x_attribute
ya <- private$.y_attribute
# normalize attributes for visual mapping
# norm <- function(v) (v - min(v, na.rm = TRUE)) / (max(v, na.rm = TRUE) - min(v, na.rm = TRUE) + 1e-9)
xa_n <- find_ranks(xa)
xa_n <- xa_n / max(xa_n)
ya_n <- find_ranks(ya)
ya_n <- ya_n / max(ya_n)
# node color
if (node_color == "x") {
pal <- grDevices::colorRampPalette(c("#313695", "#74add1", "#ffffbf", "#f46d43", "#a50026"))(100)
node_cols <- pal[as.numeric(cut(xa_n, 100))]
color_attr <- xa
color_lab <- "x"
} else if (node_color == "y") {
pal <- grDevices::colorRampPalette(c("#313695", "#74add1", "#ffffbf", "#f46d43", "#a50026"))(100)
node_cols <- pal[as.numeric(cut(ya_n, 100))]
color_attr <- ya
color_lab <- "y"
} else {
node_cols <- rep("grey70", n)
color_attr <- NULL
color_lab <- NULL
}
if (node_size == "x") {
node_sizes <- xa_n
size_attr <- xa
size_lab <- "x"
} else if (node_size == "y") {
node_sizes <- ya_n
size_attr <- ya
size_lab <- "y"
} else {
node_sizes <- rep(6, n)
size_attr <- NULL
size_lab <- NULL
}
if (is.null(coords)) {
coords <- layout(g)
} else {
# sanity: make sure dimensions match
if (!is.matrix(coords) || nrow(coords) != igraph::vcount(g) || ncol(coords) < 2) {
stop("`coords` must be a matrix with nrow = vcount(g) and at least 2 columns.")
}
}
if (show_overlap && !is.null(private$.neighborhood) && nrow(private$.neighborhood) > 0) {
overlap_edges <- as.matrix(private$.neighborhood)
overlap_edges <- overlap_edges[overlap_edges[, 1] != overlap_edges[, 2], , drop = FALSE]
g2 <- igraph::graph_from_data_frame(d = overlap_edges, vertices = igraph::V(g)$name, directed = FALSE)
igraph::V(g2)$name <- as.character(seq_len(igraph::vcount(g2)))
igraph::plot.igraph(
g2,
edge.color = grDevices::adjustcolor(neighborhood_edges_col, alpha.f = alpha_neighborhood),
vertex.color = "white",
edge.width = 1,
vertex.size = 0,
vertex.label = NA,
layout = coords
)
add_mode <- TRUE
} else {
add_mode <- FALSE
}
# browser()
plot(
g,
vertex.frame.width = vertex.frame.width,
vertex.color = node_cols,
vertex.size = log(node_sizes + 1) * 10 + 2,
vertex.label = NA,
edge.color = network_edges_col,
edge.width = edge.width,
edge.arrow.size = edge.arrow.size,
add = add_mode,
layout = coords,
)
if (!is.null(color_attr) || !is.null(size_attr)) {
if (!is.null(color_attr)) {
# browser()
rng <- quantile(color_attr, na.rm = TRUE, probs = seq(from = 0, to = 1, length.out = legend_col_n_levels))
color_vals <- round(rng, 2)
color_cols <- c(pal[1], pal[round(seq(from = 0, to = 1, length.out = legend_col_n_levels) * 100)])
} else {
color_cols <- NULL
color_vals <- NULL
}
if (!is.null(size_attr)) {
vals <- sort(unique(size_attr))[findInterval(
vec = sort(unique(node_sizes)),
x = seq(from = 0, to = 0.99, length.out = legend_size_n_levels)
) + 1]
sizes <- sort(unique(node_sizes))[findInterval(
vec = sort(unique(node_sizes)),
x = seq(from = 0, to = 0.99, length.out = legend_size_n_levels)
) + 1]
sizes <- log(sizes + 1) * 10 + 2
} else {
sizes <- NULL
vals <- NULL
}
length_a <- length(color_vals)
length_b <- length(vals)
if (length_a > 0) {
labels_a <- paste(color_lab, "=", color_vals)
} else {
labels_a <- c()
}
if (length_b > 0) {
labels_b <- paste(size_lab, "=", round(vals, 2))
color_cols_b <- rep("grey70", times = length_b)
} else {
labels_b <- c()
color_cols_b <- c()
}
labels <- c(labels_a, labels_b)
colors_tmp <- c(color_cols, color_cols_b)
sizes <- c(rep(1, times = length_a), sizes / 8)
legend(
title = "Legend",
legend_pos,
legend = labels,
pt.bg = colors_tmp,
border = "black",
pch = 21,
pt.cex = sizes,
cex = legend_size
)
}
invisible(list(graph = g, coords = coords))
},
#' @description
#' Print a summary of the `iglm.data` object to the console.
#'
#' @param digits (integer) Number of digits to round numeric output to.
#' @param ... Additional arguments (not used).
#' @return The object's private environment, invisibly.
print = function(digits = 3, ...) {
n <- as.integer(private$.n_actor)
dir_flag <- isTRUE(private$.directed)
m_z <- nrow(private$.z_network)
m_nb <- nrow(private$.neighborhood)
numfmt <- function(v) format(v, digits = digits, trim = TRUE)
summarize_attr <- function(v, type, scale) {
v <- as.vector(v)
if (type == "binomial") {
n1 <- sum(v == 1, na.rm = TRUE)
n0 <- sum(v == 0, na.rm = TRUE)
p1 <- mean(v == 1, na.rm = TRUE)
paste0("binomial 1s=", n1, ", 0s=", n0, ", P(1)=", numfmt(p1))
} else if (type == "poisson") {
qs <- stats::quantile(v, c(.00, .25, .50, .75, 1), na.rm = TRUE, names = FALSE)
paste0(
"poisson min=", numfmt(qs[1]),
", q1=", numfmt(qs[2]),
", med=", numfmt(qs[3]),
", q3=", numfmt(qs[4]),
", max=", numfmt(qs[5]),
", mean=", numfmt(mean(v, na.rm = TRUE))
)
} else if (type == "normal") {
paste0(
"normal mean=", numfmt(mean(v, na.rm = TRUE)),
", sd=", numfmt(stats::sd(v, na.rm = TRUE)),
", scale= ", numfmt(scale)
)
} else {
paste0("unknown type; length=", length(v))
}
}
x_sum <- summarize_attr(
private$.x_attribute,
private$.type_x,
private$.scale_x
)
y_sum <- summarize_attr(
private$.y_attribute,
private$.type_y,
private$.scale_y
)
w <- 28
cat("iglm.data object\n")
cat(sprintf(" %-*s: %s\n", w, "units", n))
cat(sprintf(" %-*s: %s\n", w, "directed", if (dir_flag) "TRUE" else "FALSE"))
edge_label <- sprintf("edges (fixed = %s)", private$.fix_z)
cat(sprintf(" %-*s: %s\n", w, edge_label, m_z))
cat(sprintf(" %-*s: %s\n", w, "neighborhood edges", m_nb))
cat("\nAttribute summaries\n")
x_label <- sprintf("x_attribute (fixed = %s)", private$.fix_x)
cat(sprintf(" %-*s: %s\n", w, x_label, x_sum))
cat(sprintf(" %-*s: %s\n", w, "y_attribute", y_sum))
#
# cat("iglm.data object\n")
# cat(" units :", n, "\n")
# cat(" directed :", if (dir_flag) "TRUE" else "FALSE", "\n")
# cat(" edges (fixed = ", private$.fix_z,"):", m_z, "\n")
# cat(" neighborhood edges :", m_nb, "\n")
# cat("\n")
# cat("Attribute summaries\n")
# cat(" x_attribute (fixed =", private$.fix_x,"):",x_sum, "\n", sep = "")
# cat(" y_attribute ", y_sum, "\n", sep = "")
# cat("\n")
invisible(private)
}
),
# --- Active Bindings ---
active = list(
#' @field x_attribute (`numeric`) The vector for the first unit-level attribute.
x_attribute = function(value) {
if (missing(value)) private$.x_attribute else {
self$set_x_attribute(value)
}
},
#' @field y_attribute (`numeric`) The vector for the second unit-level attribute.
y_attribute = function(value) {
if (missing(value)) private$.y_attribute else self$set_y_attribute(value)
},
#' @field z_network (`matrix`) The primary network structure as a 2-column integer edgelist.
z_network = function(value) {
if (missing(value)) private$.z_network else self$set_z_network(value)
},
#' @field neighborhood (`matrix`) Read-only. The secondary/neighborhood structure as a 2-column integer edgelist. An empty matrix if not provided.
neighborhood = function(value) {
if (missing(value)) {
if (is.null(private$.neighborhood)) matrix(0, nrow = 0, ncol = 2) else private$.neighborhood
} else stop("`neighborhood` is read-only.", call. = FALSE)
},
#' @field overlap (`matrix`) Read-only. The calculated overlap relation (dyads with shared neighbors in `neighborhood`) as a 2-column integer edgelist. An empty matrix if overlap hasn't been computed or is not available.
overlap = function(value) {
if (missing(value)) {
if (is.null(private$.overlap)) matrix(0, nrow = 0, ncol = 2) else private$.overlap
} else stop("`overlap` is read-only.", call. = FALSE)
},
#' @field directed (`logical`) Indicates if the `z_network` is treated as directed.
directed = function(value) {
if (missing(value)) private$.directed else stop("`directed` is read-only.", call. = FALSE)
},
#' @field n_actor (`integer`) The total number of actors (nodes) in the network.
n_actor = function(value) {
if (missing(value)) private$.n_actor else stop("`n_actor` is read-only.", call. = FALSE)
},
#' @field type_x (`character`) The specified distribution type for the `x_attribute`.
type_x = function(value) {
if (missing(value)) private$.type_x else self$set_type_x(value)
},
#' @field type_y (`character`) The specified distribution type for the `y_attribute`.
type_y = function(value) {
if (missing(value)) private$.type_y else self$set_type_y(value)
},
#' @field scale_x (`numeric`) The scale parameter associated with the `x_attribute`.
scale_x = function(value) {
if (missing(value)) private$.scale_x else self$set_scale_x(value)
},
#' @field scale_y (`numeric`) The scale parameter associated with the `y_attribute`.
scale_y = function(value) {
if (missing(value)) private$.scale_y else self$set_scale_y(value)
},
#' @field fix_x (`logical`) Indicates if the `x_attribute` is fixed during estimation/simulation.
fix_x = function(value) {
if (missing(value)) private$.fix_x else self$set_fix_x(value)
},
#' @field fix_z (`logical`) RIndicates if the `z_network` is fixed during estimation/simulation.
fix_z = function(value) {
if (missing(value)) private$.fix_z else self$set_fix_z(value)
},
#' @field descriptives (`list`)A list storing computed descriptive statistics for the network and attributes.
descriptives = function(value) {
if (missing(value)) private$.descriptives else stop("`descriptives` is read-only.", call. = FALSE)
},
#' @field fix_z_alocal (`logical`) Flag indicating whether nonoverlap edges are treated as random.
fix_z_alocal = function(value) {
if (missing(value)) private$.fix_z_alocal else self$set_fix_z_alocal(value)
}
)
)
#' Constructor for the iglm.data R6 object
#'
#' @description
#' Creates a `iglm.data` object, which stores network and attribute data.
#' This function acts as a user-friendly interface to the `iglm.data` R6 class generator.
#' It handles data input, infers parameters like the number of actors (`n_actor`)
#' and network directedness (`directed`) if not explicitly provided, processes
#' network data into a consistent edgelist format, calculates the overlap
#' relation based on an optional neighborhood definition, and performs
#' extensive validation of all inputs.
#'
#' @param x_attribute A numeric vector for the first unit-level attribute.
#' @param y_attribute A numeric vector for the second unit-level attribute.
#' @param z_network A matrix representing the network. Can be a 2-column
#' edgelist or a square adjacency matrix.
#' @param neighborhood An optional matrix for the neighborhood representing local dependence.
#' Can be a 2-column edgelist or a square adjacency matrix.
#' A tie in `neighborhood` between actor i and j indicates that j is in the neighborhood of i,
#' implying dependence between the respective actors.
#' @param directed A logical value indicating if `z_network` is directed.
#' If `NA` (default), directedness is inferred from the symmetry of
#' `z_network`.
#' @param n_actor An integer for the number of actors in the system.
#' If `NA` (default), `n_actor` is inferred from the attributes or
#' network matrices.
#' @param type_x Character string for the type of `x_attribute`.
#' Must be one of `"binomial"`, `"poisson"`, or `"normal"`.
#' Default is `"binomial"`.
#' @param type_y Character string for the type of `y_attribute`.
#' Must be one of `"binomial"`, `"poisson"`, or `"normal"`.
#' Default is `"binomial"`.
#' @param scale_x A positive numeric value for scaling (e.g., variance
#' for "normal" type). Default is 1.
#' @param scale_y A positive numeric value for scaling (e.g., variance
#' for "normal" type). Default is 1.
#' @param fix_x (logical) If `TRUE`, the 'x' predictor is held fixed
#' during estimation/simulation (fixed design in regression). Default is `FALSE`.
#' @param fix_z (logical) If `TRUE`, the 'z' network is held fixed
#' during estimation/simulation (fixed network design). Default is `FALSE`.
#' Setting this to TRUE, allows practicioners to estimate autologistic actor attribute models,
#' which were introduced in binary settings in Daraganova, G., & Robins, G. (2013).
#' @param fix_z_alocal (logical) If `TRUE`, edges outside the overlap region
#' are fixed, else they are random (default).
#' @param return_neighborhood Logical. If `TRUE` (default) and
#' `neighborhood` is `NULL`, a full neighborhood (all dyads) is
#' generated implying global dependence. If `FALSE`, no neighborhood is set.
#' @param file (character) Optional file path to load a saved `iglm.data` object state.
#' @return An object of class `iglm.data` (and `R6`).
#' @references
#' Fritz, C., Schweinberger, M. , Bhadra S., and D. R. Hunter (2025). A Regression Framework for Studying Relationships among Attributes under Network Interference. Journal of the American Statistical Association, to appear.
#'
#' Daraganova, G., and Robins, G. (2013). Exponential random graph models for social networks: Theory, methods and applications, 102-114. Cambridge University Press.
#' @examples
#' \donttest{
#' data("state_twitter")
#' state_twitter
#' state_twitter$iglm.data$degree_distribution(prob = FALSE, plot = TRUE)
#' state_twitter$iglm.data$geodesic_distances_distribution(prob = FALSE, plot = TRUE)
#' state_twitter$iglm.data$mean_x()
#' state_twitter$iglm.data$mean_y()
#' }
#'
#' # Generate a small iglm data object either via adjacency matrix or edgelist
#' tmp_adjacency <- iglm.data(
#' z_network = matrix(c(
#' 0, 1, 1, 0,
#' 1, 0, 0, 1,
#' 1, 0, 0, 1,
#' 0, 1, 1, 0
#' ), nrow = 4, byrow = TRUE),
#' directed = FALSE,
#' n_actor = 4,
#' type_x = "binomial",
#' type_y = "binomial"
#' )
#'
#'
#' tmp_edgelist <- iglm.data(
#' z_network = tmp_adjacency$z_network,
#' directed = FALSE,
#' n_actor = 4,
#' type_x = "binomial",
#' type_y = "binomial"
#' )
#'
#' tmp_edgelist$mean_z()
#' tmp_adjacency$mean_z()
#' @export
iglm.data <- function(x_attribute = NULL, y_attribute = NULL, z_network = NULL,
neighborhood = NULL, directed = TRUE, n_actor = NA,
type_x = "binomial", type_y = "binomial",
scale_x = 1, scale_y = 1,
fix_x = FALSE,
fix_z = FALSE,
fix_z_alocal = FALSE,
return_neighborhood = TRUE, file = NULL) {
# browser()
if (!is.null(z_network)) {
z_network <- as.matrix(z_network)
}
if (!is.null(neighborhood)) {
neighborhood <- as.matrix(neighborhood)
}
iglm.data_generator$new(
x_attribute = as.numeric(x_attribute),
y_attribute = as.numeric(y_attribute),
z_network = z_network,
neighborhood = neighborhood,
directed = as.logical(directed),
n_actor = n_actor,
type_x = as.character(type_x),
type_y = as.character(type_y),
scale_x = as.numeric(scale_x),
scale_y = as.numeric(scale_y),
fix_x = as.logical(fix_x),
fix_z = as.logical(fix_z),
fix_z_alocal = fix_z_alocal,
return_neighborhood = as.logical(return_neighborhood),
file = file
)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.