Nothing
#'
#' @title Monte Carlo goodness-of-fit test for an Erdős-Rényi stochastic blockmodel (ERSBM)
#'
#' @description `goftest_ERSBM` performs chi square goodness-of-fit test for network data considering the model as ERSBM (Karwa et al. (2023))
#'
#' @param A n by n binary symmetric adjacency matrix representing an undirected graph where n is the number of nodes in the graph
#' @param K positive integer scalar representing the number of blocks; K>1
#' @param C positive integer vector of size n for block assignments of each node; from 1 to K (no of blocks)
#' @param numGraphs number of graphs to be sampled; default value is 100
#'
#' @return A list with the elements
#' \item{statistic}{the values of the chi-square test statistics on each sampled graph}
#' \item{p.value}{the p-value for the test}
#'
#' @importFrom igraph graph.empty
#' @importFrom igraph vcount
#' @importFrom igraph graph
#' @importFrom igraph ecount
#' @importFrom igraph graph.intersection
#' @importFrom igraph graph.difference
#' @importFrom igraph as.directed
#' @importFrom igraph is.simple
#' @importFrom igraph is.directed
#' @importFrom igraph graph.union
#' @importFrom igraph get.edges
#' @importFrom igraph get.edge.ids
#' @importFrom igraph as.undirected
#' @importFrom igraph get.edgelist
#' @importFrom igraph subgraph.edges
#'
#' @include Estimation_ERSBM.R
#' @include TestStatistic_ERSBM.R
#' @include Sampling_Graph_ERSBM.R
#' @include Estimation_Block.R
#'
#' @export
#'
#' @examples
#'
#' RNGkind(sample.kind = "Rounding")
#' set.seed(1729)
#'
#' # We model a network with 3 even classes
#' n1 = 10
#' n2 = 10
#' n3 = 10
#'
#' # Generating block assignments for each of the nodes
#' n = n1 + n2 + n3
#' class = rep(c(1, 2, 3), c(n1, n2, n3))
#'
#' # Generating the adjacency matrix of the network
#' # Generate the matrix of connection probabilities
#' cmat = matrix(
#' c(
#' 0.80, 0.05, 0.05,
#' 0.05, 0.80, 0.05,
#' 0.05, 0.05, 0.80
#' ),
#' ncol = 3,
#' byrow = TRUE
#' )
#' pmat = cmat / n
#'
#' # Creating the n x n adjacency matrix
#' adj <- matrix(0, n, n)
#' for (i in 2:n) {
#' for (j in 1:(i - 1)) {
#' p = pmat[class[i], class[j]] # We find the probability of connection with the weights
#' adj[i, j] = rbinom(1, 1, p) # We include the edge with probability p
#' }
#' }
#'
#' adjsymm = adj + t(adj)
#'
#' # When class assignment is known
#' out = goftest_ERSBM(adjsymm, C = class, numGraphs = 10)
#'
#' chi_sq_seq = out$statistic
#' pvalue = out$p.value
#' print(pvalue)
#'
#' # Plotting histogram of the sequence of the test statistics
#' hist(chi_sq_seq, 20, xlab = "chi-square test statistics", main = NULL)
#' abline(v = chi_sq_seq[1], col = "red", lwd = 5) # adding test statistic on the observed network
#' legend("topleft", legend = paste("observed GoF = ", chi_sq_seq[1]))
#'
#' @references
#' Karwa et al. (2023). "Monte Carlo goodness-of-fit tests for degree corrected and related stochastic blockmodels",
#' \emph{Journal of the Royal Statistical Society Series B: Statistical Methodology},
#' \doi{https://doi.org/10.1093/jrsssb/qkad084}
goftest_ERSBM <- function(A, K = NULL, C = NULL, numGraphs = 100) {
# Some compatibility checks and error message
# Check whether the input A is a matrix
if (!is.matrix(A) | !is.numeric(A)) {
stop("A should be an adjacency matrix with numeric entry of the graph.")
}
# Check whether the graph corresponding to A is an undirected
if (!isSymmetric.matrix(A)) {
stop("A should be a square symmetric matrix.")
}
# Check whether the graph corresponding to A is an unweighted
if (!all(A %in% c(0, 1))) {
stop("A can only contain 0's and 1's.")
}
# Check whether the graph corresponding to A has no self loops
if (!all(diag(A) == 0)) {
stop("All the diagonal entries of A should be 0.")
}
# Check whether numGraphs is numeric and positive
if (!is.numeric(numGraphs) || numGraphs <= 0 || numGraphs %% 1 != 0) {
stop("numGraphs, number of graphs to sample should be a positive natural number.")
}
# Check whether C or K is provided
if (is.null(C) & is.null(K)) {
stop("Either block assignment or no of blocks should be provided.")
}
# If K is not provided, getting no of blocks from C
if (is.null(K)) {
# Getting no of blocks from the block assignment vector
K <- length(unique(C))
}
# Check whether K is numeric and positive integer
if (!is.numeric(K) | K %% 1 != 0) {
stop("No of blocks K should be a positive natural number.")
}
# No of blocks K should be at least 2
if (K < 2) {
stop("No of blocks K should be at least 2.")
}
# If C is not provided, estimate C using value of K, the no of blocks
if (is.null(C)) {
# Getting block assignment for each node from adjacency matrix when block assignment is not provided
C <- block_est(A, K)
}
# Check whether C is numeric
if (!is.numeric(C)) {
stop("All the elements of C should be numeric.")
}
# Check whether all the elements of block assignment are from 1:K
if (!all(C %in% 1:K)) {
stop("C can only contain values from 1 to K (no of blocks).")
}
# Check whether there is at least one node from each of the block
if (length(unique(C)) != K) {
stop("All the blocks should have atleast one node.")
}
# Getting dimension information
n <- nrow(A)
# Check whether length of C is compatible with dimension of A
if (length(C) != n) {
stop("The C should have same length as no of rows in A.")
}
# Getting an igraph (an undirected and unweighted) object from the input adjacency matrix
G_obs <- igraph::graph_from_adjacency_matrix(A, mode = "undirected", weighted = NULL)
# Calculate estimate of the parameter from observed graph
# which will remain same after generating a new graph on same fiber
p_mle <- get_mle_ERSBM(G_obs, C)
# It will store GoF test statistic on graphs
chi_seqR <- rep(0, numGraphs)
G <- G_obs
# Storing the first entry of chi_seq as test-stat on observed graph
chi_seqR[1] <- round(graphchi_ERSBM(G, C, p_mle), 2)
for (i in 2:numGraphs) {
# Sampling a new graph
G_current <- sample_a_move_ERSBM(C, G)
# Computing GoF test statistic on new sampled graph
chi_seqR[i] <- round(graphchi_ERSBM(G_current, C, p_mle), 2)
G <- G_current
}
# pvalue i.e, proportion of sampled grpahs has larger GoF statistic than observed one
pvalue <- mean(chi_seqR > chi_seqR[1])
# Output:
# chi_seq: sequence of chi square test statistics on the sampled graphs
# pvalue: estimated p-value when true model is ERSBM
return(list(statistic = chi_seqR, p.value = pvalue))
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.