knitr::opts_chunk$set(echo = TRUE, cache = TRUE, cache.rebuild = FALSE, autodep = TRUE)
set.seed(1)

library(StartNetwork)
library(parallel)
library(ggplot2)

Purpose: To investigate why we were having problems retrieving parameter estimates when the ERGM is the mechanistic model.

Summary: We can now retrieve the parameters accurately where the ERGM is the mechanistic model, in other words, where the ERGM parameter is the parameter of interest. See examples 6,7, and 8.

Firstly we test the functions used to estimate entropy and estimate the number of graphs corresponding to a particular sorted degree sequence. We find that the entropy computation is accurate, but estimating the number of graphs corresponding to a sorted degree sequence is more complicated.

We then test whether the minimum KL divergence between mechanistic and statistical network models corresponds to the true parameter of the statistical network model. In all cases the statistical network model is an ERGM where the likelihood statistic is the number of edges.

Entropy

We compare entropy computed with the non-parameter estimator entropy_calc using samples from the binomial distribution, with the analytic expression for the limit of entropy for the binomial distribution. The same comparison is also made with the Poisson distribution.

x <- rbinom(10000, 1000, 0.9)
entropy_calc(x)

1/2 * log(2*pi*exp(1)*1000*0.9*0.1)

lambda <- 50
x <- rpois(1000, lambda)
entropy_calc(x)

k <- 1:100
lambda * (1 - log(lambda)) + exp(-lambda) * sum((lambda^k * lfactorial(k))/factorial(k))

Combinations of degree sequences

We check whether the mapping of @bianconi2009entropy from degree sequences to size of graph spaces works well we look at simple examples of graphs spaces with known sizes and known degree sequences.

Firstly, we look at graphs where the degrees of the nodes are either zero or one, where the number of nodes with degree one is even. This means that network consists only of disconnected dyads.

Disconnected dyads

disconnected_dyad <- lchoose(seq(15, 7, by = -2),2)
x <- cumsum(disconnected_dyad)

disjoint_dd_b <- function(x, n = 15){
  number_of_graphs_dd(c(rep(0,n - 2*x), rep(1, 2*x)), type = "Bianconi")
}

disjoint_dd_l <- function(x, n = 15){
  number_of_graphs_dd(c(rep(0,n - 2*x), rep(1, 2*x)), type = "Liebenau")
}

yb <- sapply(1:5, disjoint_dd_b)
yl <- sapply(1:5, disjoint_dd_l)
par(pty="s")

plot(x, yb, xlim = c(0, 20), ylim = c(0, 20), asp = 1, xlab = "analytic expression", ylab = "general estimator", col = "blue")
abline(c(0,1), col = "black")

points(x, yl, col = "red")

legend(10,5, legend = c("Bianconi", "Liebenau"), fill = c("blue", "red"))

Single triangle

n <- seq(5, 15, by = 2)
#onetriangle <- choose(choose(n,2), 1) * (n - 2)
onetriangle <- choose(n,3)
x <- log(onetriangle[1:6])

triangle_dd_b <- function(x){
  number_of_graphs_dd(c(rep(0,x - 3), c(2,2,2)), type = "Bianconi")
}

triangle_dd_l <- function(x){
  number_of_graphs_dd(c(rep(0,x - 3), c(2,2,2)), type = "Liebenau")
}

yb <- sapply(n, triangle_dd_b)
yl <- sapply(n, triangle_dd_l)
par(pty="s")

plot(x, yb, xlim = c(0, 10), ylim = c(0, 10), asp = 1, xlab = "analytic expression", ylab = "general estimator", col = "blue")
abline(c(0,1), col = "black")

points(x, yl, col = "red")

legend(5,3, legend = c("Bianconi", "Liebenau"), fill = c("blue", "red"))

Single reverse triangle

n <- seq(5, 15, by = 2)
#onetriangle <- choose(choose(n,2), 1) * (n - 2)
onetriangle <- choose(n,3)
x <- log(onetriangle[1:6])

triangle_dd_b <- function(x){
  number_of_graphs_dd(c(rep(x - 1,x - 3), rep(x - 1 - 2, 3)), type = "Bianconi")
}

triangle_dd_l <- function(x){
  number_of_graphs_dd(c(rep(x - 1,x - 3), rep(x - 1 - 2, 3)), type = "Liebenau")
}

yb <- sapply(n, triangle_dd_b)
yl <- sapply(n, triangle_dd_l)
par(pty="s")

plot(x, yb, xlim = c(0, 20), ylim = c(0, 20), asp = 1, xlab = "analytic expression", ylab = "general estimator", col = "blue")
abline(c(0,1), col = "black")

points(x, yl, col = "red")

legend(10,5, legend = c("Bianconi", "Liebenau"), fill = c("blue", "red"))

It seems that the base measure for the sorted degree sequence is more accurately measured for sparse graphs than dense graphs. Let's look now at graphs in the middle, regular graphs where the degree is n/2.

Regular graph

The analytic expression for the number of regular graphs is reported by @mckay1991asymptotic.

n <- seq(7, 21, by = 2)
#onetriangle <- choose(choose(n,2), 1) * (n - 2)
d <- (n - 1) / 2
lambda <- d / (n - 1)

regulargraphs <- sqrt(2) * exp(0.25) * (lambda^lambda * (1-lambda)^(1-lambda))^choose(n,2) * (choose(n-1,d)^n)
x <- log(regulargraphs)

regular_dd_b <- function(x){
  number_of_graphs_dd(rep((x - 1)/2,x), type = "Bianconi")
}

regular_dd_l <- function(x){
  number_of_graphs_dd(rep((x - 1)/2,x), type = "Liebenau")
}

yb <- sapply(n, regular_dd_b)
yl <- sapply(n, regular_dd_l)
par(pty="s")

plot(x, yb, xlim = c(0, 180), ylim = c(0, 180), asp = 1, xlab = "analytic expression", ylab = "general estimator", col = "blue")
abline(c(0,1), col = "black")

points(x, yl, col = "red")

legend(25,150, legend = c("Bianconi", "Liebenau"), fill = c("blue", "red"))

The base measure seems to accurately compute the sorted degree sequence for a regular graph midway between sparse and dense.

We demonstrate the application of these formulas with a simple example. The mechanistic model is GNP, and the statistical model is an ERGM where the summary statistic is the number of edges. We start with a sparse model where the parameter is 0.1, and then the second example is a dense model where the parameter is 0.9. Finally in example 3, we fix the expression so that it is more accurate for dense graphs.

Examples

n = 15
replicates = 400

mech_net_gnp = purrr::partial(igraph::sample_gnp, n = !!n, ... = , directed = FALSE, loops = FALSE)

box_replicates <- 10

Example 1

Mechanistic model: Erdös-Renyi (target)
Statistical model: ERGM
Integral stat: sorted degree sequence
Likelihood stat: number of edges

true_value <- 0.1
theta_p <- rep(seq(0.01, 0.2, by = 0.01), box_replicates)
theta_s <- log(true_value/(1 - true_value))

cl <- parallel::makeCluster(parallel::detectCores())

g <- parallel::parLapply(cl, theta_p, StartNetwork::KL_ss, theta_s = theta_s, replicates = replicates, sorted = TRUE, mech_net = mech_net_gnp, lstat = igraph::gsize, mirror = FALSE, type = c("Bianconi", "Liebenau"))

parallel::stopCluster(cl)
g_tidy <- StartNetwork::tidy_g(g)

ggplot(g_tidy) + aes(x = parameter, y = value, group = interaction(parameter, type), col = type) + geom_boxplot(outlier.shape = NULL, position = "identity") + facet_wrap( ~ key, scales = "free_y") + 
  ggplot2::geom_vline(mapping = aes(xintercept = true_value))

Example 2

Mechanistic model: Erdös-Renyi (target)
Statistical model: ERGM
Integral stat: sorted degree sequence
Likelihood stat: number of edges

true_value <- 0.9
theta_p <- rep(seq(0.85, 0.95, by = 0.01), box_replicates)
theta_s <- log(true_value/(1 - true_value))

cl <- parallel::makeCluster(parallel::detectCores())

g <- parallel::parLapply(cl, theta_p, StartNetwork::KL_ss, theta_s = theta_s, replicates = replicates, sorted = TRUE, mech_net = mech_net_gnp, lstat = igraph::gsize, mirror = FALSE, type = c("Bianconi", "Liebenau"))

parallel::stopCluster(cl)
g_tidy <- StartNetwork::tidy_g(g)

ggplot(g_tidy) + aes(x = parameter, y = value, group = interaction(parameter, type), col = type) + geom_boxplot(outlier.shape = NULL, position = "identity") + facet_wrap( ~ key, scales = "free_y") + 
  ggplot2::geom_vline(mapping = aes(xintercept = true_value))

Example 3

Mechanistic model: Erdös-Renyi (target)
Statistical model: ERGM
Integral stat: sorted degree sequence
Likelihood stat: number of edges

true_value <- 0.5
theta_p <- rep(seq(0.4, 0.6, by = 0.01), box_replicates)
theta_s <- log(true_value/(1 - true_value))

cl <- parallel::makeCluster(parallel::detectCores())

g <- parallel::parLapply(cl, theta_p, StartNetwork::KL_ss, theta_s = theta_s, replicates = replicates, sorted = TRUE, mech_net = mech_net_gnp, lstat = igraph::gsize, mirror = FALSE, type = c("Bianconi", "Liebenau"))

parallel::stopCluster(cl)
g_tidy <- StartNetwork::tidy_g(g)

ggplot(g_tidy) + aes(x = parameter, y = value, group = interaction(parameter, type), col = type) + geom_boxplot(outlier.shape = NULL, position = "identity") + facet_wrap( ~ key, scales = "free_y") + 
  ggplot2::geom_vline(mapping = aes(xintercept = true_value))

The graphs don't look correct in examples 2 and 3. This is because the space of dense graphs is counted inaccurately when the equation of @bianconi2009entropy is naively applied, we map the space of dense graphs to the space of sparse graphs. In other words, we pretend a dense graph is really a sparse graph by converting all non-edges to edges and all edges to non-edges. The conversion from the original degree sequence $d$ to the new degree sequence $d^$ is the following (where $n$ is the number of nodes): $$ d^ = n - d - 1. $$

Example 4

Mechanistic model: Erdös-Renyi (target)
Statistical model: ERGM
Integral stat: sorted degree sequence
Likelihood stat: number of edges

true_value <- 0.9
theta_p <- rep(seq(0.85, 0.95, by = 0.01), box_replicates)
theta_s <- log(true_value/(1 - true_value))

cl <- parallel::makeCluster(parallel::detectCores())

g <- parallel::parLapply(cl, theta_p, StartNetwork::KL_ss, theta_s = theta_s, replicates = replicates, sorted = TRUE, mech_net = mech_net_gnp, lstat = igraph::gsize, mirror = TRUE, type = c("Bianconi", "Liebenau"))

parallel::stopCluster(cl)
g_tidy <- StartNetwork::tidy_g(g)

ggplot(g_tidy) + aes(x = parameter, y = value, group = interaction(parameter, type), col = type) + geom_boxplot(outlier.shape = NULL, position = "identity") + facet_wrap( ~ key, scales = "free_y") + 
  ggplot2::geom_vline(mapping = aes(xintercept = true_value))

Example 5

Mechanistic model: Erdös-Renyi (target)
Statistical model: ERGM
Integral stat: sorted degree sequence
Likelihood stat: number of edges

true_value <- 0.5
theta_p <- rep(seq(0.4, 0.6, by = 0.01), 5)
theta_s <- log(true_value/(1 - true_value))

cl <- parallel::makeCluster(parallel::detectCores())

g <- parallel::parLapply(cl, theta_p, StartNetwork::KL_ss, theta_s = theta_s, replicates = replicates, sorted = TRUE, mech_net = mech_net_gnp, lstat = igraph::gsize, mirror = TRUE, type = c("Bianconi", "Liebenau"))

parallel::stopCluster(cl)
g_tidy <- StartNetwork::tidy_g(g)

ggplot(g_tidy) + aes(x = parameter, y = value, group = interaction(parameter, type), col = type) + geom_boxplot(outlier.shape = NULL, position = "identity") + facet_wrap( ~ key, scales = "free_y") + 
  ggplot2::geom_vline(mapping = aes(xintercept = true_value))

We have successfully retrieved the parameter for a dense graph. To check

Single reverse triangle (mirrored degree sequence)

n <- seq(5, 15, by = 2)
#onetriangle <- choose(choose(n,2), 1) * (n - 2)
onetriangle <- choose(n,3)
x <- log(onetriangle[1:6])

triangle_dd_b <- function(x){
  number_of_graphs_dd(c(rep(x - 1,x - 3), rep(x - 1 - 2, 3)), mirror = TRUE, type = "Bianconi")
}

triangle_dd_l <- function(x){
  number_of_graphs_dd(c(rep(x - 1,x - 3), rep(x - 1 - 2, 3)), mirror = TRUE, type = "Liebenau")
}

yb <- sapply(n, triangle_dd_b)
yl <- sapply(n, triangle_dd_l)
par(pty="s")

plot(x, yb, xlim = c(0, 20), ylim = c(0, 20), asp = 1, xlab = "analytic expression", ylab = "general estimator", col = "blue")
abline(c(0,1), col = "black")

points(x, yl, col = "red")

legend(10,4, legend = c("Bianconi", "Liebenau"), fill = c("blue", "red"))

ERGM as mechanistic models

Example 6

Mechanistic model: ERGM (target)
Statistical model: ERGM
Integral stat: sorted degree sequence
Likelihood stat: number of edges

n <- 15

mech_net_ergm <- purrr::partial(mech_net_ergm_n, n = !!n)

true_value <- 0.1
theta_p <- rep(seq(0.05, 0.15, by = 0.01), box_replicates)
theta_s <- log(true_value/(1 - true_value))

cl <- parallel::makeCluster(parallel::detectCores())

g <- parallel::parLapply(cl, theta_p, StartNetwork::KL_ss, theta_s = theta_s, replicates = replicates, sorted = TRUE, mech_net = mech_net_ergm, lstat = igraph::gsize, mirror = TRUE, type = c("Bianconi", "Liebenau"))

parallel::stopCluster(cl)
g_tidy <- StartNetwork::tidy_g(g)

ggplot(g_tidy) + aes(x = parameter, y = value, group = interaction(parameter, type), col = type) + geom_boxplot(outlier.shape = NULL, position = "identity") + facet_wrap( ~ key, scales = "free_y") + 
  ggplot2::geom_vline(mapping = aes(xintercept = true_value))

Example 7

Mechanistic model: ERGM (target)
Statistical model: ERGM
Integral stat: sorted degree sequence
Likelihood stat: number of edges

n <- 15

mech_net_ergm <- purrr::partial(mech_net_ergm_n, n = !!n)

true_value <- 0.9
theta_p <- rep(seq(0.85, 0.95, by = 0.01), box_replicates)
theta_s <- log(true_value/(1 - true_value))

cl <- parallel::makeCluster(parallel::detectCores())

g7 <- parallel::parLapply(cl, theta_p, StartNetwork::KL_ss, theta_s = theta_s, replicates = replicates, sorted = TRUE, mech_net = mech_net_ergm, lstat = igraph::gsize, mirror = TRUE, type = c("Bianconi", "Liebenau"))

parallel::stopCluster(cl)
library(ggplot2)

g_tidy <- StartNetwork::tidy_g(g7)

ggplot(g_tidy) + aes(x = parameter, y = value, group = interaction(parameter, type), col = type) + geom_boxplot(outlier.shape = NULL, position = "identity") + facet_wrap( ~ key, scales = "free_y") + 
  ggplot2::geom_vline(mapping = aes(xintercept = true_value))

Example 8

Mechanistic model: ERGM (target)
Statistical model: ERGM
Integral stat: sorted degree sequence
Likelihood stat: number of edges

n <- 15

mech_net_ergm <- purrr::partial(mech_net_ergm_n, n = !!n)

true_value <- 0.5
theta_p <- rep(seq(0.4, 0.6, by = 0.01), box_replicates)
theta_s <- log(true_value/(1 - true_value))

cl <- parallel::makeCluster(parallel::detectCores())

g8 <- parallel::parLapply(cl, theta_p, StartNetwork::KL_ss, theta_s = theta_s, replicates = replicates, sorted = TRUE, mech_net = mech_net_ergm, lstat = igraph::gsize, mirror = TRUE, type = c("Bianconi", "Liebenau"))

parallel::stopCluster(cl)
library(ggplot2)

g_tidy <- StartNetwork::tidy_g(g8)

ggplot(g_tidy) + aes(x = parameter, y = value, group = interaction(parameter, type), col = type) + geom_boxplot(outlier.shape = NULL, position = "identity") + facet_wrap( ~ key, scales = "free_y") + 
  ggplot2::geom_vline(mapping = aes(xintercept = true_value))
sessionInfo()

Bibliography



AnthonyEbert/StartNetwork documentation built on April 24, 2020, 3:28 a.m.