Nothing
## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>"
)
## -----------------------------------------------------------------------------
library(dplyr)
library(ggplot2)
library(fastRG)
library(irlba)
library(Matrix)
library(purrr)
library(scales)
library(tidyr)
set.seed(27)
model_distinct <- function(n, k = 5) {
B <- matrix(0.05, nrow = k, ncol = k)
diag(B) <- seq(0.8, 0.4, length.out = k)
latent <- dcsbm(
theta = rexp(n) + 1,
B = B,
expected_degree = 0.1 * n
)
}
model_repeated <- function(n, k = 5) {
latent <- dcsbm(
theta = rep(1, n),
B = diag(0.5, k),
expected_degree = 0.1 * n
)
}
## -----------------------------------------------------------------------------
# sample the latent parameters of the blockmodel
latent <- model_distinct(1000)
# compute the population singular value decomposition of the blockmodel
s_pop <- svds(latent)
# sample a network conditional on the latent factors
A <- sample_sparse(latent)
# singular value decomposition of the observed network
s_obs <- irlba(A, 5)
# difference between population and sample singular values
s_pop$d - s_obs$d
## -----------------------------------------------------------------------------
sin_theta_distance <- function(u, v) {
s <- svd(crossprod(u, v))
ncol(u) - sum(s$d^2)
}
find_procrustes_rotation <- function(X, Y) {
s <- svd(crossprod(X, Y))
tcrossprod(s$v, s$u) # NOTE U, V swap versus next one
}
procrustes_align <- function(X, Y) {
s <- svd(crossprod(X, Y))
rotation <- tcrossprod(s$v, s$u)
Y %*% rotation
}
two_to_infinity_loss <- function(X, Y) {
s <- svd(crossprod(X, Y))
rotation <- tcrossprod(s$v, s$u)
Yrot <- Y %*% rotation
diff <- X - Yrot
max(sqrt(Matrix::rowSums(diff^2)))
}
loss_helper <- function(s_pop, s_obs) {
u_pop <- s_pop$u
u_obs <- s_obs$u
d_pop <- s_pop$d
d_obs <- s_obs$d
x_pop <- u_pop %*% sqrt(diag(d_pop))
x_obs <- u_obs %*% sqrt(diag(d_obs))
spectral_loss_relative <- abs(d_pop - d_obs) / d_pop
column_sin_theta_loss <- map_dbl(
1:ncol(u_pop),
\(j) {
sin_theta_distance(u_pop[, j, drop = FALSE], u_obs[, j, drop = FALSE])
}
)
tibble(
sin_theta_loss = sin_theta_distance(u_pop, u_obs),
u_two_inf_loss = two_to_infinity_loss(u_pop, u_obs),
x_two_inf_loss = two_to_infinity_loss(x_pop, x_obs),
spectral_loss_relative1 = spectral_loss_relative[1],
spectral_loss_relative2 = spectral_loss_relative[2],
spectral_loss_relative3 = spectral_loss_relative[3],
spectral_loss_relative4 = spectral_loss_relative[4],
spectral_loss_relative5 = spectral_loss_relative[5],
sin_theta_loss1 = column_sin_theta_loss[1],
sin_theta_loss2 = column_sin_theta_loss[2],
sin_theta_loss3 = column_sin_theta_loss[3],
sin_theta_loss4 = column_sin_theta_loss[4],
sin_theta_loss5 = column_sin_theta_loss[5]
)
}
## -----------------------------------------------------------------------------
run_simulation <- function(model, num_reps = 30) {
expand_grid(
n = c(100, 180, 330, 600, 1100, 2000),
reps = 1:num_reps
) |>
mutate(
pop = map(n, model),
s_pop = map(pop, svds),
A = map(pop, sample_sparse),
s_obs = map(A, irlba, 5), # rank five svd
loss = map2(s_pop, s_obs, loss_helper)
)
}
sims <- run_simulation(model_distinct)
sims
## -----------------------------------------------------------------------------
summarize_simulations <- function(sims) {
sims |>
unnest_wider(c(loss)) |>
select(contains("loss"), everything()) |>
summarize(
across(contains("loss"), mean),
.by = n
) |>
pivot_longer(
contains("loss"),
names_to = "loss_type",
values_to = "loss"
) |>
mutate(
loss_type = recode(
loss_type,
"sin_theta_loss" = "Sin Theta Loss",
"u_two_inf_loss" = "Two-to-infinity (un-scaled)",
"x_two_inf_loss" = "Two-to-infinity (scaled)",
"spectral_loss" = "Spectral",
"sin_theta_loss1" = "Sin Theta Loss (column 1)",
"sin_theta_loss2" = "Sin Theta Loss (column 2)",
"sin_theta_loss3" = "Sin Theta Loss (column 3)",
"sin_theta_loss4" = "Sin Theta Loss (column 4)",
"sin_theta_loss5" = "Sin Theta Loss (column 5)",
"spectral_loss_relative1" = "Rel. error (sigma 1)",
"spectral_loss_relative2" = "Rel. error (sigma 2)",
"spectral_loss_relative3" = "Rel. error (sigma 3)",
"spectral_loss_relative4" = "Rel. error (sigma 4)",
"spectral_loss_relative5" = "Rel. error (sigma 5)",
)
)
}
results <- summarize_simulations(sims)
results
## -----------------------------------------------------------------------------
plot_results <- function(results) {
results |>
ggplot() +
aes(x = n, y = loss, color = loss_type) +
geom_line() +
scale_x_log10(labels = label_log(digits = 2)) +
scale_y_log10(labels = label_log(digits = 2)) +
scale_color_viridis_d(begin = 0.15, end = 0.85, guide = "none") +
facet_wrap(vars(loss_type), scales = "free_y") +
labs(
title = "Estimation error in adjacency spectral embedding",
y = "Estimation error (log scale)",
x = "Number of nodes (log scale)"
) +
theme_minimal()
}
plot_results(results)
## -----------------------------------------------------------------------------
model_repeated |>
run_simulation() |>
summarize_simulations() |>
plot_results()
## -----------------------------------------------------------------------------
graph_laplacian <- function(A) {
degrees <- rowSums(A)
tau <- mean(degrees)
DA <- rowScale(A, 1 / sqrt(degrees + tau))
colScale(DA, 1 / sqrt(degrees + tau))
}
svds_laplacian <- function(pop) {
# regularize by expected mean degree (scalar)
tau <- expected_degree(pop)
# vector!!
degrees_pop <- expected_degrees(pop)
# rescale X in the population model so that XSX' is the expected
# graph Laplacian. we can't use this to sample networks anymore, but
# we can use it to bootstrap a population SVD calculation
pop$X <- rowScale(pop$X, 1 / sqrt(degrees_pop + tau))
svds(pop)
}
run_laplacian_simulation <- function(model, num_reps = 30) {
expand_grid(
n = c(100, 180, 330, 600, 1100, 2000),
reps = 1:num_reps
) |>
mutate(
pop = map(n, model),
s_pop = map(pop, svds_laplacian),
A = map(pop, sample_sparse),
L = map(A, graph_laplacian),
s_obs = map(L, irlba, 5), # rank five svd,
loss = map2(s_pop, s_obs, loss_helper)
)
}
## -----------------------------------------------------------------------------
model_distinct |>
run_laplacian_simulation() |>
summarize_simulations() |>
plot_results()
## -----------------------------------------------------------------------------
sims |>
mutate(diagnostics = map2(s_pop, s_obs, function(pop, obs) {
tibble(
rank = 1:length(pop$d),
val_pop = pop$d,
val_obs = obs$d
)
})) |>
select(n, reps, diagnostics) |>
unnest(diagnostics) |>
pivot_longer(
cols = c(val_pop, val_obs),
names_to = "type",
values_to = "value"
) |>
mutate(type = recode(type, val_pop = "Population", val_obs = "Observed")) |>
ggplot(aes(
x = factor(rank),
y = value,
color = type,
group = interaction(reps, type)
)) +
geom_point(position = position_jitter(width = 0.1),
alpha = 0.5,
size = 1.5) +
geom_line(alpha = 0.3) +
facet_wrap( ~ n, scales = "free_y", labeller = label_both) +
scale_color_manual(values = c(
"Population" = "#377eb8",
"Observed" = "#e41a1c"
)) +
labs(
title = "Scree Plots: Population vs Observed",
subtitle = "Comparing singular values across multiple simulation replicates",
x = "Index of singular value",
y = "Singular Value",
color = "Spectrum"
) +
theme_minimal() +
theme(legend.position = "bottom")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.