consistency.R
In fastRG: Sample Generalized Random Dot Product Graphs in Linear Time

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## -----------------------------------------------------------------------------
library(dplyr)
library(ggplot2)
library(fastRG)
library(irlba)
library(Matrix)
library(purrr)
library(scales)
library(tidyr)

set.seed(27)

model_distinct <- function(n, k = 5) {
  B <- matrix(0.05, nrow = k, ncol = k)
  diag(B) <- seq(0.8, 0.4, length.out = k)
  latent <- dcsbm(
    theta = rexp(n) + 1,
    B = B,
    expected_degree = 0.1 * n
  )
}

model_repeated <- function(n, k = 5) {
  latent <- dcsbm(
    theta = rep(1, n),
    B = diag(0.5, k),
    expected_degree = 0.1 * n
  )
}

## -----------------------------------------------------------------------------
# sample the latent parameters of the blockmodel
latent <- model_distinct(1000)

# compute the population singular value decomposition of the blockmodel
s_pop <- svds(latent)

# sample a network conditional on the latent factors
A <- sample_sparse(latent)

# singular value decomposition of the observed network
s_obs <- irlba(A, 5)

# difference between population and sample singular values
s_pop$d - s_obs$d

## -----------------------------------------------------------------------------
sin_theta_distance <- function(u, v) {
  s <- svd(crossprod(u, v))
  ncol(u) - sum(s$d^2)
}

find_procrustes_rotation <- function(X, Y) {
  s <- svd(crossprod(X, Y))
  tcrossprod(s$v, s$u) # NOTE U, V swap versus next one
}

procrustes_align <- function(X, Y) {
  s <- svd(crossprod(X, Y))
  rotation <- tcrossprod(s$v, s$u)
  Y %*% rotation
}

two_to_infinity_loss <- function(X, Y) {
  s <- svd(crossprod(X, Y))
  rotation <- tcrossprod(s$v, s$u)
  Yrot <- Y %*% rotation
  diff <- X - Yrot
  max(sqrt(Matrix::rowSums(diff^2)))
}

loss_helper <- function(s_pop, s_obs) {
  u_pop <- s_pop$u
  u_obs <- s_obs$u

  d_pop <- s_pop$d
  d_obs <- s_obs$d

  x_pop <- u_pop %*% sqrt(diag(d_pop))
  x_obs <- u_obs %*% sqrt(diag(d_obs))

  spectral_loss_relative <- abs(d_pop - d_obs) / d_pop
  
  column_sin_theta_loss <- map_dbl(
    1:ncol(u_pop),
    \(j) {
      sin_theta_distance(u_pop[, j, drop = FALSE], u_obs[, j, drop = FALSE])
    }
  )

  tibble(
    sin_theta_loss = sin_theta_distance(u_pop, u_obs),
    u_two_inf_loss = two_to_infinity_loss(u_pop, u_obs),
    x_two_inf_loss = two_to_infinity_loss(x_pop, x_obs),
    spectral_loss_relative1 = spectral_loss_relative[1],
    spectral_loss_relative2 = spectral_loss_relative[2],
    spectral_loss_relative3 = spectral_loss_relative[3],
    spectral_loss_relative4 = spectral_loss_relative[4],
    spectral_loss_relative5 = spectral_loss_relative[5],
    sin_theta_loss1 = column_sin_theta_loss[1],
    sin_theta_loss2 = column_sin_theta_loss[2],
    sin_theta_loss3 = column_sin_theta_loss[3],
    sin_theta_loss4 = column_sin_theta_loss[4],
    sin_theta_loss5 = column_sin_theta_loss[5]
  )
}

## -----------------------------------------------------------------------------

run_simulation <- function(model, num_reps = 30) {
  expand_grid(
    n = c(100, 180, 330, 600, 1100, 2000),
    reps = 1:num_reps
  ) |>
    mutate(
      pop = map(n, model),
      s_pop = map(pop, svds),
      A = map(pop, sample_sparse),
      s_obs = map(A, irlba, 5), # rank five svd
      loss = map2(s_pop, s_obs, loss_helper)
    )
}

sims <- run_simulation(model_distinct)
sims

## -----------------------------------------------------------------------------
summarize_simulations <- function(sims) {
  sims |>
    unnest_wider(c(loss)) |>
    select(contains("loss"), everything()) |>
    summarize(
      across(contains("loss"), mean),
      .by = n
    ) |>
    pivot_longer(
      contains("loss"),
      names_to = "loss_type",
      values_to = "loss"
    ) |>
    mutate(
      loss_type = recode(
        loss_type,
        "sin_theta_loss" = "Sin Theta Loss",
        "u_two_inf_loss" = "Two-to-infinity (un-scaled)",
        "x_two_inf_loss" = "Two-to-infinity (scaled)",
        "spectral_loss" = "Spectral",
        "sin_theta_loss1" = "Sin Theta Loss (column 1)",
        "sin_theta_loss2" = "Sin Theta Loss (column 2)",
        "sin_theta_loss3" = "Sin Theta Loss (column 3)",
        "sin_theta_loss4" = "Sin Theta Loss (column 4)",
        "sin_theta_loss5" = "Sin Theta Loss (column 5)",
        "spectral_loss_relative1" = "Rel. error (sigma 1)",
        "spectral_loss_relative2" = "Rel. error (sigma 2)",
        "spectral_loss_relative3" = "Rel. error (sigma 3)",
        "spectral_loss_relative4" = "Rel. error (sigma 4)",
        "spectral_loss_relative5" = "Rel. error (sigma 5)",
      )
    )
}

results <- summarize_simulations(sims)
results

## -----------------------------------------------------------------------------
plot_results <- function(results) {
  results |>
    ggplot() +
    aes(x = n, y = loss, color = loss_type) +
    geom_line() +
    scale_x_log10(labels = label_log(digits = 2)) +
    scale_y_log10(labels = label_log(digits = 2)) +
    scale_color_viridis_d(begin = 0.15, end = 0.85, guide = "none") +
    facet_wrap(vars(loss_type), scales = "free_y") +
    labs(
      title = "Estimation error in adjacency spectral embedding",
      y = "Estimation error (log scale)",
      x = "Number of nodes (log scale)"
    ) +
    theme_minimal()
}

plot_results(results)

## -----------------------------------------------------------------------------
model_repeated |>
  run_simulation() |>
  summarize_simulations() |>
  plot_results()

## -----------------------------------------------------------------------------
graph_laplacian <- function(A) {
  degrees <- rowSums(A)
  tau <- mean(degrees)
  DA <- rowScale(A, 1 / sqrt(degrees + tau))
  colScale(DA, 1 / sqrt(degrees + tau))
}

svds_laplacian <- function(pop) {
  # regularize by expected mean degree (scalar)
  tau <- expected_degree(pop)
  # vector!!
  degrees_pop <- expected_degrees(pop)

  # rescale X in the population model so that XSX' is the expected
  # graph Laplacian. we can't use this to sample networks anymore, but
  # we can use it to bootstrap a population SVD calculation
  pop$X <- rowScale(pop$X, 1 / sqrt(degrees_pop + tau))
  svds(pop)
}

run_laplacian_simulation <- function(model, num_reps = 30) {
  expand_grid(
    n = c(100, 180, 330, 600, 1100, 2000),
    reps = 1:num_reps
  ) |>
    mutate(
      pop = map(n, model),
      s_pop = map(pop, svds_laplacian),
      A = map(pop, sample_sparse),
      L = map(A, graph_laplacian),
      s_obs = map(L, irlba, 5), # rank five svd,
      loss = map2(s_pop, s_obs, loss_helper)
    )
}

## -----------------------------------------------------------------------------
model_distinct |>
  run_laplacian_simulation() |>
  summarize_simulations() |>
  plot_results()

## -----------------------------------------------------------------------------
sims |>
  mutate(diagnostics = map2(s_pop, s_obs, function(pop, obs) {
    tibble(
      rank = 1:length(pop$d),
      val_pop = pop$d,
      val_obs = obs$d
    )
  })) |>
  select(n, reps, diagnostics) |>
  unnest(diagnostics) |>
  pivot_longer(
    cols = c(val_pop, val_obs),
    names_to = "type",
    values_to = "value"
  ) |>
  mutate(type = recode(type, val_pop = "Population", val_obs = "Observed")) |>
  ggplot(aes(
    x = factor(rank),
    y = value,
    color = type,
    group = interaction(reps, type)
  )) +
  geom_point(position = position_jitter(width = 0.1),
             alpha = 0.5,
             size = 1.5) +
  geom_line(alpha = 0.3) +
  facet_wrap( ~ n, scales = "free_y", labeller = label_both) +
  scale_color_manual(values = c(
    "Population" = "#377eb8",
    "Observed" = "#e41a1c"
  )) +
  labs(
    title = "Scree Plots: Population vs Observed",
    subtitle = "Comparing singular values across multiple simulation replicates",
    x = "Index of singular value",
    y = "Singular Value",
    color = "Spectrum"
  ) +
  theme_minimal() +
  theme(legend.position = "bottom")

Any scripts or data that you put into this service are public.

fastRG documentation built on Dec. 6, 2025, 5:08 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

fastRG
Sample Generalized Random Dot Product Graphs in Linear Time

inst/doc/consistency.R
In fastRG: Sample Generalized Random Dot Product Graphs in Linear Time

Try the fastRG package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fastRG Sample Generalized Random Dot Product Graphs in Linear Time

inst/doc/consistency.R In fastRG: Sample Generalized Random Dot Product Graphs in Linear Time

Try the fastRG package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

fastRG
Sample Generalized Random Dot Product Graphs in Linear Time

inst/doc/consistency.R
In fastRG: Sample Generalized Random Dot Product Graphs in Linear Time