performance-benchmarks.R
In riemtan: Riemannian Metrics for Symmetric Positive Definite Matrices

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----eval=FALSE---------------------------------------------------------------
# library(riemtan)
# library(Matrix)
# library(microbenchmark)  # For benchmarking
# 
# # Load AIRM metric
# data(airm)
# 
# # Create example dataset (100 matrices of size 50x50)
# set.seed(42)
# create_spd_matrix <- function(p) {
#   mat <- diag(p) + matrix(rnorm(p*p, 0, 0.1), p, p)
#   mat <- (mat + t(mat)) / 2
#   mat <- mat + diag(p) * 0.5
#   Matrix::pack(Matrix::Matrix(mat, sparse = FALSE))
# }
# 
# connectomes <- lapply(1:100, function(i) create_spd_matrix(50))

## ----eval=FALSE---------------------------------------------------------------
# # Create sample
# sample <- CSample$new(conns = connectomes, metric_obj = airm)
# 
# # Sequential baseline
# set_parallel_plan("sequential")
# time_seq <- system.time(sample$compute_tangents())
# print(paste("Sequential:", round(time_seq[3], 2), "seconds"))
# 
# # Parallel with 4 workers
# set_parallel_plan("multisession", workers = 4)
# time_par4 <- system.time(sample$compute_tangents())
# print(paste("Parallel (4 workers):", round(time_par4[3], 2), "seconds"))
# print(paste("Speedup:", round(time_seq[3] / time_par4[3], 2), "x"))
# 
# # Parallel with 8 workers
# set_parallel_plan("multisession", workers = 8)
# time_par8 <- system.time(sample$compute_tangents())
# print(paste("Parallel (8 workers):", round(time_par8[3], 2), "seconds"))
# print(paste("Speedup:", round(time_seq[3] / time_par8[3], 2), "x"))
# 
# # Reset
# reset_parallel_plan()

## ----eval=FALSE---------------------------------------------------------------
# # Function to benchmark Frechet mean
# benchmark_fmean <- function(n, workers = 1) {
#   conns_subset <- connectomes[1:n]
#   sample <- CSample$new(conns = conns_subset, metric_obj = airm)
# 
#   if (workers == 1) {
#     set_parallel_plan("sequential")
#   } else {
#     set_parallel_plan("multisession", workers = workers)
#   }
# 
#   time <- system.time(sample$compute_fmean(tol = 0.01, max_iter = 50))
#   reset_parallel_plan()
# 
#   time[3]
# }
# 
# # Benchmark different sample sizes
# sample_sizes <- c(20, 50, 100, 200)
# results <- data.frame(
#   n = sample_sizes,
#   sequential = sapply(sample_sizes, benchmark_fmean, workers = 1),
#   parallel_4 = sapply(sample_sizes, benchmark_fmean, workers = 4),
#   parallel_8 = sapply(sample_sizes, benchmark_fmean, workers = 8)
# )
# 
# # Calculate speedups
# results$speedup_4 = results$sequential / results$parallel_4
# results$speedup_8 = results$sequential / results$parallel_8
# 
# print(results)

## ----eval=FALSE---------------------------------------------------------------
# # Create Parquet dataset
# write_connectomes_to_parquet(
#   connectomes,
#   output_dir = "benchmark_data",
#   subject_ids = paste0("subj_", 1:100)
# )
# 
# # Sequential loading
# backend_seq <- create_parquet_backend("benchmark_data")
# set_parallel_plan("sequential")
# time_load_seq <- system.time({
#   conns <- backend_seq$get_all_matrices()
# })
# 
# # Parallel loading
# backend_par <- create_parquet_backend("benchmark_data")
# set_parallel_plan("multisession", workers = 4)
# time_load_par <- system.time({
#   conns <- backend_par$get_all_matrices(parallel = TRUE)
# })
# 
# print(paste("Sequential load:", round(time_load_seq[3], 2), "seconds"))
# print(paste("Parallel load:", round(time_load_par[3], 2), "seconds"))
# print(paste("Speedup:", round(time_load_seq[3] / time_load_par[3], 2), "x"))
# 
# # Cleanup
# reset_parallel_plan()
# unlink("benchmark_data", recursive = TRUE)

## ----eval=FALSE---------------------------------------------------------------
# # Function to measure scaling
# measure_scaling <- function(workers_list) {
#   sample <- CSample$new(conns = connectomes, metric_obj = airm)
# 
#   times <- sapply(workers_list, function(w) {
#     if (w == 1) {
#       set_parallel_plan("sequential")
#     } else {
#       set_parallel_plan("multisession", workers = w)
#     }
# 
#     time <- system.time(sample$compute_tangents())[3]
#     reset_parallel_plan()
#     time
#   })
# 
#   data.frame(
#     workers = workers_list,
#     time = times,
#     speedup = times[1] / times,
#     efficiency = (times[1] / times) / workers_list * 100
#   )
# }
# 
# # Test with different worker counts
# workers <- c(1, 2, 4, 8)
# scaling_results <- measure_scaling(workers)
# 
# print(scaling_results)
# 
# # Plot scaling (if plotting package available)
# if (requireNamespace("ggplot2", quietly = TRUE)) {
#   library(ggplot2)
# 
#   p <- ggplot(scaling_results, aes(x = workers, y = speedup)) +
#     geom_line() +
#     geom_point(size = 3) +
#     geom_abline(slope = 1, intercept = 0, linetype = "dashed", color = "red") +
#     labs(
#       title = "Parallel Scaling Performance",
#       x = "Number of Workers",
#       y = "Speedup",
#       subtitle = "Dashed line = ideal linear scaling"
#     ) +
#     theme_minimal()
# 
#   print(p)
# }

## ----eval=FALSE---------------------------------------------------------------
# # Test different dataset sizes
# test_sizes <- c(10, 25, 50, 100, 200)
# 
# size_benchmark <- function(n) {
#   conns_subset <- lapply(1:n, function(i) create_spd_matrix(50))
#   sample <- CSample$new(conns = conns_subset, metric_obj = airm)
# 
#   # Sequential
#   set_parallel_plan("sequential")
#   time_seq <- system.time(sample$compute_tangents())[3]
# 
#   # Parallel
#   set_parallel_plan("multisession", workers = 4)
#   time_par <- system.time(sample$compute_tangents())[3]
# 
#   reset_parallel_plan()
# 
#   c(sequential = time_seq, parallel = time_par, speedup = time_seq / time_par)
# }
# 
# results_by_size <- t(sapply(test_sizes, size_benchmark))
# results_by_size <- data.frame(n = test_sizes, results_by_size)
# 
# print(results_by_size)

## ----eval=FALSE---------------------------------------------------------------
# # Conservative (recommended for most users)
# n_workers <- parallel::detectCores() - 1
# set_parallel_plan("multisession", workers = n_workers)
# 
# # Aggressive (maximum performance, may slow system)
# n_workers <- parallel::detectCores()
# set_parallel_plan("multisession", workers = n_workers)
# 
# # Custom (based on benchmarking)
# # Test different worker counts and choose optimal

## ----eval=FALSE---------------------------------------------------------------
# # For memory-constrained environments:
# 
# # 1. Use Parquet backend with small cache
# backend <- create_parquet_backend("large_dataset", cache_size = 5)
# 
# # 2. Use moderate worker count
# set_parallel_plan("multisession", workers = 2)
# 
# # 3. Use batch loading for very large datasets
# sample <- CSample$new(backend = backend, metric_obj = airm)
# conns_batch <- sample$load_connectomes_batched(
#   indices = 1:500,
#   batch_size = 50,   # Small batches
#   progress = TRUE
# )
# 
# # 4. Clear cache frequently
# backend$clear_cache()

## ----eval=FALSE---------------------------------------------------------------
# # With progress (slightly slower)
# set_parallel_plan("multisession", workers = 4)
# time_with_progress <- system.time({
#   sample$compute_tangents(progress = TRUE)
# })[3]
# 
# # Without progress (slightly faster)
# time_no_progress <- system.time({
#   sample$compute_tangents(progress = FALSE)
# })[3]
# 
# overhead <- (time_with_progress - time_no_progress) / time_no_progress * 100
# print(paste("Progress overhead:", round(overhead, 1), "%"))

## ----eval=FALSE---------------------------------------------------------------
# library(microbenchmark)
# 
# # Run multiple times to get stable estimates
# mb_result <- microbenchmark(
#   sequential = {
#     set_parallel_plan("sequential")
#     sample$compute_tangents()
#   },
#   parallel_4 = {
#     set_parallel_plan("multisession", workers = 4)
#     sample$compute_tangents()
#   },
#   times = 10  # Run 10 times each
# )
# 
# print(mb_result)
# plot(mb_result)

## ----eval=FALSE---------------------------------------------------------------
# # Clear any caches between runs
# sample <- CSample$new(conns = connectomes, metric_obj = airm)
# 
# # Warm up (first run may be slower)
# sample$compute_tangents()
# 
# # Now benchmark
# time <- system.time(sample$compute_tangents())[3]

## ----eval=FALSE---------------------------------------------------------------
# # Record system specs with benchmarks
# system_info <- list(
#   cores = parallel::detectCores(),
#   memory = as.numeric(system("wmic ComputerSystem get TotalPhysicalMemory", intern = TRUE)[2]) / 1e9,
#   r_version = R.version.string,
#   riemtan_version = packageVersion("riemtan"),
#   os = Sys.info()["sysname"]
# )
# 
# print(system_info)

## ----eval=FALSE---------------------------------------------------------------
# # Expect near-linear scaling up to physical cores
# set_parallel_plan("multisession", workers = 4)
# sample$compute_tangents()  # 3-4x speedup
# sample$compute_vecs()       # 2-4x speedup
# sample$compute_conns()      # 3-4x speedup

## ----eval=FALSE---------------------------------------------------------------
# # Expect 2-3x speedup (less than compute-bound)
# set_parallel_plan("multisession", workers = 4)
# sample$compute_fmean()  # 2-3x speedup

## ----eval=FALSE---------------------------------------------------------------
# # May see >4x speedup with 4 workers (parallel disk I/O)
# backend <- create_parquet_backend("dataset")
# set_parallel_plan("multisession", workers = 4)
# conns <- backend$get_all_matrices(parallel = TRUE)  # 5-10x speedup

## ----eval=FALSE---------------------------------------------------------------
# # Use multisession (multicore not available)
# set_parallel_plan("multisession", workers = 4)
# 
# # Expect slightly higher overhead than Unix
# # Typical speedup: 70-80% of Unix performance

## ----eval=FALSE---------------------------------------------------------------
# # Can use multicore for lower overhead
# set_parallel_plan("multicore", workers = 4)
# 
# # Or multisession for better stability
# set_parallel_plan("multisession", workers = 4)

## ----eval=FALSE---------------------------------------------------------------
# # Use cluster strategy for distributed computing
# library(future)
# plan(cluster, workers = c("node1", "node2", "node3", "node4"))
# 
# # Or use batchtools for SLURM integration
# library(future.batchtools)
# plan(batchtools_slurm, workers = 16)

## ----eval=FALSE---------------------------------------------------------------
# # Setup
# library(riemtan)
# set_parallel_plan("multisession", workers = parallel::detectCores() - 1)
# 
# # Load data
# backend <- create_parquet_backend("large_dataset")
# sample <- CSample$new(backend = backend, metric_obj = airm)
# 
# # Compute with progress
# sample$compute_tangents(progress = TRUE)
# sample$compute_fmean(progress = TRUE)
# 
# # Cleanup
# reset_parallel_plan()

Any scripts or data that you put into this service are public.

riemtan documentation built on Nov. 11, 2025, 1:06 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

riemtan
Riemannian Metrics for Symmetric Positive Definite Matrices

inst/doc/performance-benchmarks.R
In riemtan: Riemannian Metrics for Symmetric Positive Definite Matrices

Try the riemtan package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

riemtan Riemannian Metrics for Symmetric Positive Definite Matrices

inst/doc/performance-benchmarks.R In riemtan: Riemannian Metrics for Symmetric Positive Definite Matrices

Try the riemtan package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

riemtan
Riemannian Metrics for Symmetric Positive Definite Matrices

inst/doc/performance-benchmarks.R
In riemtan: Riemannian Metrics for Symmetric Positive Definite Matrices