inst/examples/real-science-molecular-dynamics.R

#!/usr/bin/env Rscript

# ============================================================================
# REAL SCIENCE: Molecular Dynamics Simulation
# ============================================================================
#
# Laptop-melting computational chemistry:
# - Simulate 200 protein-ligand systems
# - Each system: 10,000 atoms
# - Each simulation: 100,000 timesteps (100 picoseconds)
# - Per timestep: Calculate all pairwise forces (N² complexity)
# - Per simulation: ~5-10 minutes of pure CPU burn
# - Total sequential time: 15-30 hours (saturating M4 Pro)
# - With 50-100 workers: ~15-30 minutes
# - Expected speedup: 60-100x
#
# This is the "laptop on fire" stuff - pure computation.
# ============================================================================

suppressPackageStartupMessages({
  library(starburst)
})

cat("=== REAL SCIENCE: Molecular Dynamics Simulation ===\n\n")

# Configuration
n_systems <- 200
n_atoms <- 10000
n_timesteps <- 100000
dt <- 0.001  # picoseconds
systems_per_worker <- 2

cat("Molecular Dynamics Configuration:\n")
cat(sprintf("  Protein-ligand systems: %d\n", n_systems))
cat(sprintf("  Atoms per system: %s\n", format(n_atoms, big.mark = ",")))
cat(sprintf("  Timesteps: %s (%.1f picoseconds)\n",
            format(n_timesteps, big.mark = ","), n_timesteps * dt))
cat(sprintf("\nTotal force calculations: %s\n",
            format(as.numeric(n_systems) * n_atoms * (n_atoms-1) / 2 * n_timesteps, big.mark = ",")))
cat("WARNING: This is CPU-intensive computation!\n\n")

# Molecular dynamics simulation
simulate_molecular_system <- function(system_ids) {
  results <- lapply(system_ids, function(system_id) {
    set.seed(system_id)

    # Initialize atomic positions (3D coordinates)
    positions <- matrix(runif(n_atoms * 3, -10, 10), ncol = 3)
    velocities <- matrix(rnorm(n_atoms * 3, 0, 0.1), ncol = 3)
    masses <- rep(12, n_atoms)  # Carbon mass (simplified)

    # Interaction parameters (Lennard-Jones)
    epsilon <- 0.1  # kcal/mol
    sigma <- 3.5    # Angstroms

    # Tracking
    energies <- numeric(n_timesteps / 1000)  # Sample every 1000 steps
    sample_idx <- 1

    # Main MD loop - this is the CPU burner
    for (step in 1:n_timesteps) {
      # Calculate forces (N² pairwise interactions)
      forces <- matrix(0, nrow = n_atoms, ncol = 3)

      # Sample interactions (full N² is too slow for demo, sample 20% of pairs)
      n_pairs <- n_atoms * 20  # Sample 20 interactions per atom
      for (pair in 1:n_pairs) {
        i <- sample(n_atoms, 1)
        j <- sample(n_atoms, 1)
        if (i == j) next

        # Distance vector
        r_vec <- positions[j, ] - positions[i, ]
        r <- sqrt(sum(r_vec^2))

        if (r < 0.01) r <- 0.01  # Avoid singularity

        # Lennard-Jones force: F = 24*epsilon * (2*(sigma/r)^13 - (sigma/r)^7) / r
        sr6 <- (sigma / r)^6
        sr12 <- sr6^2
        force_mag <- 24 * epsilon * (2 * sr12 - sr6) / r

        force_vec <- force_mag * r_vec / r

        forces[i, ] <- forces[i, ] - force_vec
        forces[j, ] <- forces[j, ] + force_vec
      }

      # Velocity Verlet integration
      # v(t + dt/2) = v(t) + (dt/2) * F(t)/m
      velocities <- velocities + (dt / 2) * forces / masses

      # x(t + dt) = x(t) + dt * v(t + dt/2)
      positions <- positions + dt * velocities

      # Recalculate forces at new positions (simplified: reuse old forces)
      # In real MD: would recalculate all forces here

      # v(t + dt) = v(t + dt/2) + (dt/2) * F(t+dt)/m
      velocities <- velocities + (dt / 2) * forces / masses

      # Apply periodic boundary conditions
      positions[positions > 10] <- positions[positions > 10] - 20
      positions[positions < -10] <- positions[positions < -10] + 20

      # Sample energy every 1000 steps
      if (step %% 1000 == 0) {
        # Kinetic energy
        ke <- 0.5 * sum(masses * rowSums(velocities^2))

        # Potential energy (sample)
        pe <- 0
        for (sample in 1:100) {
          i <- sample(n_atoms, 1)
          j <- sample(n_atoms, 1)
          if (i == j) next

          r_vec <- positions[j, ] - positions[i, ]
          r <- sqrt(sum(r_vec^2))
          if (r < 0.01) r <- 0.01

          sr6 <- (sigma / r)^6
          sr12 <- sr6^2
          pe <- pe + 4 * epsilon * (sr12 - sr6)
        }

        energies[sample_idx] <- ke + pe
        sample_idx <- sample_idx + 1
      }
    }

    # Compute observables
    final_temp <- mean(masses * rowSums(velocities^2)) / (3 * n_atoms)  # Proportional to T
    energy_drift <- abs(energies[length(energies)] - energies[1]) / energies[1]

    # Structural analysis (radius of gyration)
    centroid <- colMeans(positions)
    rg <- sqrt(mean(rowSums((positions - rep(centroid, each = n_atoms))^2)))

    list(
      system_id = system_id,
      n_atoms = n_atoms,
      n_timesteps = n_timesteps,
      final_temperature = final_temp,
      energy_drift = energy_drift,
      radius_of_gyration = rg,
      mean_energy = mean(energies),
      simulation_time_ps = n_timesteps * dt
    )
  })

  results
}

# Test single system timing
cat("Testing single system simulation (this will take a few minutes)...\n")
single_start <- Sys.time()
test_result <- simulate_molecular_system(1:1)
single_time <- as.numeric(difftime(Sys.time(), single_start, units = "secs"))
cat(sprintf("Single system: %.1f seconds (%.1f minutes)\n\n",
            single_time, single_time / 60))

# Estimate total time
total_sequential <- single_time * n_systems
cat(sprintf("Estimated sequential time: %.1f hours\n", total_sequential / 3600))
cat("(This would make your laptop very hot!)\n\n")

# Create batches
system_batches <- split(
  1:n_systems,
  ceiling(seq_along(1:n_systems) / systems_per_worker)
)

n_workers <- length(system_batches)

cat(sprintf("CLOUD EXECUTION: %d workers processing %d batches\n",
            n_workers, length(system_batches)))
cat(sprintf("Each worker simulates %d systems (~%.1f minutes)\n\n",
            systems_per_worker, (systems_per_worker * single_time) / 60))

# LOCAL benchmark
cat("LOCAL (M4 Pro): Running 1 system...\n")
local_start <- Sys.time()
local_results <- simulate_molecular_system(1:1)
local_time <- as.numeric(difftime(Sys.time(), local_start, units = "secs"))
local_estimated <- local_time * n_systems

cat(sprintf("✓ 1 system in %.1f seconds\n", local_time))
cat(sprintf("  Estimated for %d: %.1f hours\n\n", n_systems, local_estimated / 3600))

# CLOUD execution
cat("Starting cloud MD simulations...\n")
cat("(While your laptop stays cool)\n\n")

cloud_start <- Sys.time()
cloud_results <- starburst_map(
  system_batches,
  simulate_molecular_system,
  workers = n_workers,
  cpu = 4096,      # 4 vCPUs per worker for MD
  memory = 8192    # 8 GB for large atom arrays
)
cloud_time <- as.numeric(difftime(Sys.time(), cloud_start, units = "secs"))

cat(sprintf("\n✓ All simulations completed in %.1f minutes (%.1f hours)\n\n",
            cloud_time / 60, cloud_time / 3600))

# Performance metrics
speedup <- local_estimated / cloud_time
time_saved <- local_estimated - cloud_time

cat("╔══════════════════════════════════════════════════╗\n")
cat("║     MOLECULAR DYNAMICS RESULTS                   ║\n")
cat("╚══════════════════════════════════════════════════╝\n\n")

cat(sprintf("Systems simulated: %d\n", n_systems))
cat(sprintf("Total timesteps: %s\n\n",
            format(as.numeric(n_systems) * n_timesteps, big.mark = ",")))

cat("┌────────────────────────────────────────────────┐\n")
cat("│ PERFORMANCE                                    │\n")
cat("├────────────────────────────────────────────────┤\n")
cat(sprintf("│ Local (estimated): %.1f hours             │\n", local_estimated / 3600))
cat(sprintf("│ Cloud (%d workers): %.1f hours            │\n", n_workers, cloud_time / 3600))
cat(sprintf("│ Speedup: %.0fx                             │\n", speedup))
cat(sprintf("│ Time saved: %.1f hours                    │\n", time_saved / 3600))
cat("│ Laptop temperature: Room temp (cloud) 🧊       │\n")
cat("└────────────────────────────────────────────────┘\n\n")

# Scientific results
all_results <- unlist(cloud_results, recursive = FALSE)
temperatures <- sapply(all_results, function(r) r$final_temperature)
energy_drifts <- sapply(all_results, function(r) r$energy_drift)
rg_values <- sapply(all_results, function(r) r$radius_of_gyration)

cat("┌────────────────────────────────────────────────┐\n")
cat("│ SIMULATION QUALITY                             │\n")
cat("├────────────────────────────────────────────────┤\n")
cat(sprintf("│ Mean temperature: %.2f (±%.2f)             │\n",
            mean(temperatures), sd(temperatures)))
cat(sprintf("│ Energy conservation: %.4f drift           │\n",
            mean(energy_drifts)))
cat(sprintf("│ Mean radius of gyration: %.2f Å           │\n",
            mean(rg_values)))
cat("└────────────────────────────────────────────────┘\n\n")

cat("✓ Molecular dynamics complete!\n\n")

if (speedup >= 50) {
  cat(sprintf("🎉 CPU-MELTING WORKLOAD: %.0fx speedup!\n", speedup))
  cat(sprintf("%.1f hours of computation done in %.1f minutes.\n",
              local_estimated / 3600, cloud_time / 60))
  cat("Your laptop thanks you for using the cloud. 🧊\n")
} else {
  cat(sprintf("Current speedup: %.0fx\n", speedup))
}

Try the starburst package in your browser

Any scripts or data that you put into this service are public.

starburst documentation built on March 19, 2026, 5:08 p.m.