Nothing
## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
eval = FALSE
)
## -----------------------------------------------------------------------------
# # Install from GitHub
# remotes::install_github("yourname/starburst")
## -----------------------------------------------------------------------------
# library(starburst)
#
# # Interactive setup wizard (takes ~2 minutes)
# starburst_setup()
## -----------------------------------------------------------------------------
# library(furrr)
# library(starburst)
#
# # Define your work
# expensive_simulation <- function(i) {
# # Some computation that takes a few minutes
# results <- replicate(1000, {
# x <- rnorm(10000)
# mean(x^2)
# })
# mean(results)
# }
#
# # Local execution (single core)
# plan(sequential)
# system.time({
# results_local <- future_map(1:100, expensive_simulation)
# })
# #> ~16 minutes on typical laptop
#
# # Cloud execution (50 workers)
# plan(future_starburst, workers = 50)
# system.time({
# results_cloud <- future_map(1:100, expensive_simulation)
# })
# #> ~2 minutes (including 45s startup)
# #> Cost: ~$0.85
#
# # Results are identical
# identical(results_local, results_cloud)
# #> [1] TRUE
## -----------------------------------------------------------------------------
# library(starburst)
# library(furrr)
#
# # Simulate portfolio returns
# simulate_portfolio <- function(seed) {
# set.seed(seed)
#
# # Random walk for 252 trading days
# returns <- rnorm(252, mean = 0.0003, sd = 0.02)
# prices <- cumprod(1 + returns)
#
# list(
# final_value = prices[252],
# max_drawdown = max(cummax(prices) - prices) / max(prices),
# sharpe_ratio = mean(returns) / sd(returns) * sqrt(252)
# )
# }
#
# # Run 10,000 simulations on 100 workers
# plan(future_starburst, workers = 100)
#
# results <- future_map(1:10000, simulate_portfolio, .options = furrr_options(seed = TRUE))
#
# # Analyze results
# final_values <- sapply(results, `[[`, "final_value")
# hist(final_values, breaks = 50, main = "Distribution of Portfolio Final Values")
#
# # 95% confidence interval
# quantile(final_values, c(0.025, 0.975))
## -----------------------------------------------------------------------------
# library(starburst)
# library(furrr)
#
# # Your data
# data <- read.csv("my_data.csv")
#
# # Bootstrap function
# bootstrap_regression <- function(i, data) {
# # Resample with replacement
# boot_indices <- sample(nrow(data), replace = TRUE)
# boot_data <- data[boot_indices, ]
#
# # Fit model
# model <- lm(y ~ x1 + x2 + x3, data = boot_data)
#
# # Return coefficients
# coef(model)
# }
#
# # Run 10,000 bootstrap samples
# plan(future_starburst, workers = 50)
#
# boot_results <- future_map(1:10000, bootstrap_regression, data = data)
#
# # Convert to matrix
# boot_coefs <- do.call(rbind, boot_results)
#
# # 95% confidence intervals for each coefficient
# apply(boot_coefs, 2, quantile, probs = c(0.025, 0.975))
## -----------------------------------------------------------------------------
# library(starburst)
# library(furrr)
#
# # Process one sample
# process_sample <- function(sample_id) {
# # Read from S3 (data already in cloud)
# fastq_path <- sprintf("s3://my-genomics-data/samples/%s.fastq", sample_id)
# data <- read_fastq(fastq_path)
#
# # Align reads
# aligned <- align_reads(data, reference = "hg38")
#
# # Call variants
# variants <- call_variants(aligned)
#
# # Return summary
# list(
# sample_id = sample_id,
# num_variants = nrow(variants),
# variants = variants
# )
# }
#
# # Process 1000 samples on 100 workers
# sample_ids <- list.files("s3://my-genomics-data/samples/", pattern = ".fastq$")
#
# plan(future_starburst, workers = 100)
#
# results <- future_map(sample_ids, process_sample, .progress = TRUE)
#
# # Combine results
# all_variants <- do.call(rbind, lapply(results, `[[`, "variants"))
## -----------------------------------------------------------------------------
# plan(future_starburst, workers = 50)
#
# results <- future_map(file_list, function(file) {
# # Workers read directly from S3
# data <- read.csv(sprintf("s3://my-bucket/%s", file))
# process(data)
# })
## -----------------------------------------------------------------------------
# # Load data locally
# data <- read.csv("local_file.csv")
#
# # staRburst automatically uploads to S3 and distributes
# plan(future_starburst, workers = 50)
#
# results <- future_map(1:1000, function(i) {
# # Each worker gets a copy of 'data'
# bootstrap_analysis(data, i)
# })
## -----------------------------------------------------------------------------
# # Upload once
# large_data <- read.csv("huge_file.csv")
# s3_path <- starburst_upload(large_data, "s3://my-bucket/large_data.rds")
#
# # Workers read from S3
# plan(future_starburst, workers = 100)
#
# results <- future_map(1:1000, function(i) {
# # Read from S3 inside worker
# data <- readRDS(s3_path)
# process(data, i)
# })
## -----------------------------------------------------------------------------
# # Check cost before running
# plan(future_starburst, workers = 100, cpu = 4, memory = "8GB")
# #> Estimated cost: ~$3.50/hour
## -----------------------------------------------------------------------------
# # Set maximum cost per job
# starburst_config(
# max_cost_per_job = 10, # Don't start jobs that would cost >$10
# cost_alert_threshold = 5 # Warn when approaching $5
# )
#
# # Now jobs exceeding limit will error before starting
# plan(future_starburst, workers = 1000) # Would cost ~$35/hour
# #> Error: Estimated cost ($35/hr) exceeds limit ($10/hr)
## -----------------------------------------------------------------------------
# plan(future_starburst, workers = 50)
#
# results <- future_map(data, process)
#
# #> Cluster runtime: 23 minutes
# #> Total cost: $1.34
## -----------------------------------------------------------------------------
# starburst_quota_status()
# #> Fargate vCPU Quota: 100 / 100 used
# #> Allows: ~25 workers with 4 vCPUs each
# #>
# #> Recommended: Request increase to 500 vCPUs
## -----------------------------------------------------------------------------
# starburst_request_quota_increase(vcpus = 500)
# #> Requesting Fargate vCPU quota increase:
# #> Current: 100 vCPUs
# #> Requested: 500 vCPUs
# #>
# #> ✓ Quota increase requested (Case ID: 12345678)
# #> ✓ AWS typically approves within 1-24 hours
## -----------------------------------------------------------------------------
# # Quota allows 25 workers, but you request 100
# plan(future_starburst, workers = 100, cpu = 4)
#
# #> ⚠ Requested: 100 workers (400 vCPUs)
# #> ⚠ Current quota: 100 vCPUs (allows 25 workers max)
# #>
# #> 📋 Execution plan:
# #> • Running in 4 waves of 25 workers each
# #>
# #> 💡 Request quota increase to 500 vCPUs? [y/n]: y
# #>
# #> ✓ Quota increase requested
# #> ⚡ Starting wave 1 (25 workers)...
#
# results <- future_map(1:1000, expensive_function)
#
# #> ⚡ Wave 1: 100% complete (250 tasks)
# #> ⚡ Wave 2: 100% complete (500 tasks)
# #> ⚡ Wave 3: 100% complete (750 tasks)
# #> ⚡ Wave 4: 100% complete (1000 tasks)
## -----------------------------------------------------------------------------
# # View logs from most recent cluster
# starburst_logs()
#
# # View logs from specific task
# starburst_logs(task_id = "abc-123")
#
# # View last 100 log lines
# starburst_logs(last_n = 100)
## -----------------------------------------------------------------------------
# starburst_status()
# #> Active Clusters:
# #> • starburst-xyz123: 50 workers running
# #> • starburst-abc456: 25 workers running
## -----------------------------------------------------------------------------
# # Rebuild environment
# starburst_rebuild_environment()
## -----------------------------------------------------------------------------
# # Check logs
# starburst_logs(task_id = "failed-task-id")
#
# # Often due to memory limits - increase worker memory
# plan(future_starburst, workers = 50, memory = "16GB") # Default is 8GB
## -----------------------------------------------------------------------------
# # Use Arrow for data frames
# library(arrow)
# write_parquet(my_data, "s3://bucket/data.parquet")
#
# # Workers read Arrow
# results <- future_map(1:100, function(i) {
# data <- read_parquet("s3://bucket/data.parquet")
# process(data, i)
# })
## -----------------------------------------------------------------------------
# # 100 tasks, each takes 10 minutes
# # Local: 1000 minutes, Cloud: ~10 minutes
## -----------------------------------------------------------------------------
# # 10000 tasks, each takes 30 seconds
# # Startup overhead (45s) dominates
## -----------------------------------------------------------------------------
# # 10,000 tiny tasks
# results <- future_map(1:10000, small_function)
## -----------------------------------------------------------------------------
# # 100 batches of 100 tasks each
# batches <- split(1:10000, ceiling(seq_along(1:10000) / 100))
#
# results <- future_map(batches, function(batch) {
# lapply(batch, small_function)
# })
#
# # Flatten results
# results <- unlist(results, recursive = FALSE)
## -----------------------------------------------------------------------------
# big_data <- read.csv("10GB_file.csv") # Upload for every task
# results <- future_map(1:1000, function(i) process(big_data, i))
## -----------------------------------------------------------------------------
# # Upload once to S3
# s3_path <- "s3://bucket/big_data.csv"
# write.csv(big_data, s3_path)
#
# # Workers read from S3
# results <- future_map(1:1000, function(i) {
# data <- read.csv(s3_path)
# process(data, i)
# })
## -----------------------------------------------------------------------------
# starburst_config(
# max_cost_per_job = 50, # Prevent accidents
# cost_alert_threshold = 25 # Get warned early
# )
## -----------------------------------------------------------------------------
# # staRburst auto-cleans, but you can force it
# plan(sequential) # Switch back to local
# # Old cluster resources are cleaned up automatically
## -----------------------------------------------------------------------------
# # High CPU, low memory (CPU-bound work)
# plan(future_starburst, workers = 50, cpu = 8, memory = "16GB")
#
# # Low CPU, high memory (memory-bound work)
# plan(future_starburst, workers = 25, cpu = 4, memory = "32GB")
## -----------------------------------------------------------------------------
# # Increase timeout for long-running tasks (default 1 hour)
# plan(future_starburst, workers = 10, timeout = 7200) # 2 hours
## -----------------------------------------------------------------------------
# # Use specific region (default from config)
# plan(future_starburst, workers = 50, region = "us-west-2")
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.