inst/doc/example-reports.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup, eval=FALSE--------------------------------------------------------
# library(starburst)
# library(rmarkdown)

## ----template, eval=FALSE-----------------------------------------------------
# # Create report template
# report_template <- '
# ---
# title: "Monthly Analytics Report"
# output: html_document
# params:
#   customer_id: ""
#   customer_name: ""
#   month: ""
#   data: NULL
# ---
# 
# `​``{r template-setup, include=FALSE}
# knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE)
# `​``
# 
# # Monthly Report for `r params$customer_name`
# 
# **Customer ID:** `r params$customer_id`
# **Period:** `r params$month`
# **Report Generated:** `r format(Sys.time(), "%Y-%m-%d %H:%M")`
# 
# ---
# 
# ## Executive Summary
# 
# This report summarizes activity for `r params$customer_name` during `r params$month`.
# 
# \`\`\`{r summary}
# data <- params$data
# cat(sprintf("Total transactions: %d\\n", nrow(data)))
# cat(sprintf("Total revenue: $%.2f\\n", sum(data$revenue)))
# cat(sprintf("Average order value: $%.2f\\n", mean(data$revenue)))
# cat(sprintf("Active days: %d\\n", length(unique(data$date))))
# \`\`\`
# 
# ## Revenue Trend
# 
# \`\`\`{r revenue-plot, fig.width=8, fig.height=4}
# daily_revenue <- aggregate(revenue ~ date, data, sum)
# plot(daily_revenue$date, daily_revenue$revenue,
#      type = "l", lwd = 2, col = "steelblue",
#      main = "Daily Revenue Trend",
#      xlab = "Date", ylab = "Revenue ($)")
# grid()
# \`\`\`
# 
# ## Top Products
# 
# \`\`\`{r top-products}
# top_products <- head(
#   aggregate(revenue ~ product, data, sum),
#   10
# )
# top_products <- top_products[order(-top_products$revenue), ]
# knitr::kable(top_products, format.args = list(big.mark = ","))
# \`\`\`
# 
# ## Summary Statistics
# 
# \`\`\`{r stats}
# stats <- data.frame(
#   Metric = c("Total Orders", "Avg Order Value", "Max Order",
#              "Min Order", "Std Dev"),
#   Value = c(
#     nrow(data),
#     round(mean(data$revenue), 2),
#     round(max(data$revenue), 2),
#     round(min(data$revenue), 2),
#     round(sd(data$revenue), 2)
#   )
# )
# knitr::kable(stats, format.args = list(big.mark = ","))
# \`\`\`
# 
# ---
# 
# *This report was automatically generated using staRburst parallel processing.*
# '
# 
# # Save template to file
# writeLines(report_template, "report_template.Rmd")

## ----data, eval=FALSE---------------------------------------------------------
# # Function to generate data for one customer
# generate_customer_data <- function(customer_id) {
#   set.seed(customer_id)
# 
#   n_transactions <- sample(100:500, 1)
#   dates <- sort(sample(seq.Date(
#     from = as.Date("2026-01-01"),
#     to = as.Date("2026-01-31"),
#     by = "day"
#   ), n_transactions, replace = TRUE))
# 
#   products <- c("Product A", "Product B", "Product C",
#                 "Product D", "Product E")
# 
#   data.frame(
#     customer_id = customer_id,
#     date = dates,
#     product = sample(products, n_transactions, replace = TRUE),
#     revenue = round(rnorm(n_transactions, mean = 150, sd = 50), 2),
#     stringsAsFactors = FALSE
#   )
# }
# 
# # Generate customer list
# n_customers <- 50
# customers <- data.frame(
#   customer_id = sprintf("CUST%03d", 1:n_customers),
#   customer_name = paste("Company", LETTERS[1:n_customers %% 26 + 1],
#                        (1:n_customers %/% 26) + 1),
#   stringsAsFactors = FALSE
# )
# 
# head(customers)

## ----render-fn, eval=FALSE----------------------------------------------------
# generate_report <- function(customer_info) {
#   customer_id <- customer_info$customer_id
#   customer_name <- customer_info$customer_name
# 
#   # Generate data for this customer
#   data <- generate_customer_data(as.numeric(gsub("CUST", "", customer_id)))
# 
#   # Output file path
#   output_file <- sprintf("report_%s.html", customer_id)
# 
#   tryCatch({
#     # Render report
#     rmarkdown::render(
#       input = "report_template.Rmd",
#       output_file = output_file,
#       params = list(
#         customer_id = customer_id,
#         customer_name = customer_name,
#         month = "January 2026",
#         data = data
#       ),
#       quiet = TRUE
#     )
# 
#     list(
#       customer_id = customer_id,
#       success = TRUE,
#       output_file = output_file,
#       file_size = file.size(output_file),
#       render_time = Sys.time()
#     )
#   }, error = function(e) {
#     list(
#       customer_id = customer_id,
#       success = FALSE,
#       error = as.character(e),
#       render_time = Sys.time()
#     )
#   })
# }

## ----local, eval=FALSE--------------------------------------------------------
# # Test with 5 reports
# test_customers <- head(customers, 5)
# 
# cat(sprintf("Rendering %d reports locally...\n", nrow(test_customers)))
# local_start <- Sys.time()
# 
# local_results <- lapply(
#   split(test_customers, 1:nrow(test_customers)),
#   generate_report
# )
# 
# local_time <- as.numeric(difftime(Sys.time(), local_start, units = "secs"))
# 
# cat(sprintf("✓ Completed in %.1f seconds\n", local_time))
# cat(sprintf("  Average: %.1f seconds per report\n", local_time / 5))
# cat(sprintf("  Estimated time for %d reports: %.1f minutes\n\n",
#             n_customers, (local_time / 5 * n_customers) / 60))

## ----cloud, eval=FALSE--------------------------------------------------------
# cat(sprintf("Rendering %d reports on AWS...\n", n_customers))
# 
# # Convert data frame rows to list for starburst_map
# customer_list <- split(customers, 1:nrow(customers))
# 
# results <- starburst_map(
#   customer_list,
#   generate_report,
#   workers = 25,
#   cpu = 2,
#   memory = "4GB"
# )

## ----results, eval=FALSE------------------------------------------------------
# # Check success rate
# success_count <- sum(sapply(results, function(x) x$success))
# failure_count <- sum(!sapply(results, function(x) x$success))
# 
# cat("\n=== Report Generation Summary ===\n\n")
# cat(sprintf("Total reports: %d\n", length(results)))
# cat(sprintf("Successfully generated: %d (%.1f%%)\n",
#             success_count, (success_count / length(results)) * 100))
# cat(sprintf("Failed: %d\n\n", failure_count))
# 
# # File size summary
# successful_results <- results[sapply(results, function(x) x$success)]
# file_sizes <- sapply(successful_results, function(x) x$file_size)
# 
# cat("File size statistics:\n")
# cat(sprintf("  Total size: %.2f MB\n", sum(file_sizes) / 1024^2))
# cat(sprintf("  Average size: %.1f KB\n", mean(file_sizes) / 1024))
# cat(sprintf("  Range: %.1f - %.1f KB\n\n",
#             min(file_sizes) / 1024, max(file_sizes) / 1024))
# 
# # Show sample of generated reports
# cat("Generated reports:\n")
# report_files <- sapply(successful_results[1:10], function(x) x$output_file)
# print(report_files)

## ----distribution, eval=FALSE-------------------------------------------------
# generate_and_distribute <- function(customer_info) {
#   # Generate report
#   result <- generate_report(customer_info)
# 
#   if (result$success) {
#     # Upload to S3 (example)
#     tryCatch({
#       # paws::s3()$put_object(
#       #   Bucket = "my-reports-bucket",
#       #   Key = sprintf("reports/2026-01/%s", result$output_file),
#       #   Body = readBin(result$output_file, "raw",
#       #                  file.size(result$output_file))
#       # )
# 
#       result$uploaded <- TRUE
#       result$s3_url <- sprintf("s3://my-reports-bucket/reports/2026-01/%s",
#                               result$output_file)
#     }, error = function(e) {
#       result$uploaded <- FALSE
#       result$upload_error <- as.character(e)
#     })
#   }
# 
#   result
# }

## ----eval=FALSE---------------------------------------------------------------
# system.file("examples/reports.R", package = "starburst")

## ----eval=FALSE---------------------------------------------------------------
# source(system.file("examples/reports.R", package = "starburst"))

Try the starburst package in your browser

Any scripts or data that you put into this service are public.

starburst documentation built on March 19, 2026, 5:08 p.m.