Nothing
#' Get path to cached simulation data directory
#' @keywords internal
get_sprtt_cache_dir <- function() {
cache_dir <- rappdirs::user_data_dir("sprtt")
if (!dir.exists(cache_dir)) {
dir.create(cache_dir, recursive = TRUE)
}
cache_dir
}
#' Download simulation data for sample size planning
#'
#' @description
#' `r lifecycle::badge("experimental")`
#'
#' Downloads pre-computed simulation results from GitHub releases.
#' Data is cached locally and only needs to be downloaded once.
#'
#' Data is hosted at:
#' \href{https://github.com/MeikeSteinhilber/sprtt_plan_sample_size}{MeikeSteinhilber/sprtt_plan_sample_size}
#'
#' @param force Logical. If TRUE, re-download even if data exists. Default FALSE.
#' @return Invisibly returns the path to the cached data file.
#' @export
#' @examples
#' \dontrun{
#' # Download data (only needed once)
#' download_sample_size_data()
#'
#' # Force re-download (e.g., after data update)
#' download_sample_size_data(force = TRUE)
#' }
#'
download_sample_size_data <- function(force = FALSE) {
cache_dir <- get_sprtt_cache_dir()
data_file <- file.path(cache_dir, "sprtt_external_data_plan_sample_size.rds")
# Return early if already cached and no force re-download
if (file.exists(data_file) && !force) {
message("Simulation data already cached at: ", data_file)
return(invisible(data_file))
}
# Ask for consent before downloading in interactive sessions
auto_consent <- isTRUE(as.logical(Sys.getenv("SPRTT_CONSENT_DOWNLOAD", "false")))
if (interactive() && !auto_consent) {
answer <- utils::menu(
c("Yes", "No"),
title = "This will download ~150 MB of simulation data to your local cache. Proceed?"
)
if (answer != 1) {
message("Download cancelled.")
return(invisible(NULL))
}
}
message("Downloading simulation data for sample size planning...")
message("This is a one-time download (~150 MB).")
tryCatch({
piggyback::pb_download(
file = "sprtt_external_data_plan_sample_size.rds",
repo = "MeikeSteinhilber/sprtt_plan_sample_size",
tag = "latest",
dest = cache_dir
)
message("\u2713 Download complete! Data cached at: ", cache_dir)
invisible(data_file)
}, error = function(e) {
stop(
"Failed to download simulation data.\n",
"Please check:\n",
" 1. Your internet connection\n",
" 2. The repository 'MeikeSteinhilber/sprtt_plan_sample_size' is public\n",
" 3. A release with 'sprtt_external_data_plan_sample_size.rds' exists\n",
"Error message: ", e$message,
call. = FALSE
)
})
}
#' Access sample size simulation data
#'
#' @description
#' `r lifecycle::badge("experimental")`
#'
#' Loads pre-computed simulation results for SPRT sample size planning.
#' If not already cached locally, the data (~150 MB) will be downloaded automatically
#' from GitHub releases. Use this function to access the complete dataset for custom
#' analysis and visualization. See the **Data Structure** section below for details
#' on available columns.
#'
#' Data is hosted at:
#' \href{https://github.com/MeikeSteinhilber/sprtt_plan_sample_size}{MeikeSteinhilber/sprtt_plan_sample_size}
#'
#'
#' @return A named list with the following elements:
#' \itemize{
#' \item \code{description}: Short description of the dataset
#' \item \code{version}: GitHub release tag of the dataset (e.g., \code{"v0.1.0-data"})
#' \item \code{created}: Date the dataset was created (as character string)
#' \item \code{n_rep}: Number of simulation iterations per condition
#' \item \code{data}: A data frame with simulation results (see **Data Structure**)
#' }
#' @export
#' @section Data Structure:
#' The \code{data} element contains simulation results with the following columns:
#'
#' **Simulation Metadata:**
#' \itemize{
#' \item \code{batch}: Batch identifier for the simulation run
#' \item \code{iteration}: Individual simulation iteration within a batch
#' \item \code{source_file}: Path to the file containing simulation parameters or results
#' }
#'
#' **Input Parameters:**
#' \itemize{
#' \item \code{f_simulated}: The true effect size used to generate the simulated data
#' \item \code{f_expected}: The expected effect size specified for the SPRT
#' \item \code{k_groups}: Number of groups in the design
#' \item \code{alpha}: Significance level (Type I error rate)
#' \item \code{power}: Desired statistical power (1 - Type II error rate)
#' \item \code{distribution}: Data distribution used for simulation
#' \item \code{sd}: Standard deviation(s) used in data generation in each group
#' \item \code{sample_ratio}: Ratio of sample sizes between groups (e.g., 1:1, 2:1)
#' \item \code{n_raw_data}: Total number of raw observations generated in each group
#' \item \code{fix_n}: Fixed sample size
#' }
#'
#' **Individual Test Results:**
#' \itemize{
#' \item \code{n}: Actual sample size at which the SPRT terminated
#' \item \code{decision}: Test decision
#' \item \code{decision_error}: Whether the decision was erroneous (Type I or Type II error)
#' \item \code{log_lr}: Log-likelihood ratio at termination
#' \item \code{f}: Calculated effect size from the data
#' \item \code{f_adj}: Adjusted effect size
#' \item \code{f_statistic}: F-statistic from ANOVA test
#' }
#'
#' **Summary Statistics (Aggregated across iterations):**
#' \itemize{
#' \item \code{decision_error_rate}: Proportion of incorrect decisions
#' \item \code{mean_n}: Mean sample size across all iterations
#' \item \code{sd_error_n}: Standard error of the mean sample size (sd(n)/sqrt(n))
#' \item \code{median_n}: Median sample size (50th percentile)
#' \item \code{min_n}, \code{max_n}: Minimum and maximum sample sizes observed
#' \item \code{q25_n}, \code{q50_n}, \code{q75_n}, \code{q90_n}, \code{q95_n}: Sample size quantiles
#' \item \code{decision_rate_25}, \code{decision_rate_50}, \code{decision_rate_75},
#' \code{decision_rate_90}, \code{decision_rate_95}, \code{decision_rate_100}:
#' Cumulative decision rates at various percentages of maximum sample size
#' }
#' @examples
#' \dontrun{
#' # Load data (downloads automatically if needed)
#' loaded <- load_sample_size_data()
#'
#' # Access the simulation data frame
#' head(loaded$data)
#'
#' # Check dataset version
#' loaded$version # e.g. "v0.1.0-data"
#' loaded$created
#' }
load_sample_size_data <- function() {
cache_dir <- get_sprtt_cache_dir()
data_file <- file.path(cache_dir, "sprtt_external_data_plan_sample_size.rds")
# Download if not cached
if (!file.exists(data_file)) {
result <- download_sample_size_data()
# Return NULL if download was cancelled
if (is.null(result)) return(invisible(NULL))
}
# Load and return
readRDS(data_file)
}
#' Clear cached simulation data
#'
#' @description
#' `r lifecycle::badge("experimental")`
#'
#' Removes locally cached simulation data (~150 MB) used by [`plan_sample_size()`].
#' Data will be automatically re-downloaded on next use of sample size planning functions.
#'
#' This function is useful when:
#' * You want to free up disk space
#' * The cached data may be outdated and you want to force a fresh download
#' * Troubleshooting cache-related issues
#'
#' @return Invisibly returns `TRUE` if cache was cleared, `FALSE` if no cache existed.
#' @export
#' @examples
#' \dontrun{
#' # Clear cache
#' cache_clear()
#' }
cache_clear <- function() {
cache_dir <- get_sprtt_cache_dir()
data_file <- file.path(cache_dir, "sprtt_external_data_plan_sample_size.rds")
if (file.exists(data_file)) {
unlink(data_file)
message("Cached simulation data cleared.")
message("Data will be re-downloaded on next use.")
return(invisible(TRUE))
} else {
message("No cached data found.")
return(invisible(FALSE))
}
}
#' Cache information
#' @description
#' `r lifecycle::badge("experimental")`
#'
#' Displays information about cached simulation data (~150 MB) used by [`plan_sample_size()`].
#' Shows the cache directory location, whether data is cached, file size, and dataset
#' version metadata.
#'
#' The simulation data is automatically downloaded on first use of sample size planning
#' functions and stored locally for faster subsequent access.
#'
#' @return Invisibly returns a list with:
#' * `cache_dir`: Character string with the cache directory path
#' * `data_cached`: Logical indicating if simulation data is cached
#' * `file_size_mb`: Numeric file size in MB (or `NA` if not cached)
#' * `data_version`: GitHub release tag of the cached dataset (or `NA` if not cached)
#' * `data_created`: Date the dataset was created (or `NA` if not cached)
#'
#' @seealso
#' * [`cache_clear()`] to remove cached data
#' * [`download_sample_size_data()`] to manually download simulation data
#' * [`plan_sample_size()`] which uses the cached data
#'
#' @export
cache_info <- function() {
cache_dir <- get_sprtt_cache_dir()
data_file <- file.path(cache_dir, "sprtt_external_data_plan_sample_size.rds")
data_cached <- file.exists(data_file)
# Extract version metadata if data is cached
if (data_cached) {
loaded <- readRDS(data_file)
data_version <- loaded$version
data_created <- loaded$created
file_size_mb <- round(file.size(data_file) / 1e6, 2)
} else {
data_version <- NA
data_created <- NA
file_size_mb <- NA
}
info <- list(
cache_dir = cache_dir,
data_cached = data_cached,
file_size_mb = file_size_mb,
data_version = data_version,
data_created = data_created
)
cat("SPRTT Simulation Data Cache\n")
cat("---------------------------\n")
cat("Cache directory:", info$cache_dir, "\n")
cat("Data cached:", info$data_cached, "\n")
if (info$data_cached) {
cat("File size:", info$file_size_mb, "MB\n")
cat("Dataset version:", info$data_version, "\n")
cat("Dataset created:", info$data_created, "\n")
}
invisible(info)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.