R/script_generation.R

Defines functions .format_r_value create_repro_report generate_repro_script

Documented in create_repro_report .format_r_value generate_repro_script

#' Generate Reproducible Script
#'
#' @description
#' Generate an executable R script that includes all reproducibility information
#' including package versions, seeds, parameters, and data verification.
#'
#' @param script_file Character. Path to save the generated script
#' @param source_script Character. Original analysis script to include
#' @param analysis_name Character. Name for this analysis
#' @param include_renv Logical. Include renv initialization. Default TRUE.
#' @param include_data_check Logical. Include data verification. Default TRUE.
#' @param include_session_info Logical. Include session info at end. Default TRUE.
#'
#' @return Path to generated script
#'
#' @importFrom utils packageVersion sessionInfo
#' @export
#'
#' @examples
#' \dontrun{
#' generate_repro_script(
#'   "analysis_reproducible.R",
#'   source_script = "analysis.R",
#'   analysis_name = "main_analysis"
#' )
#' }
generate_repro_script <- function(script_file,
                                  source_script = NULL,
                                  analysis_name = "analysis",
                                  include_renv = TRUE,
                                  include_data_check = TRUE,
                                  include_session_info = TRUE) {
  # Build script content
  script_lines <- c(
    "#!/usr/bin/env Rscript",
    "#",
    paste0("# Reproducible Analysis Script: ", analysis_name),
    paste0("# Generated: ", Sys.time()),
    paste0("# Generated by: Capsule v", packageVersion("Capsule")),
    "#",
    "# This script includes complete reproducibility information",
    "#",
    ""
  )

  # Add renv initialization
  if (include_renv) {
    script_lines <- c(
      script_lines,
      "# Initialize renv for package management",
      "if (!requireNamespace('renv', quietly = TRUE)) {",
      "  stop('renv package is required. Please install it with: install.packages(\"renv\")')",
      "}",
      "renv::restore()",
      ""
    )
  }

  # Add Capsule initialization
  script_lines <- c(
    script_lines,
    "# Load Capsule",
    "library(Capsule)",
    ""
  )

  # Add seed restoration
  seed_registry <- .load_seed_registry(".capsule/seed_registry.json")
  if (analysis_name %in% names(seed_registry$seeds)) {
    seed <- seed_registry$seeds[[analysis_name]]$seed
    script_lines <- c(
      script_lines,
      "# Restore random seed",
      paste0("set.seed(", seed, ")"),
      ""
    )
  }

  # Add parameter loading
  param_registry <- .load_param_registry(".capsule/param_registry.json")
  if (analysis_name %in% names(param_registry$analyses)) {
    params <- param_registry$analyses[[analysis_name]]$parameters
    script_lines <- c(
      script_lines,
      "# Analysis parameters",
      "params <- list("
    )
    for (i in seq_along(params)) {
      param <- params[[i]]
      value_str <- .format_r_value(param$value)
      comma <- if (i < length(params)) "," else ""
      script_lines <- c(
        script_lines,
        paste0("  ", param$name, " = ", value_str, comma)
      )
    }
    script_lines <- c(
      script_lines,
      ")",
      ""
    )
  }

  # Add data verification
  if (include_data_check) {
    data_registry <- .load_registry(".capsule/data_registry.json")
    if (!is.null(data_registry$data) && length(data_registry$data) > 0) {
      script_lines <- c(
        script_lines,
        "# Verify data integrity",
        "cat('Verifying data files...\\n')",
        "if (!Capsule::verify_data()) {",
        "  stop('Data verification failed! Files have been modified.')",
        "}",
        "cat('Data verification passed.\\n')",
        ""
      )
    }
  }

  # Add main analysis code
  script_lines <- c(
    script_lines,
    "# ========================================",
    "# Main Analysis Code",
    "# ========================================",
    ""
  )

  if (!is.null(source_script)) {
    if (file.exists(source_script)) {
      source_code <- readLines(source_script)
      script_lines <- c(script_lines, source_code, "")
    } else {
      stop("Source script not found at path: ", source_script)
    }
  } else {
    script_lines <- c(
      script_lines,
      "# Insert your analysis code here",
      ""
    )
  }

  # Add session info
  if (include_session_info) {
    script_lines <- c(
      script_lines,
      "",
      "# ========================================",
      "# Session Information",
      "# ========================================",
      "cat('\\n\\n=== Session Information ===\\n')",
      "print(sessionInfo())",
      ""
    )
  }

  # Write script file
  dir.create(dirname(script_file), recursive = TRUE, showWarnings = FALSE)
  writeLines(script_lines, script_file)

  # Make executable on Unix-like systems
  if (.Platform$OS.type == "unix") {
    Sys.chmod(script_file, mode = "0755")
  }

  cli::cli_alert_success("Reproducible script generated: {.file {script_file}}")

  invisible(script_file)
}


#' Create Reproducibility Report
#'
#' @description
#' Generate a comprehensive markdown report documenting all reproducibility information
#'
#' @param output_file Character. Path to save the report (required).
#' @param analysis_name Character. Name of the analysis
#' @param include_package_list Logical. Include full package list. Default TRUE.
#'
#' @return Path to generated report
#'
#' @export
#'
#' @examples
#' \dontrun{
#' create_repro_report(tempfile(fileext = ".md"), "main_analysis")
#' }
create_repro_report <- function(output_file,
                                analysis_name = NULL,
                                include_package_list = TRUE) {
  report_lines <- c(
    paste("#", "Reproducibility Report"),
    "",
    paste("**Generated:", Sys.time(), "**"),
    "",
    "---",
    ""
  )

  # R Environment Section
  report_lines <- c(
    report_lines,
    "## R Environment",
    "",
    paste("- **R Version:**", R.version.string),
    paste("- **Platform:**", R.version$platform),
    paste("- **OS:**", Sys.info()["sysname"], Sys.info()["release"]),
    ""
  )

  # Session Info
  si <- sessionInfo()
  report_lines <- c(
    report_lines,
    "## Loaded Packages",
    ""
  )

  if (!is.null(si$otherPkgs)) {
    for (pkg_name in names(si$otherPkgs)) {
      pkg <- si$otherPkgs[[pkg_name]]
      report_lines <- c(
        report_lines,
        paste0("- **", pkg$Package, "** ", pkg$Version)
      )
    }
    report_lines <- c(report_lines, "")
  }

  # Data Files
  data_registry <- .load_registry(".capsule/data_registry.json")
  if (!is.null(data_registry$data) && length(data_registry$data) > 0) {
    report_lines <- c(
      report_lines,
      "## Data Files",
      ""
    )
    for (file_path in names(data_registry$data)) {
      data_info <- data_registry$data[[file_path]]
      report_lines <- c(
        report_lines,
        paste0("### ", basename(file_path)),
        "",
        paste("- **Path:**", file_path),
        paste("- **Size:**", data_info$size_readable),
        paste("- **Checksum:**", substr(data_info$checksum_sha256, 1, 16), "..."),
        paste("- **Source:**", data_info$source),
        if (!is.null(data_info$source_url)) paste("- **URL:**", data_info$source_url) else NULL,
        paste("- **Modified:**", data_info$modified),
        ""
      )
    }
  }

  # Parameters
  param_registry <- .load_param_registry(".capsule/param_registry.json")
  if (!is.null(param_registry$analyses) && length(param_registry$analyses) > 0) {
    report_lines <- c(
      report_lines,
      "## Analysis Parameters",
      ""
    )

    analyses_to_report <- if (!is.null(analysis_name) && analysis_name %in% names(param_registry$analyses)) {
      list(param_registry$analyses[[analysis_name]])
    } else {
      param_registry$analyses
    }

    for (analysis in analyses_to_report) {
      report_lines <- c(
        report_lines,
        paste0("### ", analysis$analysis_name),
        ""
      )
      if (!is.null(analysis$description)) {
        report_lines <- c(report_lines, analysis$description, "")
      }
      for (param in analysis$parameters) {
        report_lines <- c(
          report_lines,
          paste0("- **", param$name, ":** ", .format_r_value(param$value))
        )
      }
      report_lines <- c(report_lines, "")
    }
  }

  # Random Seeds
  seed_registry <- .load_seed_registry(".capsule/seed_registry.json")
  if (!is.null(seed_registry$seeds) && length(seed_registry$seeds) > 0) {
    report_lines <- c(
      report_lines,
      "## Random Seeds",
      ""
    )

    seeds_to_report <- if (!is.null(analysis_name) && analysis_name %in% names(seed_registry$seeds)) {
      list(seed_registry$seeds[[analysis_name]])
    } else {
      seed_registry$seeds
    }

    for (seed_info in seeds_to_report) {
      report_lines <- c(
        report_lines,
        paste0("### ", seed_info$analysis_name),
        "",
        paste("- **Seed:**", seed_info$seed),
        paste("- **RNG Kind:**", seed_info$kind),
        paste("- **Set at:**", seed_info$timestamp),
        ""
      )
    }
  }

  # Full package list if requested
  if (include_package_list) {
    # Get all available packages from all library paths (avoid installed.packages())
    pkg_names <- unique(unlist(lapply(.libPaths(), function(lib) {
      list.files(lib)
    })))

    # Filter to only valid R packages
    pkg_names <- pkg_names[sapply(pkg_names, function(pkg) {
      !is.null(tryCatch(find.package(pkg, quiet = TRUE), error = function(e) NULL))
    })]

    report_lines <- c(
      report_lines,
      "## All Installed Packages",
      "",
      "| Package | Version | Built |",
      "|---------|---------|-------|"
    )

    for (pkg_name in pkg_names) {
      desc <- utils::packageDescription(pkg_name, fields = c("Version", "Built"))
      if (is.list(desc)) {
        version <- as.character(desc$Version)
        built <- as.character(desc$Built)
        report_lines <- c(
          report_lines,
          paste0("| ", pkg_name, " | ", version, " | ", built, " |")
        )
      }
    }
    report_lines <- c(report_lines, "")
  }

  # Write report
  dir.create(dirname(output_file), recursive = TRUE, showWarnings = FALSE)
  writeLines(report_lines, output_file)

  cli::cli_alert_success("Reproducibility report generated: {.file {output_file}}")

  invisible(output_file)
}


#' Format R Value for Script
#'
#' @description
#' Internal function to format R values as code strings
#'
#' @param value Any R value
#'
#' @return Character representation
#' @keywords internal
.format_r_value <- function(value) {
  if (is.character(value)) {
    paste0('"', value, '"')
  } else if (is.numeric(value)) {
    if (length(value) == 1) {
      as.character(value)
    } else {
      paste0("c(", paste(value, collapse = ", "), ")")
    }
  } else if (is.logical(value)) {
    as.character(value)
  } else if (is.null(value)) {
    "NULL"
  } else {
    deparse(value, width.cutoff = 500)
  }
}

Try the Capsule package in your browser

Any scripts or data that you put into this service are public.

Capsule documentation built on Nov. 11, 2025, 5:14 p.m.