tests/testthat/test-large-files.R

# tests/testthat/test-large-files.R

library(Capsule)

test_that("track_data uses SHA-256 for small files", {
  skip_on_cran()

  temp_dir <- tempfile("data_test_")
  dir.create(temp_dir, recursive = TRUE)
  on.exit(unlink(temp_dir, recursive = TRUE), add = TRUE)

  # Create small file
  test_file <- file.path(temp_dir, "small.txt")
  writeLines(c("line1", "line2"), test_file)

  registry_file <- file.path(temp_dir, "data_registry.json")
  result <- track_data(
    test_file,
    source = "generated",
    registry_file = registry_file,
    fast_hash = TRUE,
    size_threshold_gb = 1
  )

  expect_type(result, "list")
  expect_equal(result$checksum_algorithm, "sha256")
  expect_true(file.exists(registry_file))
})

test_that("track_data can use fast hash for simulated large files", {
  skip_on_cran()

  temp_dir <- tempfile("data_test_")
  dir.create(temp_dir, recursive = TRUE)
  on.exit(unlink(temp_dir, recursive = TRUE), add = TRUE)

  # Create a file
  test_file <- file.path(temp_dir, "data.txt")
  writeLines(rep("DATA", 100), test_file)

  registry_file <- file.path(temp_dir, "data_registry.json")

  # Use very low threshold to trigger fast hash
  result <- track_data(
    test_file,
    source = "generated",
    registry_file = registry_file,
    fast_hash = TRUE,
    size_threshold_gb = 0.000001 # Very low threshold
  )

  expect_type(result, "list")
  # Should use xxhash64 or metadata_hash
  expect_true(result$checksum_algorithm %in% c("sha256", "xxhash64", "metadata_hash"))
  expect_true(!is.null(result$size_gb))
})

test_that("verify_data works with different checksum algorithms", {
  skip_on_cran()

  temp_dir <- tempfile("data_test_")
  dir.create(temp_dir, recursive = TRUE)
  old_wd <- getwd()
  setwd(temp_dir)
  on.exit(
    {
      setwd(old_wd)
      unlink(temp_dir, recursive = TRUE)
    },
    add = TRUE
  )

  # Create and track file
  test_file <- "test.txt"
  writeLines(c("test"), test_file)

  registry_file <- file.path(temp_dir, "data_registry.json")
  track_data(
    test_file,
    source = "generated",
    registry_file = registry_file
  )

  # Verify
  result <- verify_data(test_file, registry_file = registry_file)
  expect_true(result)

  # Modify file
  writeLines(c("modified"), test_file)

  # Should fail verification
  result2 <- verify_data(test_file, registry_file = registry_file)
  expect_false(result2)
})

test_that("track_data handles reference source type", {
  skip_on_cran()

  temp_dir <- tempfile("data_test_")
  dir.create(temp_dir, recursive = TRUE)
  on.exit(unlink(temp_dir, recursive = TRUE), add = TRUE)

  test_file <- file.path(temp_dir, "ref.fa")
  writeLines(c(">chr1", "ATCG"), test_file)

  registry_file <- file.path(temp_dir, "data_registry.json")
  result <- track_data(
    test_file,
    source = "reference",
    description = "Test reference",
    registry_file = registry_file
  )

  expect_type(result, "list")
  expect_equal(result$source, "reference")
})

Try the Capsule package in your browser

Any scripts or data that you put into this service are public.

Capsule documentation built on Nov. 11, 2025, 5:14 p.m.