tests/testthat/test-io-immundata.R

test_that("read_immundata() upgrades legacy v1 metadata on the fly", {
  legacy_path <- create_test_output_dir("legacy_v1_")
  on.exit(cleanup_output_dir(legacy_path), add = TRUE)

  sample_file <- system.file("extdata/tsv", "sample_0_1k.tsv", package = "immundata")
  read_repertoires(
    path = sample_file,
    schema = c("cdr3_aa", "v_call"),
    output_folder = legacy_path,
    preprocess = NULL,
    postprocess = NULL
  )

  legacy_metadata_v1 <- list(
    version = as.character(packageVersion("immundata")),
    receptor_schema = list(
      features = c("cdr3_aa", "v_call"),
      chains = "TCRB"
    ),
    repertoire_schema = list()
  )
  jsonlite::write_json(
    legacy_metadata_v1,
    path = file.path(legacy_path, "metadata.json"),
    auto_unbox = TRUE,
    null = "null"
  )

  expect_warning(
    idata <- read_immundata(legacy_path, verbose = FALSE),
    "legacy v1 metadata"
  )

  checkmate::expect_r6(idata, classes = "ImmunData")
  expect_true(length(names(idata$annotations)) > 0)

  prov <- imd_get_provenance(idata)
  expect_equal(prov$current_path, normalizePath(legacy_path, mustWork = FALSE))
  expect_true(is.character(prov$snapshot_id))
  expect_gt(nchar(prov$snapshot_id), 0)
})

test_that("ImmunData$provenance is read-only and matches helper output", {
  layout <- create_snapshot_test_layout()
  on.exit(cleanup_snapshot_test_root())
  output_dir <- layout$projectA

  sample_file <- system.file("extdata/tsv", "sample_0_1k.tsv", package = "immundata")
  idata <- read_repertoires(
    path = sample_file,
    schema = c("cdr3_aa", "v_call"),
    output_folder = output_dir,
    preprocess = NULL,
    postprocess = NULL
  )

  expect_identical(idata$provenance, imd_get_provenance(idata))

  expect_error(
    idata$provenance <- list(),
    "read-only"
  )
})

test_that("read_repertoires() writes metadata with lineage array and provenance", {
  layout <- create_snapshot_test_layout()
  on.exit(cleanup_snapshot_test_root())
  output_dir <- layout$projectA

  sample_file <- system.file("extdata/tsv", "sample_0_1k.tsv", package = "immundata")

  read_repertoires(
    path = sample_file,
    schema = c("cdr3_aa", "v_call"),
    output_folder = output_dir,
    preprocess = NULL,
    postprocess = NULL
  )

  metadata_path <- file.path(output_dir, "metadata.json")
  metadata_json <- jsonlite::read_json(metadata_path, simplifyVector = FALSE)

  expect_equal(metadata_json$format_version, 2)
  expect_true(is.character(metadata_json$snapshot_id))
  expect_equal(metadata_json$producer[["function"]], "read_repertoires")
  expect_true(is.list(metadata_json$lineage))
  expect_length(metadata_json$lineage, 1)
  expect_true(is.list(metadata_json$provenance))

  ingestion_event <- metadata_json$lineage[[1]]
  expect_equal(ingestion_event$event, "ingestion")
  expect_equal(ingestion_event$producer[["function"]], "read_repertoires")
  expect_equal(ingestion_event$inputs$files, normalizePath(sample_file))
  expect_false(isTRUE(ingestion_event$inputs$metadata_joined))

  normalized_out <- normalizePath(output_dir, mustWork = FALSE)
  expect_equal(metadata_json$provenance$home_path, normalized_out)
  expect_equal(metadata_json$provenance$current_path, normalized_out)
  expect_equal(
    normalizePath(metadata_json$provenance$snapshot_root, mustWork = FALSE),
    normalizePath(file.path(normalized_out, "snapshots"), mustWork = FALSE)
  )
})

test_that("write_immundata() appends snapshot lineage event", {
  layout <- create_snapshot_test_layout()
  on.exit(cleanup_snapshot_test_root())
  output_dir <- layout$projectA

  sample_file <- system.file("extdata/tsv", "sample_0_1k.tsv", package = "immundata")
  idata <- read_repertoires(
    path = sample_file,
    schema = c("cdr3_aa", "v_call"),
    output_folder = output_dir,
    preprocess = NULL,
    postprocess = NULL
  )

  metadata_path <- file.path(output_dir, "metadata.json")
  before_json <- jsonlite::read_json(metadata_path, simplifyVector = FALSE)
  expect_length(before_json$lineage, 1)
  previous_snapshot_id <- before_json$snapshot_id

  write_immundata(idata, output_folder = output_dir)
  after_json <- jsonlite::read_json(metadata_path, simplifyVector = FALSE)

  expect_equal(after_json$producer[["function"]], "write_immundata")
  expect_length(after_json$lineage, 2)
  expect_false(identical(after_json$snapshot_id, previous_snapshot_id))

  snapshot_event <- after_json$lineage[[2]]
  expect_equal(snapshot_event$event, "snapshot")
  expect_equal(snapshot_event$producer[["function"]], "write_immundata")

  normalized_out <- normalizePath(output_dir, mustWork = FALSE)
  expect_equal(snapshot_event$source_path, normalized_out)
  expect_equal(snapshot_event$snapshot_path, normalized_out)
})

test_that("read_repertoires() writes metadata-derived files in ingestion lineage", {
  layout <- create_snapshot_test_layout()
  on.exit(cleanup_snapshot_test_root())
  output_dir <- layout$projectA

  md_path <- system.file("extdata/tsv", "metadata.tsv", package = "immundata")
  metadata_df <- read_metadata(md_path)

  read_repertoires(
    path = "<metadata>",
    schema = c("cdr3_aa", "v_call"),
    metadata = metadata_df,
    output_folder = output_dir,
    preprocess = NULL,
    postprocess = NULL
  )

  metadata_path <- file.path(output_dir, "metadata.json")
  metadata_json <- jsonlite::read_json(metadata_path, simplifyVector = FALSE)

  expect_equal(metadata_json$producer[["function"]], "read_repertoires")
  expect_length(metadata_json$lineage, 1)

  ingestion_event <- metadata_json$lineage[[1]]
  expect_true(isTRUE(ingestion_event$inputs$metadata_joined))
  expect_equal(
    unlist(ingestion_event$inputs$files, use.names = FALSE),
    normalizePath(metadata_df$File)
  )
  expect_equal(ingestion_event$args$metadata_file_col, "File")
})

test_that("write_immundata() auto-creates snapshot folders and increments versions", {
  layout <- create_snapshot_test_layout()
  on.exit(cleanup_snapshot_test_root())
  output_dir <- layout$projectA

  sample_file <- system.file("extdata/tsv", "sample_0_1k.tsv", package = "immundata")
  idata <- read_repertoires(
    path = sample_file,
    schema = c("cdr3_aa", "v_call"),
    output_folder = output_dir,
    preprocess = NULL,
    postprocess = NULL
  )

  idata_v1 <- write_immundata(idata, output_folder = NULL, tag = "baseline")
  idata_v2 <- write_immundata(idata_v1, output_folder = NULL, tag = "baseline")

  expect_true(dir.exists(file.path(output_dir, "snapshots", "baseline", "v001")))
  expect_true(dir.exists(file.path(output_dir, "snapshots", "baseline", "v002")))

  prov_v2 <- imd_get_provenance(idata_v2)
  expect_equal(
    prov_v2$current_path,
    normalizePath(file.path(output_dir, "snapshots", "baseline", "v002"), mustWork = FALSE)
  )
})

test_that("snapshot tests use projectA/projectB tree in temporary snapshot root", {
  layout <- create_snapshot_test_layout()
  on.exit(cleanup_snapshot_test_root())
  output_dir <- layout$projectA
  rehome_dir <- layout$projectB

  sample_file <- system.file("extdata/tsv", "sample_0_1k.tsv", package = "immundata")
  idata <- read_repertoires(
    path = sample_file,
    schema = c("cdr3_aa", "v_call"),
    output_folder = output_dir,
    preprocess = NULL,
    postprocess = NULL
  )

  write_immundata(idata, output_folder = NULL, tag = "baseline")
  write_immundata(
    read_immundata(output_dir, tag = "baseline", version = 1),
    output_folder = NULL,
    tag = "baseline"
  )
  write_immundata(idata, output_folder = NULL, tag = "treated")
  write_immundata(idata, output_folder = rehome_dir, rehome = TRUE)

  expect_true(file.exists(file.path(output_dir, "annotations.parquet")))
  expect_true(file.exists(file.path(output_dir, "metadata.json")))
  expect_true(dir.exists(file.path(output_dir, "snapshots", "baseline", "v001")))
  expect_true(dir.exists(file.path(output_dir, "snapshots", "baseline", "v002")))
  expect_true(dir.exists(file.path(output_dir, "snapshots", "treated", "v001")))
  expect_true(file.exists(file.path(rehome_dir, "annotations.parquet")))
  expect_true(file.exists(file.path(rehome_dir, "metadata.json")))
})

test_that("read_immundata() resolves tag latest and specific versions", {
  layout <- create_snapshot_test_layout()
  on.exit(cleanup_snapshot_test_root())
  output_dir <- layout$projectA

  sample_file <- system.file("extdata/tsv", "sample_0_1k.tsv", package = "immundata")
  idata <- read_repertoires(
    path = sample_file,
    schema = c("cdr3_aa", "v_call"),
    output_folder = output_dir,
    preprocess = NULL,
    postprocess = NULL
  )
  write_immundata(idata, output_folder = NULL, tag = "baseline")
  write_immundata(read_immundata(output_dir, tag = "baseline", version = 1), output_folder = NULL, tag = "baseline")

  latest <- read_immundata(output_dir, tag = "baseline")
  latest_prov <- imd_get_provenance(latest)
  expect_equal(
    latest_prov$current_path,
    normalizePath(file.path(output_dir, "snapshots", "baseline", "v002"), mustWork = FALSE)
  )

  version1 <- read_immundata(output_dir, tag = "baseline", version = 1)
  v1_prov <- imd_get_provenance(version1)
  expect_equal(
    v1_prov$current_path,
    normalizePath(file.path(output_dir, "snapshots", "baseline", "v001"), mustWork = FALSE)
  )
})

test_that("snapshot path resolution validates missing tags and versions", {
  layout <- create_snapshot_test_layout()
  on.exit(cleanup_snapshot_test_root())
  output_dir <- layout$projectA

  sample_file <- system.file("extdata/tsv", "sample_0_1k.tsv", package = "immundata")
  idata <- read_repertoires(
    path = sample_file,
    schema = c("cdr3_aa", "v_call"),
    output_folder = output_dir,
    preprocess = NULL,
    postprocess = NULL
  )
  write_immundata(idata, output_folder = NULL, tag = "baseline")

  expect_error(
    read_immundata(output_dir, tag = "ghost"),
    "not found"
  )

  expect_error(
    read_immundata(output_dir, tag = "baseline", version = 99),
    "not found"
  )

  expect_error(
    read_immundata(output_dir, version = 1),
    "only.*tag"
  )

  expect_error(
    read_immundata(file.path(output_dir, "snapshots", "baseline", "v001"), tag = "baseline"),
    "already points"
  )
})

test_that("write_immundata() validates tags and missing provenance for auto-snapshots", {
  layout <- create_snapshot_test_layout()
  on.exit(cleanup_snapshot_test_root())
  output_dir <- layout$projectA

  idata <- get_test_idata_tsv_no_metadata()
  annotations_tbl <- idata$annotations |> collect()
  idata_no_provenance <- ImmunData$new(
    schema = idata$schema_receptor,
    annotations = annotations_tbl
  )

  expect_error(
    write_immundata(idata_no_provenance, output_folder = NULL),
    "Cannot infer snapshot home path"
  )

  expect_error(
    write_immundata(idata, output_folder = NULL, tag = "../bad"),
    "must not include path separators"
  )
})

test_that("write_immundata() rehome controls future auto-snapshot root", {
  layout <- create_snapshot_test_layout()
  on.exit(cleanup_snapshot_test_root())
  output_dir <- layout$projectA
  alt_output_dir <- layout$projectB

  sample_file <- system.file("extdata/tsv", "sample_0_1k.tsv", package = "immundata")
  idata <- read_repertoires(
    path = sample_file,
    schema = c("cdr3_aa", "v_call"),
    output_folder = output_dir,
    preprocess = NULL,
    postprocess = NULL
  )

  moved_without_rehome <- write_immundata(idata, output_folder = alt_output_dir, rehome = FALSE)
  auto_from_old_home <- write_immundata(moved_without_rehome, output_folder = NULL, tag = "baseline")
  prov_old_home <- imd_get_provenance(auto_from_old_home)
  expect_equal(
    prov_old_home$current_path,
    normalizePath(file.path(output_dir, "snapshots", "baseline", "v001"), mustWork = FALSE)
  )

  moved_with_rehome <- write_immundata(idata, output_folder = alt_output_dir, rehome = TRUE)
  auto_from_new_home <- write_immundata(moved_with_rehome, output_folder = NULL, tag = "baseline")
  prov_new_home <- imd_get_provenance(auto_from_new_home)
  expect_equal(
    prov_new_home$current_path,
    normalizePath(file.path(alt_output_dir, "snapshots", "baseline", "v001"), mustWork = FALSE)
  )
})

test_that("operation outputs preserve provenance for auto-snapshots", {
  layout <- create_snapshot_test_layout()
  on.exit(cleanup_snapshot_test_root())
  output_dir <- layout$projectA

  sample_file <- system.file("extdata/tsv", "sample_0_1k.tsv", package = "immundata")
  idata <- read_repertoires(
    path = sample_file,
    schema = c("cdr3_aa", "v_call"),
    output_folder = output_dir,
    preprocess = NULL,
    postprocess = NULL
  )

  filtered <- filter_immundata(idata, TRUE)
  snap <- write_immundata(filtered, output_folder = NULL, tag = "ops")
  prov <- imd_get_provenance(snap)

  expect_equal(
    prov$current_path,
    normalizePath(file.path(output_dir, "snapshots", "ops", "v001"), mustWork = FALSE)
  )
})

test_that("write_immundata_internal() validates lineage as complete set", {
  layout <- create_snapshot_test_layout()
  on.exit(cleanup_snapshot_test_root())
  output_dir <- layout$projectA

  idata <- get_test_idata_tsv_no_metadata()

  expect_error(
    write_immundata_internal(
      idata = idata,
      output_folder = output_dir,
      producer_function = "read_repertoires",
      metadata_lineage_inputs = list(
        files = c("/tmp/sample.tsv"),
        metadata_joined = FALSE,
        enforce_schema = TRUE
      )
    ),
    "complete set"
  )
})

test_that("write_immundata_internal() validates lineage fields", {
  layout <- create_snapshot_test_layout()
  on.exit(cleanup_snapshot_test_root())
  output_dir <- layout$projectA

  idata <- get_test_idata_tsv_no_metadata()

  expect_error(
    write_immundata_internal(
      idata = idata,
      output_folder = output_dir,
      producer_function = "read_repertoires",
      metadata_lineage_inputs = list(
        files = c("/tmp/sample.tsv"),
        metadata_joined = FALSE,
        enforce_schema = TRUE
      ),
      metadata_lineage_args = list(
        barcode_col = NULL,
        count_col = NULL,
        locus_col = NULL,
        umi_col = NULL
      ),
      metadata_lineage_columns = list(
        renamed = list(requested = character(), applied = character(), not_found = character()),
        dropped = list(applied = character())
      ),
      metadata_lineage_pipeline = list(preprocess = character(), postprocess = character())
    ),
    "metadata_file_col|must include"
  )
})

Try the immundata package in your browser

Any scripts or data that you put into this service are public.

immundata documentation built on April 4, 2026, 9:09 a.m.