tests/testthat/test-agg-strata.R

test_that("agg_strata adds id and name to repertoires, id only to annotations", {
  strata_col <- imd_schema("strata")
  strata_name_col <- imd_schema("strata_name")

  idata <- get_test_immundata() |>
    agg_repertoires(c("Response", "Therapy"))

  stratified <- agg_strata(idata, by = "Response")
  rep_tbl <- stratified$repertoires

  expect_true(strata_col %in% names(rep_tbl))
  expect_true(strata_name_col %in% names(rep_tbl))
  expect_true(strata_col %in% names(stratified$annotations))
  expect_false(strata_name_col %in% names(stratified$annotations))

  expected_n <- length(unique(rep_tbl$Response))
  observed_n <- length(unique(rep_tbl[[strata_col]]))
  expect_equal(observed_n, expected_n)

  rep_names <- unique(rep_tbl[c(strata_col, strata_name_col)])
  expect_equal(anyDuplicated(rep_names[[strata_col]]), 0)
  expect_equal(anyDuplicated(rep_names[[strata_name_col]]), 0)
})

test_that("agg_strata validates grouping columns", {
  idata <- get_test_immundata() |>
    agg_repertoires("Response")

  expect_error(
    agg_strata(idata, by = "not_a_metadata_column"),
    "not found in idata\\$repertoires"
  )
})

test_that("agg_strata keeps repertoire-strata mapping consistent", {
  repertoire_col <- imd_schema("repertoire")
  strata_col <- imd_schema("strata")

  idata <- get_test_immundata() |>
    agg_repertoires(c("Response", "Therapy"))

  stratified <- agg_strata(idata, by = c("Response", "Therapy"))

  rep_map <- unique(stratified$repertoires[c(repertoire_col, strata_col)])
  rep_map <- rep_map[order(rep_map[[repertoire_col]]), , drop = FALSE]

  ann_map <- stratified$annotations |>
    dplyr::select(all_of(c(repertoire_col, strata_col))) |>
    dplyr::collect()
  ann_map <- unique(ann_map[c(repertoire_col, strata_col)])
  ann_map <- ann_map[order(ann_map[[repertoire_col]]), , drop = FALSE]

  expect_equal(nrow(rep_map), nrow(ann_map))
  expect_equal(anyDuplicated(rep_map[[repertoire_col]]), 0)
  expect_equal(anyDuplicated(ann_map[[repertoire_col]]), 0)
  expect_equal(
    as.data.frame(rep_map, stringsAsFactors = FALSE),
    as.data.frame(ann_map, stringsAsFactors = FALSE),
    ignore_attr = TRUE
  )
})

test_that("agg_strata handles NA group values", {
  repertoire_col <- imd_schema("repertoire")
  strata_col <- imd_schema("strata")

  idata <- get_test_immundata() |>
    agg_repertoires(c("Response", "Therapy"))

  rep_tbl <- idata$repertoires
  rep_tbl$Response[1] <- NA_character_

  idata_with_na <- ImmunData$new(
    schema = idata$schema_receptor,
    annotations = idata$annotations,
    repertoires = rep_tbl
  )

  stratified <- agg_strata(idata_with_na, by = "Response")
  strat_rep <- stratified$repertoires

  expect_equal(
    length(unique(strat_rep[[strata_col]])),
    length(unique(strat_rep$Response))
  )

  na_rep <- unique(strat_rep[is.na(strat_rep$Response), c(repertoire_col, strata_col), drop = FALSE])
  expect_equal(nrow(na_rep), 1)

  ann_na_strata <- stratified$annotations |>
    dplyr::select(all_of(c(repertoire_col, strata_col))) |>
    dplyr::collect()
  ann_na_strata <- unique(
    ann_na_strata[ann_na_strata[[repertoire_col]] == na_rep[[repertoire_col]], strata_col, drop = FALSE]
  )

  expect_equal(nrow(ann_na_strata), 1)
  expect_equal(ann_na_strata[[strata_col]], na_rep[[strata_col]])
})

test_that("agg_strata requires repertoire aggregation", {
  idata <- get_test_immundata()

  expect_error(
    agg_strata(idata, by = "Response"),
    "agg_repertoires"
  )
})

test_that("agg_strata keeps repertoire schema and strata metadata are dropped on re-aggregation", {
  strata_col <- imd_schema("strata")
  strata_name_col <- imd_schema("strata_name")

  idata <- get_test_immundata() |>
    agg_repertoires(c("Response", "Therapy"))

  stratified <- agg_strata(idata, by = "Response")

  expect_equal(stratified$schema_repertoire, idata$schema_repertoire)
  expect_true(strata_col %in% names(stratified$annotations))
  expect_true(strata_col %in% names(stratified$repertoires))
  expect_true(strata_name_col %in% names(stratified$repertoires))

  reaggregated <- agg_repertoires(stratified, schema = stratified$schema_repertoire)

  expect_false(strata_col %in% names(reaggregated$annotations))
  expect_false(strata_name_col %in% names(reaggregated$annotations))
  expect_false(strata_col %in% names(reaggregated$repertoires))
  expect_false(strata_name_col %in% names(reaggregated$repertoires))
})

test_that("re-aggregation rebuilds repertoire ids and removes strata metadata", {
  repertoire_col <- imd_schema("repertoire")
  strata_col <- imd_schema("strata")
  strata_name_col <- imd_schema("strata_name")

  idata <- get_test_immundata() |>
    agg_repertoires(c("Response", "Therapy")) |>
    agg_strata(by = "Response")

  poisoned_annotations <- idata$annotations |>
    dplyr::mutate(!!repertoire_col := 999999L, !!strata_col := 999999L)

  poisoned_repertoires <- idata$repertoires
  poisoned_repertoires[[repertoire_col]] <- 999999L
  poisoned_repertoires[[strata_col]] <- 999999L
  poisoned_repertoires[[strata_name_col]] <- "BrokenName"

  poisoned_idata <- ImmunData$new(
    schema = idata$schema_receptor,
    annotations = poisoned_annotations,
    repertoires = poisoned_repertoires
  )

  reaggregated <- agg_repertoires(poisoned_idata, schema = poisoned_idata$schema_repertoire)

  expect_false(strata_col %in% names(reaggregated$annotations))
  expect_false(strata_name_col %in% names(reaggregated$annotations))
  expect_false(strata_col %in% names(reaggregated$repertoires))
  expect_false(strata_name_col %in% names(reaggregated$repertoires))

  expect_true(repertoire_col %in% names(reaggregated$annotations))
  expect_true(repertoire_col %in% names(reaggregated$repertoires))

  ann_tbl <- reaggregated$annotations |>
    dplyr::select(all_of(repertoire_col)) |>
    dplyr::collect()
  rep_tbl <- reaggregated$repertoires

  expect_false(any(ann_tbl[[repertoire_col]] == 999999L))
  expect_false(any(rep_tbl[[repertoire_col]] == 999999L))
  expect_equal(length(unique(rep_tbl[[repertoire_col]])), nrow(rep_tbl))
  expect_true(all(ann_tbl[[repertoire_col]] %in% rep_tbl[[repertoire_col]]))
})

test_that("agg_strata can be re-run and overwrites previous strata assignment", {
  strata_col <- imd_schema("strata")
  strata_name_col <- imd_schema("strata_name")

  idata <- get_test_immundata() |>
    agg_repertoires(c("Response", "Therapy"))

  rep_tbl <- idata$repertoires
  rep_tbl$Response <- "mixed"

  mutated <- ImmunData$new(
    schema = idata$schema_receptor,
    annotations = idata$annotations,
    repertoires = rep_tbl
  )

  stratified_once <- agg_strata(mutated, by = "Response")
  expect_equal(length(unique(stratified_once$repertoires[[strata_col]])), 1)

  stratified_twice <- agg_strata(stratified_once, by = "Therapy")
  expect_equal(length(unique(stratified_twice$repertoires[[strata_col]])), 2)
  expect_true(strata_name_col %in% names(stratified_twice$repertoires))
  expect_false(strata_name_col %in% names(stratified_twice$annotations))
})

test_that("agg_strata validates by argument contract", {
  idata <- get_test_immundata() |>
    agg_repertoires(c("Response", "Therapy"))

  expect_error(agg_strata(idata, by = character()), "Assertion on 'by' failed")
  expect_error(agg_strata(idata, by = NA_character_), "Assertion on 'by' failed")
  expect_error(agg_strata(idata, by = c("Response", "Response")), "Assertion on 'by' failed")
  expect_error(agg_strata(idata, by = 1), "Assertion on 'by' failed")
})

test_that("agg_strata errors clearly when repertoire id column is missing", {
  repertoire_col <- imd_schema("repertoire")

  idata <- get_test_immundata() |>
    agg_repertoires(c("Response", "Therapy"))

  bad_annotations <- idata$annotations |>
    dplyr::select(-all_of(repertoire_col))

  bad_ann_idata <- ImmunData$new(
    schema = idata$schema_receptor,
    annotations = bad_annotations,
    repertoires = idata$repertoires
  )

  expect_error(
    agg_strata(bad_ann_idata, by = "Response"),
    "missing in .*idata\\$annotations"
  )

  bad_repertoires <- idata$repertoires
  bad_repertoires[[repertoire_col]] <- NULL

  bad_rep_idata <- ImmunData$new(
    schema = idata$schema_receptor,
    annotations = idata$annotations,
    repertoires = bad_repertoires
  )

  expect_error(
    agg_strata(bad_rep_idata, by = "Response"),
    "missing in .*idata\\$repertoires"
  )
})

test_that("rename_strata renames using a full named vector mapping", {
  strata_col <- imd_schema("strata")
  strata_name_col <- imd_schema("strata_name")

  idata <- get_test_immundata() |>
    agg_repertoires(c("Response", "Therapy")) |>
    agg_strata(by = "Response")

  strata_ids <- sort(unique(idata$repertoires[[strata_col]]))
  new_labels <- paste0("Group_", strata_ids)
  names(new_labels) <- as.character(strata_ids)

  renamed <- rename_strata(idata, names = new_labels)
  rep_tbl <- renamed$repertoires

  expect_equal(
    sort(unique(rep_tbl[[strata_name_col]])),
    sort(unname(new_labels))
  )
  expect_false(strata_name_col %in% names(renamed$annotations))
})

test_that("rename_strata supports partial renaming with unnamed policy", {
  strata_col <- imd_schema("strata")
  strata_name_col <- imd_schema("strata_name")

  idata <- get_test_immundata() |>
    agg_repertoires(c("Response", "Therapy")) |>
    agg_strata(by = "Response")

  strata_ids <- sort(unique(idata$repertoires[[strata_col]]))
  partial_map <- c("CustomA")
  names(partial_map) <- as.character(strata_ids[1])

  expect_error(
    rename_strata(idata, names = partial_map),
    "Missing names for strata ID"
  )

  auto_named <- rename_strata(idata, names = partial_map, unnamed = "auto", auto_prefix = "Auto")
  rep_tbl <- auto_named$repertoires

  expect_true("CustomA" %in% rep_tbl[[strata_name_col]])
  expect_true(any(grepl("^Auto", rep_tbl[[strata_name_col]])))

  keep_named <- rename_strata(idata, names = partial_map, unnamed = "keep")
  keep_tbl <- keep_named$repertoires
  expect_true("CustomA" %in% keep_tbl[[strata_name_col]])
})

test_that("rename_strata validates mapping integrity", {
  strata_col <- imd_schema("strata")
  strata_name_col <- imd_schema("strata_name")

  idata <- get_test_immundata() |>
    agg_repertoires(c("Response", "Therapy")) |>
    agg_strata(by = "Response")

  strata_ids <- sort(unique(idata$repertoires[[strata_col]]))

  bad_dup_label <- data.frame(
    imd_strata_id = strata_ids,
    strata_name = rep("dup", length(strata_ids))
  )
  expect_error(rename_strata(idata, names = bad_dup_label), "duplicated strata labels")

  bad_unknown_id <- data.frame(
    imd_strata_id = 999999L,
    strata_name = "Unknown"
  )
  expect_error(rename_strata(idata, names = bad_unknown_id), "Unknown strata ID")

  bad_empty <- data.frame(
    imd_strata_id = strata_ids[1],
    strata_name = ""
  )
  expect_error(rename_strata(idata, names = bad_empty), "non-empty strings")

  rep_tbl <- rename_strata(idata, names = setNames("OkName", as.character(strata_ids[1])), unnamed = "auto")$repertoires
  expect_true(strata_name_col %in% names(rep_tbl))
})

Try the immundata package in your browser

Any scripts or data that you put into this service are public.

immundata documentation built on April 4, 2026, 9:09 a.m.