Nothing
# ============================================================================
# USE CASE 1: SINGLE CHAIN
# ============================================================================
test_that("Single chain: correct barcode and receptor counts", {
output_dir <- create_test_output_dir()
on.exit(cleanup_output_dir(output_dir))
# Create toy data: 5 cells, some with IGH, some without
test_data <- data.frame(
cell_id = c("cell1", "cell2", "cell3", "cell4", "cell5"),
v_call = c("IGHV1", "IGHV2", "IGHV1", "IGHV3", "IGHV1"),
j_call = c("IGHJ1", "IGHJ2", "IGHJ1", "IGHJ3", "IGHJ1"),
junction_aa = c("CARW", "CBRW", "CARW", "CCRW", "CDRW"),
locus = c("IGH", "IGH", "IGH", "IGH", "IGH"),
umi_count = c(100, 200, 150, 300, 250)
)
temp_file <- tempfile(fileext = ".tsv")
readr::write_tsv(test_data, temp_file)
on.exit(unlink(temp_file), add = TRUE)
idata <- read_repertoires(
path = temp_file,
schema = make_receptor_schema(
features = c("v_call", "j_call", "junction_aa"),
chains = "IGH"
),
barcode_col = "cell_id",
locus_col = "locus",
umi_col = "umi_count",
output_folder = output_dir,
preprocess = NULL,
postprocess = NULL,
rename_columns = NULL
)
annotations <- idata$annotations |> collect()
# Expected counts:
# - 5 barcodes (all cells have IGH)
# - 4 unique receptors (cell1 and cell3 have identical receptors)
expect_equal(n_distinct(annotations$imd_barcode), 5)
expect_equal(n_distinct(annotations$imd_receptor_id), 4)
# Verify receptor sharing
receptor_counts <- annotations |>
group_by(imd_receptor_id) |>
summarise(n_cells = n_distinct(imd_barcode), .groups = "drop")
expect_true(any(receptor_counts$n_cells == 2))
})
test_that("Single chain with mixed loci: filters correctly", {
output_dir <- create_test_output_dir()
on.exit(cleanup_output_dir(output_dir))
# Create toy data with mixed loci
test_data <- data.frame(
cell_id = c("cell1", "cell2", "cell3", "cell4", "cell5"),
v_call = c("IGHV1", "IGLV1", "IGHV2", "IGKV1", "IGHV3"),
j_call = c("IGHJ1", "IGLJ1", "IGHJ2", "IGKJ1", "IGHJ3"),
junction_aa = c("CARW", "CASL", "CBRW", "CASK", "CCRW"),
locus = c("IGH", "IGL", "IGH", "IGK", "IGH"),
umi_count = c(100, 200, 150, 300, 250)
)
temp_file <- tempfile(fileext = ".tsv")
readr::write_tsv(test_data, temp_file)
on.exit(unlink(temp_file), add = TRUE)
# Request only IGH chains
idata <- read_repertoires(
path = temp_file,
schema = make_receptor_schema(
features = c("v_call", "j_call", "junction_aa"),
chains = "IGH"
),
barcode_col = "cell_id",
locus_col = "locus",
umi_col = "umi_count",
output_folder = output_dir,
preprocess = NULL,
postprocess = NULL,
rename_columns = NULL
)
annotations <- idata$annotations |> collect()
# Expected: Only cells 1, 3, and 5 (those with IGH)
expect_equal(n_distinct(annotations$imd_barcode), 3,
info = "Should have only 3 cells with IGH"
)
expect_setequal(annotations$imd_barcode, c("cell1", "cell3", "cell5"))
})
# ============================================================================
# USE CASE 2: STRICT PAIRING
# ============================================================================
test_that("Strict pairing: correct counts for perfect pairs", {
output_dir <- create_test_output_dir()
on.exit(cleanup_output_dir(output_dir))
# Create toy data with perfect IGH+IGL pairs
test_data <- data.frame(
cell_id = c("cell1", "cell1", "cell2", "cell2", "cell3", "cell3"),
v_call = c("IGHV1", "IGLV1", "IGHV2", "IGLV2", "IGHV1", "IGLV1"),
j_call = c("IGHJ1", "IGLJ1", "IGHJ2", "IGLJ2", "IGHJ1", "IGLJ1"),
junction_aa = c("CARW", "CASL", "CBRW", "CBSL", "CARW", "CASL"),
locus = c("IGH", "IGL", "IGH", "IGL", "IGH", "IGL"),
umi_count = c(100, 80, 120, 90, 110, 85)
)
temp_file <- tempfile(fileext = ".tsv")
readr::write_tsv(test_data, temp_file)
on.exit(unlink(temp_file), add = TRUE)
idata <- read_repertoires(
path = temp_file,
schema = make_receptor_schema(
features = c("v_call", "j_call", "junction_aa"),
chains = c("IGH", "IGL")
),
barcode_col = "cell_id",
locus_col = "locus",
umi_col = "umi_count",
output_folder = output_dir,
preprocess = NULL,
postprocess = NULL,
rename_columns = NULL
)
annotations <- idata$annotations |> collect()
# Expected:
# - 3 barcodes (all have complete pairs)
# - 2 unique receptors (cell1 and cell3 share the same receptor)
expect_equal(n_distinct(annotations$imd_barcode), 3)
expect_equal(n_distinct(annotations$imd_receptor_id), 2)
})
test_that("Strict pairing: excludes incomplete pairs", {
output_dir <- create_test_output_dir()
on.exit(cleanup_output_dir(output_dir))
# Create toy data with some incomplete pairs
test_data <- data.frame(
cell_id = c("cell1", "cell1", "cell2", "cell3", "cell4", "cell4"),
v_call = c("IGHV1", "IGLV1", "IGHV2", "IGLV3", "IGHV4", "IGKV4"),
j_call = c("IGHJ1", "IGLJ1", "IGHJ2", "IGLJ3", "IGHJ4", "IGKJ4"),
junction_aa = c("CARW", "CASL", "CBRW", "CCSL", "CDRW", "CDSK"),
locus = c("IGH", "IGL", "IGH", "IGL", "IGH", "IGK"),
umi_count = c(100, 80, 120, 90, 110, 85)
)
temp_file <- tempfile(fileext = ".tsv")
readr::write_tsv(test_data, temp_file)
on.exit(unlink(temp_file), add = TRUE)
# Request strict IGH+IGL pairing
idata <- read_repertoires(
path = temp_file,
schema = make_receptor_schema(
features = c("v_call", "j_call", "junction_aa"),
chains = c("IGH", "IGL")
),
barcode_col = "cell_id",
locus_col = "locus",
umi_col = "umi_count",
output_folder = output_dir,
preprocess = NULL,
postprocess = NULL,
rename_columns = NULL
)
annotations <- idata$annotations |> collect()
# Expected: Only cell1 has both IGH and IGL
# cell2 has only IGH, cell3 has only IGL, cell4 has IGH+IGK (wrong light chain)
expect_equal(n_distinct(annotations$imd_barcode), 1)
expect_equal(unique(annotations$imd_barcode), "cell1")
})
test_that("Strict pairing: handles duplicate chains correctly", {
output_dir <- create_test_output_dir()
on.exit(cleanup_output_dir(output_dir))
# Create data with multiple chains of same locus per cell
test_data <- data.frame(
cell_id = c("cell1", "cell1", "cell1", "cell1"),
v_call = c("IGHV1", "IGHV2", "IGLV1", "IGLV2"),
j_call = c("IGHJ1", "IGHJ2", "IGLJ1", "IGLJ2"),
junction_aa = c("CARW", "CBRW", "CASL", "CBSL"),
locus = c("IGH", "IGH", "IGL", "IGL"),
umi_count = c(100, 150, 80, 60) # IGHV2 and IGLV1 have highest counts
)
temp_file <- tempfile(fileext = ".tsv")
readr::write_tsv(test_data, temp_file)
on.exit(unlink(temp_file), add = TRUE)
idata <- read_repertoires(
path = temp_file,
schema = make_receptor_schema(
features = c("v_call", "j_call", "junction_aa"),
chains = c("IGH", "IGL")
),
barcode_col = "cell_id",
locus_col = "locus",
umi_col = "umi_count",
output_folder = output_dir,
preprocess = NULL,
postprocess = NULL,
rename_columns = NULL
)
annotations <- idata$annotations |> collect()
# Should select highest UMI for each locus
expect_equal(nrow(annotations), 2)
igh_chain <- annotations |> filter(locus == "IGH")
igl_chain <- annotations |> filter(locus == "IGL")
expect_equal(igh_chain$v_call, "IGHV2")
expect_equal(igl_chain$v_call, "IGLV1")
})
# ============================================================================
# USE CASE 3: RELAXED PAIRING WITH ARTIFACT EXCLUSION
# ============================================================================
test_that("Relaxed pairing: correct counts with artifact exclusion", {
output_dir <- create_test_output_dir()
on.exit(cleanup_output_dir(output_dir))
# Create toy data with various scenarios
test_data <- data.frame(
cell_id = c(
"cell1", "cell1", # IGH + IGL only
"cell2", "cell2", # IGH + IGK only
"cell3", "cell3", "cell3", # IGH + IGL + IGK (artifact!)
"cell4", # IGH only
"cell5", # IGL only
"cell6", "cell6"
), # IGL + IGK only (no IGH)
v_call = c(
"IGHV1", "IGLV1",
"IGHV2", "IGKV2",
"IGHV3", "IGLV3", "IGKV3",
"IGHV4",
"IGLV5",
"IGLV6", "IGKV6"
),
j_call = c(
"IGHJ1", "IGLJ1",
"IGHJ2", "IGKJ2",
"IGHJ3", "IGLJ3", "IGKJ3",
"IGHJ4",
"IGLJ5",
"IGLJ6", "IGKJ6"
),
junction_aa = c(
"CARW", "CASL",
"CBRW", "CBSK",
"CCRW", "CCSL", "CCSK",
"CDRW",
"CESL",
"CFSL", "CFSK"
),
locus = c(
"IGH", "IGL",
"IGH", "IGK",
"IGH", "IGL", "IGK",
"IGH",
"IGL",
"IGL", "IGK"
),
umi_count = c(
100, 80,
120, 90,
110, 85, 75,
130,
70,
60, 65
)
)
temp_file <- tempfile(fileext = ".tsv")
readr::write_tsv(test_data, temp_file)
on.exit(unlink(temp_file), add = TRUE)
idata <- read_repertoires(
path = temp_file,
schema = make_receptor_schema(
features = c("v_call", "j_call", "junction_aa"),
chains = c("IGH", "IGL|IGK") # Relaxed pairing
),
barcode_col = "cell_id",
locus_col = "locus",
umi_col = "umi_count",
output_folder = output_dir,
preprocess = NULL,
postprocess = NULL,
rename_columns = NULL
)
annotations <- idata$annotations |> collect()
# Expected results:
# - cell1: IGH + IGL ✓ (valid)
# - cell2: IGH + IGK ✓ (valid)
# - cell3: EXCLUDED (artifact - has both IGL and IGK)
# - cell4: EXCLUDED (missing light chain)
# - cell5: EXCLUDED (missing heavy chain)
# - cell6: EXCLUDED (missing heavy chain)
expect_equal(n_distinct(annotations$imd_barcode), 2)
expect_setequal(unique(annotations$imd_barcode), c("cell1", "cell2"))
expect_equal(n_distinct(annotations$imd_receptor_id), 2)
})
# ============================================================================
# USE CASE 3.5: BULK DATA (no barcodes, with counts)
# ============================================================================
test_that("Bulk data: correct receptor counts without barcodes", {
output_dir <- create_test_output_dir()
on.exit(cleanup_output_dir(output_dir))
# Create bulk data (no cell IDs, but with clone counts)
test_data <- data.frame(
v_call = c("IGHV1", "IGHV1", "IGHV2", "IGHV3"),
j_call = c("IGHJ1", "IGHJ1", "IGHJ2", "IGHJ3"),
junction_aa = c("CARW", "CARW", "CBRW", "CCRW"),
locus = c("IGH", "IGH", "IGH", "IGH"),
clone_count = c(100, 50, 200, 75) # Frequency counts
)
temp_file <- tempfile(fileext = ".tsv")
readr::write_tsv(test_data, temp_file)
on.exit(unlink(temp_file), add = TRUE)
idata <- read_repertoires(
path = temp_file,
schema = make_receptor_schema(
features = c("v_call", "j_call", "junction_aa"),
chains = "IGH"
),
count_col = "clone_count", # Bulk mode with counts
locus_col = "locus",
output_folder = output_dir,
preprocess = NULL,
postprocess = NULL,
rename_columns = NULL
)
annotations <- idata$annotations |> collect()
# Expected:
# - 4 synthetic barcodes (one per row)
# - 3 unique receptors (rows 1 and 2 have identical receptors)
# - Chain counts should match clone_count values
expect_equal(n_distinct(annotations$imd_barcode), 4)
expect_equal(n_distinct(annotations$imd_receptor_id), 3)
expect_equal(annotations$imd_n_chains, test_data$clone_count)
})
# ============================================================================
# COMPARISON TESTS
# ============================================================================
test_that("Comparison: relaxed vs strict pairing counts", {
output_dir_strict <- create_test_output_dir()
output_dir_relaxed <- create_test_output_dir()
on.exit({
cleanup_output_dir(output_dir_strict)
cleanup_output_dir(output_dir_relaxed)
})
# Create data with artifacts
test_data <- data.frame(
cell_id = c(
"normal1", "normal1", # IGH + IGL
"normal2", "normal2", # IGH + IGK
"artifact", "artifact", "artifact"
), # IGH + IGL + IGK
v_call = c(
"IGHV1", "IGLV1",
"IGHV2", "IGKV2",
"IGHV3", "IGLV3", "IGKV3"
),
j_call = c(
"IGHJ1", "IGLJ1",
"IGHJ2", "IGKJ2",
"IGHJ3", "IGLJ3", "IGKJ3"
),
junction_aa = c(
"CARW", "CASL",
"CBRW", "CBSK",
"CCRW", "CCSL", "CCSK"
),
locus = c(
"IGH", "IGL",
"IGH", "IGK",
"IGH", "IGL", "IGK"
),
umi_count = c(
100, 80,
120, 90,
110, 85, 75
)
)
temp_file <- tempfile(fileext = ".tsv")
readr::write_tsv(test_data, temp_file)
on.exit(unlink(temp_file), add = TRUE)
# Strict IGH + IGL
idata_strict <- read_repertoires(
path = temp_file,
schema = make_receptor_schema(
features = c("v_call", "j_call", "junction_aa"),
chains = c("IGH", "IGL")
),
barcode_col = "cell_id",
locus_col = "locus",
umi_col = "umi_count",
output_folder = output_dir_strict,
preprocess = NULL,
postprocess = NULL,
rename_columns = NULL
)
# Relaxed IGH + (IGL|IGK)
idata_relaxed <- read_repertoires(
path = temp_file,
schema = make_receptor_schema(
features = c("v_call", "j_call", "junction_aa"),
chains = c("IGH", "IGL|IGK")
),
barcode_col = "cell_id",
locus_col = "locus",
umi_col = "umi_count",
output_folder = output_dir_relaxed,
preprocess = NULL,
postprocess = NULL,
rename_columns = NULL
)
strict_cells <- idata_strict$annotations |>
collect() |>
pull(imd_barcode) |>
unique()
relaxed_cells <- idata_relaxed$annotations |>
collect() |>
pull(imd_barcode) |>
unique()
# Strict should include: normal1, artifact (picking IGL)
# Relaxed should include: normal1, normal2 (excluding artifact)
expect_true("artifact" %in% strict_cells,
info = "Strict pairing includes artifact cell"
)
expect_false("artifact" %in% relaxed_cells,
info = "Relaxed pairing excludes artifact cell"
)
expect_equal(length(relaxed_cells), 2,
info = "Relaxed should have 2 valid cells"
)
})
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.