Nothing
# test_that("downsample_immundata downsamples single-cell repertoires and is deterministic with seed", {
# output_dir <- create_test_output_dir()
# on.exit(cleanup_output_dir(output_dir))
#
# test_data <- data.frame(
# cell_id = c("c1", "c2", "c3", "c4", "c5", "c6", "c7", "c8"),
# sample_id = c("S1", "S1", "S1", "S1", "S2", "S2", "S2", "S2"),
# v_call = c("IGHV1", "IGHV2", "IGHV3", "IGHV4", "IGHV1", "IGHV2", "IGHV3", "IGHV4"),
# j_call = c("IGHJ1", "IGHJ2", "IGHJ3", "IGHJ4", "IGHJ1", "IGHJ2", "IGHJ3", "IGHJ4"),
# junction_aa = c("CARA", "CARB", "CARC", "CARD", "CARE", "CARF", "CARG", "CARH"),
# locus = "IGH",
# umi_count = c(10, 11, 12, 13, 14, 15, 16, 17)
# )
#
# temp_file <- tempfile(fileext = ".tsv")
# readr::write_tsv(test_data, temp_file)
# on.exit(unlink(temp_file), add = TRUE)
#
# idata <- read_repertoires(
# path = temp_file,
# schema = make_receptor_schema(
# features = c("v_call", "j_call", "junction_aa"),
# chains = "IGH"
# ),
# barcode_col = "cell_id",
# locus_col = "locus",
# umi_col = "umi_count",
# output_folder = output_dir,
# preprocess = NULL,
# postprocess = NULL,
# rename_columns = NULL
# )
# idata <- agg_repertoires_with_integrity(
# idata,
# schema = "sample_id",
# context = "downsample single-cell deterministic"
# )
#
# ds1 <- downsample_immundata(idata, n = 2, seed = 100)
# ds2 <- downsample_immundata(idata, n = 2, seed = 100)
#
# reps <- ds1$repertoires |> collect()
# expect_true(all(reps$n_barcodes == 2))
#
# sampled1 <- ds1$annotations |>
# select(sample_id, imd_barcode) |>
# distinct() |>
# arrange(sample_id, imd_barcode) |>
# collect()
#
# sampled2 <- ds2$annotations |>
# select(sample_id, imd_barcode) |>
# distinct() |>
# arrange(sample_id, imd_barcode) |>
# collect()
#
# expect_equal(sampled1, sampled2)
# })
#
# test_that("downsample_immundata downsampled bulk repertoires by count", {
# output_dir <- create_test_output_dir()
# on.exit(cleanup_output_dir(output_dir))
#
# test_data <- data.frame(
# sample_id = c("S1", "S1", "S2", "S2"),
# v_call = c("TRBV1", "TRBV2", "TRBV3", "TRBV4"),
# j_call = c("TRBJ1", "TRBJ2", "TRBJ1", "TRBJ2"),
# junction_aa = c("AAAA", "BBBB", "CCCC", "DDDD"),
# clone_count = c(8, 7, 6, 9)
# )
#
# temp_file <- tempfile(fileext = ".tsv")
# readr::write_tsv(test_data, temp_file)
# on.exit(unlink(temp_file), add = TRUE)
#
# idata <- read_repertoires(
# path = temp_file,
# schema = c("v_call", "j_call", "junction_aa"),
# count_col = "clone_count",
# output_folder = output_dir,
# preprocess = NULL,
# postprocess = NULL,
# rename_columns = NULL
# )
# idata <- agg_repertoires_with_integrity(
# idata,
# schema = "sample_id",
# context = "downsample bulk count mode"
# )
#
# ds <- downsample_immundata(idata, n = 5, seed = 42)
# reps <- ds$repertoires |> collect()
#
# expect_equal(sort(reps$n_barcodes), c(5, 5))
# })
#
# test_that("downsample_immundata keeps paired chains intact", {
# output_dir <- create_test_output_dir()
# on.exit(cleanup_output_dir(output_dir))
#
# test_data <- data.frame(
# cell_id = c(
# "a1", "a1", "a2", "a2", "a3", "a3",
# "b1", "b1", "b2", "b2", "b3", "b3"
# ),
# sample_id = c(
# "S1", "S1", "S1", "S1", "S1", "S1",
# "S2", "S2", "S2", "S2", "S2", "S2"
# ),
# v_call = c("IGHV1", "IGLV1", "IGHV2", "IGLV2", "IGHV3", "IGLV3", "IGHV1", "IGLV1", "IGHV2", "IGLV2", "IGHV3", "IGLV3"),
# j_call = c("IGHJ1", "IGLJ1", "IGHJ2", "IGLJ2", "IGHJ3", "IGLJ3", "IGHJ1", "IGLJ1", "IGHJ2", "IGLJ2", "IGHJ3", "IGLJ3"),
# junction_aa = c("CA1", "CL1", "CA2", "CL2", "CA3", "CL3", "CB1", "CK1", "CB2", "CK2", "CB3", "CK3"),
# locus = c("IGH", "IGL", "IGH", "IGL", "IGH", "IGL", "IGH", "IGL", "IGH", "IGL", "IGH", "IGL"),
# umi_count = rep(100, 12)
# )
#
# temp_file <- tempfile(fileext = ".tsv")
# readr::write_tsv(test_data, temp_file)
# on.exit(unlink(temp_file), add = TRUE)
#
# idata <- read_repertoires(
# path = temp_file,
# schema = make_receptor_schema(
# features = c("v_call", "j_call", "junction_aa"),
# chains = c("IGH", "IGL")
# ),
# barcode_col = "cell_id",
# locus_col = "locus",
# umi_col = "umi_count",
# output_folder = output_dir,
# preprocess = NULL,
# postprocess = NULL,
# rename_columns = NULL
# )
# idata <- agg_repertoires_with_integrity(
# idata,
# schema = "sample_id",
# context = "downsample paired-chain integrity"
# )
#
# ds <- downsample_immundata(idata, n = 2, seed = 7)
# reps <- ds$repertoires |> collect()
# expect_true(all(reps$n_barcodes == 2))
#
# chain_stats <- ds$annotations |>
# collect() |>
# summarise(
# .by = c(sample_id, imd_barcode),
# n_loci = n_distinct(locus),
# n_rows = n()
# )
#
# expect_true(all(chain_stats$n_loci == 2))
# expect_true(all(chain_stats$n_rows == 2))
# })
#
# test_that("downsample_immundata works on IG test data with proportion n", {
# output_dir <- create_test_output_dir()
# on.exit(cleanup_output_dir(output_dir))
#
# idata <- read_repertoires(
# path = test_ig_data(),
# schema = make_receptor_schema(
# features = c("v_call", "j_call", "junction_aa"),
# chains = c("IGH", "IGK|IGL")
# ),
# barcode_col = "cell_id",
# locus_col = "locus",
# umi_col = "duplicate_count",
# output_folder = output_dir,
# preprocess = NULL,
# postprocess = NULL,
# rename_columns = NULL
# ) |>
# mutate_immundata(cohort = "all")
# idata <- agg_repertoires_with_integrity(
# idata,
# schema = "cohort",
# context = "downsample IG proportion"
# )
#
# n_before <- idata$repertoires |>
# collect() |>
# pull(n_barcodes)
#
# ds <- downsample_immundata(idata, n = 0.5, seed = 123)
# n_after <- ds$repertoires |>
# collect() |>
# pull(n_barcodes)
#
# expect_equal(n_after, floor(n_before * 0.5))
#
# chain_stats <- ds$annotations |>
# collect() |>
# summarise(
# .by = imd_barcode,
# n_loci = n_distinct(locus)
# )
# expect_true(all(chain_stats$n_loci == 2))
# })
#
# test_that("downsample_immundata with n = 1 keeps one receptor per repertoire", {
# output_dir <- create_test_output_dir()
# on.exit(cleanup_output_dir(output_dir))
#
# test_data <- data.frame(
# cell_id = c("s1c1", "s1c2", "s1c3", "s1c4", "s2c1", "s2c2", "s2c3", "s2c4"),
# sample_id = c("S1", "S1", "S1", "S1", "S2", "S2", "S2", "S2"),
# v_call = c("IGHV1", "IGHV1", "IGHV2", "IGHV3", "IGHV4", "IGHV5", "IGHV5", "IGHV6"),
# j_call = c("IGHJ1", "IGHJ1", "IGHJ2", "IGHJ3", "IGHJ4", "IGHJ5", "IGHJ5", "IGHJ6"),
# junction_aa = c("A", "A", "B", "C", "D", "E", "E", "F"),
# locus = "IGH",
# umi_count = c(12, 11, 10, 9, 8, 7, 6, 5)
# )
#
# temp_file <- tempfile(fileext = ".tsv")
# readr::write_tsv(test_data, temp_file)
# on.exit(unlink(temp_file), add = TRUE)
#
# idata <- read_repertoires(
# path = temp_file,
# schema = make_receptor_schema(
# features = c("v_call", "j_call", "junction_aa"),
# chains = "IGH"
# ),
# barcode_col = "cell_id",
# locus_col = "locus",
# umi_col = "umi_count",
# output_folder = output_dir,
# preprocess = NULL,
# postprocess = NULL,
# rename_columns = NULL
# )
# idata <- agg_repertoires_with_integrity(
# idata,
# schema = "sample_id",
# context = "downsample n=1 receptor mode"
# )
#
# ds1 <- downsample_immundata(idata, n = 1, seed = 321)
# ds2 <- downsample_immundata(idata, n = 1, seed = 321)
#
# receptor_stats <- ds1$annotations |>
# collect() |>
# summarise(
# .by = sample_id,
# n_receptors = n_distinct(imd_receptor_id)
# ) |>
# arrange(sample_id)
#
# expect_true(all(receptor_stats$n_receptors == 1))
#
# reps <- ds1$repertoires |>
# collect() |>
# arrange(sample_id)
# expect_true(all(reps$n_receptors == 1))
#
# sampled1 <- ds1$annotations |>
# select(sample_id, imd_receptor_id) |>
# distinct() |>
# arrange(sample_id, imd_receptor_id) |>
# collect()
#
# sampled2 <- ds2$annotations |>
# select(sample_id, imd_receptor_id) |>
# distinct() |>
# arrange(sample_id, imd_receptor_id) |>
# collect()
#
# expect_equal(sampled1, sampled2)
# })
#
# test_that("downsample_immundata validates n and handles no-repertoire fallback", {
# idata <- get_test_idata_tsv_no_metadata()
#
# expect_error(
# downsample_immundata(idata, n = 2.5),
# "integer count"
# )
#
# n_before <- idata$annotations |>
# distinct(imd_barcode) |>
# collect() |>
# nrow()
#
# ds <- downsample_immundata(idata, n = 0.1, seed = 1)
#
# n_after <- ds$annotations |>
# distinct(imd_barcode) |>
# collect() |>
# nrow()
#
# expect_lt(n_after, n_before)
# expect_null(ds$repertoires)
# })
#
# test_that("downsample_immundata warns and keeps repertoire unchanged when n exceeds available units", {
# output_dir <- create_test_output_dir()
# on.exit(cleanup_output_dir(output_dir))
#
# test_data <- data.frame(
# cell_id = c("c1", "c2", "c3", "c4", "c5", "c6"),
# sample_id = c("S1", "S1", "S1", "S2", "S2", "S2"),
# v_call = c("IGHV1", "IGHV2", "IGHV3", "IGHV1", "IGHV2", "IGHV3"),
# j_call = c("IGHJ1", "IGHJ2", "IGHJ3", "IGHJ1", "IGHJ2", "IGHJ3"),
# junction_aa = c("A1", "A2", "A3", "B1", "B2", "B3"),
# locus = "IGH",
# umi_count = c(1, 1, 1, 1, 1, 1)
# )
#
# temp_file <- tempfile(fileext = ".tsv")
# readr::write_tsv(test_data, temp_file)
# on.exit(unlink(temp_file), add = TRUE)
#
# idata <- read_repertoires(
# path = temp_file,
# schema = make_receptor_schema(features = c("v_call", "j_call", "junction_aa"), chains = "IGH"),
# barcode_col = "cell_id",
# locus_col = "locus",
# umi_col = "umi_count",
# output_folder = output_dir,
# preprocess = NULL,
# postprocess = NULL,
# rename_columns = NULL
# )
# idata <- agg_repertoires_with_integrity(
# idata,
# schema = "sample_id",
# context = "downsample n exceeds units"
# )
#
# reps_before <- idata$repertoires |>
# collect() |>
# arrange(sample_id)
#
# ds <- NULL
# expect_warning(
# ds <- downsample_immundata(idata, n = 10, seed = 42),
# "returned unchanged"
# )
#
# reps_after <- ds$repertoires |>
# collect() |>
# arrange(sample_id)
#
# expect_equal(reps_after$n_barcodes, reps_before$n_barcodes)
# })
#
# test_that("downsample_immundata supports count-mode proportion downsampling", {
# output_dir <- create_test_output_dir()
# on.exit(cleanup_output_dir(output_dir))
#
# test_data <- data.frame(
# sample_id = c("S1", "S1", "S2", "S2"),
# v_call = c("TRBV1", "TRBV2", "TRBV3", "TRBV4"),
# j_call = c("TRBJ1", "TRBJ2", "TRBJ1", "TRBJ2"),
# junction_aa = c("AAAA", "BBBB", "CCCC", "DDDD"),
# clone_count = c(8, 7, 6, 9)
# )
#
# temp_file <- tempfile(fileext = ".tsv")
# readr::write_tsv(test_data, temp_file)
# on.exit(unlink(temp_file), add = TRUE)
#
# idata <- read_repertoires(
# path = temp_file,
# schema = c("v_call", "j_call", "junction_aa"),
# count_col = "clone_count",
# output_folder = output_dir,
# preprocess = NULL,
# postprocess = NULL,
# rename_columns = NULL
# )
# idata <- agg_repertoires_with_integrity(
# idata,
# schema = "sample_id",
# context = "downsample count proportion"
# )
#
# ds <- downsample_immundata(idata, n = 0.5, seed = 101)
# reps <- ds$repertoires |> collect()
#
# expect_equal(sort(reps$n_barcodes), c(7, 7))
# })
#
# test_that("downsample_immundata is deterministic in count mode with seed", {
# output_dir <- create_test_output_dir()
# on.exit(cleanup_output_dir(output_dir))
#
# test_data <- data.frame(
# sample_id = c("S1", "S1", "S2", "S2"),
# v_call = c("TRBV1", "TRBV2", "TRBV3", "TRBV4"),
# j_call = c("TRBJ1", "TRBJ2", "TRBJ1", "TRBJ2"),
# junction_aa = c("AAAA", "BBBB", "CCCC", "DDDD"),
# clone_count = c(8, 7, 6, 9)
# )
#
# temp_file <- tempfile(fileext = ".tsv")
# readr::write_tsv(test_data, temp_file)
# on.exit(unlink(temp_file), add = TRUE)
#
# idata <- read_repertoires(
# path = temp_file,
# schema = c("v_call", "j_call", "junction_aa"),
# count_col = "clone_count",
# output_folder = output_dir,
# preprocess = NULL,
# postprocess = NULL,
# rename_columns = NULL
# )
# idata <- agg_repertoires_with_integrity(
# idata,
# schema = "sample_id",
# context = "downsample count deterministic"
# )
#
# ds1 <- downsample_immundata(idata, n = 5, seed = 222)
# ds2 <- downsample_immundata(idata, n = 5, seed = 222)
#
# ann1 <- ds1$annotations |>
# select(sample_id, imd_barcode, imd_n_chains) |>
# arrange(sample_id, imd_barcode) |>
# collect()
# ann2 <- ds2$annotations |>
# select(sample_id, imd_barcode, imd_n_chains) |>
# arrange(sample_id, imd_barcode) |>
# collect()
#
# expect_equal(ann1, ann2)
# })
#
# test_that("downsample_immundata errors when proportion results in zero target", {
# idata <- get_test_idata_tsv_no_metadata()
#
# expect_error(
# downsample_immundata(idata, n = 0.0001, seed = 1),
# "Increase `n`"
# )
# })
#
# test_that("downsample_immundata produces consistent imd_count and imd_proportion invariants", {
# output_dir <- create_test_output_dir()
# on.exit(cleanup_output_dir(output_dir))
#
# test_data <- data.frame(
# cell_id = c("c1", "c2", "c3", "c4", "c5", "c6"),
# sample_id = c("S1", "S1", "S1", "S2", "S2", "S2"),
# v_call = c("IGHV1", "IGHV1", "IGHV2", "IGHV1", "IGHV2", "IGHV2"),
# j_call = c("IGHJ1", "IGHJ1", "IGHJ2", "IGHJ1", "IGHJ2", "IGHJ2"),
# junction_aa = c("A1", "A1", "A2", "B1", "B2", "B2"),
# locus = "IGH",
# umi_count = c(10, 11, 12, 13, 14, 15)
# )
#
# temp_file <- tempfile(fileext = ".tsv")
# readr::write_tsv(test_data, temp_file)
# on.exit(unlink(temp_file), add = TRUE)
#
# idata <- read_repertoires(
# path = temp_file,
# schema = make_receptor_schema(features = c("v_call", "j_call", "junction_aa"), chains = "IGH"),
# barcode_col = "cell_id",
# locus_col = "locus",
# umi_col = "umi_count",
# output_folder = output_dir,
# preprocess = NULL,
# postprocess = NULL,
# rename_columns = NULL
# )
# idata <- agg_repertoires_with_integrity(
# idata,
# schema = "sample_id",
# context = "downsample count/proportion invariants"
# )
#
# ds <- downsample_immundata(idata, n = 2, seed = 33)
#
# reps <- ds$repertoires |> collect()
# ann <- ds$annotations |> collect()
#
# receptor_stats <- ann |>
# select(imd_repertoire_id, imd_receptor_id, imd_count, imd_proportion) |>
# distinct() |>
# summarise(
# .by = imd_repertoire_id,
# sum_count = sum(imd_count),
# sum_prop = sum(imd_proportion)
# ) |>
# arrange(imd_repertoire_id)
#
# reps <- reps |> arrange(imd_repertoire_id)
#
# expect_equal(receptor_stats$sum_count, reps$n_barcodes)
# expect_true(all(abs(receptor_stats$sum_prop - 1) < 1e-8))
# })
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.