Nothing
#' @title ImmunData: A Unified Structure for Immune Receptor Repertoire Data
#'
#' @description
#' `ImmunData` is an abstract R6 class for managing and transforming immune receptor repertoire data.
#' It supports flexible backends (e.g., Arrow, DuckDB, dbplyr) and lazy evaluation,
#' and provides tools for filtering, aggregation, and receptor-to-repertoire mapping.
#'
#' @seealso [read_repertoires()], [read_immundata()]
#'
#' @concept core_immundata
#' @export
ImmunData <- R6Class(
"ImmunData",
private = list(
# .annotations A barcode-level table that links each barcode (i.e., cell ID)
# to a receptor. It can also store cell-level metadata such as
# sample ID, donor, or tissue source. This table is **not aggregated** and
# typically contains one row per barcode.
.annotations = NULL,
# .repertoire_table A duckplyr table with repertoire names and receptor counts.
.repertoire_table = NULL,
# .provenance Internal snapshot/provenance metadata used by IO helpers.
.provenance = NULL
),
public = list(
#' @field schema_receptor A named list describing how to interpret receptor-level data.
#' This includes the fields used for aggregation (e.g., `CDR3`, `V_gene`, `J_gene`),
#' and optionally unique identifiers for each receptor row. Used to ensure consistency
#' across processing steps.
schema_receptor = NULL,
#' @field schema_repertoire A named list defining how barcodes or annotations should be
#' grouped into repertoires. This may include sample-level metadata (e.g., `sample_id`,
#' `donor_id`) used to define unique repertoires.
schema_repertoire = NULL,
#' @description Creates a new `ImmunData` object.
#' This constructor expects receptor-level and barcode-level data,
#' along with a receptor schema defining aggregation and identity fields.
#'
#' @param schema A character vector specifying the receptor schema (e.g., aggregate fields, ID columns).
#' @param annotations A cell/barcode-level dataset mapping barcodes to receptor rows.
#' @param repertoires A repertoire table, created inside the body of [agg_repertoires].
#' @param provenance Internal provenance metadata for snapshot lineage.
initialize = function(schema,
annotations,
repertoires = NULL,
provenance = NULL) {
checkmate::check_data_frame(annotations)
checkmate::assert_list(provenance, null.ok = TRUE)
if (checkmate::test_character(schema)) {
schema <- make_receptor_schema(features = schema, chains = NULL)
}
private$.annotations <- annotations
self$schema_receptor <- schema
private$.provenance <- provenance
if (!is.null(repertoires)) {
self$schema_repertoire <- setdiff(
colnames(repertoires),
c(
imd_schema()$repertoire,
imd_schema()$strata,
imd_schema()$strata_name,
imd_schema()$n_receptors,
imd_schema()$n_barcodes,
imd_schema()$n_cells
)
)
private$.repertoire_table <- repertoires
}
}
),
active = list(
#' @field receptors Accessor for the dynamically-created table with receptors.
receptors = function() {
receptor_id_col <- imd_schema("receptor")
locus_col <- imd_schema("locus")
features <- imd_receptor_features(self$schema_receptor)
chains <- imd_receptor_chains(self$schema_receptor)
if (length(chains) == 2) {
receptor_data <- private$.annotations |>
select(all_of(c(
receptor_id_col,
features,
locus_col
))) |>
distinct()
if (!grepl("\\|", chains[2])) {
locus_1 <- chains[1]
locus_2 <- chains[2]
receptor_data |>
filter(!!rlang::sym(locus_col) == locus_1) |>
full_join(
receptor_data |>
filter(!!rlang::sym(locus_col) == locus_2),
by = receptor_id_col
)
} else {
relaxed_chain_alternatives <- trimws(unlist(strsplit(chains[2], "\\|")))
locus_1 <- chains[1]
locus_2 <- relaxed_chain_alternatives[1]
locus_3 <- relaxed_chain_alternatives[2]
receptor_data |>
filter(!!rlang::sym(locus_col) == locus_1) |>
full_join(
receptor_data |>
filter(!!rlang::sym(locus_col) %in% c(locus_2, locus_3)),
by = receptor_id_col
)
}
} else {
private$.annotations |>
select({{ receptor_id_col }}, all_of(features)) |>
distinct(!!rlang::sym(receptor_id_col), .keep_all = TRUE)
}
},
#' @field annotations Accessor for the annotation-level table (`.annotations`).
annotations = function() {
private$.annotations
},
#' @field repertoires Get a table of repertoires and their basic statistics.
repertoires = function() {
# TODO: cache repertoire table to memory if not very big?
if (!is.null(private$.repertoire_table)) {
private$.repertoire_table |>
collect() |>
arrange_at(vars(1))
} else {
NULL
}
},
#' @field metadata Get a table of repertoires without their basic statistics.
metadata = function() {
if (!is.null(private$.repertoire_table)) {
private$.repertoire_table |>
select(c(imd_schema("repertoire"), self$schema_repertoire)) |>
collect() |>
arrange_at(vars(1))
} else {
NULL
}
},
#' @field provenance Read-only accessor for snapshot provenance metadata.
provenance = function(value) {
if (missing(value)) {
return(imd_get_provenance(self))
}
cli::cli_abort("`provenance` is read-only and cannot be assigned directly.")
}
)
)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.