Benchmark 1. MCA lung dataset annotation using ref_tabula_muris_drop reference


l_mat <- clustifyrdata::MCA_lung_mat
l_meta <- clustifyrdata::MCA_lung_meta

# find lung references, remove generic terms
lung_cols <-  grep("-Lung",
                   value = TRUE)

tml_ref <- ref_tabula_muris_drop[, lung_cols]
tml_ref <- tml_ref[, -c(8, 13)]

# default with all genes
start <- proc.time()

res <- clustify(
  input = l_mat,
  ref_mat = tml_ref,
  metadata = l_meta,
  cluster_col = "Annotation"

res_allgenes <- cor_to_call(
  cor_mat = res,
  metadata = l_meta,
  cluster_col = "Annotation"

end <- proc.time()

names(res_allgenes) <- c("MCA annotation", "clustifyr call", "r")
print(end - start)
print(res_allgenes, n = nrow(res_allgenes))

benchmark 2. Using sorted microarray data to classify 10x PBMC example data, available in clustifyrdata package

full_pbmc_matrix <- clustifyrdata::pbmc_matrix
full_pbmc_meta <- clustifyrdata::pbmc_meta
microarray_ref <- clustifyrdata::ref_hema_microarray

start <- proc.time()

res <- clustify(
  input = full_pbmc_matrix,
  ref_mat = microarray_ref,
  metadata = full_pbmc_meta,
  query_genes = pbmc_vargenes[1:500],
  cluster_col = "classified"

res2 <- cor_to_call(res, threshold = 0.5)

end <- proc.time()

names(res2) <- c("manual annotation", "clustifyr call", "r")
print(end - start)
print(res2, n = nrow(res2))
  1. Please see manuscript for full benchmarking.

Comparison with other methods

using Tablua Muris (drop and facs samples) 12 shared tissues, which can be downloaded as seurat objects

  1. Building reference and then mapping:

default clustify, with all genes

clustify, pulling var.genes from seurat objects

clustify, using M3Drop for feature selection

clustify, using per_cell = TRUE option, and then assign cluster consensus ident with collapse_to_cluster = TRUE

clustify, after ALRA imputation, using per_cell = TRUE option, and then assign cluster consensus ident with collapse_to_cluster = TRUE


  1. Mapping from prebuilt all-encompassing references to the drop samples:

clustify, using ref_tabula_muris_facs

singleR, using default built-in mouse references without fine tuning

  1. Generate marker gene list (of 30 genes per reference identity), and then mapping

default clustify_list


