knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  fig.path = "man/figures/",
  dpi = 300
)
st <- data.table::fread("https://bioconductor.org/packages/stats/bioc/clustifyr/clustifyr_stats.tab", data.table = FALSE, verbose = FALSE)
st_all <- dplyr::filter(st, Month == "all")
cl <- as.numeric(data.table::fread("https://raw.githubusercontent.com/raysinensis/clone_counts_public/main/clustifyr_total.txt", verbose = FALSE))

clustifyr

R-CMD-check-bioc Codecov test coverage platforms bioc #downloads

clustifyr classifies cells and clusters in single-cell RNA sequencing experiments using reference bulk RNA-seq data sets, sorted microarray expression data, single-cell gene signatures, or lists of marker genes.

Installation

Install the Bioconductor version with:

if (!requireNamespace("BiocManager", quietly = TRUE))
    install.packages("BiocManager")

BiocManager::install("clustifyr")

Install the development version with:

BiocManager::install("rnabioco/clustifyr")

Example usage

In this example we use the following built-in input data:

We then calculate correlation coefficients and plot them on a pre-calculated projection (stored in pbmc_meta).

library(clustifyr)

# calculate correlation
res <- clustify(
  input = pbmc_matrix_small,
  metadata = pbmc_meta$classified,
  ref_mat = cbmc_ref,
  query_genes = pbmc_vargenes
)

# print assignments
cor_to_call(res)

# plot assignments on a projection
plot_best_call(
  cor_mat = res,
  metadata = pbmc_meta,
  cluster_col = "classified"
)

clustify() can take a clustered SingleCellExperiment or seurat object (both v2 and v3) and assign identities.

# for SingleCellExperiment
clustify(
  input = sce_small,          # an SCE object
  ref_mat = cbmc_ref,         # matrix of RNA-seq expression data for each cell type
  cluster_col = "cell_type1", # name of column in meta.data containing cell clusters
  obj_out = TRUE              # output SCE object with cell type inserted as "type" column
) 

library(Seurat)
# for Seurat3/4
clustify(
  input = s_small3,
  cluster_col = "RNA_snn_res.1",
  ref_mat = cbmc_ref,
  seurat_out = TRUE
)

# New output option, directly as a vector (in the order of the metadata), which can then be inserted into metadata dataframes and other workflows
clustify(
  input = s_small3,
  cluster_col = "RNA_snn_res.1",
  ref_mat = cbmc_ref,
  vec_out = TRUE
)

New reference matrix can be made directly from SingleCellExperiment and Seurat objects as well. Other scRNAseq experiment object types are supported as well.

# make reference from SingleCellExperiment objects
sce_ref <- object_ref(
  input = sce_small,               # SCE object
  cluster_col = "cell_type1"       # name of column in colData containing cell identities
)

# make reference from seurat objects
s_ref <- seurat_ref(
  seurat_object = s_small3,
  cluster_col = "RNA_snn_res.1"
)

head(s_ref)

clustify_lists() handles identity assignment of matrix or SingleCellExperiment and seurat objects based on marker gene lists.

clustify_lists(
  input = pbmc_matrix_small,
  metadata = pbmc_meta,
  cluster_col = "classified",
  marker = pbmc_markers,
  marker_inmatrix = FALSE
)

clustify_lists(
  input = s_small3,
  marker = pbmc_markers,
  marker_inmatrix = FALSE,
  cluster_col = "RNA_snn_res.1",
  seurat_out = TRUE
)

Additional resources



NCBI-Hackathons/clustifyR documentation built on April 9, 2024, 7:33 a.m.