knitr::opts_chunk$set( collapse = TRUE, comment = "#>", fig.path = "man/figures/", dpi = 300 )
st <- data.table::fread("https://bioconductor.org/packages/stats/bioc/clustifyr/clustifyr_stats.tab", data.table = FALSE, verbose = FALSE) st_all <- dplyr::filter(st, Month == "all") cl <- as.numeric(data.table::fread("https://raw.githubusercontent.com/raysinensis/clone_counts_public/main/clustifyr_total.txt", verbose = FALSE))
clustifyr classifies cells and clusters in single-cell RNA sequencing experiments using reference bulk RNA-seq data sets, sorted microarray expression data, single-cell gene signatures, or lists of marker genes.
Install the Bioconductor version with:
if (!requireNamespace("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install("clustifyr")
Install the development version with:
BiocManager::install("rnabioco/clustifyr")
In this example we use the following built-in input data:
pbmc_matrix_small)pbmc_meta), with cluster information stored ("classified")pbmc_vargenes)cbmc_ref)We then calculate correlation coefficients and plot them on a pre-calculated projection (stored in pbmc_meta).
library(clustifyr) # calculate correlation res <- clustify( input = pbmc_matrix_small, metadata = pbmc_meta$classified, ref_mat = cbmc_ref, query_genes = pbmc_vargenes ) # print assignments cor_to_call(res) # plot assignments on a projection plot_best_call( cor_mat = res, metadata = pbmc_meta, cluster_col = "classified" )
clustify() can take a clustered SingleCellExperiment or seurat object (from v2 up to v5) and assign identities.
# for SingleCellExperiment sce_small <- sce_pbmc() clustify( input = sce_small, # an SCE object ref_mat = cbmc_ref, # matrix of RNA-seq expression data for each cell type cluster_col = "cell_type", # name of column in meta.data containing cell clusters obj_out = TRUE # output SCE object with cell type inserted as "type" column ) # for Seurat library(Seurat) s_small <- so_pbmc() clustify( input = s_small, cluster_col = "RNA_snn_res.0.5", ref_mat = cbmc_ref, seurat_out = TRUE ) # New output option, directly as a vector (in the order of the metadata), which can then be inserted into metadata dataframes and other workflows clustify( input = s_small, cluster_col = "RNA_snn_res.0.5", ref_mat = cbmc_ref, vec_out = TRUE )[1:10]
New reference matrix can be made directly from SingleCellExperiment and Seurat objects as well. Other scRNAseq experiment object types are supported as well.
# make reference from SingleCellExperiment objects sce_small <- sce_pbmc() sce_ref <- object_ref( input = sce_small, # SCE object cluster_col = "cell_type" # name of column in colData containing cell identities ) # make reference from seurat objects s_small <- so_pbmc() s_ref <- seurat_ref( seurat_object = s_small, cluster_col = "RNA_snn_res.0.5" ) head(s_ref)
clustify_lists() handles identity assignment of matrix or SingleCellExperiment and seurat objects based on marker gene lists.
clustify_lists( input = pbmc_matrix_small, metadata = pbmc_meta, cluster_col = "classified", marker = pbmc_markers, marker_inmatrix = FALSE ) clustify_lists( input = s_small, marker = pbmc_markers, marker_inmatrix = FALSE, cluster_col = "RNA_snn_res.0.5", seurat_out = TRUE )
Script for benchmarking, compatible with scRNAseq_Benchmark
Additional reference data (including tabula muris, immgen, etc) are available in a supplemental package clustifyrdatahub. Also see list for individual downloads.
See the FAQ for more details.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.