options(width = 150) knitr::opts_chunk$set( collapse = TRUE, tidy = FALSE, message = FALSE, warning = FALSE )
We will be analyzing the CITE-seq human PBMC data. This data can be installed with SeuratData.
# install Seurat v4.0.0 and SeuratData if (!requireNamespace("Seurat", quietly = TRUE) | utils::packageVersion("Seurat") < "4.0.0") remotes::install_version("Seurat", version = "4.0.0") if (!requireNamespace("SeuratData", quietly = TRUE)) devtools::install_github('satijalab/seurat-data') library(FuseNet) library(Seurat) library(SeuratData) InstallData("cbmc") library(cbmc.SeuratData) # Find highly variable features/genes cbmc <- FindVariableFeatures(cbmc, selection.method = "vst", verbose = FALSE)
data_rna <- cbmc@assays$RNA@counts[VariableFeatures(cbmc),] cbmc_rna <- InitiateFuseNet(raw_data = data_rna, project_name = "RNA", normalization = "cosine", pca_dims = 20, kernel = "gaussian", k = 15) cbmc_rna <- GeomSketch(object = cbmc_rna, geom_pca_dims = 20, geom_size = 3000, sketch_n_pca = 20, sketch_k = 15) ptm <- proc.time() cbmc_rna <- RunFuseNet(object = cbmc_rna, n_iters = 100, pca_dims = 20, k = 15, ratio = 0.5, norm_type = "l1") ptm1 <- proc.time() print(x = ptm1 - ptm)
data_prt <- cbmc@assays$ADT@counts cbmc_prt <- InitiateFuseNet(raw_data = data_prt, project_name = "Protein", normalization = "cosine", kernel = "gaussian", k = 15, verbose = FALSE) cbmc_prt <- GeomSketch(object = cbmc_prt, geom_size = 3000, geom_pca_dims = 3, sketch_n_pca = 0, sketch_k = 15) ptm <- proc.time() cbmc_prt <- RunFuseNet(object = cbmc_prt, n_iters = 100, ratio = 0.5, k = 15, norm_type = "l1") ptm1 <- proc.time() print(x = ptm1 - ptm)
We see that the mouse cells, which were used as the spike-in controls, were defined almost exclusively by RNA modality (RNA weight > 0.8). This is because there is no anti-mouse antibodies were used in the study.
In general lymphocytes were well defined in the ADT modality (protein weights > 0.6). Especially, CD8+ and CD4+ naive and memory T cells were more defined by the ADT data, on the other hand, dendritic cells (DC and pDCs) were more defined in the RNA data due to the absence of its surface markers in the ADT data.
fused <- FuseData(cbmc_rna, cbmc_prt, project_k = 15) cbmc@meta.data["RNA_Weight"] <- fused$fused_weight["RNA",] cbmc@meta.data["Protein_Weight"] <- fused$fused_weight["Protein",] VlnPlot(object = cbmc, features = c("RNA_Weight", "Protein_Weight"), sort = TRUE, group.by = "rna_annotations")
We can perform UMAP dimensional reduction or clustering on the fused data for downstream analysis.
umap.dims <- uwot::umap(X = fused$fused_dist, n_components = 2) dimnames(x = umap.dims) <- list(Cells(cbmc), c("UMAP_1", "UMAP_2")) cbmc@reductions$umap <- CreateDimReducObject(embeddings = umap.dims, key = "UMAP_", assay = c("RNA", "ADT")) cbmc@reductions$umap@cell.embeddings = umap.dims DimPlot(object = cbmc, group.by = "rna_annotations", reduction = "umap", label = TRUE, repel = TRUE, label.box = TRUE)
print(sessionInfo())
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.