knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)
library(rscrublet)
library(Matrix)

Run rscrublet on example dataset

dim(pbmc8k)
pbmc8k[1:2,1:2]

pbmc8k dataset is sparse cell*gene dgTMatrix with UMI counts

scrr = scrub_doublets(E_obs = pbmc8k,expected_doublet_rate=0.06,min_counts=2, min_cells=3, min_gene_variability_pctl=85, n_prin_comps=30)
# set threshould automatically 
scrr=call_doublets(scrr)
# examine score distribution
plot_doublet_histogram(scrr)
# find predicted doublets
head(rownames(pbmc8k)[scrr$predicted_doublets])

Run rscrublet on text files


Define temporary folder to store the data

tmpdir = '~/rscrublet.test'

download the data

if(dir.exists(tmpdir))
  unlink(tmpdir)
dir.create(tmpdir)
download.file("https://cf.10xgenomics.com/samples/cell-exp/4.0.0/SC3_v3_NextGem_SI_Neuron_10K/SC3_v3_NextGem_SI_Neuron_10K_filtered_feature_bc_matrix.tar.gz", destfile =  paste0(tmpdir,"/data.tar.gz"))
system(paste0('cd ',tmpdir,'; tar -xzvf data.tar.gz'))

load into R

m = t(readMM(paste0(tmpdir,'/filtered_feature_bc_matrix/matrix.mtx.gz')))
rownames(m) = read.table(paste0(tmpdir,'/filtered_feature_bc_matrix/barcodes.tsv.gz'))[,1]
colnames(m) = read.table(paste0(tmpdir,'/filtered_feature_bc_matrix/features.tsv.gz'))[,1]
dim(m)
m[1:2,1:2]

run rscrublet

scrr = scrub_doublets(E_obs = m,expected_doublet_rate=0.06,min_counts=2, min_cells=3, min_gene_variability_pctl=85, n_prin_comps=30)

set threshould automatically

scrr=call_doublets(scrr)
# examine score distribution
plot_doublet_histogram(scrr,breaks = 100)

Run rscrublet on Seurat dataset

prepare data

library(Seurat)
download.file("https://cf.10xgenomics.com/samples/cell-exp/4.0.0/Parent_NGSC3_DI_PBMC/Parent_NGSC3_DI_PBMC_filtered_feature_bc_matrix.h5",
              destfile =  paste0(tmpdir,"/pbmc10k_filt.h5"))
filt.matrix = Read10X_h5(paste0(tmpdir,"/pbmc10k_filt.h5"),use.names = T)
srat    = CreateSeuratObject(counts = filt.matrix)
srat = NormalizeData(srat)
srat = ScaleData(srat)
srat = FindVariableFeatures(srat)
srat = RunPCA(srat, verbose = FALSE,npcs=15,approx=FALSE)
srat = RunUMAP(srat, reduction = "pca", dims = 1:15, verbose = FALSE)
DimPlot(srat)

run rscrublet

count_matrix = t(as(srat@assays$RNA@counts,'dgTMatrix'))
count_matrix[1:2,1:3]
scrr = scrub_doublets(E_obs = count_matrix,expected_doublet_rate=0.06,min_counts=2, min_cells=3, min_gene_variability_pctl=85, n_prin_comps=30)
scrr=call_doublets(scrr)
plot_doublet_histogram(scrr)
srat$doublet.score = scrr$doublet_scores_obs
FeaturePlot(srat,features = 'doublet.score',cols=c('gray','red'))

Run rscrublet on SCE dataset

prepare data

library(scRNAseq)
sce.zeisel = ZeiselBrainData()
count_matrix = as(t(sce.zeisel@assays@data$counts),'dgTMatrix')
count_matrix[1:2,1:2]
scrr = scrub_doublets(E_obs = count_matrix,expected_doublet_rate=0.06,min_counts=2, min_cells=3, min_gene_variability_pctl=85, n_prin_comps=30)

seems there are no doublets

scrr=call_doublets(scrr)
plot_doublet_histogram(scrr)


iaaaka/Rscrublet documentation built on Dec. 20, 2021, 5:57 p.m.