knitr::opts_chunk$set( collapse = TRUE, comment = "#>" )
library(dsb) library(MASS) library(mclust)
To speed up compute time for normalization ~ 10-fold, set fast.km = TRUE
.
Specify this argument with either the original method using empty droplets DSBNormalizeProtein()
, or the dsb method that only requires the raw counts for cells and no empty drops ModelNegativeADTnorm()
.
Below is a benchmark showing 3 normalizations on a 1e6 cell benchmark
How to use the fast method for datasets without empty drops:
isotypes = c("MouseIgG1kappaisotype_PROT", "MouseIgG2akappaisotype_PROT", "Mouse IgG2bkIsotype_PROT", "RatIgG2bkIsotype_PROT") norm.adt = ModelNegativeADTnorm( cell_protein_matrix = cells_citeseq_mtx, fast.km = TRUE, denoise.counts = TRUE, use.isotype.control = TRUE, isotype.control.name.vec = isotypes )
How to use the fast method for datasets with empty droplets specified (see main vignette):
norm.adt = DSBNormalizeProtein( cell_protein_matrix = dsb::raw.adt.matrix, empty_drop_matrix = dsb::empty_drop_citeseq_mtx, fast.km = TRUE, denoise.counts = TRUE, use.isotype.control = TRUE, isotype.control.name.vec = isotypes )
Differences in the resulting normalized values between the two methods:
r = "deepskyblue3" library(dsb) # specify isotypes isotypes.names = rownames(cells_citeseq_mtx)[67:70] norm = DSBNormalizeProtein( # set fast.km = TRUE to run the fast method fast.km = TRUE, cell_protein_matrix = dsb::cells_citeseq_mtx, empty_drop_matrix = dsb::empty_drop_citeseq_mtx, denoise.counts = TRUE, use.isotype.control = TRUE, isotype.control.name.vec = rownames(cells_citeseq_mtx)[67:70], ) # original method norm.original = dsb::DSBNormalizeProtein( cell_protein_matrix = dsb::cells_citeseq_mtx, empty_drop_matrix = dsb::empty_drop_citeseq_mtx, denoise.counts = TRUE, use.isotype.control = TRUE, isotype.control.name.vec = rownames(cells_citeseq_mtx)[67:70], ) n.original = norm.original$dsb_normalized_matrix n.fast = norm$dsb_normalized_matrix # individual correlations par(mfrow=c(1,2)) plot(n.original['CD8_PROT', ], n.fast['CD8_PROT', ], pch = 16, font.main = 1, col = adjustcolor(r, alpha.f = 0.2), cex = 0.6, xlab = "dsb original", ylab = "dsb km.fast", main = 'CD8 Normalized ADT' ) plot(n.original['CD4_PROT', ], n.fast['CD4_PROT', ], pch = 16, font.main = 1, cex = 0.6, col = adjustcolor(r, alpha.f = 0.2), xlab = "dsb original", ylab = "dsb km.fast", main = 'CD4 Normalized ADT' )
Correlation of normalized values:
correlations <- sapply(seq_len(nrow(n.original)), function(x){ cor(n.original[x, ], n.fast[x, ], method = 'pearson') }) # plot hist(correlations, breaks = 20, xlim = c(0.97, 1), main = "correlation per protein\n km.fast vs original method", font.main = 1, xlab = "Pearson correlation", freq = FALSE, col = "lightgray", border = "white") rug(correlations)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.