Nothing
## ---- include = FALSE---------------------------------------------------------
knitr::opts_chunk$set(
collapse = TRUE,
comment = "#>",
eval = FALSE,
# fig.path = "man/figures/README-",
out.width = "100%"
)
## ---- eval = FALSE------------------------------------------------------------
# install.packages('dsb')
# library(dsb)
#
# adt_norm = DSBNormalizeProtein(
# cell_protein_matrix = cells_citeseq_mtx,
# empty_drop_matrix = empty_drop_citeseq_mtx,
# denoise.counts = TRUE,
# use.isotype.control = TRUE,
# isotype.control.name.vec = rownames(cells_citeseq_mtx)[67:70]
# )
## ---- eval = FALSE------------------------------------------------------------
# library(dsb)
#
# # read raw data using the Seurat function "Read10X"
# raw = Seurat::Read10X("data/raw_feature_bc_matrix/")
# cells = Seurat::Read10X("data/filtered_feature_bc_matrix/")
#
# # define cell-containing barcodes and separate cells and empty drops
# stained_cells = colnames(cells$`Gene Expression`)
# background = setdiff(colnames(raw$`Gene Expression`), stained_cells)
#
# # split the data into separate matrices for RNA and ADT
# prot = raw$`Antibody Capture`
# rna = raw$`Gene Expression`
## ---- eval = FALSE------------------------------------------------------------
# # create metadata of droplet QC stats used in standard scRNAseq processing
# mtgene = grep(pattern = "^MT-", rownames(rna), value = TRUE) # used below
#
# md = data.frame(
# rna.size = log10(Matrix::colSums(rna)),
# prot.size = log10(Matrix::colSums(prot)),
# n.gene = Matrix::colSums(rna > 0),
# mt.prop = Matrix::colSums(rna[mtgene, ]) / Matrix::colSums(rna)
# )
# # add indicator for barcodes Cell Ranger called as cells
# md$drop.class = ifelse(rownames(md) %in% stained_cells, 'cell', 'background')
#
# # remove barcodes with no evidence of capture in the experiment
# md = md[md$rna.size > 0 & md$prot.size > 0, ]
#
## ---- eval = FALSE------------------------------------------------------------
# ggplot(md, aes(x = log10(n.gene), y = prot.size )) +
# theme_bw() +
# geom_bin2d(bins = 300) +
# scale_fill_viridis_c(option = "C") +
# facet_wrap(~drop.class)
## ---- eval = FALSE------------------------------------------------------------
# background_drops = rownames(
# md[ md$prot.size > 1.5 &
# md$prot.size < 3 &
# md$rna.size < 2.5, ]
# )
# background.adt.mtx = as.matrix(prot[ , background_drops])
## ---- eval = FALSE------------------------------------------------------------
#
# # calculate statistical thresholds for droplet filtering.
# cellmd = md[md$drop.class == 'cell', ]
#
# # filter drops with + / - 3 median absolute deviations from the median library size
# rna.mult = (3*mad(cellmd$rna.size))
# prot.mult = (3*mad(cellmd$prot.size))
# rna.lower = median(cellmd$rna.size) - rna.mult
# rna.upper = median(cellmd$rna.size) + rna.mult
# prot.lower = median(cellmd$prot.size) - prot.mult
# prot.upper = median(cellmd$prot.size) + prot.mult
#
# # filter rows based on droplet qualty control metrics
# qc_cells = rownames(
# cellmd[cellmd$prot.size > prot.lower &
# cellmd$prot.size < prot.upper &
# cellmd$rna.size > rna.lower &
# cellmd$rna.size < rna.upper &
# cellmd$mt.prop < 0.14, ]
# )
## ---- eval = FALSE------------------------------------------------------------
# length(qc_cells)
## ---- eval = FALSE------------------------------------------------------------
# cell.adt.raw = as.matrix(prot[ , qc_cells])
# cell.rna.raw = rna[ ,qc_cells]
# cellmd = cellmd[qc_cells, ]
## ---- eval = FALSE------------------------------------------------------------
# # flter
# pm = sort(apply(cell.adt.raw, 1, max))
# pm2 = apply(background.adt.mtx, 1, max)
# head(pm)
## ---- eval = FALSE------------------------------------------------------------
# # remove the non staining protein
# cell.adt.raw = cell.adt.raw[!rownames(cell.adt.raw) == 'CD34_TotalSeqB', ]
# background.adt.mtx = background.adt.mtx[!rownames(background.adt.mtx) == 'CD34_TotalSeqB', ]
#
## ---- eval = FALSE------------------------------------------------------------
# # define isotype controls
# isotype.controls = c("IgG1_control_TotalSeqB", "IgG2a_control_TotalSeqB",
# "IgG2b_control_TotalSeqB")
#
# # normalize and denoise with dsb with
# cells.dsb.norm = DSBNormalizeProtein(
# cell_protein_matrix = cell.adt.raw,
# empty_drop_matrix = background.adt.mtx,
# denoise.counts = TRUE,
# use.isotype.control = TRUE,
# isotype.control.name.vec = isotype.controls
# )
# # note: normalization takes ~ 20 seconds
# # system.time()
# # user system elapsed
# # 20.799 0.209 21.783
## ---- eval = FALSE------------------------------------------------------------
# # dsb with non-standard options
# cell.adt.dsb.2 = DSBNormalizeProtein(
# cell_protein_matrix = cell.adt.raw,
# empty_drop_matrix = background.adt.mtx,
# denoise.counts = TRUE,
# use.isotype.control = TRUE,
# isotype.control.name.vec = rownames(cell.adt.raw)[29:31],
# define.pseudocount = TRUE,
# pseudocount.use = 1,
# scale.factor = 'mean.subtract',
# quantile.clipping = TRUE,
# quantile.clip = c(0.01, 0.99),
# return.stats = TRUE
# )
#
#
## ---- eval = FALSE------------------------------------------------------------
# # Seurat workflow
# library(Seurat)
#
# # integrating with Seurat
# stopifnot(isTRUE(all.equal(rownames(cellmd), colnames(cell.adt.raw))))
# stopifnot(isTRUE(all.equal(rownames(cellmd), colnames(cell.rna.raw))))
#
# # create Seurat object note: min.cells is a gene filter, not a cell filter
# s = Seurat::CreateSeuratObject(counts = cell.rna.raw,
# meta.data = cellmd,
# assay = "RNA",
# min.cells = 20)
#
# # add dsb normalized matrix "cell.adt.dsb" to the "CITE" data (not counts!) slot
# s[["CITE"]] = Seurat::CreateAssayObject(data = cells.dsb.norm)
#
## ---- eval = FALSE------------------------------------------------------------
# # define proteins to use in clustering (non-isptype controls)
# prots = rownames(s@assays$CITE@data)[1:28]
#
# # cluster and run umap
# s = Seurat::FindNeighbors(object = s, dims = NULL,assay = 'CITE',
# features = prots, k.param = 30,
# verbose = FALSE)
#
# # direct graph clustering
# s = Seurat::FindClusters(object = s, resolution = 1,
# algorithm = 3,
# graph.name = 'CITE_snn',
# verbose = FALSE)
# # umap (optional)
# # s = Seurat::RunUMAP(object = s, assay = "CITE", features = prots,
# # seed.use = 1990, min.dist = 0.2, n.neighbors = 30,
# # verbose = FALSE)
#
# # make results dataframe
# d = cbind(s@meta.data,
# as.data.frame(t(s@assays$CITE@data))
# # s@reductions$umap@cell.embeddings)
# )
## ---- eval = FALSE------------------------------------------------------------
# library(magrittr)
# # calculate the median protein expression separately for each cluster
# adt_plot = d %>%
# dplyr::group_by(CITE_snn_res.1) %>%
# dplyr::summarize_at(.vars = prots, .funs = median) %>%
# tibble::remove_rownames() %>%
# tibble::column_to_rownames("CITE_snn_res.1")
# # plot a heatmap of the average dsb normalized values for each cluster
# pheatmap::pheatmap(t(adt_plot),
# color = viridis::viridis(25, option = "B"),
# fontsize_row = 8, border_color = NA)
#
## -----------------------------------------------------------------------------
# # use pearson residuals as normalized values for pca
# DefaultAssay(s) = "RNA"
# s = NormalizeData(s, verbose = FALSE) %>%
# FindVariableFeatures(selection.method = 'vst', verbose = FALSE) %>%
# ScaleData(verbose = FALSE) %>%
# RunPCA(verbose = FALSE)
#
# # set up dsb values to use in WNN analysis (do not normalize with CLR, use dsb normalized values)
# DefaultAssay(s) = "CITE"
# VariableFeatures(s) = prots
# s = s %>%
# ScaleData() %>%
# RunPCA(reduction.name = 'apca', verbose = FALSE)
#
# # run WNN
# s = FindMultiModalNeighbors(
# s, reduction.list = list("pca", "apca"),
# dims.list = list(1:30, 1:18),
# modality.weight.name = "RNA.weight",
# verbose = FALSE
# )
#
# # cluster
# s <- FindClusters(s, graph.name = "wsnn",
# algorithm = 3,
# resolution = 1.5,
# verbose = FALSE,
# random.seed = 1990)
## -----------------------------------------------------------------------------
# ## WNN with dsb values
# # use RNA pearson residuals as normalized values for RNA pca
# DefaultAssay(s) = "RNA"
# s = NormalizeData(s, verbose = FALSE) %>%
# FindVariableFeatures(selection.method = 'vst', verbose = FALSE) %>%
# ScaleData(verbose = FALSE) %>%
# RunPCA(verbose = FALSE)
#
#
# # set up dsb values to use in WNN analysis
# DefaultAssay(s) = "CITE"
# VariableFeatures(s) = prots
#
# # run true pca to initialize dr pca slot for WNN
# ## Not used {
# s = ScaleData(s, assay = 'CITE', verbose = FALSE)
# s = RunPCA(s, reduction.name = 'pdsb',features = VariableFeatures(s), verbose = FALSE)
# # }
#
# # make matrix of normalized protein values to add as dr embeddings
# pseudo = t(GetAssayData(s,slot = 'data',assay = 'CITE'))[,1:29]
# colnames(pseudo) = paste('pseudo', 1:29, sep = "_")
# s@reductions$pdsb@cell.embeddings = pseudo
#
# # run WNN directly using dsb normalized values.
# s = FindMultiModalNeighbors(
# object = s,
# reduction.list = list("pca", "pdsb"),
# weighted.nn.name = "dsb_wnn",
# knn.graph.name = "dsb_knn",
# modality.weight.name = "dsb_weight",
# snn.graph.name = "dsb_snn",
# dims.list = list(1:30, 1:29),
# verbose = FALSE
# )
#
# # cluster based on the join RNA and protein graph
# s = FindClusters(s, graph.name = "dsb_knn",
# algorithm = 3,
# resolution = 1.5,
# random.seed = 1990,
# verbose = FALSE)
## -----------------------------------------------------------------------------
# # create multimodal heatmap
# vf = VariableFeatures(s,assay = "RNA")
#
# # find marker genes for the joint clusters
# Idents(s) = "dsb_knn_res.1.5"
# DefaultAssay(s) = "RNA"
# rnade = FindAllMarkers(s, features = vf, only.pos = TRUE, verbose = FALSE)
# gene_plot = rnade %>%
# dplyr::filter(avg_log2FC > 1 ) %>%
# dplyr::group_by(cluster) %>%
# dplyr::top_n(3) %$% gene %>% unique
#
#
# cite_data = GetAssayData(s,slot = 'data',assay = 'CITE') %>% t()
# rna_subset = GetAssayData(s,assay = 'RNA',slot = 'data')[gene_plot, ] %>%
# as.data.frame() %>%
# t() %>%
# as.matrix()
#
# # combine into dataframe
# d = cbind(s@meta.data, cite_data, rna_subset)
#
# # calculate the median protein expression per cluster
# dat_plot = d %>%
# dplyr::group_by(dsb_knn_res.1.5) %>%
# dplyr::summarize_at(.vars = c(prots, gene_plot), .funs = median) %>%
# tibble::remove_rownames() %>%
# tibble::column_to_rownames("dsb_knn_res.1.5")
#
## ---- fig.height=4, fig.width=3-----------------------------------------------
# # make a combined plot
# suppressMessages(library(ComplexHeatmap)); ht_opt$message = FALSE
#
# # protein heatmap
# # protein heatmap
# prot_col = circlize::colorRamp2(breaks = seq(-1,25, by = 1),
# colors = viridis::viridis(n = 27, option = "B"))
# p1 = Heatmap(t(dat_plot)[prots, ],
# name = "protein",
# col = prot_col,
# use_raster = T,
# row_names_gp = gpar(color = "black", fontsize = 5)
# )
#
#
# # mRNA heatmap
# mrna = t(dat_plot)[gene_plot, ]
# rna_col = circlize::colorRamp2(breaks = c(-2,-1,0,1,2),
# colors = colorspace::diverge_hsv(n = 5))
# p2 = Heatmap(t(scale(t(mrna))),
# name = "mRNA",
# col = rna_col,
# use_raster = T,
# clustering_method_columns = 'average',
# column_names_gp = gpar(color = "black", fontsize = 7),
# row_names_gp = gpar(color = "black", fontsize = 5))
#
#
# # combine heatmaps
# ht_list = p1 %v% p2
# draw(ht_list)
#
## ---- eval = FALSE------------------------------------------------------------
# library(Seurat)
# umi = Read10X(data.dir = 'data/raw_feature_bc_matrix/')
# k = 3
# barcode.whitelist =
# rownames(
# CreateSeuratObject(counts = umi,
# min.features = k, # retain all barcodes with at least k raw mRNA
# min.cells = 800, # this just speeds up the function by removing genes.
# )@meta.data
# )
#
# write.table(barcode.whitelist,
# file =paste0(your_save_path,"barcode.whitelist.tsv"),
# sep = '\t', quote = FALSE, col.names = FALSE, row.names = FALSE)
#
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.