options(tinytex.verbose = TRUE) knitr::opts_chunk$set( cache = TRUE, cache.lazy = FALSE, tidy = TRUE )
library(LinQView) library(Seurat) library(cowplot) library(ggplot2)
In this tutorial, we use a public CITE-seq dataset to illustrate Joint analysis using LinQ-seq. Data could be download from NCBI: RNA (ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE100nnn/GSE100866/suppl/GSE100866_CBMC_8K_13AB_10X-RNA_umi.csv.gz) ADT (ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE100nnn/GSE100866/suppl/GSE100866_CBMC_8K_13AB_10X-ADT_umi.csv.gz)
# Load in the RNA UMI matrix cbmc.rna <- as.sparse(x = read.csv(file = "../../../Data/citeseq/GSE100866_CBMC_8K_13AB_10X-RNA_umi.csv.gz", sep = ",", header = TRUE, row.names = 1)) cbmc.rna <- CollapseSpeciesExpressionMatrix(object = cbmc.rna) # Load in the ADT UMI matrix cbmc.adt <- as.sparse(x = read.csv(file = "../../../Data/citeseq/GSE100866_CBMC_8K_13AB_10X-ADT_umi.csv.gz", sep = ",", header = TRUE, row.names = 1)) cbmc.adt <- cbmc.adt[setdiff(x = rownames(x = cbmc.adt), y = c("CCR5", "CCR7", "CD10")), ]
t1 <- Sys.time() cbmc <- createObject(rna = cbmc.rna, adt = cbmc.adt) t2 <- Sys.time() t2 - t1
for this dataset, we don't need to filter out unwanted cells
cbmc <- subset(cbmc, subset = nFeature_RNA > 200 & nFeature_RNA < 2500 & percent.mt < softThreshold(cbmc))
for this dataset, we don't need to filter out unwanted genes
# remove Ig genes #cbmc <- removeGene(object = cbmc,pattern = '^IG[HKL]')
data Normalization for both ADT (CLR) and RNA (log)
t1 <- Sys.time() cbmc <- dataNormalization(object = cbmc) t2 <- Sys.time() t2 - t1
Call seurat function to identify highly variable genes (HVG) for RNA data
t1 <- Sys.time() cbmc <- FindVariableFeatures(object = cbmc) # directly use Seurat Function t2 <- Sys.time() t2 - t1
Scale data for both ADT and RNA
t1 <- Sys.time() cbmc <- dataScaling(object = cbmc) t2 <- Sys.time() t2 - t1
directly call Seurat function for linear dimension reduction (PCA)
t1 <- Sys.time() cbmc <- RunPCA(cbmc, features = VariableFeatures(object = cbmc), verbose = FALSE) # directly use Seurat Function t2 <- Sys.time() t2 - t1
call Seurat function JackStraw to determine number of PCs
#cbmc <- JackStraw(cbmc, num.replicate = 100) #cbmc <- ScoreJackStraw(cbmc, dims = 1:20) #JackStrawPlot(cbmc, dims = 1:20) #ElbowPlot(cbmc)
calculate cell-cell distances for RNA, ADT and joint. number of PC was set to 20 by default.
t1 <- Sys.time() cbmc <- jointDistance(object = cbmc, keep.rna = TRUE, keep.adt = TRUE, dims = 25) t2 <- Sys.time() t2 - t1
run UMAP as Non-linear dimension reduction for RNA, ADT and joint analysis.
t1 <- Sys.time() cbmc <- tsneFromDistane(object = cbmc, assay = "All") t2 <- Sys.time() t2 - t1
t1 <- Sys.time() cbmc <- clusteringFromDistance(object = cbmc, assay = "All", resolution = c(0.9,0.9,0.9)) t2 <- Sys.time() t2 - t1 # contribution of two modalities distHeatMap(object = cbmc)
plots <- generateGridDimPlot(cbmc, legend = FALSE, darkTheme = FALSE) listPlot(object = plots, align = "h") ###### user also can only plot some of those plots by index, figure ident or figure map info #listPlot(object = plots, fig.ident = "RNA") #listPlot(object = plots, fig.ident = "RNA", fig.map = "RNA") #user can use plotInfo() function to get index, figure ident and figure map information, then plot figures by index plotInfo(plots) #listPlot(object = plots, fig.id = 1)
# Heatmap for joint clusters heatMapPlot(object = cbmc, group.by = "jointClusterID", height.rel = 1, adt.label = TRUE) # Heatmap for RNA clusters heatMapPlot(object = cbmc, group.by = "rnaClusterID", height.rel = 1, adt.label = TRUE) # Heatmap for ADT clusters heatMapPlot(object = cbmc, group.by = "adtClusterID", height.rel = 1, adt.label = TRUE)
VlnPlot(cbmc, features = c("rna_CD8A", "adt_CD8", "rna_NCAM1", "adt_CD56", "rna_CD3G", "adt_CD3"), group.by = 'jointClusterID', pt.size = 0, ncol = 2)
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.