In Ghosn-Lab/BatchNorm: Data Matrix Normalization and Merging Strategies Minimize Batch-specific Systemic Variation in scRNA-Seq Data

library(knitr)
library(dplyr)
library(Seurat)
library(ggplot2)
library(lisi)

opts_chunk$set(
  fig.pos = "!h", out.extra = "", warning = F, 
  fig.align = "center", fig.height = 7, fig.width = 7
)

Harmony Batch Correction

Processing Samples 1-3 Seurat objects

Data filtering & normalization

library(BatchNorm)
library(harmony)
# Import unfiltered Seurat object (included with 'BatchNorm' package)
data(PBMCs)

# Run "standard" Seurat + Harmony workflow:
## "htmlpreview.github.io/?https://github.com/immunogenomics/harmony/blob/master/docs/SeuratV3.html"
# Including filtering by mitochondrial percentage (+5 SD)
# Including data normalization, variable gene selection and gene scaling (performed on all samples together)
PBMCs <- PBMCs %>% 
         MitoFilter() %>% 
         NormalizeData(normalization.method = "LogNormalize", assay = "RNA", scale.factor = 10000) %>% 
         NormalizeData(verbose = FALSE, assay = "ADT", normalization.method = "CLR") %>%
         FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
         ScaleData() %>% 
         RunPCA(npcs = 30) %>% 
         RunHarmony("orig.ident", plot_convergence = TRUE)

Generating a UMAP and clusters

## Generating a UMAP and clusters
PBMCs <- PBMCs %>% 
         RunUMAP(reduction = "harmony", dims = 1:14) %>%
         FindNeighbors(reduction = "harmony", dims = 1:14) %>%
         FindClusters(resolution = .8)

UMAPPlot(PBMCs, cols = colors.use, group.by = "orig.ident") + ggtitle("Harmony: Sample Identity")

Measuring sample-UMAP integration (generating an iLISI score)

GetiLISI(object = PBMCs, nSamples = 3)

Cell Typing of "harmonized" PBMCs object for CMS

Convert cluster classifications to cell type classifications

# For complete cell classification workflow see our vignette "Biaxial Gating of a Single Sample"
# More details can be found in figure S3 of our manuscript "Data Matrix Normalization and Merging Strategies Minimize Batch-specific Systemic Variation in scRNA-Seq Data."
UMAPPlot(PBMCs, cols = colors.use, pt.size = 2, 
         group.by = "seurat_clusters", label = T) + ggtitle("Harmony: Clusters")
# B_Cells = 10, 11
# T_CD4 = 0, 1, 13, 16
# No TReg cluster (contained within cluster 1)
# T_CD8 = 7, 2
# NK_T = 5
# NK = 3, 18
# NKCD56Hi = 9
# Monocyte_Classical = 4, 6, 12
# Monocyte_NonClassical = 8
# Dendritic_Cells = 14, 15
# HSPCs = 19
# Cycling_Cells = 17
Idents(PBMCs) <- PBMCs[["seurat_clusters"]]
Idents(PBMCs) <- plyr::mapvalues(Idents(PBMCs), from = c(10, 11, 0, 1, 13, 16,
                                                         7, 2, 5, 3, 18, 9,
                                                         4, 6, 12, 
                                                         8, 14, 15,
                                                         19, 17), 
                to = c('B_Cells', 'B_Cells', 'T_CD4', 'T_CD4', 'T_CD4', 'T_CD4',
                       'T_CD8', 'T_CD8', 'NK_T', 'NK', "NK", "NK_CD56Hi",
                       'Monocyte_Classical', 'Monocyte_Classical', 'Monocyte_Classical',
                       'Monocyte_NonClassical', 'Dendritic_Cells', 'Dendritic_Cells',
                       'HSPCs', 'Cycling_Cells'))
Idents(PBMCs) <- factor(Idents(PBMCs),
                                 levels = c("B_Cells", "T_CD4", "TReg", 
                                            "T_CD8", "NK_T", "NK", "NK_CD56Hi",
                                            "Monocyte_Classical", "Monocyte_NonClassical",
                                            "Dendritic_Cells", "HSPCs", "Cycling_Cells"))
PBMCs[["Cell_Type"]] <- Idents(PBMCs)
UMAPPlot(PBMCs, cols = colors.use, label = F) + ggtitle("Harmony: Cell Type Classifications")

Compare single-sample workflow cell type classifications to joint classifications (generating a CMS)

# PBMC Sample 1
data(PBMC1_Single_ID)
S1cms <- GetCMS(object = PBMCs, 
                sample.ID = "Sample_01", 
                reference.ID = PBMC1_Single_ID)
# PBMC Sample 2
data(PBMC2_Single_ID)
S2cms <- GetCMS(object = PBMCs, 
                sample.ID = "Sample_02", 
                reference.ID = PBMC2_Single_ID)
# PBMC Sample 3
data(PBMC3_Single_ID)
S3cms <- GetCMS(object = PBMCs,
                sample.ID = "Sample_03",
                reference.ID = PBMC3_Single_ID)

# Average CMS
mean(c(S1cms, S2cms, S3cms))

LIGER Batch Correction

Processing Samples 1-3 Seurat objects

Data filtering & normalization

library(rliger)
library(SeuratWrappers)

# Import unfiltered Seurat object (included with 'BatchNorm' package)
data(PBMCs)

# Run "standard" Seurat + LIGER workflow:
## "https://github.com/satijalab/seurat-wrappers/blob/master/docs/liger.md"
# Including filtering by mitochondrial percentage (+5 SD)
# Including data normalization, variable gene selection and gene scaling (performed on all samples together)
PBMCs <- PBMCs %>% 
         MitoFilter() %>% 
         NormalizeData(normalization.method = "LogNormalize", assay = "RNA", scale.factor = 10000) %>% 
         NormalizeData(verbose = FALSE, assay = "ADT", normalization.method = "CLR") %>%
         FindVariableFeatures(selection.method = "vst", nfeatures = 2000) %>%
         ScaleData(split.by = "orig.ident", do.center = FALSE) %>% 
         RunOptimizeALS(k = 20, lambda = 5, split.by = "orig.ident") %>% 
         RunQuantileNorm(split.by = "orig.ident") %>%
         RunUMAP(dims = 1:14, reduction = "iNMF") %>% 
         FindNeighbors(reduction = "iNMF", dims = 1:14) %>%
         FindClusters(resolution = 0.8)

UMAPPlot(PBMCs, cols = colors.use, group.by = "orig.ident") + ggtitle("LIGER: Sample Identity")

Measuring sample-UMAP integration (generating an iLISI score)

GetiLISI(object = PBMCs, nSamples = 3)

Cell Typing of LIGER-corrected PBMCs object for CMS

Convert cluster classifications to cell type classifications

# For complete cell classification workflow see our vignette "Biaxial Gating of a Single Sample"
# More details can be found in figure S3 of our manuscript "Data Matrix Normalization and Merging Strategies Minimize Batch-specific Systemic Variation in scRNA-Seq Data."
UMAPPlot(PBMCs, cols = colors.use, pt.size = 2, 
         group.by = "clusters", label = T) + ggtitle("LIGER: Clusters")
# B_Cells = 3, 10, 12, 17
# T_CD4 = 2, 19
# No TReg cluster (contained within cluster 2)
# T_CD8 = 20, 15
# NK_T = 4
# NK = 6, 16
# NK_CD56Hi = 8
# Monocyte_Classical = 1, 7, 9, 14
# Monocyte_NonClassical = 18
# Dendritic_Cells = 11, 13
# HSPCs = 5
# Cycling_Cells no cluster

Idents(PBMCs) <- PBMCs[["clusters"]]
Idents(PBMCs) <- plyr::mapvalues(Idents(PBMCs), from = c(3, 10, 12, 17, 2, 19,
                                                         20, 15, 4, 6, 16, 8,
                                                         1, 7, 9, 
                                                         14, 18, 11,
                                                         13, 5), 
                to = c('B_Cells', 'B_Cells', 'B_Cells', 'B_Cells', 'T_CD4', 'T_CD4',
                       'T_CD8', 'T_CD8', 'NK_T', 'NK', "NK", "NK_CD56Hi",
                       'Monocyte_Classical', 'Monocyte_Classical', 'Monocyte_Classical',
                       'Monocyte_Classical', 'Monocyte_NonClassical', 'Dendritic_Cells',
                       'Dendritic_Cells', 'HSPCs'))
Idents(PBMCs) <- factor(Idents(PBMCs),
                                 levels = c("B_Cells", "T_CD4", "TReg", 
                                            "T_CD8", "NK_T", "NK", "NK_CD56Hi",
                                            "Monocyte_Classical", "Monocyte_NonClassical",
                                            "Dendritic_Cells", "HSPCs", "Cycling_Cells"))
PBMCs[["Cell_Type"]] <- Idents(PBMCs)
UMAPPlot(PBMCs, cols = colors.use, label = F) + ggtitle("LIGER: Cell Type Classifications")

Compare single-sample workflow cell type classifications to joint classifications (generating a CMS)

# PBMC Sample 1
data(PBMC1_Single_ID)
S1cms <- GetCMS(object = PBMCs, 
                sample.ID = "Sample_01", 
                reference.ID = PBMC1_Single_ID)
# PBMC Sample 2
data(PBMC2_Single_ID)
S2cms <- GetCMS(object = PBMCs, 
                sample.ID = "Sample_02", 
                reference.ID = PBMC2_Single_ID)
# PBMC Sample 3
data(PBMC3_Single_ID)
S3cms <- GetCMS(object = PBMCs,
                sample.ID = "Sample_03",
                reference.ID = PBMC3_Single_ID)

# Average CMS
mean(c(S1cms, S2cms, S3cms))

Seurat 3 Batch Correction

Processing Samples 1-3 Seurat objects

Data filtering & normalization

# Import unfiltered Seurat object (included with 'BatchNorm' package)
data(PBMCs)

# Run "standard" Seurat 3 data integration workflow:
# "https://satijalab.org/seurat/archive/v3.0/integration.html"
# Including filtering by mitochondrial percentage (+5 SD)
PBMCs <- MitoFilter(PBMCs)
# Including SCTransform data normalization and feature selection
PBMCs.list <- SplitObject(PBMCs, split.by = "orig.ident")
for (i in 1:length(PBMCs.list)) {
    PBMCs.list[[i]] <- SCTransform(PBMCs.list[[i]], verbose = FALSE)
}
# Next, SelectIntegrationFeatures identifies a given number of features to use during the integration steps (here, 3,000).
# PrepSCTIntegration ensures all pearson residuals have been calculated and that the data is ready for integration.
PBMCs.features <- SelectIntegrationFeatures(object.list = PBMCs.list, nfeatures = 3000)
# Set global object size to 2 gb to avoid errors in this next step.
options(future.globals.maxSize = 2000 * 1024^2)
PBMCs.list <- PrepSCTIntegration(object.list = PBMCs.list, anchor.features = PBMCs.features,
                                 verbose = FALSE)
# Identify integration "anchors" and integrate the datasets.
# This may take approximately 10 minutes or more on this data set
PBMCs.anchors <- FindIntegrationAnchors(object.list = PBMCs.list, 
                                        normalization.method = "SCT", 
                                        anchor.features = PBMCs.features, 
                                        verbose = FALSE)
PBMCs <- IntegrateData(anchorset = PBMCs.anchors, 
                       normalization.method = "SCT", verbose = FALSE)
# Next, we proceed with a standard UMAP/clustering/cell-typing analysis on the integrated dataset
# We are careful not to run the ScaleData function after integration (which would override the integrated features).
PBMCs <- RunPCA(PBMCs, npcs = 30)
PBMCs <- PBMCs %>% 
         RunUMAP(reduction = "pca", dims = 1:14) %>%
         FindNeighbors(reduction = "pca", dims = 1:14) %>%
         FindClusters(resolution = .8)
PBMCs <- NormalizeData(PBMCs, verbose = FALSE, assay = "ADT", normalization.method = "CLR")

UMAPPlot(PBMCs, cols = colors.use, group.by = "orig.ident") + ggtitle("Seurat 3: Sample Identity")

Measuring sample-UMAP integration (generating an iLISI score)

GetiLISI(object = PBMCs, nSamples = 3)

Cell Typing of Seurat 3-corrected PBMCs object for CMS

Convert cluster classifications to cell type classifications

# For complete cell classification workflow see our vignette "Biaxial Gating of a Single Sample"
# More details can be found in figure S3 of our manuscript "Data Matrix Normalization and Merging Strategies Minimize Batch-specific Systemic Variation in scRNA-Seq Data."
UMAPPlot(PBMCs, cols = colors.use, pt.size = 2, 
         group.by = "seurat_clusters", label = T) + ggtitle("Seurat 3: Clusters")
# B_Cells = 11, 14, 16
# T_CD4 = 0, 2, 3, 15
# TReg = 13
# T_CD8 = 5, 8
# NK_T = 7
# NK = 1
# NK_CD56Hi = 12
# Monocyte_Classical = 4, 6, 9
# Monocyte_NonClassical = 10
# Dendritic_Cells = 17
# HSPCs = 18
# Cycling_Cells no cluster
Idents(PBMCs) <- PBMCs[["seurat_clusters"]]
Idents(PBMCs) <- plyr::mapvalues(Idents(PBMCs), from = c(11, 14, 16, 0, 2, 3, 15,
                                                         13, 5, 8, 7, 1, 12,
                                                         4, 6, 9, 
                                                         10, 17, 18), 
                to = c('B_Cells', 'B_Cells', 'B_Cells', 'T_CD4', 'T_CD4', 'T_CD4', 'T_CD4',
                       'TReg', 'T_CD8', 'T_CD8', 'NK_T', "NK", "NK_CD56Hi",
                       'Monocyte_Classical', 'Monocyte_Classical', 'Monocyte_Classical',
                       'Monocyte_NonClassical', 'Dendritic_Cells', 'HSPCs'))
Idents(PBMCs) <- factor(Idents(PBMCs),
                                 levels = c("B_Cells", "T_CD4", "TReg", 
                                            "T_CD8", "NK_T", "NK", "NK_CD56Hi",
                                            "Monocyte_Classical", "Monocyte_NonClassical",
                                            "Dendritic_Cells", "HSPCs", "Cycling_Cells"))
PBMCs[["Cell_Type"]] <- Idents(PBMCs)
UMAPPlot(PBMCs, cols = colors.use, label = F) + ggtitle("Seurat 3: Cell Type Classifications")

Compare single-sample workflow cell type classifications to joint classifications (generating a CMS)

# PBMC Sample 1
data(PBMC1_Single_ID)
S1cms <- GetCMS(object = PBMCs, 
                sample.ID = "Sample_01", 
                reference.ID = PBMC1_Single_ID)
# PBMC Sample 2
data(PBMC2_Single_ID)
S2cms <- GetCMS(object = PBMCs, 
                sample.ID = "Sample_02", 
                reference.ID = PBMC2_Single_ID)
# PBMC Sample 3
data(PBMC3_Single_ID)
S3cms <- GetCMS(object = PBMCs,
                sample.ID = "Sample_03",
                reference.ID = PBMC3_Single_ID)

# Average CMS
mean(c(S1cms, S2cms, S3cms))

Ghosn-Lab/BatchNorm documentation built on Dec. 17, 2021, 9:32 p.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

Ghosn-Lab/BatchNorm
Data Matrix Normalization and Merging Strategies Minimize Batch-specific Systemic Variation in scRNA-Seq Data

In Ghosn-Lab/BatchNorm: Data Matrix Normalization and Merging Strategies Minimize Batch-specific Systemic Variation in scRNA-Seq Data

Harmony Batch Correction

Processing Samples 1-3 Seurat objects

Data filtering & normalization

Generating a UMAP and clusters

Measuring sample-UMAP integration (generating an iLISI score)

Cell Typing of "harmonized" PBMCs object for CMS

Convert cluster classifications to cell type classifications

Compare single-sample workflow cell type classifications to joint classifications (generating a CMS)

LIGER Batch Correction

Processing Samples 1-3 Seurat objects

Data filtering & normalization

Measuring sample-UMAP integration (generating an iLISI score)

Cell Typing of LIGER-corrected PBMCs object for CMS

Convert cluster classifications to cell type classifications

Compare single-sample workflow cell type classifications to joint classifications (generating a CMS)

Seurat 3 Batch Correction

Processing Samples 1-3 Seurat objects

Data filtering & normalization

Measuring sample-UMAP integration (generating an iLISI score)

Cell Typing of Seurat 3-corrected PBMCs object for CMS

Convert cluster classifications to cell type classifications

Compare single-sample workflow cell type classifications to joint classifications (generating a CMS)

R Package Documentation

Browse R Packages

We want your feedback!

Ghosn-Lab/BatchNorm Data Matrix Normalization and Merging Strategies Minimize Batch-specific Systemic Variation in scRNA-Seq Data

In Ghosn-Lab/BatchNorm: Data Matrix Normalization and Merging Strategies Minimize Batch-specific Systemic Variation in scRNA-Seq Data

Harmony Batch Correction

Processing Samples 1-3 Seurat objects

Data filtering & normalization

Generating a UMAP and clusters

Measuring sample-UMAP integration (generating an iLISI score)

Cell Typing of "harmonized" PBMCs object for CMS

Convert cluster classifications to cell type classifications

Compare single-sample workflow cell type classifications to joint classifications (generating a CMS)

LIGER Batch Correction

Processing Samples 1-3 Seurat objects

Data filtering & normalization

Measuring sample-UMAP integration (generating an iLISI score)

Cell Typing of LIGER-corrected PBMCs object for CMS

Convert cluster classifications to cell type classifications

Compare single-sample workflow cell type classifications to joint classifications (generating a CMS)

Seurat 3 Batch Correction

Processing Samples 1-3 Seurat objects

Data filtering & normalization

Measuring sample-UMAP integration (generating an iLISI score)

Cell Typing of Seurat 3-corrected PBMCs object for CMS

Convert cluster classifications to cell type classifications

Compare single-sample workflow cell type classifications to joint classifications (generating a CMS)

R Package Documentation

Browse R Packages

We want your feedback!

Ghosn-Lab/BatchNorm
Data Matrix Normalization and Merging Strategies Minimize Batch-specific Systemic Variation in scRNA-Seq Data