knitr::opts_chunk$set(echo = TRUE,  cache=TRUE)

This guide will show the perfomance of methods (bindSC, Seurat, LIGER, and Harmony) on A549 dataset. Download sci-RNA-seq data from https://drive.google.com/file/d/1tjMA9yERXYBx-GKKrGORKF1-4Zs9xScM/view?usp=sharing Download sci-ATAC-seq data from https://drive.google.com/file/d/1REKXV-TTm1rkWEyX82Q1R6dOtE-dPsza/view?usp=sharing change the input to your own file path.

Stage 1: Load A549 datasets

library(bindSC)
library(peakRAM)
source("./runMethod.R")
source("./Eval.R")
A549_RNA <- readRDS("../../data/A549_rna.rds")
A549_ATAC <- readRDS("../../data/A549_atac.rds")


summary(A549_RNA)
summary(A549_ATAC)
dim(A549_RNA$X)
dim(A549_ATAC$Z0)
dim(A549_ATAC$Y)
X <- A549_RNA$X
Y <- A549_ATAC$Y
Z0 <- A549_ATAC$Z0

#X <- (X-rowMeans(X))
#Y <- (Y -rowMeans(Y))
#Z0 <- (Z0-rowMeans(Z0))


treatmentTime <- c(A549_RNA$RNA_meta$treatment_time, A549_ATAC$ATAC_meta$group)
#treatmentTime <- c(A549_rna@meta.data$treatment_time, A549_atac@meta.data$group)
treatmentTime[treatmentTime=="A549_0h"] <- 0
treatmentTime[treatmentTime=="A549_1h"] <- 1
treatmentTime[treatmentTime=="A549_3h"] <- 3

type <- c(rep("scRNA-seq",nrow(A549_RNA$RNA_meta)), rep("scATAC-seq",nrow(A549_ATAC$ATAC_meta)))
#type <- c(rep("scRNA-seq",nrow(A549_rna@meta.data)), rep("scATAC-seq",nrow(A549_atac@meta.data)))
gene.overlap <- intersect(rownames(X), rownames(Z0))
cell.overlap <- intersect(colnames(Y), colnames(Z0))

X <- as.matrix(X[gene.overlap,])
Z0 <- as.matrix(Z0[gene.overlap, cell.overlap])
Y <- as.matrix(Y[,cell.overlap])


out <- dimReduce( dt1 =  X, dt2 = Z0,  K = 15)
x <- out$dt1
z0 <- out$dt2
y  <- dimReduce(dt1 = Y, K=15)


X.clst <- treatmentTime[type=="scRNA-seq"]
Y.clst <- treatmentTime[type=="scATAC-seq"]


celltype <- c(X.clst, Y.clst)

# Given 1/3 h is not seperated, we only consider pre/post treatment difference 
celltype[celltype==3] <- 1

rd_time <- c()
rd_mem <- c()

Stage 2: run four methods

Run bindSC. The time used is also shown.

K <- 5
out <- peakRAM(
  bindsc <- BiCCA(X=t(x),
             Z0=t(z0), 
             Y=t(y), 
             alpha = 0.1,
             lambda = 0.5,
             K = K, 
             X.clst = X.clst,
             Y.clst = Y.clst,
             num.iteration = 50, 
             temp.path = "./tp",
             tolerance = 0.01, 
             save = TRUE, 
             block.size = 0)
)


rd_mem <- c(rd_mem, out$Peak_RAM_Used_MiB)
rd_time <- c(rd_time, out$Elapsed_Time_sec)


bindsc_umap <- get_UMAP(rbind(bindsc$u, bindsc$r))

Run Seurat. The time used is also shown.

out <- peakRAM(seurat <- runSeurat(X =X, Z0=Z0, K = K))

rd_mem <- c(rd_mem, out$Peak_RAM_Used_MiB)
rd_time <- c(rd_time, out$Elapsed_Time_sec)

seurat_umap <- get_UMAP(seurat)

Run LIGER. The time used is also shown.

out<-peakRAM(liger <- runLIGER(X = X, Z0=Z0, K = K))


rd_mem <- c(rd_mem, out$Peak_RAM_Used_MiB)
rd_time <- c(rd_time, out$Elapsed_Time_sec)

liger_umap <- get_UMAP(liger)

Run Harmony. The time used is also shown.

out <- peakRAM(harmony <- runHarmony(X =X, Z0=Z0, K = K))

rd_mem <- c(rd_mem, out$Peak_RAM_Used_MiB)
rd_time <- c(rd_time, out$Elapsed_Time_sec)
harmony_umap <- get_UMAP(harmony)

Stage 3: Visulization of integration for each method.

UMAP shows results from co-embeddings of each method, split by different modalites. For convenience, the cells are colored by annotation from co-assyed profiles. This means that the color is consistent between two modalites

source("./Eval.R")
tp <- rbind(bindsc_umap, seurat_umap, liger_umap,  harmony_umap)
merged_dt <- data.frame("UMAP1"=tp[,1], "UMAP2"=tp[,2], 
                        "celltype"=as.factor(celltype), 
                        tech=c(rep("A",ncol(X)), rep("B",ncol(Y))),
                        "method"=c(rep("bindSC", nrow(bindsc_umap)),  
                                   rep("Seurat", nrow(seurat_umap)),
                                   rep("LIGER",  nrow(liger_umap)),
                                   rep("Harmony", nrow(harmony_umap))))

p <- umapPlot_a549(merged_dt = merged_dt, tech_label = c("scRNA-seq","scATAC-seq"))

p


tiff("Fig2_a549_umap.tiff", width=9, height =3, res =300, units = "in", compression = "lzw")
print(p)
dev.off()

Stage 4: Visulization of integration accuracy.

We use three indexs: 1) silhouette score; 2) alignment mixing score; 3) anchoring distance. Higher silhoouette score means cell type are well separated in the co-embedding space. Higher alignment mixing score means two modalites are aligned well in the co-embedding space. Lower Anchoring distance means the cell with measurement from two modalites is closer in the co-embedding space.

library(rdist)
bindsc_coembed <- rbind(bindsc$u, bindsc$r)
coembed <- rbind(bindsc,seurat, liger, harmony)
co_assay <- intersect(colnames(X), colnames(Y))
lst1  <- match(co_assay, colnames(X))
lst2 <- match(co_assay, colnames(Y))
out <- alignmentScore(coembed = merged_dt[,c("UMAP1","UMAP2")], merged_dt = merged_dt, coassay_1 = lst1, coassay_2 =lst2)
p <- score_plot(out)
print(p)


tiff("Fig2_a549_eval.tiff", width=9, height =3, res =300, units = "in", compression = "lzw")
print(p)
dev.off()

Stage 5: Running time and memory usage

cpu_time <- data.frame("Time"=rd_time,"Memory"=rd_mem,"Method"=c("bindSC","Seurat","LIGER","Harmony"))
cpu_time$Memory <- cpu_time$Memory/1024
p <- ggscatter(cpu_time,x="Time", y="Memory", color="Method",label="Method", repel = TRUE)  + 
  theme_classic() + NoLegend() + xlim(0, max(cpu_time$Time))+ 
  xlab("Elapsed time (Sec)") + ylab("Maximum memory usage (Gb)") + ggtitle(paste0("Cell size (", length(X.clst),",", length(Y.clst),")" ))
p
saveRDS(cpu_time,file="./a549.cpu_usage.RDS")

Stage 6: Save resutls

result <- list()
result$coembed <- coembed
result$umap <- merged_dt
result$score <- out
saveRDS(result, file="A549.methodEval.RDS")


KChen-lab/bindSC documentation built on Sept. 29, 2022, 4:24 a.m.