knitr::opts_chunk$set(echo = TRUE, cache=TRUE)
This guide will show the perfomance of methods (bindSC, Seurat, LIGER, and Harmony) on A549 dataset. Download sci-RNA-seq data from https://drive.google.com/file/d/1tjMA9yERXYBx-GKKrGORKF1-4Zs9xScM/view?usp=sharing Download sci-ATAC-seq data from https://drive.google.com/file/d/1REKXV-TTm1rkWEyX82Q1R6dOtE-dPsza/view?usp=sharing change the input to your own file path.
library(bindSC) library(peakRAM) source("./runMethod.R") source("./Eval.R") A549_RNA <- readRDS("../../data/A549_rna.rds") A549_ATAC <- readRDS("../../data/A549_atac.rds") summary(A549_RNA) summary(A549_ATAC) dim(A549_RNA$X) dim(A549_ATAC$Z0) dim(A549_ATAC$Y) X <- A549_RNA$X Y <- A549_ATAC$Y Z0 <- A549_ATAC$Z0 #X <- (X-rowMeans(X)) #Y <- (Y -rowMeans(Y)) #Z0 <- (Z0-rowMeans(Z0)) treatmentTime <- c(A549_RNA$RNA_meta$treatment_time, A549_ATAC$ATAC_meta$group) #treatmentTime <- c(A549_rna@meta.data$treatment_time, A549_atac@meta.data$group) treatmentTime[treatmentTime=="A549_0h"] <- 0 treatmentTime[treatmentTime=="A549_1h"] <- 1 treatmentTime[treatmentTime=="A549_3h"] <- 3 type <- c(rep("scRNA-seq",nrow(A549_RNA$RNA_meta)), rep("scATAC-seq",nrow(A549_ATAC$ATAC_meta))) #type <- c(rep("scRNA-seq",nrow(A549_rna@meta.data)), rep("scATAC-seq",nrow(A549_atac@meta.data))) gene.overlap <- intersect(rownames(X), rownames(Z0)) cell.overlap <- intersect(colnames(Y), colnames(Z0)) X <- as.matrix(X[gene.overlap,]) Z0 <- as.matrix(Z0[gene.overlap, cell.overlap]) Y <- as.matrix(Y[,cell.overlap]) out <- dimReduce( dt1 = X, dt2 = Z0, K = 15) x <- out$dt1 z0 <- out$dt2 y <- dimReduce(dt1 = Y, K=15) X.clst <- treatmentTime[type=="scRNA-seq"] Y.clst <- treatmentTime[type=="scATAC-seq"] celltype <- c(X.clst, Y.clst) # Given 1/3 h is not seperated, we only consider pre/post treatment difference celltype[celltype==3] <- 1 rd_time <- c() rd_mem <- c()
Run bindSC. The time used is also shown.
K <- 5 out <- peakRAM( bindsc <- BiCCA(X=t(x), Z0=t(z0), Y=t(y), alpha = 0.1, lambda = 0.5, K = K, X.clst = X.clst, Y.clst = Y.clst, num.iteration = 50, temp.path = "./tp", tolerance = 0.01, save = TRUE, block.size = 0) ) rd_mem <- c(rd_mem, out$Peak_RAM_Used_MiB) rd_time <- c(rd_time, out$Elapsed_Time_sec) bindsc_umap <- get_UMAP(rbind(bindsc$u, bindsc$r))
Run Seurat. The time used is also shown.
out <- peakRAM(seurat <- runSeurat(X =X, Z0=Z0, K = K)) rd_mem <- c(rd_mem, out$Peak_RAM_Used_MiB) rd_time <- c(rd_time, out$Elapsed_Time_sec) seurat_umap <- get_UMAP(seurat)
Run LIGER. The time used is also shown.
out<-peakRAM(liger <- runLIGER(X = X, Z0=Z0, K = K)) rd_mem <- c(rd_mem, out$Peak_RAM_Used_MiB) rd_time <- c(rd_time, out$Elapsed_Time_sec) liger_umap <- get_UMAP(liger)
Run Harmony. The time used is also shown.
out <- peakRAM(harmony <- runHarmony(X =X, Z0=Z0, K = K)) rd_mem <- c(rd_mem, out$Peak_RAM_Used_MiB) rd_time <- c(rd_time, out$Elapsed_Time_sec) harmony_umap <- get_UMAP(harmony)
UMAP shows results from co-embeddings of each method, split by different modalites. For convenience, the cells are colored by annotation from co-assyed profiles. This means that the color is consistent between two modalites
source("./Eval.R") tp <- rbind(bindsc_umap, seurat_umap, liger_umap, harmony_umap) merged_dt <- data.frame("UMAP1"=tp[,1], "UMAP2"=tp[,2], "celltype"=as.factor(celltype), tech=c(rep("A",ncol(X)), rep("B",ncol(Y))), "method"=c(rep("bindSC", nrow(bindsc_umap)), rep("Seurat", nrow(seurat_umap)), rep("LIGER", nrow(liger_umap)), rep("Harmony", nrow(harmony_umap)))) p <- umapPlot_a549(merged_dt = merged_dt, tech_label = c("scRNA-seq","scATAC-seq")) p tiff("Fig2_a549_umap.tiff", width=9, height =3, res =300, units = "in", compression = "lzw") print(p) dev.off()
We use three indexs: 1) silhouette score; 2) alignment mixing score; 3) anchoring distance. Higher silhoouette score means cell type are well separated in the co-embedding space. Higher alignment mixing score means two modalites are aligned well in the co-embedding space. Lower Anchoring distance means the cell with measurement from two modalites is closer in the co-embedding space.
library(rdist) bindsc_coembed <- rbind(bindsc$u, bindsc$r) coembed <- rbind(bindsc,seurat, liger, harmony) co_assay <- intersect(colnames(X), colnames(Y)) lst1 <- match(co_assay, colnames(X)) lst2 <- match(co_assay, colnames(Y)) out <- alignmentScore(coembed = merged_dt[,c("UMAP1","UMAP2")], merged_dt = merged_dt, coassay_1 = lst1, coassay_2 =lst2) p <- score_plot(out) print(p) tiff("Fig2_a549_eval.tiff", width=9, height =3, res =300, units = "in", compression = "lzw") print(p) dev.off()
cpu_time <- data.frame("Time"=rd_time,"Memory"=rd_mem,"Method"=c("bindSC","Seurat","LIGER","Harmony")) cpu_time$Memory <- cpu_time$Memory/1024 p <- ggscatter(cpu_time,x="Time", y="Memory", color="Method",label="Method", repel = TRUE) + theme_classic() + NoLegend() + xlim(0, max(cpu_time$Time))+ xlab("Elapsed time (Sec)") + ylab("Maximum memory usage (Gb)") + ggtitle(paste0("Cell size (", length(X.clst),",", length(Y.clst),")" )) p saveRDS(cpu_time,file="./a549.cpu_usage.RDS")
result <- list() result$coembed <- coembed result$umap <- merged_dt result$score <- out saveRDS(result, file="A549.methodEval.RDS")
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.