Here, we used a version of IA-SVA (iasva2) using residuals as dependent variables in each iteration.
rm(list = ls()) library(Seurat) library(leedonghyung) library(SummarizedExperiment) library(irlba) library(Rtsne) library(RColorBrewer) library(pheatmap) library(corrplot) library(tmod) library(tagcloud) library(limma) library(VennDiagram) library(ggrepel) library(data.table) # normalize function normalize <- function (counts) { normfactor <- colSums(counts) return(t(t(counts)/normfactor)*median(normfactor)) } color.vec <- c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "black", "#a65628", "#f781bf", "#999999")
path <- ("~/Dropbox/CZI_project/Public_Data/Kang_2017/") name <- "Kang.2017.all.cells" ctrl.data <- as.data.frame(fread(paste0("gzcat ", path,"immune_control_expression_matrix.txt.gz"))) dim(ctrl.data) rownames(ctrl.data) <- ctrl.data$V1 ctrl.data$V1 <- NULL dim(ctrl.data) #head(ctrl.data[,1:5]) stim.data <- as.data.frame(fread(paste0("gzcat ", path,"immune_stimulated_expression_matrix.txt.gz"))) dim(stim.data) rownames(stim.data) <- stim.data$V1 stim.data$V1 <- NULL dim(stim.data) #head(stim.data[,1:5]) # Set up control object ctrl <- CreateSeuratObject(raw.data = ctrl.data, project = "IMMUNE_CTRL", min.cells = 5) ctrl@meta.data$stim <- "CTRL" ctrl <- FilterCells(ctrl, subset.names = "nGene", low.thresholds = 500, high.thresholds = Inf) ctrl <- NormalizeData(ctrl) ctrl <- ScaleData(ctrl, display.progress = F) # Set up stimulated object stim <- CreateSeuratObject(raw.data = stim.data, project = "IMMUNE_STIM", min.cells = 5) stim@meta.data$stim <- "STIM" stim <- FilterCells(stim, subset.names = "nGene", low.thresholds = 500, high.thresholds = Inf) stim <- NormalizeData(stim) stim <- ScaleData(stim, display.progress = F) # Gene selection for input to CCA ctrl <- FindVariableGenes(ctrl, do.plot = F) stim <- FindVariableGenes(stim, do.plot = F) g.1 <- head(rownames(ctrl@hvg.info), 2000) g.2 <- head(rownames(stim@hvg.info), 2000) genes.use <- union(g.1, g.2) # combine data ctrl.data.sel <- ctrl.data[genes.use, rownames(ctrl@meta.data)] stim.data.sel <- stim.data[genes.use, rownames(stim@meta.data)] all.counts <- cbind(ctrl.data.sel, stim.data.sel) dim(all.counts) # overdispersed genes #saveRDS(all.counts, file = "immune_control_stimulated_filtered_counts.Rds")
# load data all.counts <- readRDS("~/Dropbox/Kang.2017/IASVA/By_Celltype/Kang.2017.by.cell.CD8 T.overdispersed_raw_counts.Rds") dim(all.counts) # load in seurat annotations imm.comb <- readRDS("~/Dropbox/Kang.2017/Kang.2017.PBMC.stimulated.Seurat.immune.combined.Rds") imm.meta <- imm.comb@meta.data imm.meta <- cbind(imm.meta, imm.comb@ident) colnames(imm.meta)[ncol(imm.meta)] <- "celltype" dim(imm.meta) all.meta <- imm.meta imm.meta <- imm.meta[imm.meta$celltype == "CD8 T",] table(rownames(imm.meta) == colnames(all.counts)) # calculate lib size #pdf(file = paste(name, cell_interest, "IASVA.genes.detected.hist.pdf", sep = ".")) Num_Detected_Genes <- colSums(all.counts > 0) hist(Num_Detected_Genes, col = "dodgerblue4", breaks = 20, ylab = "Number of Cells") summ <- summary(Num_Detected_Genes) legend("topright", legend = paste(names(summ), summ)) abline(v = 500, col = "red", lty = 2) legend("right", legend = paste("< 500 genes = ", length(which(Num_Detected_Genes < 500)), " Cells\n", "> 500 genes = ", length(which(Num_Detected_Genes > 500)), " Cells", sep = "")) #dev.off()
Here, the only known factors are stimulation, celltype, and library size
# make vector of condition Condition <- factor(imm.meta$stim) CellType <- factor(imm.meta$celltype) # normalize counts counts.filt <- normalize(all.counts) Geo_Lib <- colSums(log(counts.filt+1)) Num_Detected_Genes <- colSums(all.counts > 0) # categorize lib size Geo_Lib_Cat <- cut(Num_Detected_Genes, breaks = 4) summary(Geo_Lib) barplot(Geo_Lib, xlab="Cell", las=2, ylab = "Geometric Library Size") lcounts <- log(counts.filt + 1) lcounts <- as.matrix(lcounts) # PC1 and Geometric library size correlation pc1 = irlba(lcounts - rowMeans(lcounts), 1)$v[,1] ## partial SVD cor(Geo_Lib, pc1) cor(Num_Detected_Genes, pc1) cor(as.numeric(Condition), pc1) cor(Num_Detected_Genes, Geo_Lib)
#tsne.res <- Rtsne(unique(t(lcounts)), dims = 2) #saveRDS(tsne.res, file = paste(name, "tsne.preIASVA.Rds", sep = ".")) # load tsne results tsne.res <- readRDS("~/Dropbox/Kang.2017/IASVA/By_Celltype/Kang.2017.by.cell.CD8 T.tsne.preIASVA.Rds") # by celltype par(mfrow=c(2,2)) #par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE) plot(tsne.res$Y[,1:2], main="Pre IA-SVA", xlab="tSNE Dim1", ylab="tSNE Dim2", pch=20, col=color.vec[CellType], bg=color.vec[CellType], cex = 0.5) #par(xpd=TRUE) legend(50,20, levels(CellType), border="white", fill=color.vec, bty="n", title = "Cell Type") # by condition #par(mar=c(5, 4, 4, 2), xpd=FALSE) plot(tsne.res$Y[,1:2], main="Pre IA-SVA", xlab="tSNE Dim1", ylab="tSNE Dim2", pch=20, col=color.vec[Condition], bg=color.vec[Condition], oma=c(4,4,6,12), cex = 0.5) legend("topleft", levels(Condition), border="white", fill=color.vec, bty="n", title = "Condition") # by genes detect plot(tsne.res$Y[,1:2], main="Pre IA-SVA", xlab="tSNE Dim1", ylab="tSNE Dim2", pch=20, col=color.vec[Geo_Lib_Cat], bg=color.vec[Geo_Lib_Cat], oma=c(4,4,6,12), cex = 0.5) legend("topleft", levels(Geo_Lib_Cat), border="white", fill=color.vec, bty="n", title = "Genes Detected")
# run iasva mod <- model.matrix(~Num_Detected_Genes) summ_exp <- SummarizedExperiment(assays = as.matrix(counts.filt)) pct_cutt <- 1 iasva.res<- fast_iasva2(summ_exp, mod[,-1, drop = F], verbose=FALSE, pct.cutoff = pct_cutt, num.sv = 10) # load results #iasva.res <- readRDS("/Users/lawlon/Documents/CZI/Seurat_Public_Data/immune_alignment_expression_matrices/IASVA/By_Celltype/Kang.2017.by.cell.CD8 T.IASVA.1.output.Rds") iasva.sv <- iasva.res$sv #saveRDS(iasva.res, file = paste(name, "IASVA", pct_cutt, "output.Rds", sep = ".")) #markers_pct1 <- find_markers(Y = summ_exp, iasva.sv = iasva.sv) #markers_pct1 <- readRDS("/Users/lawlon/Documents/CZI/Seurat_Public_Data/immune_alignment_expression_matrices/IASVA/By_Celltype/Kang.2017.by.cell.CD8 T.IASVA.1.markers.Rds") # revised find markers function #source("/Users/lawlon/iasva_markers_update/R/find_markers.R") #all_markers <- .GlobalEnv$find_markers(Y = summ_exp, iasva.sv = iasva.sv) # all_markers <- readRDS("/Users/lawlon/Documents/CZI/Seurat_Public_Data/immune_alignment_expression_matrices/IASVA/All_Cells/Kang.2017.all.cells.IASVA.by.SV.markers.Rds") #saveRDS(markers_pct1, file = paste(name, "IASVA", pct_cutt, "markers.Rds", sep = ".")) # tsne on markers # set.seed(100) # if (ncol(markers_pct1) > 100) { # perplex <- 30 # } else { # perplex <- round(ncol(markers_pct1)/4) # } # tsne.res.iasva <- Rtsne(unique(t(log(markers_pct1+1))), dims = 2, perplexity = perplex) # saveRDS(tsne.res.iasva, file = paste(name, cell_interest, stim_cond, "IASVA", pct_cutt, "tsne.markers.Rds", sep = "."))
ncol(iasva.sv) # if (ncol(iasva.sv) > 5) { # pairs(iasva.sv[,1:5], main="IA-SVA", pch=20, col=color.vec[CellType], # bg=color.vec[CellType], cex=0.5, oma=c(4,4,6,12)) # legend("right", levels(CellType), border="white", fill=color.vec, bty="n") # pairs(iasva.sv[,1:5], main="IA-SVA", pch=20, col=color.vec[Condition], # bg=color.vec[Condition], cex=0.5, oma=c(4,4,6,12)) # legend("right", levels(Condition), border="white", fill=color.vec, bty="n") # pairs(iasva.sv[,1:5], main="IA-SVA", pch=20, col=color.vec[Geo_Lib_Cat], # bg=color.vec[Geo_Lib_Cat], cex=0.5, oma=c(4,4,6,12)) # legend("right", levels(Geo_Lib_Cat), border="white", fill=color.vec, bty="n") # } else { par(mar=c(5.1, 4.1, 4.1, 8.2), xpd=TRUE) pairs(iasva.sv, main="IA-SVA", pch=20, col=color.vec[Condition], bg=color.vec[Condition], cex=0.5, oma=c(4,4,6,12)) legend("right", levels(Condition), border="white", fill=color.vec, bty="n") # }
Use SVs separate cells. For example, SV1, SV4, SV8 are good candidates for further analysis.
iasva_vars <- cbind(iasva.sv, Geo_Lib, Condition) corrplot(cor(iasva_vars))
SV1.marker.counts <- leedonghyung::find_markers(summ_exp, as.matrix(iasva.sv[,1]), rsq.cutoff=0.2) dim(SV1.marker.counts) anno.col <- data.frame(Condition = Condition, Genes_Detect = Geo_Lib_Cat, #CellType = CellType, SV1 = iasva.sv[,1]) #SV4= iasva.sv[,4]) rownames(anno.col) <- colnames(SV1.marker.counts) pheatmap(log(SV1.marker.counts+1), show_colnames =FALSE, clustering_method = "ward.D2",cutree_cols = 3,annotation_col = anno.col) #annotation_colors = anno.colors)
SV4.marker.counts <- leedonghyung::find_markers(summ_exp, as.matrix(iasva.sv[,4]), rsq.cutoff=0.2) dim(SV4.marker.counts) anno.col <- data.frame(Condition = Condition, Genes_Detect = Geo_Lib_Cat, #CellType = CellType, SV4 = iasva.sv[,4]) rownames(anno.col) <- colnames(SV4.marker.counts) pheatmap(log(SV4.marker.counts+1), show_colnames =FALSE, clustering_method = "ward.D2",cutree_cols = 3,annotation_col = anno.col) #annotation_colors = anno.colors)
SV8.marker.counts <- leedonghyung::find_markers(summ_exp, as.matrix(iasva.sv[,8]), rsq.cutoff=0.1) dim(SV8.marker.counts) anno.col <- data.frame(Condition = Condition, Genes_Detect = Geo_Lib_Cat, #CellType = CellType, SV8 = iasva.sv[,8]) rownames(anno.col) <- colnames(SV8.marker.counts) pheatmap(log(SV8.marker.counts+1), show_colnames =FALSE, clustering_method = "ward.D2",cutree_cols = 3,annotation_col = anno.col) #annotation_colors = anno.colors)
SV1.SV4.marker.counts <- leedonghyung::find_markers(summ_exp, iasva.sv[,c(1,4)], rsq.cutoff=0.2) dim(SV1.SV4.marker.counts) anno.col <- data.frame(Condition = Condition, Genes_Detect = Geo_Lib_Cat, #CellType = CellType, SV1 = iasva.sv[,1], SV4= iasva.sv[,4]) rownames(anno.col) <- colnames(SV1.SV4.marker.counts) pheatmap(log(SV1.SV4.marker.counts+1), show_colnames =FALSE, clustering_method = "ward.D2",cutree_cols = 3,annotation_col = anno.col) #annotation_colors = anno.colors)
all_markers <- find_markers2(summ_exp, iasva.sv[,c(1,4,8)], rsq.cutoff=0.1) design <- model.matrix(~Condition) fit <- eBayes(lmFit(counts.filt, design)) tt <- topTable(fit, coef=2, number=Inf, genelist = rownames(counts.filt)) head(tt, 10) # iterative analysis of SVs an modules all_sv_res <- list() par(mfrow = c(3,1), mar = c(4,2,2,2)) for (j in 1:3) { sv_genes <- all_markers[[j]] # module analysis fg <- tt[sv_genes,] fg.ord <- fg[order(fg$logFC, decreasing = T),] res <- tmodHGtest(fg = fg.ord$ID, bg = tt$ID) res.ord <- res[order(res$E, decreasing = T),] # append results all_sv_res[[j]] <- res.ord names(all_sv_res)[j] <- names(all_markers)[j] if (nrow(res.ord) > 1) { # make plots bp1 <- barplot(res.ord$E, main = paste(names(all_markers)[j], " Module Enrichment", sep = ""), xlab = "", horiz = T, cex.main = 2, cex.lab = 2) #barplot(res.ord$E, xlab = "Module Enrichment", main = paste(names(all_markers)[j], " Genes", sep = ""), horiz = T) par(xpd=TRUE) text(x = max(res.ord$E)/2, y = bp1[,1], labels = paste(res.ord$Title, " (", res$b, "/", res$B, ")", sep = ""), cex = 1, font = 2) } }
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.