rm(list = ls()) library(Seurat) library(iasva) library(SummarizedExperiment) library(irlba) library(Rtsne) library(RColorBrewer) library(pheatmap) library(corrplot) library(tmod) library(tagcloud) library(limma) library(VennDiagram) #setwd("/Users/lawlon/Documents/CZI/Seurat_Public_Data/immune_alignment_expression_matrices/") name <- "Kang.2017.all.cells" pct_cutt <- 1 ctrl.data <- readRDS("/Users/lawlon/Documents/CZI/Seurat_Public_Data/immune_alignment_expression_matrices/immune_control_expression_matrix.Rds") dim(ctrl.data) stim.data <- readRDS("/Users/lawlon/Documents/CZI/Seurat_Public_Data/immune_alignment_expression_matrices/immune_stimulated_expression_matrix.Rds") dim(stim.data) # normalize function normalize <- function (counts) { normfactor <- colSums(counts) return(t(t(counts)/normfactor)*median(normfactor)) } color.vec <- c("#e41a1c", "#377eb8", "#4daf4a", "#984ea3", "#ff7f00", "black", "#a65628", "#f781bf", "#999999")
# load in seurat annotations imm.comb <- readRDS("/Users/lawlon/Documents/CZI/Seurat_Public_Data/immune_alignment_expression_matrices/Seurat/Kang.2017.PBMC.stimulated.Seurat.immune.combined.Rds") imm.meta <- imm.comb@meta.data imm.meta <- cbind(imm.meta, imm.comb@ident) colnames(imm.meta)[ncol(imm.meta)] <- "celltype" dim(imm.meta) all.meta <- imm.meta imm.meta <- imm.meta[imm.meta$celltype == "CD8 T" & imm.meta$stim == "CTRL",] # Set up control object ctrl <- CreateSeuratObject(raw.data = ctrl.data[, rownames(imm.meta)], project = "IMMUNE_CTRL", min.cells = 5) ctrl@meta.data$stim <- "CTRL" ctrl <- FilterCells(ctrl, subset.names = "nGene", low.thresholds = 500, high.thresholds = Inf) ctrl <- NormalizeData(ctrl) ctrl <- ScaleData(ctrl, display.progress = F) # Gene selection for input to CCA ctrl <- FindVariableGenes(ctrl, do.plot = F) g.1 <- head(rownames(ctrl@hvg.info), 2000) genes.use <- g.1 # combine data ctrl.data.sel <- ctrl.data[genes.use, rownames(ctrl@meta.data)] all.counts <- ctrl.data.sel dim(all.counts) # overdispersed genes #saveRDS(all.counts, file = "immune_control_stimulated_filtered_counts.Rds")
# calculate lib size #pdf(file = paste(name, cell_interest, "IASVA.genes.detected.hist.pdf", sep = ".")) Num_Detected_Genes <- colSums(all.counts > 0) hist(Num_Detected_Genes, col = "dodgerblue4", breaks = 20, ylab = "Number of Cells") summ <- summary(Num_Detected_Genes) legend("topright", legend = paste(names(summ), summ)) abline(v = 500, col = "red", lty = 2) legend("right", legend = paste("< 500 genes = ", length(which(Num_Detected_Genes < 500)), " Cells\n", "> 500 genes = ", length(which(Num_Detected_Genes > 500)), " Cells", sep = "")) #dev.off()
Here, the only known factors are stimulation, celltype, and library size
# make vector of condition Condition <- factor(imm.meta$stim) CellType <- factor(imm.meta$celltype) # normalize counts counts.filt <- normalize(all.counts) Geo_Lib <- colSums(log(counts.filt+1)) # categorize lib size Geo_Lib_Cat <- cut(Num_Detected_Genes, breaks = 4) summary(Geo_Lib) barplot(Geo_Lib, xlab="Cell", las=2, ylab = "Geometric Library Size") lcounts <- log(counts.filt + 1) lcounts <- as.matrix(lcounts) # PC1 and Geometric library size correlation pc1 = irlba(lcounts - rowMeans(lcounts), 1)$v[,1] ## partial SVD cor(Geo_Lib, pc1)
tsne.res <- Rtsne(unique(t(lcounts)), dims = 2) #saveRDS(tsne.res, file = paste(name, "tsne.preIASVA.Rds", sep = ".")) # by celltype par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE) plot(tsne.res$Y[,1:2], main="Pre IA-SVA", xlab="tSNE Dim1", ylab="tSNE Dim2", pch=20, col=color.vec[CellType], bg=color.vec[CellType], cex = 0.5) par(xpd=TRUE) legend(50,20, levels(CellType), border="white", fill=color.vec, bty="n", title = "Cell Type") # by condition par(mar=c(5, 4, 4, 2), xpd=FALSE) plot(tsne.res$Y[,1:2], main="Pre IA-SVA", xlab="tSNE Dim1", ylab="tSNE Dim2", pch=20, col=color.vec[Condition], bg=color.vec[Condition], oma=c(4,4,6,12), cex = 0.5) legend("topright", levels(Condition), border="white", fill=color.vec, bty="n", title = "Condition") # by genes detect plot(tsne.res$Y[,1:2], main="Pre IA-SVA", xlab="tSNE Dim1", ylab="tSNE Dim2", pch=20, col=color.vec[Geo_Lib_Cat], bg=color.vec[Geo_Lib_Cat], oma=c(4,4,6,12), cex = 0.5) legend("topright", levels(Geo_Lib_Cat), border="white", fill=color.vec, bty="n", title = "Genes Detected")
# run iasva mod <- model.matrix(~Geo_Lib) summ_exp <- SummarizedExperiment(assays = as.matrix(counts.filt)) iasva.res<- fast_iasva(summ_exp, mod[,-1, drop = F], verbose=FALSE, pct.cutoff = pct_cutt, num.sv = 10) # load results iasva.sv <- iasva.res$sv #saveRDS(iasva.res, file = paste(name, "IASVA", pct_cutt, "output.Rds", sep = ".")) markers_pct1 <- iasva::find_markers(Y = summ_exp, iasva.sv = iasva.sv) # revised find markers function source("/Users/lawlon/iasva_markers_update/R/find_markers.R") all_markers <- .GlobalEnv$find_markers(Y = summ_exp, iasva.sv = iasva.sv) # all_markers <- readRDS("/Users/lawlon/Documents/CZI/Seurat_Public_Data/immune_alignment_expression_matrices/IASVA/All_Cells/Kang.2017.all.cells.IASVA.by.SV.markers.Rds") #saveRDS(markers_pct1, file = paste(name, "IASVA", pct_cutt, "markers.Rds", sep = ".")) #tsne on markers set.seed(100) if (ncol(markers_pct1) > 100) { perplex <- 30 } else { perplex <- round(ncol(markers_pct1)/4) } tsne.res.iasva <- Rtsne(unique(t(log(markers_pct1+1))), dims = 2, perplexity = perplex) # saveRDS(tsne.res.iasva, file = paste(name, cell_interest, stim_cond, "IASVA", pct_cutt, "tsne.markers.Rds", sep = "."))
ncol(iasva.sv) # if (ncol(iasva.sv) > 5) { # pairs(iasva.sv[,1:5], main="IA-SVA", pch=20, col=color.vec[CellType], # bg=color.vec[CellType], cex=0.5, oma=c(4,4,6,12)) # legend("right", levels(CellType), border="white", fill=color.vec, bty="n") # pairs(iasva.sv[,1:5], main="IA-SVA", pch=20, col=color.vec[Condition], # bg=color.vec[Condition], cex=0.5, oma=c(4,4,6,12)) # legend("right", levels(Condition), border="white", fill=color.vec, bty="n") # pairs(iasva.sv[,1:5], main="IA-SVA", pch=20, col=color.vec[Geo_Lib_Cat], # bg=color.vec[Geo_Lib_Cat], cex=0.5, oma=c(4,4,6,12)) # legend("right", levels(Geo_Lib_Cat), border="white", fill=color.vec, bty="n") # } else { par(mar=c(5.1, 4.1, 4.1, 8.2), xpd=TRUE) pairs(iasva.sv, main="IA-SVA", pch=20, col=color.vec[CellType], bg=color.vec[CellType], cex=0.5, oma=c(4,4,6,12)) legend(0.85,0.5, levels(CellType), border="white", fill=color.vec, bty="n") pairs(iasva.sv, main="IA-SVA", pch=20, col=color.vec[Condition], bg=color.vec[Condition], cex=0.5, oma=c(4,4,6,12)) legend("right", levels(Condition), border="white", fill=color.vec, bty="n") pairs(iasva.sv, main="IA-SVA", pch=20, col=color.vec[Geo_Lib_Cat], bg=color.vec[Geo_Lib_Cat], cex=0.5, oma=c(4,4,6,12)) legend("right", levels(Geo_Lib_Cat), border="white", fill=color.vec, bty="n", title = "Genes Detected") # }
iasva_vars <- cbind(iasva.sv, Geo_Lib) corrplot(cor(iasva_vars))
# iterative analysis of SVs an modules all_sv_res <- list() par(mfrow = c(4,2), mar = c(4,2,2,2)) for (j in 1:ncol(iasva.sv)) { sv_genes <- all_markers[[j]] design <- model.matrix(~iasva.sv[,j]) fit <- eBayes(lmFit(counts.filt, design)) tt <- topTable(fit, coef=2, number=Inf, genelist = rownames(counts.filt)) # module analysis fg <- tt[sv_genes,] fg.ord <- fg[order(fg$logFC, decreasing = T),] res <- tmodHGtest(fg = fg.ord$ID, bg = tt$ID) if (is.null(res)) {} else { res.ord <- res[order(res$E, decreasing = T),] # append results all_sv_res[[j]] <- res.ord names(all_sv_res)[j] <- names(all_markers)[j] if (nrow(res.ord) > 1) { # make plots bp1 <- barplot(res.ord$E, main = paste(names(all_markers)[j], " Module Enrichment", sep = ""), xlab = "", horiz = T, cex.main = 2, cex.lab = 2) #barplot(res.ord$E, xlab = "Module Enrichment", main = paste(names(all_markers)[j], " Genes", sep = ""), horiz = T) par(xpd=TRUE) text(x = max(res.ord$E)/2, y = bp1[,1], labels = paste(res.ord$Title, " (", res$b, "/", res$B, ")", sep = ""), cex = 1, font = 2) } } }
anno.col <- data.frame(Condition = Condition, Genes_Detect = Geo_Lib_Cat, CellType = CellType) rownames(anno.col) <- colnames(markers_pct1) # how many markers are there dim(markers_pct1) pheatmap(log(markers_pct1+1), show_colnames = FALSE, show_rownames = TRUE, clustering_method = "ward.D2", fontsize_row = 10, annotation_col = anno.col)
# load results par(mfrow = c(1,1)) par(mar=c(5.1, 4.1, 4.1, 8.2), xpd=TRUE) plot(tsne.res.iasva$Y[,1:2], main="IA-SVA", xlab="tSNE Dim1", ylab="tSNE Dim2", pch=20, col=color.vec[CellType], bg=color.vec[CellType], oma=c(4,4,6,12), cex = 0.5) legend(40, 20, levels(CellType), border="white", fill=color.vec, bty="n") par(mar=c(5, 4, 4, 2), xpd=FALSE) plot(tsne.res.iasva$Y[,1:2], main="IA-SVA", xlab="tSNE Dim1", ylab="tSNE Dim2", pch=20, col=color.vec[Geo_Lib_Cat], bg=color.vec[Geo_Lib_Cat], oma=c(4,4,6,12), cex = 0.5) legend("topright", levels(Geo_Lib_Cat), border="white", fill=color.vec, bty="n", title = "Genes Detected") plot(tsne.res.iasva$Y[,1:2], main="IA-SVA", xlab="tSNE Dim1", ylab="tSNE Dim2", pch=20, col=color.vec[Condition], bg=color.vec[Condition], oma=c(4,4,6,12), cex = 0.5) legend("topright", levels(Condition), border="white", fill=color.vec, bty="n")
# load in seurat annotations imm.comb <- readRDS("/Users/lawlon/Documents/CZI/Seurat_Public_Data/immune_alignment_expression_matrices/Seurat/Kang.2017.PBMC.stimulated.Seurat.immune.combined.Rds") imm.meta <- imm.comb@meta.data imm.meta <- cbind(imm.meta, imm.comb@ident) colnames(imm.meta)[ncol(imm.meta)] <- "celltype" dim(imm.meta) all.meta <- imm.meta imm.meta <- imm.meta[imm.meta$celltype == "CD8 T" & imm.meta$stim == "STIM",] # Set up control object stim <- CreateSeuratObject(raw.data = stim.data[, rownames(imm.meta)], project = "IMMUNE_STIM", min.cells = 5) stim@meta.data$stim <- "STIM" stim <- FilterCells(stim, subset.names = "nGene", low.thresholds = 500, high.thresholds = Inf) stim <- NormalizeData(stim) stim <- ScaleData(stim, display.progress = F) # Gene selection for input to CCA stim <- FindVariableGenes(stim, do.plot = F) g.2 <- head(rownames(stim@hvg.info), 2000) genes.use <- g.2 # combine data stim.data.sel <- stim.data[genes.use, rownames(stim@meta.data)] all.counts <- stim.data.sel dim(all.counts) # overdispersed genes #saveRDS(all.counts, file = "immune_control_stimulated_filtered_counts.Rds")
# calculate lib size #pdf(file = paste(name, cell_interest, "IASVA.genes.detected.hist.pdf", sep = ".")) Num_Detected_Genes <- colSums(all.counts > 0) hist(Num_Detected_Genes, col = "dodgerblue4", breaks = 20, ylab = "Number of Cells") summ <- summary(Num_Detected_Genes) legend("topright", legend = paste(names(summ), summ)) abline(v = 500, col = "red", lty = 2) legend("right", legend = paste("< 500 genes = ", length(which(Num_Detected_Genes < 500)), " Cells\n", "> 500 genes = ", length(which(Num_Detected_Genes > 500)), " Cells", sep = "")) #dev.off()
Here, the only known factors are stimulation, celltype, and library size
# make vector of condition Condition <- factor(imm.meta$stim) CellType <- factor(imm.meta$celltype) # normalize counts counts.filt <- normalize(all.counts) Geo_Lib <- colSums(log(counts.filt+1)) # categorize lib size Geo_Lib_Cat <- cut(Num_Detected_Genes, breaks = 4) summary(Geo_Lib) barplot(Geo_Lib, xlab="Cell", las=2, ylab = "Geometric Library Size") lcounts <- log(counts.filt + 1) lcounts <- as.matrix(lcounts) # PC1 and Geometric library size correlation pc1 = irlba(lcounts - rowMeans(lcounts), 1)$v[,1] ## partial SVD cor(Geo_Lib, pc1)
tsne.res <- Rtsne(unique(t(lcounts)), dims = 2) #saveRDS(tsne.res, file = paste(name, "tsne.preIASVA.Rds", sep = ".")) # by celltype par(mar=c(5.1, 4.1, 4.1, 8.1), xpd=TRUE) plot(tsne.res$Y[,1:2], main="Pre IA-SVA", xlab="tSNE Dim1", ylab="tSNE Dim2", pch=20, col=color.vec[CellType], bg=color.vec[CellType], cex = 0.5) par(xpd=TRUE) legend(50,20, levels(CellType), border="white", fill=color.vec, bty="n", title = "Cell Type") # by condition par(mar=c(5, 4, 4, 2), xpd=FALSE) plot(tsne.res$Y[,1:2], main="Pre IA-SVA", xlab="tSNE Dim1", ylab="tSNE Dim2", pch=20, col=color.vec[Condition], bg=color.vec[Condition], oma=c(4,4,6,12), cex = 0.5) legend("topright", levels(Condition), border="white", fill=color.vec, bty="n", title = "Condition") # by genes detect plot(tsne.res$Y[,1:2], main="Pre IA-SVA", xlab="tSNE Dim1", ylab="tSNE Dim2", pch=20, col=color.vec[Geo_Lib_Cat], bg=color.vec[Geo_Lib_Cat], oma=c(4,4,6,12), cex = 0.5) legend("topright", levels(Geo_Lib_Cat), border="white", fill=color.vec, bty="n", title = "Genes Detected")
# run iasva mod <- model.matrix(~Geo_Lib) summ_exp <- SummarizedExperiment(assays = as.matrix(counts.filt)) iasva.res<- fast_iasva(summ_exp, mod[,-1, drop = F], verbose=FALSE, pct.cutoff = pct_cutt, num.sv = 10) # load results iasva.sv <- iasva.res$sv #saveRDS(iasva.res, file = paste(name, "IASVA", pct_cutt, "output.Rds", sep = ".")) markers_pct1 <- iasva::find_markers(Y = summ_exp, iasva.sv = iasva.sv) # revised find markers function source("/Users/lawlon/iasva_markers_update/R/find_markers.R") all_markers <- .GlobalEnv$find_markers(Y = summ_exp, iasva.sv = iasva.sv) # all_markers <- readRDS("/Users/lawlon/Documents/CZI/Seurat_Public_Data/immune_alignment_expression_matrices/IASVA/All_Cells/Kang.2017.all.cells.IASVA.by.SV.markers.Rds") #saveRDS(markers_pct1, file = paste(name, "IASVA", pct_cutt, "markers.Rds", sep = ".")) #tsne on markers set.seed(100) if (ncol(markers_pct1) > 100) { perplex <- 30 } else { perplex <- round(ncol(markers_pct1)/4) } tsne.res.iasva <- Rtsne(unique(t(log(markers_pct1+1))), dims = 2, perplexity = perplex) # saveRDS(tsne.res.iasva, file = paste(name, cell_interest, stim_cond, "IASVA", pct_cutt, "tsne.markers.Rds", sep = "."))
ncol(iasva.sv) # if (ncol(iasva.sv) > 5) { # pairs(iasva.sv[,1:5], main="IA-SVA", pch=20, col=color.vec[CellType], # bg=color.vec[CellType], cex=0.5, oma=c(4,4,6,12)) # legend("right", levels(CellType), border="white", fill=color.vec, bty="n") # pairs(iasva.sv[,1:5], main="IA-SVA", pch=20, col=color.vec[Condition], # bg=color.vec[Condition], cex=0.5, oma=c(4,4,6,12)) # legend("right", levels(Condition), border="white", fill=color.vec, bty="n") # pairs(iasva.sv[,1:5], main="IA-SVA", pch=20, col=color.vec[Geo_Lib_Cat], # bg=color.vec[Geo_Lib_Cat], cex=0.5, oma=c(4,4,6,12)) # legend("right", levels(Geo_Lib_Cat), border="white", fill=color.vec, bty="n") # } else { par(mar=c(5.1, 4.1, 4.1, 8.2), xpd=TRUE) pairs(iasva.sv, main="IA-SVA", pch=20, col=color.vec[CellType], bg=color.vec[CellType], cex=0.5, oma=c(4,4,6,12)) legend(0.85,0.5, levels(CellType), border="white", fill=color.vec, bty="n") pairs(iasva.sv, main="IA-SVA", pch=20, col=color.vec[Condition], bg=color.vec[Condition], cex=0.5, oma=c(4,4,6,12)) legend("right", levels(Condition), border="white", fill=color.vec, bty="n") pairs(iasva.sv, main="IA-SVA", pch=20, col=color.vec[Geo_Lib_Cat], bg=color.vec[Geo_Lib_Cat], cex=0.5, oma=c(4,4,6,12)) legend("right", levels(Geo_Lib_Cat), border="white", fill=color.vec, bty="n", title = "Genes Detected") # }
iasva_vars <- cbind(iasva.sv, Geo_Lib) corrplot(cor(iasva_vars))
# iterative analysis of SVs an modules all_sv_res <- list() par(mfrow = c(4,2), mar = c(4,2,2,2)) for (j in 1:ncol(iasva.sv)) { sv_genes <- all_markers[[j]] design <- model.matrix(~iasva.sv[,j]) fit <- eBayes(lmFit(counts.filt, design)) tt <- topTable(fit, coef=2, number=Inf, genelist = rownames(counts.filt)) # module analysis fg <- tt[sv_genes,] if (nrow(fg) > 1) { fg.ord <- fg[order(fg$logFC, decreasing = T),] } else { fg.ord <- fg } res <- tmodHGtest(fg = fg.ord$ID, bg = tt$ID) if (is.null(res)) {} else { res.ord <- res[order(res$E, decreasing = T),] # append results all_sv_res[[j]] <- res.ord names(all_sv_res)[j] <- names(all_markers)[j] if (nrow(res.ord) > 1) { # make plots bp1 <- barplot(res.ord$E, main = paste(names(all_markers)[j], " Module Enrichment", sep = ""), xlab = "", horiz = T, cex.main = 2, cex.lab = 2) #barplot(res.ord$E, xlab = "Module Enrichment", main = paste(names(all_markers)[j], " Genes", sep = ""), horiz = T) par(xpd=TRUE) text(x = max(res.ord$E)/2, y = bp1[,1], labels = paste(res.ord$Title, " (", res$b, "/", res$B, ")", sep = ""), cex = 1, font = 2) } } }
anno.col <- data.frame(Condition = Condition, Genes_Detect = Geo_Lib_Cat, CellType = CellType) rownames(anno.col) <- colnames(markers_pct1) # how many markers are there dim(markers_pct1) pheatmap(log(markers_pct1+1), show_colnames = FALSE, show_rownames = TRUE, clustering_method = "ward.D2", fontsize_row = 10, annotation_col = anno.col)
# load results par(mfrow = c(1,1)) par(mar=c(5.1, 4.1, 4.1, 8.2), xpd=TRUE) plot(tsne.res.iasva$Y[,1:2], main="IA-SVA", xlab="tSNE Dim1", ylab="tSNE Dim2", pch=20, col=color.vec[CellType], bg=color.vec[CellType], oma=c(4,4,6,12), cex = 0.5) legend(40, 20, levels(CellType), border="white", fill=color.vec, bty="n") par(mar=c(5, 4, 4, 2), xpd=FALSE) plot(tsne.res.iasva$Y[,1:2], main="IA-SVA", xlab="tSNE Dim1", ylab="tSNE Dim2", pch=20, col=color.vec[Geo_Lib_Cat], bg=color.vec[Geo_Lib_Cat], oma=c(4,4,6,12), cex = 0.5) legend("topright", levels(Geo_Lib_Cat), border="white", fill=color.vec, bty="n", title = "Genes Detected") plot(tsne.res.iasva$Y[,1:2], main="IA-SVA", xlab="tSNE Dim1", ylab="tSNE Dim2", pch=20, col=color.vec[Condition], bg=color.vec[Condition], oma=c(4,4,6,12), cex = 0.5) legend("topright", levels(Condition), border="white", fill=color.vec, bty="n")
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.