Reanalysis of http://www.cell.com/cell/fulltext/S0092-8674(18)30116-8
Based on http://satijalab.org/seurat/mca
NB This analysis is run on a large RAM server as the PCA takes 128GB of RAM, to run on a laptop reduce the number of hv.genes as in the seurat tutorial
wget -O ../data/previous_studies/MCA/MCA.zip https://www.dropbox.com/s/dl/8d8t4od38oojs6i/MCA.zip unzip ../data/previous_studies/MCA/MCA.zip -d ../data/previous_studies/
library(Matrix) library(Seurat) mca.matrix <- readRDS(file = "../data/previous_studies/MCA/MCA_merged_mat.rds") mca.metadata <- read.csv("../data/previous_studies/MCA/MCA_All-batch-removed-assignments.csv", row.names = 1) mca <- CreateSeuratObject(raw.data = mca.matrix, meta.data = mca.metadata, project = "MouseCellAtlas") # Only keep annotated cells mca <- SubsetData(mca, cells.use = rownames(mca@meta.data[!is.na(mca@meta.data$ClusterID), ]), do.clean = TRUE) # Leaves us with 242k cells mca
mca <- NormalizeData(object = mca, normalization.method = "LogNormalize", scale.factor = 10000) mca <- FindVariableGenes(object = mca, mean.function = ExpMean, dispersion.function = LogVMR, do.plot = FALSE) hv.genes <- head(rownames(mca@hvg.info), 10000)
# mito.genes <- grep(pattern = "^mt-", x = rownames(x = mca@data), value = TRUE) # percent.mito <- Matrix::colSums(mca@raw.data[mito.genes, ])/Matrix::colSums(mca@raw.data) # mca <- AddMetaData(object = mca, metadata = percent.mito, col.name = "percent.mito") mca <- ScaleData(object = mca, genes.use = hv.genes, display.progress = TRUE)
mca <- RunPCA(object = mca, pcs.compute = 3, do.print = TRUE, pcs.print = 1:3, genes.print = 25, pc.genes = hv.genes)
library(data.table) mca_pca <- data.table(mca@dr$pca@cell.embeddings, keep.rownames=T) mca_pca$Tissue <- mca@meta.data$Tissue saveRDS(mca_pca, "../data/mca_pca2.rds") tissue_cols <- rep("black", nlevels(mca_pca$Tissue)) names(tissue_cols) <- levels(mca_pca$Tissue) c1 <- RColorBrewer::brewer.pal(9, "Set1") tissue_cols["Testis"] <- c1[1] tissue_cols["Thymus"] <- c1[2] tissue_cols[grep("Stem-Cell", levels(mca_pca$Tissue))] <- c1[3] tissue_cols[grep("Lactation", levels(mca_pca$Tissue))] <- c1[4] tissue_cols[grep("Blood", levels(mca_pca$Tissue))] <- c1[5] tissue_cols["Prostate"] <- c1[6] tissue_cols["Small-Intestine"] <- c1[7] tissue_cols["Fetal_Brain"] <- c1[8] tissue_cols["Uterus"] <- c1[9] #devtools::install_github('VPetukhov/ggrastr') library(ggrastr) plot <- ggplot(mca_pca, aes(PC1, PC2, colour=Tissue)) + geom_point_rast(size=1, alpha=0.3, stroke=0) + scale_color_manual(values = tissue_cols) + theme(legend.text = element_text(size=8), legend.position = "bottom") + guides(colour = guide_legend(override.aes = list(size=4, alpha=1), title = "Tissue", ncol=6)) pdf("../results/MCA_PCA.pdf", height=6, width = 10.5) plot dev.off() pdf("../results/MCA_PCA_by_tissue.pdf", width = 20) ggplot(tmp, aes(PC1,PC2, colour=Tissue=="Testis")) + geom_point_rast(data=mca_pca[,-"Tissue", with=F], size=1, alpha=0.3, stroke=0, colour="grey") + geom_point_rast(size=1, alpha=0.3, stroke=0) + facet_wrap(~Tissue) + theme_grey() dev.off()
mca_t <- CreateSeuratObject(raw.data = mca.matrix, meta.data = mca.metadata, project = "MouseCellAtlas") # Only keep annotated cells mca_t <- SubsetData(mca, cells.use = rownames(mca@meta.data[!is.na(mca@meta.data$ClusterID),][mca@meta.data$Tissue=="Testis",]), do.clean = TRUE) mca_t mca_t <- NormalizeData(object = mca_t, normalization.method = "LogNormalize", scale.factor = 10000) mca_t <- FindVariableGenes(object = mca_t, mean.function = ExpMean, dispersion.function = LogVMR, do.plot = FALSE) hv.genes_t <- head(rownames(mca_t@hvg.info), 3000) mca_t <- ScaleData(object = mca_t, genes.use = hv.genes_t, display.progress = TRUE, do.par = FALSE, num.cores = 2) mca_t <- RunPCA(object = mca_t, pcs.compute = 10, do.print = TRUE, pcs.print = 1:5, genes.print = 25) pdf() FeaturePlot(mca_t, c("Prm2"), reduction.use = "pca") FeaturePlot(mca_t, c("Prm2"), reduction.use = "pca", dim.1 = 1, dim.2 = 2) FeaturePlot(mca_t, c("Prm2"), reduction.use = "pca", dim.1 = 2, dim.2 = 3) FeaturePlot(mca_t, c("Prm2"), reduction.use = "pca", dim.1 = 1, dim.2 = 4) FeaturePlot(mca_t, c("Prm2"), reduction.use = "pca", dim.1 = 2, dim.2 = 4) FeaturePlot(mca_t, c("Prm2"), reduction.use = "pca", dim.1 = 3, dim.2 = 4) dev.off() pdf() FeaturePlot(mca_t, c("Smcp"), reduction.use = "pca") FeaturePlot(mca_t, c("Insl3"), reduction.use = "pca", dim.1 = 3, dim.2 = 4) FeaturePlot(mca_t, c("Tyrobp"), reduction.use = "pca", dim.1 = 3, dim.2 = 4) FeaturePlot(mca_t, c("Dcn"), reduction.use = "pca", dim.1 = 3, dim.2 = 4) dev.off()
mca_t <- RunTSNE(object = mca_t, reduction.use = "pca", dims.use = 1:10, nthreads = 2, max_iter = 1000, perplexity=60) pdf() DimPlot(object = mca_t, reduction.use = "tsne", do.return = TRUE, group.by ="ClusterID", pt.size = 0.5) + ggtitle("Cluster ID") + scale_color_manual(values = c(RColorBrewer::brewer.pal(9, "Set1"), RColorBrewer::brewer.pal(8, "Set2"),"black","red")) dev.off() tmp <- DimPlot(object = mca_t, reduction.use = "tsne", do.return = TRUE, group.by ="ClusterID")$data ggplot(tmp, aes(tSNE_1, tSNE_2, colour=ident=="Testis_16")) + geom_point() pdf() FeaturePlot(mca_t, c("Prm2"), reduction.use = "tsne", no.legend = FALSE) FeaturePlot(mca_t, c("Aard"), reduction.use = "tsne", no.legend = FALSE) FeaturePlot(mca_t, c("Dcn"), reduction.use = "tsne", no.legend = FALSE) FeaturePlot(mca_t, c("Insl3"), reduction.use = "tsne", no.legend = FALSE) FeaturePlot(mca_t, c("Lyz2"), reduction.use = "tsne", no.legend = FALSE) FeaturePlot(mca_t, c("Apoe"), reduction.use = "tsne", no.legend = FALSE) FeaturePlot(mca_t, c("Tyrobp"), reduction.use = "tsne", no.legend = FALSE) FeaturePlot(mca_t, c("Prdm9"), reduction.use = "tsne", no.legend = FALSE) FeaturePlot(mca_t, c("Ssxb1"), reduction.use = "tsne", no.legend = FALSE) FeaturePlot(mca_t, c("Hmgb4"), reduction.use = "tsne", no.legend = FALSE) dev.off()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.