Reanalysis of http://www.cell.com/cell/fulltext/S0092-8674(18)30116-8

Based on http://satijalab.org/seurat/mca

NB This analysis is run on a large RAM server as the PCA takes 128GB of RAM, to run on a laptop reduce the number of hv.genes as in the seurat tutorial

wget -O ../data/previous_studies/MCA/MCA.zip https://www.dropbox.com/s/dl/8d8t4od38oojs6i/MCA.zip
unzip ../data/previous_studies/MCA/MCA.zip -d ../data/previous_studies/
library(Matrix)
library(Seurat)
mca.matrix <- readRDS(file = "../data/previous_studies/MCA/MCA_merged_mat.rds")
mca.metadata <- read.csv("../data/previous_studies/MCA/MCA_All-batch-removed-assignments.csv",
                         row.names = 1)

mca <- CreateSeuratObject(raw.data = mca.matrix, meta.data = mca.metadata, project = "MouseCellAtlas")
# Only keep annotated cells
mca <- SubsetData(mca, cells.use = rownames(mca@meta.data[!is.na(mca@meta.data$ClusterID), 
    ]), do.clean = TRUE)
# Leaves us with 242k cells
mca
mca <- NormalizeData(object = mca, normalization.method = "LogNormalize", scale.factor = 10000)

mca <- FindVariableGenes(object = mca, mean.function = ExpMean, dispersion.function = LogVMR, 
    do.plot = FALSE)
hv.genes <- head(rownames(mca@hvg.info), 10000)
# mito.genes <- grep(pattern = "^mt-", x = rownames(x = mca@data), value = TRUE)
# percent.mito <- Matrix::colSums(mca@raw.data[mito.genes, ])/Matrix::colSums(mca@raw.data)
# mca <- AddMetaData(object = mca, metadata = percent.mito, col.name = "percent.mito")

mca <- ScaleData(object = mca, genes.use = hv.genes, display.progress = TRUE)

PCA: Warning this step requires 125GB of RAM!

mca <- RunPCA(object = mca, pcs.compute = 3, do.print = TRUE, 
    pcs.print = 1:3, genes.print = 25, pc.genes = hv.genes)

Plot PCA

library(data.table)
mca_pca <- data.table(mca@dr$pca@cell.embeddings, keep.rownames=T)
mca_pca$Tissue <- mca@meta.data$Tissue
saveRDS(mca_pca, "../data/mca_pca2.rds")

tissue_cols <- rep("black", nlevels(mca_pca$Tissue))
names(tissue_cols) <- levels(mca_pca$Tissue)

c1 <- RColorBrewer::brewer.pal(9, "Set1")
tissue_cols["Testis"] <- c1[1]
tissue_cols["Thymus"] <- c1[2]
tissue_cols[grep("Stem-Cell", levels(mca_pca$Tissue))] <- c1[3]
tissue_cols[grep("Lactation", levels(mca_pca$Tissue))] <- c1[4]
tissue_cols[grep("Blood", levels(mca_pca$Tissue))] <- c1[5]
tissue_cols["Prostate"] <- c1[6]
tissue_cols["Small-Intestine"] <- c1[7]
tissue_cols["Fetal_Brain"] <- c1[8]
tissue_cols["Uterus"] <- c1[9]


#devtools::install_github('VPetukhov/ggrastr')
library(ggrastr)
plot <- ggplot(mca_pca, aes(PC1, PC2, colour=Tissue)) +
    geom_point_rast(size=1, alpha=0.3, stroke=0) +
    scale_color_manual(values = tissue_cols) +
    theme(legend.text = element_text(size=8), legend.position = "bottom") +
    guides(colour = guide_legend(override.aes = list(size=4, alpha=1),
                                 title = "Tissue", ncol=6))

pdf("../results/MCA_PCA.pdf", height=6, width = 10.5)
plot
dev.off()

pdf("../results/MCA_PCA_by_tissue.pdf", width = 20)
ggplot(tmp, aes(PC1,PC2, colour=Tissue=="Testis")) +
    geom_point_rast(data=mca_pca[,-"Tissue", with=F], size=1, alpha=0.3, stroke=0, colour="grey") +
    geom_point_rast(size=1, alpha=0.3, stroke=0) +
    facet_wrap(~Tissue) +
    theme_grey()
dev.off()

Analyse only Testis Cells

mca_t <- CreateSeuratObject(raw.data = mca.matrix, meta.data = mca.metadata, project = "MouseCellAtlas")
# Only keep annotated cells
mca_t <- SubsetData(mca, cells.use = rownames(mca@meta.data[!is.na(mca@meta.data$ClusterID),][mca@meta.data$Tissue=="Testis",]), do.clean = TRUE)
mca_t

mca_t <- NormalizeData(object = mca_t, normalization.method = "LogNormalize", scale.factor = 10000)

mca_t <- FindVariableGenes(object = mca_t, mean.function = ExpMean, dispersion.function = LogVMR, 
    do.plot = FALSE)
hv.genes_t <- head(rownames(mca_t@hvg.info), 3000)

mca_t <- ScaleData(object = mca_t, genes.use = hv.genes_t, display.progress = TRUE, do.par = FALSE, num.cores = 2)

mca_t <- RunPCA(object = mca_t, pcs.compute = 10, do.print = TRUE, 
    pcs.print = 1:5, genes.print = 25)

pdf()
FeaturePlot(mca_t, c("Prm2"), reduction.use = "pca")
FeaturePlot(mca_t, c("Prm2"), reduction.use = "pca", dim.1 = 1, dim.2 = 2)
FeaturePlot(mca_t, c("Prm2"), reduction.use = "pca", dim.1 = 2, dim.2 = 3)
FeaturePlot(mca_t, c("Prm2"), reduction.use = "pca", dim.1 = 1, dim.2 = 4)
FeaturePlot(mca_t, c("Prm2"), reduction.use = "pca", dim.1 = 2, dim.2 = 4)
FeaturePlot(mca_t, c("Prm2"), reduction.use = "pca", dim.1 = 3, dim.2 = 4)
dev.off()

pdf()
FeaturePlot(mca_t, c("Smcp"), reduction.use = "pca")
FeaturePlot(mca_t, c("Insl3"), reduction.use = "pca", dim.1 = 3, dim.2 = 4)
FeaturePlot(mca_t, c("Tyrobp"), reduction.use = "pca", dim.1 = 3, dim.2 = 4)
FeaturePlot(mca_t, c("Dcn"), reduction.use = "pca", dim.1 = 3, dim.2 = 4)
dev.off()

t-SNE

mca_t <- RunTSNE(object = mca_t, reduction.use = "pca", dims.use = 1:10,
    nthreads = 2, max_iter = 1000, perplexity=60)

pdf()
DimPlot(object = mca_t, reduction.use = "tsne", do.return = TRUE, group.by ="ClusterID", pt.size = 0.5) + ggtitle("Cluster ID") + scale_color_manual(values = c(RColorBrewer::brewer.pal(9, "Set1"), RColorBrewer::brewer.pal(8, "Set2"),"black","red"))
dev.off()


tmp <- DimPlot(object = mca_t, reduction.use = "tsne", do.return = TRUE, group.by ="ClusterID")$data
ggplot(tmp, aes(tSNE_1, tSNE_2, colour=ident=="Testis_16")) + geom_point()

pdf()
FeaturePlot(mca_t, c("Prm2"), reduction.use = "tsne", no.legend = FALSE)
FeaturePlot(mca_t, c("Aard"), reduction.use = "tsne", no.legend = FALSE)
FeaturePlot(mca_t, c("Dcn"), reduction.use = "tsne", no.legend = FALSE)
FeaturePlot(mca_t, c("Insl3"), reduction.use = "tsne", no.legend = FALSE)
FeaturePlot(mca_t, c("Lyz2"), reduction.use = "tsne", no.legend = FALSE)
FeaturePlot(mca_t, c("Apoe"), reduction.use = "tsne", no.legend = FALSE)
FeaturePlot(mca_t, c("Tyrobp"), reduction.use = "tsne", no.legend = FALSE)
FeaturePlot(mca_t, c("Prdm9"), reduction.use = "tsne", no.legend = FALSE)
FeaturePlot(mca_t, c("Ssxb1"), reduction.use = "tsne", no.legend = FALSE)
FeaturePlot(mca_t, c("Hmgb4"), reduction.use = "tsne", no.legend = FALSE)
dev.off()


MyersGroup/testisAtlas documentation built on Nov. 29, 2020, 9:21 p.m.