library(BiocStyle) knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)
We obtain a single-cell RNA sequencing dataset of the mouse brain from @jessa2019stalled.
Counts for endogenous genes are available from ArrayExpress using the accession number E-MTAB-6946.
We download and cache them using the r Biocpkg("BiocFileCache")
package.
library(BiocFileCache) bfc <- BiocFileCache("raw_data", ask = FALSE) raw.path <- bfcrpath(bfc, "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE133531&format=file") tmp <- tempfile() untar(raw.path, exdir=tmp) list.files(tmp)
Creating a SingleCellExperiment
from each entry.
library(DropletUtils) nms <- c("GSM3934450_ET_CT_12", "GSM3934451_ET_CT_15", "GSM3934452_PT_CT_0", "GSM3934453_C57BL6-P3-cortex", "GSM3934454_C57BL6-P6-cortex", "GSM3934455_ET_PO_12", "GSM3934456_ET_PO_15", "GSM3934457_PT_PO_0", "GSM3934458_C57BL6-P3-pons", "GSM3934459_C57BL6-P6-pons") library(BiocParallel) all.prefixes <- file.path(tmp, paste0(nms, "_")) names(all.prefixes) <- nms sce <- read10xCounts(all.prefixes, type="prefix", BPPARAM=MulticoreParam()) sce
Filling in the per-sample metadata.
all.meta <- list() for (i in seq_along(nms)) { all.meta[[i]] <- read.delim(file.path(tmp, paste0(nms[i], ".metadata.tsv.gz"))) all.meta[[i]]$.Sample <- nms[i] } all.meta <- do.call(rbind, all.meta) head(all.meta)
Matching to the columns of sce
.
obs <- DataFrame(Sample=all.meta$.Sample, Barcode=paste0(all.meta$Cell, "-1")) stopifnot(all(obs %in% colData(sce))) m <- match(colData(sce), obs) all.meta <- DataFrame(all.meta[m,], row.names=NULL) # Auto-filling cell and sample information, even for the cells that weren't used. all.meta$Cell <- sub("-1", "", sce$Barcode) stopifnot(all(!grepl("-", all.meta$Cell))) rename <- split(all.meta$Sample, all.meta$.Sample) rename <- lapply(rename, unique) stopifnot(all(lengths(rename)==1L)) all.meta$Sample <- unlist(rename)[sce$Sample] table(all.meta$Sample, useNA="always") colData(sce) <- all.meta colnames(sce) <- NULL sce$retained <- !is.na(m) sce$.Sample <- NULL summary(sce$retained)
Moving the dimensionality reduction results to a better home.
cd <- colData(sce) pc.names <- c("PC1", "PC2") t.names <- c("tSNE_1", "tSNE_2") reducedDims(sce) <- list(Sample_PC=as.matrix(cd[,pc.names]), Sample_tSNE=as.matrix(cd[,t.names])) keep <- colnames(cd) %in% c(pc.names, t.names) cd <- cd[,!keep,drop=FALSE] colnames(cd)[colnames(cd)=="Cluster"] <- "Sample_Cluster" colnames(cd)[colnames(cd)=="Cluster_number"] <- "Sample_Cluster_number" colData(sce) <- cd sce
Adding the joint forebrain data.
fb.path <- bfcrpath(bfc, "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE133531&format=file&file=GSE133531%5FForebrain%5Fjoin%2E2D%2Etsv%2Egz") fb.meta <- DataFrame(read.delim(fb.path)) fb.meta obs <- fb.meta[,c("Cell", "Sample")] obs$Cell <- sub(".*_", "", obs$Cell) ref <- colData(sce)[,c("Cell", "Sample")] stopifnot(all(obs %in% ref)) m <- match(ref, obs) summary(is.na(m)) fb.meta <- fb.meta[m,] u.names <- c("UMAP1", "UMAP2") reducedDim(sce, "Forebrain_PC") <- as.matrix(fb.meta[,pc.names]) reducedDim(sce, "Forebrain_tSNE") <- as.matrix(fb.meta[,t.names]) reducedDim(sce, "Forebrain_UMAP") <- as.matrix(fb.meta[,u.names]) sce$Forebrain_Joint_cluster_number <- fb.meta$Joint_cluster_number table(sce$Forebrain_Joint_cluster_number) sce$Forebrain_Joint_cluster <- fb.meta$Joint_cluster table(sce$Forebrain_Joint_cluster)
Repeating the dose with the Pons data.
pons.path <- bfcrpath(bfc, "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE133531&format=file&file=GSE133531%5FPons%5Fjoin%2E2D%2Etsv%2Egz") pons.meta <- DataFrame(read.delim(pons.path)) pons.meta obs <- pons.meta[,c("Cell", "Sample")] obs$Cell <- sub(".*_", "", obs$Cell) ref <- colData(sce)[,c("Cell", "Sample")] stopifnot(all(obs %in% ref)) m <- match(ref, obs) summary(is.na(m)) pons.meta <- pons.meta[m,] u.names <- c("UMAP1", "UMAP2") reducedDim(sce, "Pons_PC") <- as.matrix(pons.meta[,pc.names]) reducedDim(sce, "Pons_tSNE") <- as.matrix(pons.meta[,t.names]) reducedDim(sce, "Pons_UMAP") <- as.matrix(pons.meta[,u.names]) sce$Pons_Joint_cluster_number <- pons.meta$Joint_cluster_number table(sce$Pons_Joint_cluster_number) sce$Pons_Joint_cluster <- pons.meta$Joint_cluster table(sce$Pons_Joint_cluster)
We now save all of the relevant components to file for upload to r Biocpkg("ExperimentHub")
.
path <- file.path("scRNAseq", "jessa-brain", "2.6.0") dir.create(path, showWarnings=FALSE, recursive=TRUE) saveRDS(assay(sce), file=file.path(path, "counts.rds")) saveRDS(colData(sce), file=file.path(path, "coldata.rds")) saveRDS(rowData(sce), file=file.path(path, "rowdata.rds")) saveRDS(as.list(reducedDims(sce)), file=file.path(path, "reddims.rds"))
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.