library(BiocStyle)
knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)

Downloading the data

We obtain a single-cell RNA sequencing dataset of the mouse brain from @jessa2019stalled. Counts for endogenous genes are available from ArrayExpress using the accession number E-MTAB-6946. We download and cache them using the r Biocpkg("BiocFileCache") package.

library(BiocFileCache)
bfc <- BiocFileCache("raw_data", ask = FALSE)
raw.path <- bfcrpath(bfc, "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE133531&format=file")
tmp <- tempfile()
untar(raw.path, exdir=tmp)
list.files(tmp)

Creating a SingleCellExperiment from each entry.

library(DropletUtils)
nms <- c("GSM3934450_ET_CT_12", "GSM3934451_ET_CT_15",
    "GSM3934452_PT_CT_0", "GSM3934453_C57BL6-P3-cortex",
    "GSM3934454_C57BL6-P6-cortex", "GSM3934455_ET_PO_12",
    "GSM3934456_ET_PO_15", "GSM3934457_PT_PO_0",
    "GSM3934458_C57BL6-P3-pons", "GSM3934459_C57BL6-P6-pons")

library(BiocParallel)
all.prefixes <- file.path(tmp, paste0(nms, "_"))
names(all.prefixes) <- nms
sce <- read10xCounts(all.prefixes, type="prefix", BPPARAM=MulticoreParam())
sce

Filling in the metadata

Filling in the per-sample metadata.

all.meta <- list()
for (i in seq_along(nms)) {
    all.meta[[i]] <- read.delim(file.path(tmp, paste0(nms[i], ".metadata.tsv.gz")))
    all.meta[[i]]$.Sample <- nms[i]
}
all.meta <- do.call(rbind, all.meta)
head(all.meta)

Matching to the columns of sce.

obs <- DataFrame(Sample=all.meta$.Sample, Barcode=paste0(all.meta$Cell, "-1"))
stopifnot(all(obs %in% colData(sce)))
m <- match(colData(sce), obs)
all.meta <- DataFrame(all.meta[m,], row.names=NULL)

# Auto-filling cell and sample information, even for the cells that weren't used.
all.meta$Cell <- sub("-1", "", sce$Barcode)
stopifnot(all(!grepl("-", all.meta$Cell)))

rename <- split(all.meta$Sample, all.meta$.Sample)
rename <- lapply(rename, unique)
stopifnot(all(lengths(rename)==1L))
all.meta$Sample <- unlist(rename)[sce$Sample]
table(all.meta$Sample, useNA="always")

colData(sce) <- all.meta
colnames(sce) <- NULL
sce$retained <- !is.na(m)
sce$.Sample <- NULL
summary(sce$retained)

Moving the dimensionality reduction results to a better home.

cd <- colData(sce)
pc.names <- c("PC1", "PC2")
t.names <- c("tSNE_1", "tSNE_2")
reducedDims(sce) <- list(Sample_PC=as.matrix(cd[,pc.names]), Sample_tSNE=as.matrix(cd[,t.names]))

keep <- colnames(cd) %in% c(pc.names, t.names)
cd <- cd[,!keep,drop=FALSE]
colnames(cd)[colnames(cd)=="Cluster"] <- "Sample_Cluster"
colnames(cd)[colnames(cd)=="Cluster_number"] <- "Sample_Cluster_number"
colData(sce) <- cd
sce

Adding joint metadata

Adding the joint forebrain data.

fb.path <- bfcrpath(bfc, 
    "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE133531&format=file&file=GSE133531%5FForebrain%5Fjoin%2E2D%2Etsv%2Egz")
fb.meta <- DataFrame(read.delim(fb.path))
fb.meta

obs <- fb.meta[,c("Cell", "Sample")]
obs$Cell <- sub(".*_", "", obs$Cell)
ref <- colData(sce)[,c("Cell", "Sample")]
stopifnot(all(obs %in% ref))
m <- match(ref, obs)
summary(is.na(m))
fb.meta <- fb.meta[m,]

u.names <- c("UMAP1", "UMAP2")
reducedDim(sce, "Forebrain_PC") <- as.matrix(fb.meta[,pc.names])
reducedDim(sce, "Forebrain_tSNE") <- as.matrix(fb.meta[,t.names])
reducedDim(sce, "Forebrain_UMAP") <- as.matrix(fb.meta[,u.names])

sce$Forebrain_Joint_cluster_number <- fb.meta$Joint_cluster_number
table(sce$Forebrain_Joint_cluster_number)
sce$Forebrain_Joint_cluster <- fb.meta$Joint_cluster
table(sce$Forebrain_Joint_cluster)

Repeating the dose with the Pons data.

pons.path <- bfcrpath(bfc, 
    "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE133531&format=file&file=GSE133531%5FPons%5Fjoin%2E2D%2Etsv%2Egz")
pons.meta <- DataFrame(read.delim(pons.path))
pons.meta

obs <- pons.meta[,c("Cell", "Sample")]
obs$Cell <- sub(".*_", "", obs$Cell)
ref <- colData(sce)[,c("Cell", "Sample")]
stopifnot(all(obs %in% ref))
m <- match(ref, obs)
summary(is.na(m))
pons.meta <- pons.meta[m,]

u.names <- c("UMAP1", "UMAP2")
reducedDim(sce, "Pons_PC") <- as.matrix(pons.meta[,pc.names])
reducedDim(sce, "Pons_tSNE") <- as.matrix(pons.meta[,t.names])
reducedDim(sce, "Pons_UMAP") <- as.matrix(pons.meta[,u.names])

sce$Pons_Joint_cluster_number <- pons.meta$Joint_cluster_number
table(sce$Pons_Joint_cluster_number)
sce$Pons_Joint_cluster <- pons.meta$Joint_cluster
table(sce$Pons_Joint_cluster)

Saving to file

We now save all of the relevant components to file for upload to r Biocpkg("ExperimentHub").

path <- file.path("scRNAseq", "jessa-brain", "2.6.0")
dir.create(path, showWarnings=FALSE, recursive=TRUE)
saveRDS(assay(sce), file=file.path(path, "counts.rds"))
saveRDS(colData(sce), file=file.path(path, "coldata.rds"))
saveRDS(rowData(sce), file=file.path(path, "rowdata.rds"))
saveRDS(as.list(reducedDims(sce)), file=file.path(path, "reddims.rds"))

Session information {-}

sessionInfo()


drisso/scRNAseq documentation built on Feb. 16, 2021, 1:18 a.m.