library(BiocStyle)
knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)

Downloading the data

We obtain a single-cell RNA sequencing dataset of mouse spermatogenesis from @ernst2019staged. Counts for endogenous genes are available from ArrayExpress using the accession number E-MTAB-6946. We download and cache them using the r Biocpkg("BiocFileCache") package.

library(BiocFileCache)
bfc <- BiocFileCache("raw_data", ask = FALSE)
cr.zpath <- bfcrpath(bfc, "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6946/E-MTAB-6946.processed.3.zip")
ed.zpath <- bfcrpath(bfc, "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6946/E-MTAB-6946.processed.4.zip")

We read each of the count matrices into memory.

library(Matrix)
unzip(cr.zpath, exdir=tempdir())
cr.mat <- as(readMM(file.path(tempdir(), "raw_counts.mtx")), "dgCMatrix")
dim(cr.mat)
unzip(ed.zpath, exdir=tempdir())
ed.mat <- as(readMM(file.path(tempdir(), "raw_counts_emptyDrops.mtx")), "dgCMatrix")
dim(ed.mat)

We then slap all the objects together in a SingleCellExperiment object.

library(SingleCellExperiment)
cellranger <- SingleCellExperiment(list(counts=cr.mat))
cellranger
emptydrops <- SingleCellExperiment(list(counts=ed.mat))
emptydrops

Filling in the metadata

Adding gene-level metadata.

gene.path <- bfcrpath(bfc, "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6946/E-MTAB-6946.processed.2.zip")
unzip(gene.path, exdir=tempdir())
gene.info <- read.delim(file.path(tempdir(), "genes.tsv"))
rowData(cellranger) <- gene.info
rownames(cellranger) <- gene.info$ID
rowData(cellranger)

gene.path <- bfcrpath(bfc, "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6946/E-MTAB-6946.processed.6.zip")
unzip(gene.path, exdir=tempdir())
gene.info <- read.delim(file.path(tempdir(), "genes_emptyDrops.tsv"))
rowData(emptydrops) <- gene.info
rownames(emptydrops) <- gene.info$ID
rowData(emptydrops)

Adding cell-level metadata.

cell.path <- bfcrpath(bfc, "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6946/E-MTAB-6946.processed.1.zip")
unzip(cell.path, exdir=tempdir())
cell.info <- read.table(file.path(tempdir(), "cell_metadata.txt"))
colData(cellranger) <- DataFrame(cell.info, check.names=FALSE)
colData(cellranger)

cell.path <- bfcrpath(bfc, "https://www.ebi.ac.uk/arrayexpress/files/E-MTAB-6946/E-MTAB-6946.processed.5.zip")
unzip(cell.path, exdir=tempdir())
cell.info <- read.table(file.path(tempdir(), "cell_metadata_emptyDrops.txt"))
colData(emptydrops) <- DataFrame(cell.info)
colData(emptydrops)

Saving to file

We now save all of the relevant components to file for upload to r Biocpkg("ExperimentHub").

path <- file.path("scRNAseq", "ernst-spermatogenesis", "2.6.0")
dir.create(path, showWarnings=FALSE, recursive=TRUE)
saveRDS(assay(cellranger), file=file.path(path, "counts-cellranger.rds"))
saveRDS(colData(cellranger), file=file.path(path, "coldata-cellranger.rds"))
saveRDS(rowData(cellranger), file=file.path(path, "rowdata-cellranger.rds"))

saveRDS(assay(emptydrops), file=file.path(path, "counts-emptydrops.rds"))
saveRDS(colData(emptydrops), file=file.path(path, "coldata-emptydrops.rds"))
saveRDS(rowData(emptydrops), file=file.path(path, "rowdata-emptydrops.rds"))

Session information {-}

sessionInfo()

References



drisso/scRNAseq documentation built on Feb. 16, 2021, 1:18 a.m.