library(BiocStyle)
knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)

Downloading the count data

We obtain a single-cell RNA sequencing dataset of mouse ESCs from @buettner2015computational, by downloading the files from ArrayExpress and caching the results with r Biocpkg("BiocFileCache").

library(BiocFileCache)
bfc <- BiocFileCache("raw_data", ask=FALSE)
zip.path <- bfcrpath(bfc, file.path("https://www.ebi.ac.uk/arrayexpress",
    "files/E-MTAB-2805/E-MTAB-2805.processed.1.zip"))
exdir <- tempdir()
unzip(zip.path, exdir=exdir)

Processing the read counts

We read in the count matrix for cells in each cell cycle phase.

g1 <- read.delim(file.path(exdir, "G1_singlecells_counts.txt"), row.names=1)
gene.g1 <- g1[,1:3]
g1 <- as.matrix(g1[,-(1:3)])
dim(g1)

s <- read.delim(file.path(exdir, "S_singlecells_counts.txt"), row.names=1)
gene.s <- s[,1:3]
s <- as.matrix(s[,-(1:3)])
dim(s)

g2m <- read.delim(file.path(exdir, "G2M_singlecells_counts.txt"), row.names=1)
gene.g2m <- g2m[,1:3]
g2m <- as.matrix(g2m[,-(1:3)])
dim(g2m)

Applying some sanity checks to ensure that the rows are the same across matrices.

stopifnot(identical(gene.g1, gene.s))
stopifnot(identical(gene.g1, gene.g2m))

Combining and storing metadata

combined <- cbind(g1, s, g2m)
library(S4Vectors)
coldata <- DataFrame(phase=rep(c("G1", "S", "G2M"), c(ncol(g1), ncol(s), ncol(g2m))))
coldata
rowdata <- DataFrame(gene.g1)
rowdata

Saving for upload

We save these to file for upload to r Biocpkg("ExperimentHub").

repath <- file.path("scRNAseq", "buettner-esc", "2.0.0")
dir.create(repath, showWarnings=FALSE, recursive=TRUE)
saveRDS(combined, file=file.path(repath, "counts.rds"))
saveRDS(coldata, file=file.path(repath, "coldata.rds"))
saveRDS(rowdata, file=file.path(repath, "rowdata.rds"))

Session information

sessionInfo()

References



drisso/scRNAseq documentation built on Feb. 16, 2021, 1:18 a.m.