library(BiocStyle) knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)
We obtain a single-cell RNA sequencing dataset of the mouse brain from @zeisel2015brain.
Counts for endogenous genes, mitochondrial genes, repeats and spike-in transcripts are available from http://linnarssonlab.org/cortex.
We download and cache them using the r Biocpkg("BiocFileCache")
package.
library(BiocFileCache) bfc <- BiocFileCache("raw_data", ask = FALSE) base.url <- file.path("https://storage.googleapis.com", "linnarsson-lab-www-blobs/blobs/cortex") mRNA.path <- bfcrpath(bfc, file.path(base.url, "expression_mRNA_17-Aug-2014.txt")) rep.path <- bfcrpath(bfc, file.path(base.url, "expression_rep_17-Aug-2014.txt")) mito.path <- bfcrpath(bfc, file.path(base.url, "expression_mito_17-Aug-2014.txt")) spike.path <- bfcrpath(bfc, file.path(base.url, "expression_spikes_17-Aug-2014.txt"))
We define a simple utility function for loading data in from each file.
Each file contains some metadata, so we create a SingleCellExperiment
object to accommodate both the counts and the metadata.
library(SingleCellExperiment) readFormat <- function(infile) { # First column is empty. metadata <- read.delim(infile, stringsAsFactors=FALSE, header=FALSE, nrow=10)[,-1] rownames(metadata) <- metadata[,1] metadata <- metadata[,-1] metadata <- DataFrame(t(metadata), check.names=FALSE) # Cleaning up aspects of coercion to a DataFrame. to.coerce <- c("group #", "total mRNA mol", "well", "sex", "age", "diameter") for (x in colnames(metadata)) { if (x %in% to.coerce) { FUN <- as.numeric } else { FUN <- as.character } metadata[[x]] <- FUN(metadata[[x]]) } rownames(metadata) <- NULL # First column after row names is some useless filler. counts <- read.delim(infile, stringsAsFactors=FALSE, header=FALSE, row.names=1, skip=11)[,-1] counts <- as.matrix(counts) colnames(counts) <- metadata$cell_id SingleCellExperiment(list(counts=counts), colData=metadata) }
Using this function, we read in the counts for the endogenous genes, ERCC spike-in transcripts and mitochondrial genes.
endo.data <- readFormat(mRNA.path) endo.data rep.data <- readFormat(rep.path) rep.data spike.data <- readFormat(spike.path) spike.data mito.data <- readFormat(mito.path) mito.data
We need to rearrange the columns for the mitochondrial data, as the order is not consistent with the other files.
m <- match(endo.data$cell_id, mito.data$cell_id) mito.data <- mito.data[,m]
# Should all be the same. stopifnot(identical(colData(endo.data), colData(spike.data))) stopifnot(identical(colData(endo.data), colData(rep.data))) stopifnot(identical(colData(endo.data), colData(mito.data)))
We combine all components into a single SingleCellExperiment
object.
sce <- rbind(endo.data, mito.data, spike.data, rep.data) rowData(sce)$featureType <- rep(c("endogenous", "mito", "ERCC", "repeat"), c(nrow(endo.data), nrow(mito.data), nrow(spike.data), nrow(rep.data))) sce
We now save all of the relevant components of sce
to file for upload to r Biocpkg("ExperimentHub")
.
path <- file.path("scRNAseq", "zeisel-brain", "2.0.0") dir.create(path, showWarnings=FALSE, recursive=TRUE) saveRDS(counts(sce), file=file.path(path, "counts.rds")) saveRDS(rowData(sce), file=file.path(path, "rowdata.rds")) saveRDS(colData(sce), file=file.path(path, "coldata.rds"))
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.