library(BiocStyle) knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)
We obtain a single-cell RNA sequencing dataset of the mouse hypothalamus from @romanov2017molecular.
Counts for endogenous genes are available from the Gene Expression Omnibus
using the accession number GSE74672.
We download and cache it using the r Biocpkg("BiocFileCache")
package.
library(BiocFileCache) bfc <- BiocFileCache("raw_data", ask = FALSE) base.url <- file.path("ftp://ftp.ncbi.nlm.nih.gov/geo/series", "GSE74nnn/GSE74672/suppl") count.file <- bfcrpath(bfc, file.path(base.url, "GSE74672_expressed_mols_with_classes.xlsx.gz"))
We load them into memory.
library(R.utils) tmp.file <- tempfile(fileext=".xlsx") gunzip(count.file, destname=tmp.file, remove=FALSE) # WARNING: this requires >20 GB of memory! library(readxl) all.data <- read_xlsx(tmp.file, col_names=FALSE) dim(all.data)
The first 9 rows correspond to metadata and need to be extracted.
coldata <- all.data[1:9,] colfields <- coldata[,1,drop=TRUE] coldata <- coldata[,-1] coldata <- t(coldata) coldata <- data.frame(coldata, stringsAsFactors=FALSE) rownames(coldata) <- NULL library(S4Vectors) coldata <- DataFrame(coldata) colnames(coldata) <- colfields for (i in 4:9) { coldata[,i] <- as.numeric(coldata[,i]) } coldata
The next three rows are empty, so we just skip them. We convert the remaining counts into a sparse matrix.
raw.counts <- all.data[-(1:12),] counts <- as.matrix(raw.counts[,-1]) library(Matrix) counts <- as(counts, "dgCMatrix") rownames(counts) <- raw.counts[,1,drop=TRUE] colnames(counts) <- NULL dim(counts)
We now save all of the components to file for upload to r Biocpkg("ExperimentHub")
.
These will be used to construct a SingleCellExperiment
on the client side when the dataset is requested.
path <- file.path("scRNAseq", "romanov-brain", "2.0.0") dir.create(path, showWarnings=FALSE, recursive=TRUE) saveRDS(counts, file=file.path(path, "counts.rds")) saveRDS(coldata, file=file.path(path, "coldata.rds"))
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.