library(BiocStyle) knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)
We obtain a single-cell RNA sequencing dataset of human pancreas from @lawlor2017singlecell.
Counts for endogenous genes are available from the Gene Expression Omnibus
using the accession number GSE86469.
We download and cache it using the r Biocpkg("BiocFileCache")
package.
library(BiocFileCache) bfc <- BiocFileCache(ask=FALSE) count.tab <- bfcrpath(bfc, file.path( "ftp://ftp.ncbi.nlm.nih.gov/geo/series/GSE86nnn/GSE86469/suppl", "GSE86469_GEO.islet.single.cell.processed.data.RSEM.raw.expected.counts.csv.gz" ))
We then load it in as a sparse matrix.
library(scater) counts <- readSparseCounts(count.tab, sep=",", quote='"', row.names=1) dim(counts)
We extract the metadata for this study using the r Biocpkg("GEOquery")
package.
library(GEOquery) coldata <- pData(getGEO("GSE86469")[[1]]) library(S4Vectors) coldata <- as(coldata, "DataFrame") rownames(coldata) <- NULL colnames(coldata)
We remove the constant columns, as these are unlikely to be interesting.
nlevels <- vapply(coldata, FUN=function(x) length(unique(x[!is.na(x)])), 1L) coldata <- coldata[,nlevels > 1L]
We also remove the columns related to the GEO accession itself, as well as some redundant fields.
coldata <- coldata[,! colnames(coldata) %in% c("geo_accession", "relation", "relation.1")] coldata <- coldata[,!grepl("^characteristics_ch", colnames(coldata))]
We convert all factors into character vectors, and we clean up the column names.
for (i in colnames(coldata)) { if (is.factor(coldata[[i]])) { coldata[[i]] <- as.character(coldata[[i]]) } } stopifnot(identical(colnames(counts), coldata[,1])) colnames(coldata) <- sub("[:_]ch1$", "", colnames(coldata)) coldata
We now save all of the components to file for upload to r Biocpkg("ExperimentHub")
.
These will be used to construct a SingleCellExperiment
on the client side when the dataset is requested.
path <- file.path("scRNAseq", "lawlor-pancreas", "2.0.0") dir.create(path, showWarnings=FALSE, recursive=TRUE) saveRDS(counts, file=file.path(path, "counts.rds")) saveRDS(coldata, file=file.path(path, "coldata.rds"))
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.