library(BiocStyle) knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)
We obtain a single-cell RNA sequencing dataset of haematopoietic stem-progenitor cells from @bunis2021singlecell.
Counts for endogenous genes are available from the Gene Expression Omnibus
using the accession number GSE158490.
We download and cache them using the r Biocpkg("BiocFileCache")
package.
library(BiocFileCache) bfc <- BiocFileCache("raw_data", ask = FALSE) mat.path <- bfcrpath(bfc, "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE158490&format=file&file=GSE158490%5Fmatrix%2Emtx%2Egz") mat <- Matrix::readMM(mat.path) dim(mat)
Creating a SingleCellExperiment
object:
library(SingleCellExperiment) sce <- SingleCellExperiment(list(counts=mat)) cd.path <- bfcrpath(bfc, "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE158490&format=file&file=GSE158490%5Fbarcodes%2Etsv%2Egz") colnames(sce) <- readLines(cd.path) rd.path <- bfcrpath(bfc, "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE158490&format=file&file=GSE158490%5Fgenes%2Etsv%2Egz") rd <- read.table(rd.path) colnames(rd) <- c("ID", "Symbol") rowData(sce) <- rd rownames(sce) <- rowData(sce)$ID sce
Attaching some metadata.
The SNG.1ST
column specifies the sample for each cell, ranging from adult bone marrow (APB
), fetal bone marrow (FS
) and newborn umbilical cord (UCB
).
Various other demuxlet statistics are also reported here.
We won't expand this out to all 720k columns of mat
to save some space, given that most downstream applications will only care about the observed cells.
We can import this data into our sce
, using the importDemux()
function of r Biocpkg("dittoSeq")
to retain selected metadata.
(Indeed, yes it is an odd placement for this function which should be split off into some other demultiplexing-focused package.)
We can then convert the ABM/UCB/FBM of Samples into age groups of samples.
demux.path <- bfcrpath(bfc, "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE158490&format=file&file=GSE158490%5FHSPC%2Ebest%2Etxt%2Egz") demux <- read.delim(demux.path, check.names=FALSE) stopifnot(all(demux$BARCODE %in% colnames(sce))) dim(demux) sce <- dittoSeq::importDemux(sce, demuxlet.best = demux, verbose = FALSE) # Remove the unnecessary (single-value here) Lane metadata sce$Lane <- NULL # Remove the "CD4_" from start of sample names, and replace "APB" and "FS" # with "ABM" and "FBM" which reflect the cells put through bulk RNAseq to # generate genotyping for Demuxlet, rather than these cells. sce$Sample <- gsub("^CD4_", "", sce$Sample) sce$Sample <- gsub("^FS", "FBM", sce$Sample) sce$Sample <- gsub("^APB", "ABM", sce$Sample) # Add age metadata sce$age <- NA sce$age[grep("^F", sce$Sample)] <- "fetal" sce$age[grep("^U", sce$Sample)] <- "newborn" sce$age[grep("^A", sce$Sample)] <- "adult" colData(sce)
From the fully processed objects shared on figshare, we will obtain cell type annotations and developmental stage scores.
full.path <- bfcrpath(bfc, "https://ndownloader.figshare.com/files/25953740") raw_meta <- readRDS(full.path)@meta.data stopifnot(all(rownames(raw_meta) %in% colnames(sce))) dim(raw_meta) # Add 'retained' metadata representing barcodes retained by authors. sce$retained <- colnames(sce) %in% rownames(raw_meta) summary(sce$retained) # Add celltypes authors_meta <- raw_meta[colnames(sce),] rownames(authors_meta) <- colnames(sce) sce$labels <- authors_meta$trajectory_calls table(sce$labels) # Add Developmental Stage Scoring as a DataFrame # Retrieve and rename a bit better DevStageScores <- DataFrame( HSCMPP_scores = authors_meta$MPP.RFScore, HSCMPP_inTraining = authors_meta$MPP.inTraining, GMP_scores = authors_meta$GMP.RFScore, GMP_inTraining = authors_meta$GMP.inTraining, MEP_scores = authors_meta$MEP.RFScore, MEP_inTraining = authors_meta$MEP.inTraining, row.names = rownames(authors_meta) ) sce$DevStageScoring <- DevStageScores sce$DevStageScoring[sce$retained,]
To save space, we only save the colData
for non-NA
rows.
We will fill these in later.
keep <- colnames(sce) %in% c(demux$BARCODE, rownames(raw_meta)) summary(keep)
We now save all of the relevant components to file for upload to r Biocpkg("ExperimentHub")
.
path <- file.path("scRNAseq", "bunis-hspc", "2.6.0") dir.create(path, showWarnings=FALSE, recursive=TRUE) saveRDS(assay(sce), file=file.path(path, "counts.rds")) saveRDS(colData(sce)[keep,], file=file.path(path, "coldata.rds")) saveRDS(rowData(sce), file=file.path(path, "rowdata.rds"))
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.