library(BiocStyle)
knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)

Downloading the data

We obtain a single-cell RNA sequencing dataset of liver-resident immune cells from multiple donors from @zhao2020singlecell. Counts for endogenous genes are available from the Gene Expression Omnibus using the accession number GSE125188. We download and cache them using the r Biocpkg("BiocFileCache") package.

library(BiocFileCache)
bfc <- BiocFileCache("raw_data", ask = FALSE)

mat.path <- bfcrpath(bfc, 
    "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE125188&format=file&file=GSE125188%5FCD45%2B%5Fmatrix%2Emtx%2Egz")
mat <- Matrix::readMM(mat.path)
mat <- as(mat, "dgCMatrix")

Slapping together a SingleCellExperiment object.

feat.path <- bfcrpath(bfc,
    "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE125188&format=file&file=GSE125188%5FCD45%2B%5Fgenes%2Etsv%2Egz")
feat.data <- read.table(feat.path, header=FALSE)
colnames(feat.data) <- c("ID", "Symbol")
rownames(feat.data) <- feat.data$ID

barcode.path <- bfcrpath(bfc,
    "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE125188&format=file&file=GSE125188%5FCD45%2B%5Fbarcodes%2Etsv%2Egz")
colnames(mat) <- readLines(barcode.path)

library(SingleCellExperiment)
sce <- SingleCellExperiment(list(counts=mat), rowData=feat.data)
sce

Downloading the metadata

Pulling down the metadata. Note that not all cells are present in the metadata so some expansion is required.

meta.path <- bfcrpath(bfc, 
    "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE125188&format=file&file=GSE125188%5FCD45%2B%5Fcell%5Fidentities%2Etsv%2Egz")
meta <- read.table(meta.path, header=TRUE)

m <- match(colnames(sce), meta$Barcode)
colData(sce) <- cbind(colData(sce), meta[m,])
sce$Barcode <- colnames(sce)
sce$retained <- !is.na(m)

colData(sce)

We also need to collate the fine metadata for the other cell types.

pattern1 <- "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE125188&format=file&file=GSE125188%5F" 
pattern2 <- "%5Fcell%5Fidentities%2Etsv%2Egz"

broad <- fine <- rep(NA_character_, ncol(sce))
for (type in c("ASC", "B", "Mo", "NKT")) {
    sub.path <- bfcrpath(bfc, paste0(pattern1, type, pattern2))
    out <- read.delim(sub.path)
    m <- match(out$Barcode, colnames(sce))
    stopifnot(all(!is.na(m)))
    broad[m] <- type
    fine[m] <- out$Group
}

table(broad, useNA="always")
table(fine, useNA="always")

sce$broad <- broad
sce$fine <- fine

Saving to file

We now save all of the relevant components to file for upload to r Biocpkg("ExperimentHub").

path <- file.path("scRNAseq", "zhao-immune-liver", "2.6.0")
dir.create(path, showWarnings=FALSE, recursive=TRUE)
saveRDS(assay(sce), file=file.path(path, "counts.rds"))
saveRDS(colData(sce), file=file.path(path, "coldata.rds"))
saveRDS(rowData(sce), file=file.path(path, "rowdata.rds"))

Session information

sessionInfo()

References



drisso/scRNAseq documentation built on Feb. 16, 2021, 1:18 a.m.