library(BiocStyle)
knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)

Download the data

We obtain a single-nucleus RNA sequencing dataset of mouse kidneys from @wu2019advantages. Counts for endogenous genes and antibody-derived tags are available from the Gene Expression Omnibus using the accession number GSE119531.

library(BiocFileCache)
bfc <- BiocFileCache("raw_data", ask = FALSE)
base.url <- "https://ftp.ncbi.nlm.nih.gov/geo/series/GSE119nnn/GSE119531/suppl"

healthy.raw <- bfcrpath(bfc, file.path(base.url, "GSE119531_Healthy.combined.dge.txt.gz"))
healthy.ann <- bfcrpath(bfc, file.path(base.url, "GSE119531_Healthy.combined.cell.annotation.txt.gz"))

disease.raw <- bfcrpath(bfc, file.path(base.url, "GSE119531_UUO.dge.txt.gz"))
disease.ann <- bfcrpath(bfc, file.path(base.url, "GSE119531_UUO.cell.annotation.txt.gz"))

Processing the data

We load in each of the two sets of files. There are some mild discrepancies with the cell barcodes that require some adjustment.

library(scater)
disease.mat <- readSparseCounts(disease.raw)
dim(disease.mat)

disease.tab <- read.table(disease.ann, header=TRUE, sep="\t", stringsAsFactors=FALSE)
head(disease.tab)

colnames(disease.mat) <- sub("1_", "_", colnames(disease.mat))
stopifnot(identical(colnames(disease.mat), disease.tab$CellBarcode))

And again, for the healthy samples.

healthy.mat <- readSparseCounts(healthy.raw)
dim(healthy.mat)

healthy.tab <- read.table(healthy.ann, header=TRUE, sep="\t", stringsAsFactors=FALSE)
head(healthy.tab)

colnames(healthy.mat) <- sub("sNuc.10x", "sNuc-10x", colnames(healthy.mat))
stopifnot(identical(colnames(healthy.mat), healthy.tab$CellBarcode))

Save for upload

We now save all of the relevant components to file for upload to r Biocpkg("ExperimentHub").

repath <- file.path("scRNAseq", "wu-kidney", "2.4.0")
dir.create(repath, showWarnings=FALSE, recursive=TRUE)

saveRDS(healthy.tab, file=file.path(repath, "coldata-healthy.rds"))
saveRDS(healthy.mat, file=file.path(repath, "counts-healthy.rds"))

saveRDS(disease.tab, file=file.path(repath, "coldata-disease.rds"))
saveRDS(disease.mat, file=file.path(repath, "counts-disease.rds"))

Session info

sessionInfo()

References



drisso/scRNAseq documentation built on Feb. 16, 2021, 1:18 a.m.