library(BiocStyle) knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)
We obtain a single-cell RNA sequencing dataset of the mouse mammary gland from Bach et al. (2017). we define all of the samples to be pulled down.
accessions <- c( "GSM2834498", "GSM2834499", "GSM2834500", "GSM2834501", "GSM2834502", "GSM2834503", "GSM2834504", "GSM2834505") samples <- c( "NP_1", "NP_2", "G_1", "G_2", "L_1", "L_2", "PI_1", "PI_2") conditions <- c(NP="Nulliparous", G="Gestation", L="Lactation", PI="Post-involution")[sub("_.*", "", samples)] data.frame(accessions, samples, conditions)
We then download and cache the assorted count matrices using the r Biocpkg("BiocFileCache")
package.
library(BiocFileCache) bfc <- BiocFileCache("raw_data", ask = FALSE) library(Matrix) library(S4Vectors) library(DelayedArray) template.path <- "ftp://ftp.ncbi.nlm.nih.gov/geo/samples/%snnn/%s/suppl" collected.counts <- collected.rowdata <- collected.coldata <- list() for (i in seq_along(accessions)) { curacc <- accessions[i] subacc <- substr(curacc, 1, 7) base.path <- sprintf(template.path, subacc, curacc) header <- paste0(curacc, "%5F", sub("_", "%5F", samples[i]), "%5F") barcode.fname <- bfcrpath(bfc, file.path(base.path, paste0(header, "barcodes%2Etsv%2Egz"))) gene.fname <- bfcrpath(bfc, file.path(base.path, paste0(header, "genes%2Etsv%2Egz"))) counts.fname <- bfcrpath(bfc, file.path(base.path, paste0(header, "matrix%2Emtx%2Egz"))) current.counts <- as(readMM(counts.fname), "dgCMatrix") collected.counts[[i]] <- current.counts gene.info <- read.table(gene.fname, stringsAsFactors=FALSE) colnames(gene.info) <- c("Ensembl", "Symbol") collected.rowdata[[i]] <- DataFrame(gene.info) collected.coldata[[i]] <- DataFrame( Barcode=readLines(barcode.fname), Sample=samples[i], Condition=conditions[i]) }
We verify that all of the row data matches up, and that all the dimensions are consistent.
stopifnot(length(unique(collected.rowdata))==1L) X <- vapply(collected.coldata, nrow, 0L) Y <- vapply(collected.counts, ncol, 0L) stopifnot(identical(X, Y)) X
We bind them all into one SingleCellExperiment
:
final.counts <- do.call(cbind, collected.counts) final.coldata <- do.call(rbind, collected.coldata) library(SingleCellExperiment) sce <- SingleCellExperiment( list(counts=final.counts), rowData=collected.rowdata[[1]], colData=final.coldata ) # Moving the Ensembl identifiers to the row names. rownames(sce) <- rowData(sce)$Ensembl rowData(sce)$Ensembl <- NULL # Polishing up the object. library(scRNAseq) sce <- polishDataset(sce) sce
And we save them to disk in preparation for upload:
meta <- list( title="Differentiation dynamics of mammary epithelial cells revealed by single-cell RNA sequencing", description="Characterising the hierarchy of mammary epithelial cells (MECs) and how they are regulated during adult development is important for understanding how breast cancer arises. Here we report the use of single-cell RNA sequencing to determine the gene expression profile of MECs across four developmental stages; nulliparous, mid gestation, lactation and post involution. Our analysis of 23,184 cells identifies 15 clusters, few of which could be fully characterised by a single marker gene. We argue instead that the epithelial cells—especially in the luminal compartment—should rather be conceptualised as being part of a continuous spectrum of differentiation. Furthermore, our data support the existence of a common luminal progenitor cell giving rise to intermediate, restricted alveolar and hormone-sensing progenitors. This luminal progenitor compartment undergoes transcriptional changes in response to a full pregnancy, lactation and involution. In summary, our results provide a global, unbiased view of adult mammary gland development.", taxonomy_id="10090", genome="GRCm38", sources=list( list(provider="GEO", id="GSE106273"), list(provider="PubMed", id="29225342") ), maintainer_name="Aaron Lun", maintainer_email="infinite.monkeys.with.keyboards@gmail.com" ) saveDataset(sce, "2023-12-14_output", meta)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.