library(BiocStyle) knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)
We obtain a single-nucleus RNA sequencing dataset of mouse brains from Hu et al. (2017). Counts for endogenous genes and antibody-derived tags are available from the Gene Expression Omnibus using the accession number GSE106678.
library(BiocFileCache) bfc <- BiocFileCache("raw_data", ask = FALSE) tarred <- bfcrpath(bfc, "https://www.ncbi.nlm.nih.gov/geo/download/?acc=GSE106678&format=file") temp <- tempfile() dir.create(temp) untar(tarred, exdir=temp) list.files(temp)
We also create a metadata template:
meta <- list( title="Dissecting cell-type composition and activity-dependent transcriptional state in mammalian brains by massively parallel single-nucleus RNA-Seq [%s only]", description="Massively parallel single-cell RNA sequencing can precisely resolve cellular diversity in a high-throughput manner at low cost, but unbiased isolation of intact single cells from complex tissues, such as adult mammalian brains, is challenging. Here, we integrate sucrose-gradient assisted purification of nuclei with droplet microfluidics to develop a highly scalable single-nucleus RNA-Seq approach (sNucDrop-Seq), which is free of enzymatic dissociation and nuclei sorting. By profiling ~18,000 nuclei isolated from cortical tissues of adult mice, we demonstrate that sNucDrop-Seq not only accurately reveals neuronal and non-neuronal subtype composition with high sensitivity, but also enables in-depth analysis of transient transcriptional states driven by neuronal activity, at single-cell resolution, in vivo. Maintainer note: this dataset contains only the %s cells from the study.", taxonomy_id="10090", genome="GRCm38", sources=list( list(provider="GEO", id="GSE106678"), list(provider="PubMed", id="29220646") ), maintainer_name="Aaron Lun", maintainer_email="infinite.monkeys.with.keyboards@gmail.com" )
We clear out any existing output directory:
output.dir <- "2023-12-20_output" unlink(output.dir, recursive=TRUE) dir.create(output.dir)
We load in each of the files.
library(scuttle) counts <- list() for (x in list.files(temp, full.names=TRUE)) { prefix <- sub("^[^_]*_", "", x) prefix <- sub("_.*", "", prefix) counts[[prefix]] <- readSparseCounts(x) } do.call(rbind, lapply(counts, dim))
For some unknown reason, each matrix has its own set of features! Crazy. At least the intersection is of a reasonable size.
length(Reduce(intersect, lapply(counts, rownames)))
I can't be sure that the missing features in a given matrix have all-zero expression values.
So, I will make the executive decision of taking the intersection across matrices and cbind
'ing them together into a single matrix for each system (i.e., cortex or 3T3).
If you don't like that, take it up with the authors.
Combining the 3T3 matrices for cell and nuclei:
counts.3T3 <- counts[c("cell-3T3", "nuclei-3T3")] common.3T3 <- Reduce(intersect, lapply(counts.3T3, rownames)) combined.3T3 <- do.call(cbind, lapply(counts.3T3, function(x) x[common.3T3,])) dim(combined.3T3)
Slapping together a SingleCellExperiment
:
library(SingleCellExperiment) sce <- SingleCellExperiment(list(counts=combined.3T3)) sce$protocol <- rep(sub("-3T3$", "", names(counts.3T3)), vapply(counts.3T3, ncol, 0L)) table(sce$protocol)
Adding some polish to optimize for space:
library(scRNAseq) sce <- polishDataset(sce) sce
And saving it:
copy <- meta copy$title <- sprintf(copy$title, "3T3") copy$description <- sprintf(copy$description, "3T3") saveDataset(sce, file.path(output.dir, "3T3"), copy)
Defining the sample identities:
samples <- c( "nuclei-ctx-1", "nuclei-ctx-2", "nuclei-ctx-3", "nuclei-ctx-4", "nuclei-ctx-5", "nuclei-ctx-6", "nuclei-ctx-7", "nuclei-ctx-8", "nuclei-ctx-9", "nuclei-ctx-10", "nuclei-ctx-11", "nuclei-ctx-12", "nuclei-ctx-13", "nuclei-ctx-saline1", "nuclei-ctx-PTZ1", "nuclei-ctx-saline2", "nuclei-ctx-PTZ2" ) # Checking that everyone is accounted for: cortex.samples <- sort(names(counts)[grep("^nuclei-ctx-", names(counts))]) stopifnot(identical(cortex.samples, sort(samples)))
Combining the cortex matrices across samples:
counts.cortex <- counts[samples] common.cortex <- Reduce(intersect, lapply(counts.cortex, rownames)) combined.cortex <- do.call(cbind, lapply(counts.cortex, function(x) x[common.cortex,])) dim(combined.cortex)
Slapping together a SingleCellExperiment
:
library(SingleCellExperiment) sce <- SingleCellExperiment(list(counts=combined.cortex))
Adding some more information about each sample.
replicator <- rep(seq_along(samples), vapply(counts.cortex, ncol, 0L)) treatment <- rep(NA_character_, length(samples)) treatment[grep("^nuclei-ctx-saline[0-9]+$", samples)] <- "saline" treatment[grep("^nuclei-ctx-PTZ[0-9]+$", samples)] <- "PTZ" sce$treatment <- treatment[replicator] table(sce$treatment, useNA="always") animal <- as.integer(sub("[^0-9]+([0-9]+)$", "\\1", samples)) stopifnot(!anyNA(animal)) sce$animal <- animal[replicator] table(sce$animal)
Polishing it up:
sce <- polishDataset(sce) sce
And saving it:
copy <- meta copy$title <- sprintf(copy$title, "cortex") copy$description <- sprintf(copy$description, "cortex") saveDataset(sce, file.path(output.dir, "cortex"), copy)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.