library(BiocStyle) knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)
We obtain a single-cell RNA sequencing dataset of mouse ESCs from Buettner et al. (2015),
by downloading the files from ArrayExpress and caching the results with r Biocpkg("BiocFileCache")
.
library(BiocFileCache) bfc <- BiocFileCache("raw_data", ask=FALSE) s.path <- bfcrpath(bfc, 'https://www.ebi.ac.uk/biostudies/files/E-MTAB-2805/S_singlecells_counts.txt') g2m.path <- bfcrpath(bfc, 'https://www.ebi.ac.uk/biostudies/files/E-MTAB-2805/G2M_singlecells_counts.txt') g1.path <- bfcrpath(bfc, 'https://www.ebi.ac.uk/biostudies/files/E-MTAB-2805/G1_singlecells_counts.txt')
We read in the count matrix for cells in each cell cycle phase.
g1 <- read.delim(g1.path, row.names=1) gene.g1 <- g1[,1:3] g1 <- as.matrix(g1[,-(1:3)]) dim(g1) s <- read.delim(s.path, row.names=1) gene.s <- s[,1:3] s <- as.matrix(s[,-(1:3)]) dim(s) g2m <- read.delim(g2m.path, row.names=1) gene.g2m <- g2m[,1:3] g2m <- as.matrix(g2m[,-(1:3)]) dim(g2m)
Applying some sanity checks to ensure that the rows are the same across matrices.
stopifnot(identical(gene.g1, gene.s)) stopifnot(identical(gene.g1, gene.g2m))
combined <- cbind(g1, s, g2m) library(S4Vectors) coldata <- DataFrame(phase=rep(c("G1", "S", "G2M"), c(ncol(g1), ncol(s), ncol(g2m)))) coldata rowdata <- DataFrame(gene.g1) rowdata
Assembling a SingleCellExperiment
consisting of the genes:
library(SingleCellExperiment) is.gene <- grepl("^ENSMUSG", rownames(combined)) sce <- SingleCellExperiment(list(counts=combined[is.gene,]), colData=coldata, rowData=rowdata[is.gene,]) mainExpName(sce) <- "gene"
Splitting out the spike-in counts:
library(scRNAseq) is.ercc <- grepl("^ERCC-", rownames(combined)) spikedata <- countErccMolecules(volume = 1, dilution = 1000) # We don't use 'rowdata' at all as this is non-informative for spike-ins. spikes <- SummarizedExperiment(list(counts=combined[is.ercc,])) rowData(spikes) <- spikedata[rownames(spikes),] altExp(sce, "ERCC") <- spikes spikes
Shifting the diagnostics into the column data.
is.other <- !is.gene & !is.ercc diagnostics <- t(as.matrix(combined[is.other,])) rownames(diagnostics) <- NULL colData(sce)$metrics <- DataFrame(diagnostics)
Now polishing the object to save some space.
library(scRNAseq) sce <- polishDataset(sce) sce
We save the SingleCellExperiment
to disk in preparation for upload.
meta <- list( title="Computational analysis of cell-to-cell heterogeneity in single-cell RNA-sequencing data reveals hidden subpopulations of cells", description="Recent technical developments have enabled the transcriptomes of hundreds of cells to be assayed in an unbiased manner, opening up the possibility that new subpopulations of cells can be found. However, the effects of potential confounding factors, such as the cell cycle, on the heterogeneity of gene expression and therefore on the ability to robustly identify subpopulations remain unclear. We present and validate a computational approach that uses latent variable models to account for such hidden factors. We show that our single-cell latent variable model (scLVM) allows the identification of otherwise undetectable subpopulations of cells that correspond to different stages during the differentiation of naive T cells into T helper 2 cells. Our approach can be used not only to identify cellular subpopulations but also to tease apart different sources of gene expression heterogeneity in single-cell transcriptomes. Maintainer note: alignment and quantification metrics are stored in a nested `metrics` data frame in the column data. Molecule counts for ERCC spike-ins are computed based on a volume of 1 nL per cell and a dilution of 1:1000.", taxonomy_id="10090", genome="GRCm38", sources=list( list(provider="ArrayExpress", id="E-MTAB-2805"), list(provider="PubMed", id="25599176") ), maintainer_name="Aaron Lun", maintainer_email="infinite.monkeys.with.keyboards@gmail.com" ) saveDataset(sce, "2023-12-14_output", meta)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.