library(BiocStyle) knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)
We obtain a single-cell RNA sequencing dataset of 416B cells and trophoblasts from @lun2017assessing.
Counts for endogenous genes and spike-in transcripts are available from ArrayExpress
using the accession number E-MTAB-5522.
We download and cache it using the r Biocpkg("BiocFileCache")
package.
library(BiocFileCache) bfc <- BiocFileCache("raw_data", ask = FALSE) lun.zip <- bfcrpath(bfc, file.path("https://www.ebi.ac.uk/arrayexpress/files", "E-MTAB-5522/E-MTAB-5522.processed.1.zip")) unzip(lun.zip, exdir=tempdir())
We read in the metadata from the SDRF file:
lun.sdrf <- bfcrpath(bfc, file.path("https://www.ebi.ac.uk/arrayexpress/files", "E-MTAB-5522/E-MTAB-5522.sdrf.txt")) coldata <- read.delim(lun.sdrf, check.names=FALSE, stringsAsFactors=FALSE) library(S4Vectors) coldata <- as(coldata, "DataFrame") colnames(coldata)
We keep only the experimentally interesting metadata, discarding columns that are duplicated or only have one level.
keep <- grep("Characteristics|Factor", colnames(coldata)) coldata <- coldata[,c(1, keep)] # keeping the cell IDs. new.colnames <- sub(".*\\[(.*)\\]", "\\1", colnames(coldata)) u <- !duplicated(new.colnames) coldata <- coldata[,u] colnames(coldata) <- new.colnames[u] has.multi.levels <- vapply(coldata, FUN=function(x) length(unique(x))>1L, TRUE) coldata <- coldata[,has.multi.levels] head(coldata)
We load the counts into memory for the 416B cells.
plate1.416b <- read.delim(file.path(tempdir(), "counts_Calero_20160113.tsv"), header=TRUE, row.names=1, check.names=FALSE) plate2.416b <- read.delim(file.path(tempdir(), "counts_Calero_20160325.tsv"), header=TRUE, row.names=1, check.names=FALSE) stopifnot(identical(rownames(plate1.416b), rownames(plate2.416b)))
We extract the gene lengths and combine the matrices together:
gene.lengths <- plate1.416b$Length rowdata.416b <- DataFrame(Length=gene.lengths) stopifnot(identical(gene.lengths, plate2.416b$Length)) plate1.416b <- as.matrix(plate1.416b[,-1]) plate2.416b <- as.matrix(plate2.416b[,-1]) counts.416b <- cbind(plate1.416b, plate2.416b) dim(counts.416b)
We extract the relevant column data.
m <- match(colnames(counts.416b), coldata[,1]) coldata.416b <- coldata[m,] stopifnot(identical(colnames(counts.416b), coldata.416b[,1]))
And we save these to file for upload to r Biocpkg("ExperimentHub")
.
path <- file.path("scRNAseq", "lun-spikein", "2.0.0") dir.create(path, showWarnings=FALSE, recursive=TRUE) saveRDS(counts.416b, file=file.path(path, "counts-416b.rds")) saveRDS(rowdata.416b, file=file.path(path, "rowdata-416b.rds")) saveRDS(coldata.416b, file=file.path(path, "coldata-416b.rds"))
We repeat this process for the trophoblasts.
plate1.tropho <- read.delim(file.path(tempdir(), "counts_Liora_20160906.tsv"), header=TRUE, row.names=1, check.names=FALSE) plate2.tropho <- read.delim(file.path(tempdir(), "counts_Liora_20170201.tsv"), header=TRUE, row.names=1, check.names=FALSE) stopifnot(identical(rownames(plate1.tropho), rownames(plate2.tropho)))
We extract the gene lengths and combine the matrices together:
gene.lengths <- plate1.tropho$Length rowdata.tropho <- DataFrame(Length=gene.lengths) stopifnot(identical(gene.lengths, plate2.tropho$Length)) plate1.tropho <- as.matrix(plate1.tropho[,-1]) plate2.tropho <- as.matrix(plate2.tropho[,-1]) counts.tropho <- cbind(plate1.tropho, plate2.tropho) dim(counts.tropho)
We extract the relevant column data.
m <- match(colnames(counts.tropho), coldata[,1]) coldata.tropho <- coldata[m,] stopifnot(identical(colnames(counts.tropho), coldata.tropho[,1]))
And we save these to file for upload to r Biocpkg("ExperimentHub")
.
path <- file.path("scRNAseq", "lun-spikein", "2.0.0") dir.create(path, showWarnings=FALSE, recursive=TRUE) saveRDS(counts.tropho, file=file.path(path, "counts-tropho.rds")) saveRDS(rowdata.tropho, file=file.path(path, "rowdata-tropho.rds")) saveRDS(coldata.tropho, file=file.path(path, "coldata-tropho.rds"))
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.