library(BiocStyle) knitr::opts_chunk$set(error=FALSE, message=FALSE, warning=FALSE)
We obtain a single-cell RNA sequencing dataset of the mouse nervous system from @zeisel2018molecular.
Counts for endogenous genes are available from http://mousebrain.org/downloads.html as a loom
file.
We download and cache it using the r Biocpkg("BiocFileCache")
package.
library(BiocFileCache) bfc <- BiocFileCache("raw_data", ask = FALSE) loom.path <- bfcrpath(bfc, "https://storage.googleapis.com/linnarsson-lab-loom/l5_all.loom")
We load this into our R session using the r Biocpkg("LoomExperiment")
package.
library(LoomExperiment) scle <- import(loom.path, type="SingleCellLoomExperiment") scle
Then it's just a matter of peeling apart the bits that we need.
We do need to clean up the rowData
:
rd <- rowData(scle) colnames(rd) <- sub("^X_", "", colnames(rd)) rownames(rd) <- rd$Accession rd
We also need to clean up the colData
.
This is, sadly, a gargantuan effort - so much for loom
being a ready-to-use file format!
cd <- colData(scle) # Useless pieces of information, or pieces that are obvious from the experimental design. cd$Bucket <- NULL cd$Species <- NULL cd$AnalysisProject <- NULL cd$Transcriptome <- NULL cd$TimepointPool <- NULL cd$NGI_PlateWell <- NULL cd$PlugDate <- NULL cd$Plug_Date <- NULL cd$Project <- NULL # Redundant fields. cd$Cell_Conc <- NULL cd$ngperul_cDNA <- NULL cd$cDNA_Lib_Ok <- NULL cd$Target_Num_Cells <- NULL cd$Date_Captured <- NULL cd$Num_Pooled_Animals <- NULL cd$PCR_Cycles <- NULL cd$Sample_Index <- NULL cd$Seq_Comment <- NULL cd$Seq_Lib_Date <- NULL cd$Seq_Lib_Ok <- NULL # Converting various character fields into numbers, where applicable. # On occassion, we have to get rid of some nonsense fields with quotation marks; # something probably got corrupted when the file was saved. options(warn=2) for (i in colnames(cd)) { current <- cd[[i]] if (is.character(current)) { converted <- current is.bad <- converted=="" | grepl('"', converted) converted[is.bad] <- NA changed <- TRUE if (any(has.pct <- grepl("%$", current))) { converted[!has.pct] <- NA converted <- sub("%$", "", converted) converted <- as.numeric(converted)/100 cd[[i]] <- converted } else if (any(has.comma <- grepl(",[0-9]{3}", current))) { converted[!has.comma] <- NA # these are probably corruptions of some sort. converted <- gsub(",([0-9]{3})", "\\1", converted) converted <- as.numeric(converted) cd[[i]] <- converted } else if (any(grepl("^[0-9]+,[0-9]+$", current))){ converted <- sub(",", ".", converted) converted <- as.numeric(converted) cd[[i]] <- converted } else if (any(grepl("^[0-9]+(\\.[0-9]+)?$", current))) { cd[[i]] <- as.numeric(converted) } else { changed <- FALSE } # # For debugging purposes, to check that the corruptions are purged correctly. # if (changed) { # print(i) # print(names(table(current))) # print(names(table(converted))) # } } } options(warn=1) # Replacing the 'nan's with NA's. for (i in colnames(cd)) { current <- cd[[i]] if (is.character(current) && any(lost <- current=="nan")) { current[lost] <- NA_character_ cd[[i]] <- current } } # Stripping out the class probabilities into a nested matrix. clcols <- grep("ClassProbability", colnames(cd)) clprobs <- cd[,clcols] clprobs <- as.matrix(clprobs) colnames(clprobs) <- sub("ClassProbability_", "", colnames(clprobs)) cd <- cd[,-clcols] cd$ClassProbability <- clprobs # Hacking out the reduced dimensions. pca.cols <- c("X_PC1", "X_PC2") tsne.cols <- c("X_tSNE1", "X_tSNE2") other.cols <- c("X_X", "X_Y") reddim <- list( PCA=as.matrix(cd[,pca.cols]), tSNE=as.matrix(cd[,tsne.cols]), unnamed=as.matrix(cd[,other.cols]) ) cd <- cd[,!colnames(cd) %in% c(pca.cols, tsne.cols, other.cols)] # Why the prefixing with X_? Well, whatever. colnames(cd) <- sub("X_", "", colnames(cd)) cd
We now save all of the relevant components to file for upload to r Biocpkg("ExperimentHub")
.
path <- file.path("scRNAseq", "zeisel-nervous", "2.6.0") dir.create(path, showWarnings=FALSE, recursive=TRUE) saveRDS(rd, file=file.path(path, "rowdata.rds")) saveRDS(cd, file=file.path(path, "coldata.rds")) saveRDS(reddim, file=file.path(path, "reddims.rds"))
The column pairs are a bit tricky, but we just coerce them into SelfHits
objects and save that.
colpairs <- lapply(colGraphs(scle), function(x) as(x, "SelfHits")) saveRDS(colpairs, file=file.path(path, "colpairs.rds"))
The trickiest part is resaving the HDF5 file as a sparse matrix.
saveRDS(as(assay(scle), 'dgCMatrix'), file=file.path(path, "counts.rds"))
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.