knitr::opts_chunk$set(echo = TRUE) options(width = 100)
Here we will download and process the Seurat objects associated with the tonsil cell atlas to prepare them for submission to ExperimentHub
. We will follow a similar style and structure as the one used for the TabulaMurisSenisData
pacakge.
In particular, here we will focus on the Visium data from our tonsil atlas.
library(here) library(glue) library(Seurat) library(HDF5Array) library(stringr) library(SpatialExperiment) library(tools) library(dplyr) library(readr) library(purrr) library(tibble)
The chunk above was run already to download the scRNA-seq data in another notebook, so we can read the "tonsil-atlas-zenodo-get-output_v2.csv" file and get the URL of the Visium data:
# Read data path_to_csv1 <- here("inst/scripts/tonsil-atlas-zenodo-get-output_v2.csv") files_df <- read.csv(file = path_to_csv1, header = FALSE) colnames(files_df) <- c("url", "file_id", "file_name")
We proceed to download the Visium dataset:
# Create data directories data_dir <- here("inst/scripts/spatial_raw_data") out_dir <- here("inst/scripts/HCATonsilData/2.0/Spatial") dir.create(data_dir, recursive = TRUE, showWarnings = TRUE) dir.create(out_dir, recursive = TRUE, showWarnings = TRUE) # Get URL and download options(timeout = 1000000) filt <- str_detect(files_df$file_name, "Spatial") spatial_url <- files_df[filt, "url"] out_file <- file.path( data_dir, files_df[filt, "file_name"] ) download.file(url = spatial_url, destfile = out_file) # Check MD5 md5_original <- files_df[filt, "file_id"] md5_download <- md5sum(out_file) if (!md5_original == md5_download) { stop("The file is corrupted!") } # Uncompress untar(out_file, exdir = data_dir)
# Read data file_path <- here("inst/scripts/spatial_raw_data/spatial_transcriptomics/20220527_tonsil_atlas_spatial_seurat_obj.rds") so <- readRDS(file_path) # 'assays' for (i in c("counts", "data")) { j <- switch(i, counts="counts", data="logcounts") fnm <- file.path(out_dir, glue("Spatial_assay_{j}.h5")) mtx <- GetAssayData(so, i) h5 <- writeHDF5Array(mtx, filepath=fnm, name=j, chunkdim=getHDF5DumpChunkDim(dim(mtx))) } # 'reducedDims' for (dr in Reductions(so)) { obj <- Embeddings(so, dr) rownames(obj) <- NULL fnm <- file.path(out_dir, glue("Spatial_dimred_{dr}.rds")) saveRDS(obj, fnm) } # 'colData' spatial_coordinates <- map_df(Images(so), \(.) so@images[[.]]@coordinates) spatial_coordinates <- rownames_to_column(spatial_coordinates, "barcode") cell_metadata <- left_join(so@meta.data, spatial_coordinates, by = "barcode") cd <- DataFrame(cell_metadata) cd$sample_id <- cd$gem_id # needed for SPE saveRDS(cd, file.path(out_dir, "Spatial_coldata.rds")) # 'rowData' genes_df <- read_tsv( here("inst/extdata/features.tsv.gz"), col_names = c("ensembl_id", "symbol", "type") ) %>% filter(type == "Gene Expression") %>% as.data.frame() gene_ids <- genes_df$ensembl_id names(gene_ids) <- genes_df$symbol gs <- rownames(so) rd <- so[["Spatial"]]@meta.features rd <- DataFrame(row.names=gs, gene_name=gs, rd) rd$gene_id <- gene_ids[rd$gene_name] names(rd$gene_id) <- NULL rd$highly_variable <- gs %in% VariableFeatures(so) saveRDS(rd, file.path(out_dir, "Spatial_rowdata.rds")) # 'imgData' id <- lapply(Images(so), \(id) { sfs <- so@images[[id]]@scale.factors img <- GetImage(so, image=id, mode="raster") saveRDS(img, file.path(out_dir, glue("Spatial_image_{id}.rds"))) saveRDS(sfs, file.path(out_dir, glue("Spatial_scale_{id}.rds"))) })
As a sanity check, let us ensure that we can create successfully each SingleCellExperiment object from its individual slots:
# metadata rd <- readRDS(file.path(out_dir, "Spatial_rowdata.rds")) cd <- readRDS(file.path(out_dir, "Spatial_coldata.rds")) # assays as <- list.files(out_dir, "^Spatial_assay_", full.names=TRUE) names(as) <- gsub("^Spatial_assay_(.*)\\.h5$", "\\1", basename(as)) as <- mapply( \(h5, nm) { mtx <- h5read(h5, name=nm) rownames(mtx) <- rd$gene_name colnames(mtx) <- cd$barcode as(mtx, "dgCMatrix") }, h5=as, nm=names(as), SIMPLIFY=FALSE) # reductions dr <- list.files(out_dir, "^Spatial_dimred_", full.names=TRUE) names(dr) <- toupper(gsub("^Spatial_dimred_(.*)\\.rds", "\\1", basename(dr))) dr <- lapply(dr, readRDS) # images id <- lapply(unique(cd$sample_id), \(id) { img <- readRDS(file.path(out_dir, glue("Spatial_image_{id}.rds"))) sfs <- readRDS(file.path(out_dir, glue("Spatial_scale_{id}.rds"))) DataFrame( sample_id=id, image_id="lowres", data=I(list(SpatialImage(img))), scaleFactor=sfs$lowres) }) |> do.call(what="rbind")
Construct SpatialExperiment object:
# construct 'SpatialExperiment' (spe <- SpatialExperiment(assays=as, mainExpName="Spatial", reducedDims=dr, imgData=id, rowData=rd, colData=cd)) #metadata=md
sce_old <- as.SingleCellExperiment(so) sce_new <- as(spe, "SingleCellExperiment") # drop class-specific identifiers sce_old$ident <- sce_new$sample_id <- NULL # drop 'spatialCoords' int_colData(sce_new)$spatialCoords <- NULL # match ordering of reduced dimensions drs <- reducedDimNames(sce_old) reducedDims(sce_new) <- reducedDims(sce_new)[drs] # drop irrelevant internals & dimension names imd <- c("version", "mainExpName") int_metadata(sce_old) <- int_metadata(sce_old)[imd] int_metadata(sce_new) <- int_metadata(sce_new)[imd] rownames(colData(sce_old)) <- rownames(colData(sce_new)) <- NULL # this is necessary, too... elementMetadata(rowRanges(sce_old)) <- elementMetadata(rowRanges(sce_new)) # check saved & re-loaded objects match identical(sce_old, sce_new)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.