library(BiocStyle) knitr::opts_chunk$set(error = FALSE, message = FALSE, warning = FALSE) knitr::opts_knit$set(root.dir = file.path("..", "extdata"))
This script downloads a hundred images (one image per patient), as well as the associated single-cell data and cell segmentation masks from the breast tumour Imaging Mass Cytometry (IMC) dataset described in the following publication:
All data are openly available from zenodo.
Here, we will download single cell data and metadata, and process them to create a SingleCellExperiment object. We will then download the corresponding multichannel IMC images and cell segmentation masks and format them into CytoImageList objects using the cytomapper package.
library(data.table) library(S4Vectors) library(SingleCellExperiment) library(cytomapper)
dataset_name <- "JacksonFischer_2020_BreastCancer" dataset_version <- "v2" cat("Dataset version:", dataset_version)
Setting the working and output directories
# Temporary directory to unzip files workdir <- tempdir() Sys.setenv(workdir = workdir) # Output directory dataset_dir <- file.path(".", dataset_name) if(!(dir.exists(dataset_dir))) dir.create(dataset_dir) outdir <- file.path(dataset_dir, dataset_version) if(!(dir.exists(outdir))) dir.create(outdir) # Increase timeout period so that large files can be downloaded timeout <- getOption('timeout') options(timeout = 1000)
We will download single-cell data corresponding from [@JacksonFischer-2020-BreastCancer] from zenodo.
Function to download and unzip files.
importData <- function(url, output_dir, filename) { # Download download.file(url, destfile = file.path(output_dir, filename)) # Unzip system2("unzip", args = c("-o", file.path(output_dir, filename), "-d", output_dir), stdout = TRUE) # Remove zipped folder file.remove(file.path(output_dir, filename)) }
The first zip folder contains the main single-cell, sample and patient metadata. The single-cell locations and cluster labels are downloaded as separate zip folders because they were uploaded to zenodo separately at a later time point.
# Download dataset url_cells <- ("https://zenodo.org/record/4607374/files/SingleCell_and_Metadata.zip?download=1") zip_name <- "SingleCell_and_Metadata.zip" download.file(url_cells, destfile = file.path(workdir, zip_name)) # Unzip required files - Basel Cohort system2("unzip", args = c( "-o", file.path(workdir, zip_name), "Data_publication/BaselTMA/SC_dat.csv", "-d", workdir)) system2("unzip", args = c( "-o", file.path(workdir, zip_name), "Data_publication/BaselTMA/Basel_PatientMetadata.csv", "-d", workdir)) # Unzip required files - Zurich Cohort system2("unzip", args = c( "-o", file.path(workdir, zip_name), "Data_publication/ZurichTMA/SC_dat.csv", "-d", workdir)) system2("unzip", args = c( "-o", file.path(workdir, zip_name), "Data_publication/ZurichTMA/Zuri_PatientMetadata.csv", "-d", workdir)) # Unzip panel file system2("unzip", args = c( "-o", file.path(workdir, zip_name), "Data_publication/Basel_Zuri_StainingPanel.csv", "-d", workdir)) file.remove(file.path(workdir, zip_name))
Download single cell labels and locations
# Clusters url_cluster <- ("https://zenodo.org/record/4607374/files/singlecell_cluster_labels.zip?download=1") importData(url_cluster, workdir, "singlecell_cluster_labels.zip") # Cell locations url_locations <- ("https://zenodo.org/record/4607374/files/singlecell_locations.zip?download=1") importData(url_locations, workdir, "singlecell_locations.zip")
We read in single cell data linked to the Basel and Zurich cohorts, including single cell data, cell metadata, clinical data, antibody panel information, and cell clusters.
# Single cell expressions and spatial features cells <- rbind( fread(file.path(workdir, "Data_publication/BaselTMA/SC_dat.csv")), fread(file.path(workdir, "Data_publication/ZurichTMA/SC_dat.csv")) ) # Sample and clinical metadata cell_meta_basel <- fread(file.path(workdir, "Data_publication/BaselTMA/Basel_PatientMetadata.csv")) cell_meta_zuri <- fread(file.path(workdir, "Data_publication/ZurichTMA/Zuri_PatientMetadata.csv")) cell_meta_zuri[, HER2Status := HER2] cell_meta_zuri[ ,':=' (HER2 = NULL, ER = NULL, PR = NULL)] cell_meta <- as.data.table(rbind( data.frame(c(cell_meta_basel, sapply( setdiff(names(cell_meta_zuri),names(cell_meta_basel)), function(x) NA))), data.frame(c(cell_meta_zuri, sapply( setdiff(names(cell_meta_basel),names(cell_meta_zuri)), function(x) NA))) )) # Panel information panel <- fread(file.path( workdir, "Data_publication/Basel_Zuri_StainingPanel.csv")) # Cluster labels (merge PhenoGraph cluster labels and metacluster labels) pg_clusters_basel <- fread(file.path( workdir, "Cluster_labels/PG_basel.csv"), header = TRUE) setnames(pg_clusters_basel, "PhenoGraphBasel", "PhenoGraph") pg_clusters_zuri <- fread(file.path( workdir, "Cluster_labels/PG_zurich.csv"), header = TRUE) pg_clusters <- rbind(pg_clusters_basel, pg_clusters_zuri) meta_clusters_basel <- fread(file.path( workdir, "Cluster_labels/Basel_metaclusters.csv"), header = TRUE) meta_clusters_zuri <- fread(file.path( workdir, "Cluster_labels/Zurich_matched_metaclusters.csv"), header = TRUE) meta_clusters <- rbind(meta_clusters_basel, meta_clusters_zuri) clusters <- merge(pg_clusters, meta_clusters, by = "id", all.x = TRUE) # Single-cell locations locations_basel <- fread(file.path( workdir, "Basel_SC_locations.csv"), header = TRUE) locations_zuri <- fread(file.path( workdir, "Zurich_SC_locations.csv"), header = TRUE) locations <- rbind(locations_basel, locations_zuri) # Remove data frames that are not needed anymore remove(cell_meta_basel, cell_meta_zuri) remove(locations_basel, locations_zuri) remove(pg_clusters_basel, pg_clusters_zuri, pg_clusters) remove(meta_clusters_basel, meta_clusters_zuri, meta_clusters)
Filter out non-breast images
img_to_remove <- paste(c("Liver", "control", "non-breast"), collapse = "|") cells <- cells[grep(img_to_remove, cells$core, invert = TRUE), ]
We first extract channels related to spatial information, such as cell area or number of neighbors.
spatial_channels <- c( "Area", "Eccentricity", "Solidity", "Extent", "EulerNumber", "Perimeter", "MajorAxisLength","MinorAxisLength", "Orientation", "Percent_Touching", "Number_Neighbors" ) # Spatial channels will go to the colData slot of the SCE spatial <- cells[channel %in% spatial_channels, ] # Marker expression levels will go to the assay slot of the SCE cells <- cells[!channel %in% spatial_channels,]
Here, we will collect all cell-specific metadata in a single DataFrame, which will constitute the colData entry of the final SingleCellExperiment object.
Columns are renamed for consistency with the other datasets.
# Wide format spatial single-cell info cell_metadata <- dcast.data.table( spatial, "core + CellId + id ~ channel", value.var = "mc_counts") # Subset and rename columns cell_metadata <- cell_metadata[, .( image_name = core, cell_id = id, neighbors_number = Number_Neighbors, neighbors_percent_touching = Percent_Touching, cell_area = Area, cell_perimeter = Perimeter, cell_eccentricity = Eccentricity, cell_euler_number = EulerNumber, cell_extent = Extent, cell_major_axis_length = MajorAxisLength, cell_minor_axis_length = MinorAxisLength, cell_orientation = Orientation, cell_solidity = Solidity )] # Add single-cell locations locations <- locations[, .( cell_id = id, cell_x = Location_Center_X, cell_y = Location_Center_Y )] cell_metadata <- merge(cell_metadata, locations, by = "cell_id") # Add clusters clusters <- clusters[, .( cell_id = id, cell_cluster_phenograph = PhenoGraph, cell_metacluster = cluster )] cell_metadata <- merge(cell_metadata, clusters, by = "cell_id") # Add sample and patient metadata cell_meta <- cell_meta[, image_number := 1:.N] # Rename columns cell_meta <- cell_meta[, .( image_name = core, image_number = image_number, cells_per_image = Count_Cells, image_width = Width_FullStack, image_height = Height_FullStack, image_area = area, image_filename = FileName_FullStack, image_sum_area_cells = sum_area_cells, image_percent_tumor_cells = X.tumorcells, image_percent_normal_epithelial_cells = X.normalepithelialcells, image_percent_stroma = X.stroma, image_percent_inflammatory_cells = X.inflammatorycells, patient_id = PID, patient_age = age, patient_gender = gender, patient_status = Patientstatus, patient_disease_status = diseasestatus, pateint_menopausal = menopausal, patient_DFS_months = DFSmonth, patient_OS_months = OSmonth, patient_year_sample_collection = Yearofsamplecollection, TMA_location = TMALocation, TMA_x = TMAxlocation, TMA_y = yLocation, TMA_block_label = TMABlocklabel, TMA_UBTMA_location = UBTMAlocation, tumor_grade = grade, tumor_type = tumor_type, tumor_subtype = Subtype, tumor_clinical_type = clinical_type, tumor_location = location, tumor_size = tumor_size, tumor_primary_site = PrimarySite, tumor_primary_diagnosis = Ptdiagnosis, tumor_ER_status = ERStatus, tumor_HER2_status = HER2Status, tumor_HR_status = HR, tumor_PR_status = PRStatus, tumor_ERpos_ductal_ca = ER.DuctalCa, tumor_triple_neg_ductal = TripleNegDuctal, tumor_hormone_sensitive = hormonesensitive, tumor_hormone_resistant_after_sensitive = hormoneresistantaftersenstive, tumor_I_plus_neg = I_plus_neg, tumor_PTNM_M = PTNM_M, tumor_PTNM_N = PTNM_N, tumor_PTNM_T = PTNM_T, tumor_PTNM_radicality = PTNM_Radicality, tumor_microinvasion = microinvasion, tumor_lymphatic_invation = Lymphaticinvasion, tumor_venous_invasion = Venousinvasion, tumor_SN = SN, tumor_histology = histology, tumor_pre_surgery_Tx_type = Pre.surgeryTx, tumor_post_surgery_Tx_type = Post.surgeryTx, tumor_post_surgery_Tx = Post_surgeryTx, tumor_response = response )] cell_meta[tumor_HER2_status == "+", ]$tumor_HER2_status <- "positive" cell_meta[tumor_HER2_status == "-", ]$tumor_HER2_status <- "negative" cell_meta[tumor_HER2_status == "", ]$tumor_HER2_status <- NA
We specify the cohort (Zurich or Basel), and merge the two cell metadata data frames.
# Specify the cohort of origin cell_meta$patient_cohort <- "" cell_meta[grep("BaselTMA", image_name), ]$patient_cohort <- "Basel" cell_meta[grep("ZTMA", image_name), ]$patient_cohort <- "Zurich" # Merg the data frames cell_metadata <- merge(cell_metadata, cell_meta, by = "image_name", all.x = TRUE) cell_metadata <- DataFrame(cell_metadata)
Finally, we add unique cell ids as row names, add cell numbers, and order the cell metadata object based on image_number and cell_number.
# Cell ids are used as row names cell_metadata$cell_number <- as.integer(sub(".*_", "", cell_metadata$cell_id)) cell_metadata$cell_id <- paste(cell_metadata$image_name, cell_metadata$cell_number, sep = "_") rownames(cell_metadata) <- cell_metadata$cell_id # Rows are ordered by image and cell numbers cell_metadata <- cell_metadata[order(cell_metadata$image_number, cell_metadata$cell_number), ]
Here, we will collect all marker-related information and collect it in a DataFrame that will constitute the rowData slot of the SingleCellExperiment object.
We first rename markers for consistency with other datasets.
# Exclude gas channels gas_channels <- c("Hg", "In115", "I127", "Pb", "Xe", "Ar") cells <- cells[!grepl(paste(gas_channels, collapse = "|"), cells$channel), .(image_name = core, cell_id = id, channel, counts = mc_counts)] # Fix marker and metal names cells[, full_name := sub(".*Di ", "", channel)] cells[, metal := sub("Di .*", "", channel)] cells[, weight := sub(".*[A-Za-z ]", "", metal)] cells[, metal := gsub("[0-9]+", "", metal)] cells[, metal := paste0(metal, weight)] cells[full_name == "Rutheni", `:=` (short_name = metal, full_name = metal)] cells[full_name == "Iridium", `:=` (short_name = metal, full_name = metal)] # Add missing metal info cells[full_name == "phospho Histone", `:=` ( metal = "Eu153", full_name = "phospho-Histone H3 [S28]", short_name = "p_H3")] cells[full_name == "phospho S6", `:=` ( metal = "Er170", full_name = "phospho-S6 [S235/S236]", short_name = "p_S6")] cells[full_name == "phospho mTOR", `:=` ( metal = "Yb173", full_name = "phospho-mTOR [S2448]", short_name = "p_mTOR")] # Clarify unclear names cells[full_name == "cleaved", `:=` ( full_name = "cleaved-PARP + cleaved-Caspase3", short_name = "cPARP_cCASP3")] cells[full_name == "cerbB", `:=` ( full_name = "Epidermal growth factor receptor-2", short_name = "HER2")] cells[full_name == "Carboni", `:=` ( full_name = "Carbonic anhydrase IX", short_name = "CA9")] cells[metal == "In113", `:=` (full_name = "Histone H3", short_name = "H3")] cells[metal == "La139", `:=` (full_name = "H3K27me3", short_name = "H3K27me3")] cells[metal == "Pr141", `:=` (full_name = "Cytokeratin 5", short_name = "KRT5")] cells[metal == "Nd142", `:=` (full_name = "Fibronectin", short_name = "FN1")] cells[metal == "Nd143", `:=` (full_name = "Cytokeratin 19", short_name = "KRT19")] cells[metal == "Nd144", `:=` (full_name = "Cytokeratin 8/18", short_name = "KRT8_18")] cells[metal == "Nd145", `:=` (full_name = "Twist", short_name = "TWIST1")] cells[metal == "Sm147", `:=` (full_name = "Cytokeratin 14", short_name = "KRT14")] cells[metal == "Nd148", `:=` (full_name = "Smooth muscle actin", short_name = "SMA")] cells[metal == "Sm149", `:=` (full_name = "Vimentin", short_name = "VIM")] cells[metal == "Nd150", `:=` (full_name = "c-Myc", short_name = "c_Myc")] cells[metal == "Sm152", `:=` (full_name = "CD3 epsilon", short_name = "CD3e")] cells[metal == "Gd155", `:=` (full_name = "Slug", short_name = "SNAI2")] cells[metal == "Tb159", `:=` (full_name = "p53", short_name = "p53")] cells[metal == "Gd156", `:=` (full_name = "Estrogen receptor alpha", short_name = "ERa")] cells[metal == "Gd158", `:=` (full_name = "Progesterone receptor A/B", short_name = "PGR")] cells[metal == "Ho165", `:=` (full_name = "Beta-catenin", short_name = "CTNNB")] cells[metal == "Er167", `:=` (full_name = "E-Cadherin", short_name = "CDH1")] cells[metal == "Er168", `:=` (full_name = "Ki-67", short_name = "Ki67")] cells[metal == "Tm169", `:=` (full_name = "Epidermal growth factor receptor", short_name = "EGFR")] cells[metal == "Yb171", `:=` (full_name = "Transcription factor SOX-9", short_name = "SOX9")] cells[metal == "Yb172", `:=` (full_name = "von Willebrand factor", short_name = "vWF")] cells[metal == "Yb174", `:=` (full_name = "Cytokeratin 7", short_name = "KRT7")] cells[metal == "Lu175", `:=` (full_name = "Pan-cytokeratin", short_name = "PanCK")] cells[metal == "Ir191", `:=` (full_name = "Iridium 191", short_name = "DNA1")] cells[metal == "Ir193", `:=` (full_name = "Iridium 193", short_name = "DNA2")] # Add missing short names cells[full_name %in% c("CD68", "p53", "CD44", "CD45", "GATA3", "CD20", "EpCAM"), short_name := full_name] # Ruthenium channels cells[startsWith(full_name, "Ru"), full_name := gsub("Ru", "Ruthenium ", full_name)]
We then import the panel and fix antibody clone names.
cells$name <- cells$channel # Select columns panel <- panel[, .( channel = FullStack, metal = `Metal Tag`, antibody_clone = `Antibody Clone` )] # Fix antibody clones names panel[metal == "Gd158", antibody_clone := "EP2 + SP2"] panel[metal == "Yb176", antibody_clone := "F21-852 + C92-605"] panel <- panel[!duplicated(metal), ] panel <- panel[metal %in% unique(cells$metal)] panel[antibody_clone == "", antibody_clone := NA] panel[metal == "Sm147", `:=` (antibody_clone = "polyclonal_CK14")] panel[metal == "Eu153", `:=` (antibody_clone = "HTA28")] panel[metal == "Gd156", `:=` (antibody_clone = "polyclonal_anti_rabbit_IgG")] panel[metal == "Er167", `:=` (antibody_clone = "36/E-Cadherin")] panel[metal == "Yb172", `:=` (antibody_clone = "polyclonal_vWF")] # Merge all panel information panel <- merge(unique(cells[, .(metal, name, full_name, short_name)]), panel, by = "metal")
Finally, we convert the panel table to a DataFrame and add target short_names as row names.
panel <- panel[order(panel$channel), ] panel <- as(panel, "DataFrame") rownames(panel) <- panel$short_name
Here, we will prepare the counts matrix that will be stored in the assay slot of the SingleCellExperiment object.
We extract marker expression values from the cells table and convert them to a matrix. We then order the counts matrix by cell_id and short_name.
We then convert cell_id to the {image_number _ cell_number} format for consistency with other datasets.
# Filter "cells" data frame cells <- cells[!is.na(cells$short_name), ] cells <- cells[cells$image_name %in% cell_metadata$image_name, ] # Create count matrix counts <- dcast.data.table(cells, "cell_id ~ short_name", value.var = "counts") row_names <- counts$cell_id counts[, cell_id := NULL] counts <- as.matrix(counts, rownames = NULL) rownames(counts) <- row_names counts <- counts[order(match(rownames(counts), rownames(cell_metadata))), order(match(colnames(counts), rownames(panel)))] # Add cell_id cell_metadata$cell_id <- paste(cell_metadata$image_number, cell_metadata$cell_number, sep = "_") rownames(cell_metadata) <- cell_metadata$cell_id rownames(counts) <- rownames(cell_metadata)
We have now obtained all metadata and feature data to create the SingleCellExperiment object.
sce <- SingleCellExperiment( assays = list(counts = t(counts)), rowData = panel, colData = cell_metadata )
We apply two different counts transformations:
- exprs: arcsinh-transformed counts (cofactor = 1).
- quant_norm: censored + quantile-normalized counts.
assay(sce, "exprs") <- asinh(counts(sce) / 1) quant <- apply(assay(sce, "counts"), 1, quantile, probs = 0.99, na.rm = TRUE) assay(sce, "quant_norm") <- apply(assay(sce, "counts"), 2, function(x) x / quant) assay(sce, "quant_norm")[assay(sce, "quant_norm") > 1] <- 1 assay(sce, "quant_norm")[assay(sce, "quant_norm") < 0] <- 0
We first subset the Zurich cohort to one image per patient.
sce_zurich <- sce[, sce$patient_cohort == "Zurich"] mainExpName(sce_zurich) <- paste(dataset_name, "Zurich", dataset_version, sep = "_") # Randomly select one image per patient coldata_zurich <- as.data.table(colData(sce_zurich)) set.seed(2) image_sub_zurich <- coldata_zurich[ coldata_zurich[, .I[sample(.N, 1)], by = patient_id]$V1]$image_name # Select 100 patients from the Zurich cohort sce_zurich <- sce_zurich[, sce_zurich$image_name %in% image_sub_zurich] remove(coldata_zurich)
For the Basel cohort, we select a set of a hundred patients as an example
dataset. For consistency, the selection is based on a subsetting vector
obtained from the first imcdatasets version.
image_sub_basel <- c("BaselTMA_SP41_257_X3Y1", "BaselTMA_SP41_166_X15Y4", "BaselTMA_SP41_153_X7Y5", "BaselTMA_SP41_58_X15Y1", "BaselTMA_SP41_135_X8Y5", "BaselTMA_SP41_220_X10Y5", "BaselTMA_SP41_284_X7Y2", "BaselTMA_SP41_18_X13Y5", "BaselTMA_SP41_42_X14Y5", "BaselTMA_SP41_141_X11Y2", "BaselTMA_SP41_177_X16Y5", "BaselTMA_SP41_45_X3Y3", "BaselTMA_SP41_117_X13Y3", "BaselTMA_SP41_57_X8Y6", "BaselTMA_SP41_227_X9Y6", "BaselTMA_SP41_234_X10Y6", "BaselTMA_SP41_86_X15Y3", "BaselTMA_SP41_214_X15Y6", "BaselTMA_SP41_237_X16Y6", "BaselTMA_SP41_61_X3Y4", "BaselTMA_SP41_186_X5Y4", "BaselTMA_SP41_38_X4Y7", "BaselTMA_SP41_114_X13Y4", "BaselTMA_SP41_11_X13Y7", "BaselTMA_SP41_112_X5Y8", "BaselTMA_SP41_53_X6Y8", "BaselTMA_SP41_129_X7Y8", "BaselTMA_SP41_203_X8Y8", "BaselTMA_SP41_101_X10Y8", "BaselTMA_SP41_255_X12Y8", "BaselTMA_SP41_267_X13Y8", "BaselTMA_SP41_48_X14Y8", "BaselTMA_SP41_212_X1Y9", "BaselTMA_SP41_63_X4Y9", "BaselTMA_SP42_13_X5Y8", "BaselTMA_SP42_51_X1Y2", "BaselTMA_SP42_120_X5Y2", "BaselTMA_SP42_217_X1Y3", "BaselTMA_SP42_10_X1Y5", "BaselTMA_SP42_160_X13Y5", "BaselTMA_SP42_98_X11Y5", "BaselTMA_SP42_91_X11Y3", "BaselTMA_SP42_192_X8Y5", "BaselTMA_SP42_185_X7Y5", "BaselTMA_SP42_145_X1Y4", "BaselTMA_SP42_16_X3Y4", "BaselTMA_SP42_275_X5Y4", "BaselTMA_SP42_89_X4Y6", "BaselTMA_SP42_56_X2Y6", "BaselTMA_SP42_130_X1Y6", "BaselTMA_SP42_136_X9Y4", "BaselTMA_SP42_158_X12Y6", "BaselTMA_SP42_261_X11Y6", "BaselTMA_SP42_174_X9Y6", "BaselTMA_SP42_279_X14Y6", "BaselTMA_SP42_152_X15Y6", "BaselTMA_SP42_176_X3Y7", "BaselTMA_SP42_99_X5Y7", "BaselTMA_SP42_273_X6Y7", "BaselTMA_SP42_5_X8Y7", "BaselTMA_SP42_236_X11Y7", "BaselTMA_SP42_169_X3Y8", "BaselTMA_SP42_184_X6Y8", "BaselTMA_SP42_71_X8Y8", "BaselTMA_SP42_179_X13Y8", "BaselTMA_SP42_163_X1Y9", "BaselTMA_SP42_59_X3Y9", "BaselTMA_SP43_87_X15Y4", "BaselTMA_SP43_142_X5Y1", "BaselTMA_SP43_17_X11Y4", "BaselTMA_SP43_233_X13Y7", "BaselTMA_SP43_243_X13Y3", "BaselTMA_SP43_278_X3Y2", "BaselTMA_SP43_124_X10Y8", "BaselTMA_SP43_180_X5Y2", "BaselTMA_SP43_4_X15Y3", "BaselTMA_SP43_118_X13Y5", "BaselTMA_SP43_119_X9Y8", "BaselTMA_SP43_266_X7Y8", "BaselTMA_SP43_122_X12Y8", "BaselTMA_SP43_49_X1Y5", "BaselTMA_SP43_244_X3Y1", "BaselTMA_SP43_47_X16Y4", "BaselTMA_SP43_200_X9Y6", "BaselTMA_SP43_90_X7Y6", "BaselTMA_SP43_147_X6Y6", "BaselTMA_SP43_207_X1Y6", "BaselTMA_SP43_93_X15Y5", "BaselTMA_SP43_97_X16Y6", "BaselTMA_SP43_66_X15Y6", "BaselTMA_SP43_235_X11Y2", "BaselTMA_SP43_107_X4Y7", "BaselTMA_SP43_170_X3Y3", "BaselTMA_SP43_209_X11Y1", "BaselTMA_SP43_50_X7Y1", "BaselTMA_SP43_43_X5Y5", "BaselTMA_SP43_199_X8Y5", "BaselTMA_SP43_115_X4Y8", "BaselTMA_SP43_226_X6Y9", "BaselTMA_SP43_269_X5Y9")
# Subset the Basel cohort sce_basel <- sce[, sce$patient_cohort == "Basel"] mainExpName(sce_basel) <- paste(dataset_name, "Basel", dataset_version, sep = "_") # Select 100 patients based on the subsetting vector above sce_basel <- sce_basel[, sce_basel$image_name %in% image_sub_basel]
We save the SingleCellExperiment object for upload to r Biocpkg("ExperimentHub").
# Full dataset mainExpName(sce) <- paste(dataset_name, "FULL", dataset_version, sep = "_") saveRDS(sce, file.path(outdir, "sce_full.rds")) print(sce) # Subampled Basel cohort saveRDS(sce_basel, file.path(outdir, "sce_basel.rds")) print(sce_basel) # Zurich cohort saveRDS(sce_zurich, file.path(outdir, "sce_zurich.rds")) print(sce_zurich)
Finally, we remove the downloaded files and generated objects to save storage space.
remove(cells, cell_metadata, spatial, counts, locations, clusters, row_names) file.remove(file.path(workdir, "Basel_SC_locations.csv"), file.path(workdir, "Zurich_SC_locations.csv")) unlink(file.path(workdir, "Data_publication"), recursive = TRUE) unlink(file.path(workdir, "Cluster_labels"), recursive = TRUE)
Here, we will download multichannel images from [@JacksonFischer-2020-BreastCancer], as well as the corresponding cell segmentation masks. Images and masks correspond to the data in the SingleCellExperiment object and will be formatted into CytoImageList objects.
We download the zip file containing images and masks.
# Download images and masks url_images <- ("https://zenodo.org/record/4607374/files/OMEandSingleCellMasks.zip?download=1") importData(url_images, workdir, "OMEandSingleCellMasks.zip")
We will import and process cell segmentation masks to make them compatible with the cytomapper package.
We unzip cell segmentation masks and read them into a CytoImageList object.
# Unzip fn <- file.path("OMEnMasks", "Basel_Zuri_masks.zip") system2("unzip", args = c("-o", file.path(workdir, fn), "*full_maks.tiff", "-d", workdir), stdout = TRUE) # Load all masks masks <- loadImages(file.path(workdir, "Basel_Zuri_masks"), pattern = "_full_maks.tiff") # Clean-up tiffs_delete <- list.files(file.path(workdir, "Basel_Zuri_masks")) file.remove(file.path(workdir, "Basel_Zuri_masks", tiffs_delete)) file.remove(file.path(workdir, fn))
The masks are 16-bit images and need to be re-scaled in order to obtain integer cell ids.
# Before scaling range(masks[[1]]) masks <- scaleImages(masks, value = (2 ^ 16) - 1) # After scaling range(masks[[1]])
Occasionally a single-cell ID is skipped in the masks but in the single-cell data the cell numbers were renamed sequentially. Therefore, the single-cell IDs in the masks also have to renamed sequentially in order to correspond to the cell numbers from the single-cell data.
# Rename single-cell IDs sequentially in each mask for (n in names(masks)){ imageData(masks[[n]]) = plyr::mapvalues( imageData(masks[[n]]), sort(unique(as.integer(imageData(masks[[n]])))), 0:(length(unique(as.integer(imageData(masks[[n]]))))-1) ) }
Next, we add image names to masks objects, these names correspond to the
image_name column in colData(sce). This information is stored in the
metadata columns of the CytoImageList objects and is used by cytomapper to
match single cell data, images and masks.
names(masks) <- gsub("_full_maks", "_full.tiff", names(masks)) mcols(masks)$image_filename <- names(masks) masks <- masks[names(masks) %in% sce$image_filename] # Add image metadata mcols(masks) <- merge( mcols(masks), unique(colData(sce)[, c( "image_filename", "image_name", "image_number")]), by = "image_filename")
We create two objects containing the masks from the Basel and Zurich cohorts.
We subset the masks to the hundred images contained in the
SingleCellExperiment objects.
# Basel cohort masks_basel <- masks[grep("BaselTMA", names(masks))] masks_basel <- masks_basel[names(masks_basel) %in% unique( sce_basel$image_filename)] names(masks_basel) <- mcols(masks_basel)$image_name # Zurich cohort masks_zurich <- masks[grep("ZTMA", names(masks))] masks_zurich <- masks_zurich[names(masks_zurich) %in% unique( sce_zurich$image_filename)] names(masks_zurich) <- mcols(masks_zurich)$image_name # All masks names(masks) <- mcols(masks)$image_name
Finally, we will save the generated CytoImageList objects for uploading to r Biocpkg("ExperimentHub").
# All masks saveRDS(masks, file.path(outdir, "masks_full.rds")) print(head(masks)) remove(masks) # Basel cohort saveRDS(masks_basel, file.path(outdir, "masks_basel.rds")) print(head(masks_basel)) # Zurich cohort saveRDS(masks_zurich, file.path(outdir, "masks_zurich.rds")) print(head(masks_zurich))
We will import and process multichannel images to make them compatible with the cytomapper package.
For memory space reasons, we will generate one image subset for the Basel cohort and one for the Zurich cohort. The images will be subsetted to correspond to the single cell data in the sce_basel and sce_zurich objects. We will first process images from the Basel cohort and then repeat the same steps for the Zurich cohort.
We unzip images and read them into a CytoImageList object.
fn <- file.path("OMEnMasks", "ome.zip") system2("unzip", args = c("-o", file.path(workdir, fn), "*BaselTMA_*.tiff", "-d", workdir), stdout = TRUE)
Loading all tiffs would require too much memory, so we delete all the tiff files that we do not want to keep. We then use the loadImages function of the cytomapper package to read the images into a CytoImageList object.
tiffs_all <- list.files(file.path(workdir, "ome/")) tiff_delete <- tiffs_all[!tiffs_all %in% sce_basel$image_filename] file.remove(file.path(workdir, "ome/", tiff_delete)) # Load the images as a CytoImageList object images_basel <- loadImages(file.path(workdir, "ome/"), pattern = "_full.tiff") file.remove(file.path(workdir, "ome/", tiffs_all))
Next, we add image names to image objects, these names correspond to the
image_name column in colData(sce). This information is stored in the
metadata columns of the CytoImageList objects and is used by cytomapper
to match single cell data, images and masks.
mcols(images_basel)$image_filename <- paste0(names(images_basel), ".tiff") images_basel <- images_basel[ mcols(images_basel)$image_filename %in% sce_basel$image_filename] mcols(images_basel) <- merge( mcols(images_basel), unique(colData(sce_basel)[, c( "image_filename", "image_name", "image_number")]), by = "image_filename")
Here, we exclude image channels that are not present in the SingleCellExperiment object.
panel <- rowData(sce) panel <- panel[order(panel$channel), ] images_basel <- getChannels(images_basel, panel$channel) gc()
Finally, we will add protein short names as channel names of the images
object. These short names correspond to the row names of the
SingleCellExperiment object and to the short_name column of rowData(sce).
channelNames(images_basel) <- rownames(panel)
We make sure that image names and masks names are the same so that they can be matched with cytomapper.
print(identical(mcols(masks_basel)$image_name, mcols(images_basel)$image_name)) names(images_basel) <- mcols(images_basel)$image_name
Finally, we save the generated CytoImageList images for uploading to r Biocpkg("ExperimentHub").
saveRDS(images_basel, file.path(outdir, "images_basel.rds")) print(head(images_basel)) remove(images_basel) gc()
To import and format images from the Zurich cohort, we perform the exact same steps as we just did for the Basel cohort.
Unzip images
system2("unzip", args = c("-o", file.path(workdir, fn), "*ZTMA*.tiff", "-d", workdir), stdout = TRUE)
Loading all tiffs would require too much memory, so we delete all the tiff files that we do not want to keep. We then use the loadImages function of the cytomapper package to read the images into a CytoImageList object.
tiffs_all <- list.files(file.path(workdir, "ome/")) tiff_delete <- tiffs_all[!tiffs_all %in% sce_zurich$image_filename] file.remove(file.path(workdir, "ome/", tiff_delete)) # Load the images as a CytoImageList object images_zurich <- loadImages(file.path(workdir, "ome/"), pattern = "_full.tiff") file.remove(file.path(workdir, "ome/", tiffs_all))
Next, we add image names to image objects, these names correspond to the
image_name column in colData(sce). This information is stored in the
metadata columns of the CytoImageList objects and is used by cytomapper
to match single cell data, images and masks.
mcols(images_zurich)$image_filename <- paste0(names(images_zurich), ".tiff") images_zurich <- images_zurich[ mcols(images_zurich)$image_filename %in% sce_zurich$image_filename] mcols(images_zurich) <- merge( mcols(images_zurich), unique(colData(sce_zurich)[, c( "image_filename", "image_name", "image_number")]), by = "image_filename")
Here, we exclude image channels that are not present in the SingleCellExperiment object.
panel <- rowData(sce) panel <- panel[order(panel$channel), ] images_zurich <- getChannels(images_zurich, panel$channel) gc()
We add protein short names as channel names of the images
object. These short names correspond to the row names of the
SingleCellExperiment object and to the short_name column of rowData(sce).
channelNames(images_zurich) <- rownames(panel)
We make sure that image names and masks names are the same so that they can be matched with cytomapper.
print(identical(mcols(images_zurich)$image_name, mcols(masks_zurich)$image_name)) names(images_zurich) <- mcols(images_zurich)$image_name
Finally, we save the generated CytoImageList images and masks objects for uploading to r Biocpkg("ExperimentHub").
saveRDS(images_zurich, file.path(outdir, "images_zurich.rds")) print(head(images_zurich)) remove(images_zurich)
Remove all files from the temporary working directory.
downloaded_files <- list.files(workdir) downloaded_files <- downloaded_files[!downloaded_files %in% "BiocStyle"] unlink(file.path(workdir, downloaded_files), recursive = TRUE) # Reset original timeout value options(timeout = timeout)
sessionInfo()
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.