Subsetting to generate a toy dataset

knitr::opts_chunk$set(echo = TRUE)
knitr::opts_knit$set(root.dir = file.path("..", "extdata"))

Script to generate toy data for the cytomapper package

Running the 1_LoadPancreasData.Rmd script generates a SingleCellExperiment object:

Runing the 2_LoadPancreasImages.Rmd generates two CytoImageList objects:

These three object form a dataset comprising 100 multiplexed images that each contain 38 channels and the associated single-cell data.

For space reasons, this dataset needs to be subsetted in order to generate the small toy dataset used to illustrate the cytomapper package.

In this script, we will crop the images to 100 x 100 pixels and limit the number of channels to five. We will subset the single-cell dataset accordingly, to retain only the cells and channels that are present on the toy images.

library(SingleCellExperiment)
library(cytomapper)
library(ijtiff)

Read in single-cell data

pancreasSCE <- readRDS("pancreas_sce.rds")
pancreasSCE

Load images

pancreasImages <- readRDS("pancreas_images.rds")
pancreasImages

Load masks

pancreasMasks <- readRDS("pancreas_masks.rds")
pancreasMasks

Subset the images and masks

We first rename the images and the masks to have shorter names.
We next select three images to subset by providing three image names that are in names(images) and use the cytomapper::getImages function to subset them.

# Rename the images
names(pancreasImages) <- gsub('a0_full_clean', 'imc', names(pancreasImages))
names(pancreasMasks) <- gsub('a0_full_mask', 'mask', names(pancreasMasks))

# Name of the images to subset
image.list <- c("E34", "G01", "J02")

# Subset the three selected images
pancreasImages <- getImages(pancreasImages, paste(image.list, 'imc', sep='_'))
pancreasMasks <- getImages(pancreasMasks, paste(image.list, 'mask', sep='_'))

Subset the channels

We will subset five channels from the subsetted images. We provide five channel names (that are in channelNames(pancreasImages)) and use the cytomapper::getChannels function to subset them.

# Name of the channel to subset
channel.list <- c("H3", "CD99", "PIN",  "CD8a", "CDH")

# Subset the five selected channels
pancreasImages <- getChannels(pancreasImages, channel.list)

Crop the images and the masks

For each image, we provide specific coordinates.
The same coordinates are used for the images and the masks.

pancreasImages$E34_imc <- CytoImageList(pancreasImages$E34_imc[95:194, 201:300,])
pancreasImages$G01_imc <- CytoImageList(pancreasImages$G01_imc[121:220, 291:390,])
pancreasImages$J02_imc <- CytoImageList(pancreasImages$J02_imc[246:345, 501:600,])

pancreasMasks$E34_mask <- CytoImageList(pancreasMasks$E34_mask[95:194, 201:300])
pancreasMasks$G01_mask <- CytoImageList(pancreasMasks$G01_mask[121:220, 291:390])
pancreasMasks$J02_mask <- CytoImageList(pancreasMasks$J02_mask[246:345, 501:600])

Add image numbers to the images and masks objects

mcols(pancreasImages)$ImageNb <- c(1:3)
mcols(pancreasMasks)$ImageNb <- c(1:3)

Subset the SingleCellExperiment Object (1/2)

We first subset the selected images from the SingleCellExperiment object. We then subset the selected channels

# Subset the selected images
pancreasSCE <- pancreasSCE[, pancreasSCE$ImageName %in% image.list]

# Subset the selected channels
pancreasSCE <- pancreasSCE[rownames(pancreasSCE) %in% channel.list, ]

Subset the SingleCellExperiment Object (2/2)

We first extract the numbers of the cells present on the cropped masks. We then subset these cells from the SCE object.

# Subset the selected channels
E34.cells <- unique(as.vector(pancreasMasks$E34))
G01.cells <- unique(as.vector(pancreasMasks$G01))
J02.cells <- unique(as.vector(pancreasMasks$J02))

# Subset the cells from the SCE
pancreasSCE <- cbind(pancreasSCE[,pancreasSCE$ImageName == "E34" & pancreasSCE$CellNumber %in% E34.cells],
                     pancreasSCE[,pancreasSCE$ImageName == "G01" & pancreasSCE$CellNumber %in% G01.cells],
                     pancreasSCE[,pancreasSCE$ImageName == "J02" & pancreasSCE$CellNumber %in% J02.cells])

Remove unnecessary metadata

To save additional space, we will remove the unnecessary metadata from the SCE object.

# Metadata to retain from colData(pancreasSCE) and rowData(pancreasSCE)
keep.cols <- c("CellNumber", "Pos_X", "Pos_Y", "Area", "ImageName", "CellType", "CellCat")
keep.rows <- c("MetalTag", "Target", "clean_Target")

# Subset the SCE object
rowData(pancreasSCE) <- rowData(pancreasSCE)[, colnames(rowData(pancreasSCE)) %in% keep.rows]
colData(pancreasSCE) <- colData(pancreasSCE)[, colnames(colData(pancreasSCE)) %in% keep.cols]

Add metadata

For convenience, we will add image numbers (ImageNb) and a channel numbers (frame) to the SCE object.
We will also add some columns containing the names of the associated images and masks and add a column capturing three cell types. We will also generate a logical entry for testing purposes.

# Add channel numbers
rowData(pancreasSCE)$frame <- c(1:5)

# Add image numbers
image.numbers <- data.frame(ImageName = image.list,
                            ImageNb = c(1:3),
                            stringsAsFactors = FALSE)

colData(pancreasSCE) <- transform(merge(colData(pancreasSCE), image.numbers,
                                by="ImageName", all=TRUE),
                          rownames=rownames(colData(pancreasSCE)))

rownames(colData(pancreasSCE)) <- colData(pancreasSCE)$rownames
colData(pancreasSCE)$rownames <- NULL

# Rename the cell number column
colData(pancreasSCE)$CellNb <- colData(pancreasSCE)$CellNumber
colData(pancreasSCE)$CellNumber <- NULL

# Add image and mask names
colData(pancreasSCE)$MaskName <- paste(colData(pancreasSCE)$ImageName,
                               'mask.tiff', sep='_')

colData(pancreasSCE)$ImageName <- paste(colData(pancreasSCE)$ImageName,
                               'imc.tiff', sep='_')

# Add cell types
cur_celltypes <- pancreasSCE$CellType
cur_celltypes[cur_celltypes == "beta"] <- "celltype_A"
cur_celltypes[cur_celltypes == "alpha"] <- "celltype_B"
cur_celltypes[!(cur_celltypes %in% c("celltype_A", "celltype_B"))] <- "celltype_C"

pancreasSCE$CellType <- cur_celltypes

# Save logical entry
pancreasSCE$Pattern <- pancreasSCE$CellCat == "exocrine"
pancreasSCE$CellCat <- NULL

Save images as tiff files

We now save the images and masks as tiff files. These files are used to illustrate the cytomapper package.
The EBImage and tiff packages do not support 32-bit encodings for images. However, after compensation, the pixels contain floats rather than integers.
We have to use the ijtiff package to write out the images. Of Note: The file names need to be changed to '.tiff' after saving since the ijtiff package saves every image as '.tif'.

# Save multiplexed images
write_tif(imageData(transpose(pancreasImages$E34)), 
          path = "E34_imc.tiff",
          bits_per_sample = 32L, overwrite = TRUE)
file.rename("E34_imc.tif", "E34_imc.tiff")

write_tif(imageData(transpose(pancreasImages$G01)), 
          path = "G01_imc.tiff",
          bits_per_sample = 32L, overwrite = TRUE)
file.rename("G01_imc.tif", "G01_imc.tiff")

write_tif(imageData(transpose(pancreasImages$J02)), 
          path = "J02_imc.tiff",
          bits_per_sample = 32L, overwrite = TRUE)
file.rename("J02_imc.tif", "J02_imc.tiff")

# Save masks
write_tif(imageData(transpose(pancreasMasks$E34)), 
          path = "E34_mask.tiff",
          bits_per_sample = 16L, overwrite = TRUE)
file.rename("E34_mask.tif", "E34_mask.tiff")

write_tif(imageData(transpose(pancreasMasks$G01)), 
          path = "G01_mask.tiff",
          bits_per_sample = 16L, overwrite = TRUE)
file.rename("G01_mask.tif", "G01_mask.tiff")

write_tif(imageData(transpose(pancreasMasks$J02)), 
          path = "J02_mask.tiff",
          bits_per_sample = 16L, overwrite = TRUE)
file.rename("J02_mask.tif", "J02_mask.tiff")

Save the SCE object and the images and masks CytoImageList objects.

save(pancreasSCE,
     file = file.path("..", "..", "data", "pancreasSCE.RData"), compress = "xz")
save(pancreasMasks,
     file = file.path("..", "..", "data", "pancreasMasks.RData"), compress = "xz")
save(pancreasImages,
     file = file.path("..", "..", "data", "pancreasImages.RData"), compress = "xz")


Try the cytomapper package in your browser

Any scripts or data that you put into this service are public.

cytomapper documentation built on Jan. 30, 2021, 2:01 a.m.