knitr::opts_chunk$set(echo = TRUE)
First, let's get some practice installing and loading packages:
# from CRAN install.packages(c("ggplot2", "tibble", "devtools")) # from Bioconductor if (!require("BiocManager", quietly = TRUE)) install.packages("BiocManager") BiocManager::install(version = "3.14") BiocManager::install("EBImage") # from Github devtools::install_github("https://github.com/livnatje/DIALOGUE") devtools::install_github("https://github.com/Jerby-Lab/opipes")
To load packages, call library(YOURPACKAGENAME)
# from CRAN library(ggplot2) library(tibble) library(pbapply) # from Bioconductor library(EBImage) # from Github library(opipes) library(DIALOGUE)
For the purposes of this tutorial, we will load one sample from the ovarian cancer project: TMA-13, FOV-1
PATH <- "/Volumes/ljerby/HGSC_Profiling/Data/SMI/5 Raw data/SMI-0028_TMA_13/CellComposite/CellComposite_F001.jpg" comp <- readImage(PATH) display(comp, method = "raster")
Wait, what did you just do?
We just ran two functions from EBImage
. A neat trick for helping you fetch the
documentation on any function, is by putting a ?
in front of the function name
?readImage
What's in an Image
object in R?
comp
One of the things I'm working on whilst pre-processing the data, is to extract a quantitative measure of the IF intensity. These images are LARGE. Even loading them will take a bit.
From the documentation we obtained from Nanostring
, we know that each of the
frames in the raw images corresponds to a particular immunofluorescent stain.
PATH <- "/Volumes/ljerby/HGSC_Profiling/Data/SMI/5 Raw data/SMI-0028_TMA_13/RawMorphologyImages/20220222_094630_S2_C902_P99_N99_F001_Z005.TIF" img <- readImage(PATH) display(img, all = T)
In order to extract IF intensities, you want to be able to assign each pixel to
a cell. Lucky for me, Livnat has run cell-segmentation on our data. The DAPI
(frame 5) and Membrane (CD298/B2M)
was used to do the cell-segmentation.
# Reading and cleaning matrix names cellseg <- read.csv("/Volumes/ljerby/HGSC_Profiling/Results/Segmentation/SMI/Seg/SMI_T13_F001_whole-cell.csv") colnames(cellseg) <- unlist(lapply(colnames(cellseg), function(x) {strsplit(x , split = "X0.")[[1]][2]})) colnames(cellseg)[1] <- "0" cellseg_raw <- cellseg cellseg_raw[1000:1020, 1000:1009]
Plotting the cell segmentation map
# Plot the cell segmentation map cellseg[cellseg == 1001] <- 0.9 # if a pixel is part of the cell, set the image intensity to 0.9 display(1- Image(t(as.matrix(cellseg))), method = "raster")
Reading and plotting the nuclear-segmentation only map:
# Reading and cleaning matrix names nuclearseg <- read.csv("/Volumes/ljerby/HGSC_Profiling/Results/Segmentation/SMI/Seg/SMI_T13_F001_nuclear.csv") colnames(nuclearseg) <- unlist(lapply(colnames(nuclearseg), function(x) {strsplit(x , split = "X0.")[[1]][2]})) colnames(nuclearseg)[1] <- "0" nuclearseg_raw <- nuclearseg # Plot the nuclear cell segmentation map nuclearseg[nuclearseg != 0] <- 0.9 display(1- Image(t(as.matrix(nuclearseg))), method = "raster")
PanCK
, CD45
and CD3
immuno-flurescent intensity would be useful for
cross-checking with computationally derived cell-type assignments downstream,
so we will extract that now!
frameID <- data.frame(stain = c("Membrane", "PanCK", "CD45", "CD3", "DAPI"), frame_id = c(1:5)) tibble(frameID)
In the code block below, we loop over the 3 frames that contain
PanCK
, CD45
and CD3
intensity, and every cell, to extract the IF intensity
of the cytoplasm. It is very slow (hours for one FOV), so we will not run it.
Instead, I am running this in parallel on the cluster (topic for another time).
if_list <- lapply(2:4, function(fid){ print(fid) intensities <- data.frame(t(pbsapply(c(1:max(cellseg_raw)), simplify = T, function(cid){ if (sum(t(cellseg_raw) == cid & t(nuclearseg) == 0) == 0) { # this is the case where the cell segmentation is pretty much just the nucleus... return(c(NA, NA)) } else { mean <- mean(img@.Data[,,fid][t(cellseg_raw) == cid & t(nuclearseg) == 0]) max <- max(img@.Data[,,fid][t(cellseg_raw) == cid & t(nuclearseg) == 0]) return(c(max, mean)) } }))) colnames(intensities) <- paste0(frameID[fid,]$stain, c("_max", "_mean")) return(intensities) }) if_frame <- do.call("cbind", if_list) row.names(if_frame) <- c(1:max(cellseg_raw)) saveRDS(if_frame, file = "/Volumes/ljerby/HGSC_Profiling/Intermediate/20220410_TMA13_FOV1_intensities.rds")
There are some cell-types we obviously didn't stain for (e.g endothelial marker, fibroblast marker etc). We will use the RNA quantifications for each cell to assign cell-types.
First, we load the RNA data:
tma13 <- readRDS("/Volumes/ljerby/HGSC_Profiling/Data/R_data/SMI_HGSC_TMA13_mesmer.rds") r <- tma13$SMI_T13_F001 remove(tma13) names(r)
First, we process the data by filtering out cells with too few overall counts
tpm <- r$tpm[,unlist(apply(r$cd, 2, sum)) > 50] coor <- r$coor[unlist(apply(r$cd, 2, sum)) > 50,]
Now we use the wrappers in opipes
to cluster the data. By "wrappers" I mean I
wrote code that "wraps" around other code. In this case, the other code is
Seurat
set.seed(123) so <- seuratify(tpm, prj = "tma13_fov1") so <- std_preprocess(so, nfeatures = 350) so <- run_pca(so, n_pcs = 20, pca.assay = "RNA") so <- umap_tsne(so, umap.flag = T, n_dims = 20) so <- opipes::cluster(so, n_dims = 20)
Let's plot our clustering:
plot_df <- data.frame(so@reductions$umap@cell.embeddings, cluster = so$RNA_snn_res.0.4) p <- ggplot(plot_df, aes(x = UMAP_1, y = UMAP_2, col = cluster)) + geom_point(size = 0.5) + theme_bw() + coord_fixed() + guides(colour = guide_legend("cluster", override.aes = list(size=3, shape = 15))) print(p)
Here, we call the cell type assignments, using a cell-type assignment
code that I've wrapped into opipes
# create a list object q <- c() q$tpm <- tpm q$genes <- row.names(tpm) q$cells <- colnames(tpm) q$clusters <- paste0("C", so$RNA_snn_res.0.4) # read in your preferred cell type signatures cell.sig <- readRDS("/Volumes/ljerby/SharedResources/Data/Signatures/cell.type.sig.rds") cell.sig <- cell.sig$HGSC[-c(3,4,10)] # removing CD8.T, CD4.T, and cell cycle genes for now # execute to cell-type assignment code q <- scRNA_markers_assign(q, cell.clusters = q$clusters, cell.sig = cell.sig, non.immune.cell.types = c('Endothelial','CAF','Malignant'), immune.markers = c("CD3E","PTPRC","CD8A","CD4")) install.packages("assertthat")
What do the cell-type assignments look like in in space?
plot_df <- data.frame(coor, so@reductions$umap@cell.embeddings, cluster = q$clusters, cell_type = q$cell.types, so@reductions$pca@cell.embeddings) tibble(plot_df)
And now we use this data.frame to plot our cell-type assignments in physical space
p <- ggplot(plot_df, aes(x = x, y = y, col = cell_type)) + geom_point(size = 0.5) + scale_color_manual(values = c("#FF5B18", "#6580f0", "#c4a360", "#bc31cc", "#3ec25f", "#fce728", "#b5b5b5")) + theme(plot.title = element_text(hjust = 0.5, size = 16, face = "bold"), panel.background = element_rect(fill = "black", colour = "black", size = 0.5, linetype = "solid"), panel.grid.major = element_line(size = 0.25, linetype = 'dotted'), panel.grid.minor = element_line(size = 0.1, linetype = 'dotted')) + guides(colour = guide_legend("cluster", override.aes = list(size=3, shape = 15))) + coord_fixed() print(p)
Isn't the comparison to the composite image uncanny?
display(comp, method = "raster")
We will now run a tool developed by Livnat, for identifying "multi-cellular programs". Let's do an analysis to see if there are multi-cullular programs between malignant, fibroblasts, and macrophages
The first step is to make a DIALOGUE "cell type" object for each cell type of interest and store it in a list.
celltypes_of_interest <-c("Malignant", "Macrophage", "CAF") cell.types <- c() for (celltype in celltypes_of_interest) { cat(paste0("\nProcessing ", celltype, "...\n")) idx <- which(plot_df$cell_type == celltype) cat(paste0("\tSubsetting ", celltype, " Data...\n")) cell_tpm <- q$tpm[,idx] cell_meta <- so@meta.data[idx,] samples <- unlist(apply(coor[idx,], 1, function(row){ if(row[1] < 2000 & row [2] <2000) { return("niche1") } else if (row[1] >= 2000 & row[2] < 2000) { return("niche2") } else if (row[1] >= 2000 & row[2] >= 2000) { return("niche3") } else { return("niche4") }})) cell_q <- r$comp.reads[cell_meta$cellid] cat(paste0("\tComputing PCA for ", celltype, " Data...\n")) sub <- seuratify(cell_tpm) sub <- std_preprocess(sub, nfeatures = 350) sub <- run_pca(sub, n_pcs = 20, pca.assay = "RNA", verbose = F) cat(paste0("\tCompute cell type object for ", celltype, "...\n")) cell.types[[celltype]] <- DIALOGUE::make.cell.type( name = celltype, # the cell types tpm = cell_tpm, # tpm samples = samples, #sample X = sub@reductions$pca@cell.embeddings, # pcs metadata = cell_meta, #metadata cellQ = cell_q # cell quality ) }
Now we can run DIALOGUE!
library(DIALOGUE) R<-DIALOGUE.run(rA =cell.types, main = "20220412_Tutorial", k = 3, results.dir = "~/Desktop/tutorial", n.genes = 20) # conf = "cellQ",covar = c("cellQ"))
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.