knitr::opts_chunk$set(tidy = TRUE) knitr::knit_hooks$set(small.mar = function(before, options, envir) { if (before) par(mar = c(0, 0, 0, 0)) # no margin })
cytometree
cytometree
is a package that implements a binary tree algorithm for the analysis of cytometry data.
Its core algorithm is based on the construction of a binary tree, whose nodes represents cell subpopulations.
At each node, observed cells (or "events") and markers are modeled by both a normal distribution (so unimodal), and a mixture of 2 normal distributions (so bimodal).
Splitting of the events at each node is done according to a normalized difference of AIC between the two distributional fit (unimodal or bimodal), allowing to pick the marker that best splits those data.
When AIC normalized differences $D$ are not significant anymore, the tree has been constructed, and the cells have been automatically gated (i.e. partitioned).
Given the unsupervised nature of the binary tree, some of the available markers may not be used to find the different cell populations present in a given sample. To recover a complete annotation, we defined, as a post processing procedure, an annotation method which allows the user to distinguish two (or three) expression levels per marker.
cytometree
In this example, we will use a diffuse large B-cell lymphoma dataset (from the FlowCAP-I challenge, with only 3 markers.
First, we need to load the package cytometree
and we can have a look at the data:
library(cytometree) data(DLBCL) dim(DLBCL) head(DLBCL)
We have 3 markers measured FL1
, FL2
, FL4
as well as the label
obtained from manual gating, and r nrow(DLBCL)
cells.
# getting the only the cell events with the 3 markers measured: cellevents <- DLBCL[,c("FL1", "FL2", "FL4")] # storing the maanual gating reference from FlowCAP-I: manual_labels <- DLBCL[,"label"]
The function CytomeTree
builds the binary tree according to our algorithm, that gives the automatic gating (given in the resulting labels
attribute):
# Build the binary tree: Tree <- CytomeTree(cellevents, minleaf = 1, t = 0.1) # Retreive the resulting partition (i.e. automatic gating): Tree_Partition <- Tree$labels
One can fiddle with the t
threshold to change the desired depth of the tree.
The function plot_graph
plots a graph representing this binary tree, showing which markers were used in which order to splits the events:
# Plot a graph of the tree (with specific graphical parameters): plot_graph(Tree, edge.arrow.size = 0.3, Vcex = 0.8, vertex.size = 30)
The function plot_nodes
allows to graphically evaluate the fit of the gaussian family distribution at each node. It can also be used to investigate a particular node:
# Plot the distribution fit for each node: plot_nodes(Tree)
# Plot the distribution fit for a particular node: plot_nodes(Tree, "FL4.1")
The function Annotation
annotates the subpopulations partitioned by the binary tree:
# Run the annotation algorithm: Annot <- Annotation(Tree,plot=FALSE) Annot$combinations
The post-processing annotation can be compared to the incomplete annotation obtained from the tree alone:
# Annotation gotten from the tree: Tree$annotation
This completed annotation eases the search for specific cell types of interest, where 1
represents a positive measurement of a marker, while 0
corresponds to its absence:
# seeked cell type: FL2+FL4+ pheno_ex1 <- RetrievePops(Annot, phenotypes = list(rbind(c("FL2", 1), c("FL4", 1)))) pheno_ex1$phenotypesinfo #One can look for several cell types at once: phenotypes <- list() # FL2+FL4- phenotypes[[1]] <- rbind(c("FL2", 1), c("FL4", 0)) # FL2-FL4+ phenotypes[[2]] <- rbind(c("FL2", 0), c("FL4", 1)) # Retreive corresponding cell populations: PhenoInfos <- RetrievePops(Annot, phenotypes) PhenoInfos$phenotypesinfo
Finally, we can compare the automatic gating obtained using cytometree
to the original manual gating (which had r sum(manual_labels==0)
events inconsistently gated across different operators performing the manual gating on those very same data - so those r sum(manual_labels==0)
cells were identified as outliers in the reference label and were kept out of the evaluation criteria in the FlowCAP-I challenge):
# F-measure ignoring cells labeled 0 as in FlowCAP-I. # Use FmeasureC() in any other case. Fmeas <- cytometree::FmeasureC_no0(ref=manual_labels, pred=Tree_Partition) # Scatterplots. library(ggplot2) library(viridis) # Ignoring cells labeled 0 as in FlowCAP-I. # Building the data frame to scatter plot the data. FL1 <- cellevents[, "FL1"] FL2 <- cellevents[, "FL2"] FL4 <- cellevents[, "FL4"] n <- length(FL1) man_lab <- factor(as.character(manual_labels)) levels(man_lab) <- c("outliers (manual gating)", "pop1", "pop2") FL4pos <- RetrievePops(Annot, phenotypes = list(cbind("FL4", 1))) auto_lab <- factor(as.character(FL4pos$Mergedleaves)) levels(auto_lab) <- c("pop2", "pop1") Labels <- factor(c(as.character(man_lab), as.character(auto_lab))) method <- as.factor(c(rep("FlowCap-I manual gating", n), rep("CytomeTree auto gating", n))) scatter_df <- data.frame("FL2" = FL2, "FL4" = FL4, "Label" = Labels, "method" = method) p <- ggplot2::ggplot(scatter_df, ggplot2::aes_string(x = "FL2", y="FL4",colour="Label"))+ ggplot2::geom_point(alpha = 1,cex = 1)+ ggplot2::scale_colour_manual(values = c("grey", viridis::viridis(4))) + ggplot2::facet_wrap(~ method) + ggplot2::theme_bw() + ggplot2::theme(legend.position="bottom") + ggplot2::guides(colour = ggplot2::guide_legend(override.aes = list(size=3)))+ ggplot2::ggtitle(paste("F-measure (manual gating as reference - removing the outliers) =", round(Fmeas, 3))) p
In this example, we will use a dataset from the HIPC (Human Immunology Project Consortium program where a PBMC sample from patient 12828 was analyzed by Stanford.
First, we need to load the package cytometree
and we can have a look at the data:
data(HIPC) dim(HIPC) head(HIPC)
We have 6 markers measured CCR7
, CD4
, CD45RA
, HLADR
, CD38
, CD8
as well as the label
obtained from manual gating, and r nrow(HIPC)
cell.
# getting the only the cell events with the 3 markers measured: cellevents <- HIPC[,c("CCR7", "CD4", "CD45RA", "HLADR" ,"CD38" ,"CD8")] # storing the maanual gating reference from FlowCAP-I: manual_labels <- HIPC[,"label"]
The function CytomeTree
builds the binary tree according to our algorithm, that gives the automatic gating (given in the resulting labels
attribute):
# Build the binary tree: Tree <- CytomeTree(cellevents, minleaf = 1, t = 0.2) # Retreive the resulting partition (i.e. automatic gating): Tree_Partition <- Tree$labels
One can fiddle with the t
threshold to change the desired depth of the tree.
The function plot_graph
plots a graph representing this binary tree, showing which markers were used in which order to splits the events:
# Plot a graph of the tree (with specific graphical parameters): plot_graph(Tree, edge.arrow.size = 0.4, Vcex = 0.45)
The function plot_nodes
allows to graphically evaluate the fit of the gaussian family distribution at each node. It can also be used to investigate a particular node:
# Plot the fit for 2 specific nodes: plot_nodes(Tree, list("CD4.1", "CD45RA.7"))
The function Annotation
annotates the subpopulations partitioned by the binary tree:
# Run the annotation algorithm: Annot <- Annotation(Tree,plot=FALSE) Annot$combinations
This completed annotation eases the search for specific cell types of interest, where 1
represents a positive measurement of a marker, while 0
corresponds to its absence:
#One can look for several cell types at once: phenotypes <- list() # CD8-CD4+CCR7-CD45RA+ CD4effector <- rbind(c("CD8", 0), c("CD4", 1), c("CCR7", 0), c("CD45RA", 1)) phenotypes[[1]] <- CD4effector # CD8-CD4+CCR7+CD45RA+ CD4naive <- rbind(c("CD8", 0), c("CD4", 1), c("CCR7", 1), c("CD45RA", 1)) phenotypes[[2]] <- CD4naive # CD8-CD4+CCR7+CD45RA- CD4CM <- rbind(c("CD8", 0), c("CD4", 1), c("CCR7", 1), c("CD45RA", 0)) phenotypes[[3]] <- CD4CM # CD8-CD4+CCR7+CD45RA+ CD4effectorM <- rbind(c("CD8", 0), c("CD4", 1), c("CCR7", 0), c("CD45RA", 0)) phenotypes[[4]] <- CD4effectorM # Retreive corresponding cell populations: PhenoInfos <- RetrievePops(Annot, phenotypes) # One can find informations about the seeked phenotypes in PhenoInfos$phenotypesinfo # According to PhenoInfos$phenotypesinfo, CD8-CD4+CCR7-CD45RA+ population is labeled 12 # According to PhenoInfos$phenotypesinfo, CD8-CD4+CCR7+CD45RA+ population is labeled 5 # According to PhenoInfos$phenotypesinfo, CD8-CD4+CCR7+CD45RA- population is labeled 4 # According to PhenoInfos$phenotypesinfo, CD8-CD4+CCR7+CD45RA+ population is comprised # of 4 clusters labeled 8,9,10, 11. They were merged into a new cluster labeled 13. # The new partition, with merged clusters is to be found in PhenoInfos$Mergedleaves. auto_labels_merged <- PhenoInfos$Mergedleaves # Get the indices of the subpopulation comprised of classes labeled 12, 5, 4,13. subpopulation_indices <- auto_labels_merged %in% c(12, 5, 4, 13) # Scatter plot the data. CD45RA <- cellevents[subpopulation_indices, "CD45RA"] CCR7 <- cellevents[subpopulation_indices, "CCR7"] auto_lab <- factor(auto_labels_merged[subpopulation_indices]) levels(auto_lab) <- viridis::viridis(length(levels(auto_lab))) auto_lab <- as.character(auto_lab) plot(CD45RA, CCR7, col = auto_lab, main = "Automatic gating")
It is possible to force the markers which will be used to gate the data first,
using the force_first_markers
argument of the CytomeTree()
function.
Once those forced markers are used, automatic behavior of the algorithm is
resumed for the remaining markers.
# Build the binary tree, forcing the first node to be CD4, and the second ones HLADR Tree <- CytomeTree(cellevents, minleaf = 1, t = 0.2, force_first_markers = c("CD4", "HLADR"))
# Plot a graph of the tree (with specific graphical parameters): plot_graph(Tree, edge.arrow.size = 0.4, Vcex = 0.45)
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.