In saeyslab/FlowSOM_workshop: How to get started with FlowSOM

Computational cytometry tools

Getting started

Load libraries

suppressPackageStartupMessages({
  library(ggplot2) # For nice plots
  library(pheatmap) # For pretty heatmap plots
  library(readxl) # For reading excel files
  library(flowCore) # For basic cytometry operations
  library(flowDensity) # For density visualizations and density based gating
  library(Rtsne) # For tSNE approximations
  library(flowAI) # For quality control over time
  library(FlowSOM) # For FlowSOM
  library(FlowSOMworkshop) # Some extra functions to make life easier in this workshop
  library(flowStats) # For normalization
  library(CytoNorm) # For normalization
  library(PeacoQC) # For QC over time, replacement for flowAI
})

Identify the fcs files of interest on the system

dir <- system.file("extdata", package="FlowSOMworkshop")
files <- list.files(dir, pattern = "Live.fcs")
files

Load an fcs file into a flowFrame

ff <- read.FCS(file.path(dir, files[4]))
ff

Check the expression matrix stored in the flowframe

head(ff@exprs)

Check the metadata stored in the flowframe

head(ff@description)

Plot two markers against each other in the traditional dot plot

plotDens(ff, get_channels(ff, c("CD3", "CD19")))

Compensate the data with the spillover matrix acquired at the machine and transform with the logicle transformation

comp <- ff@description$`$SPILLOVER`
comp

ff <- compensate(ff, comp)
ff <- transform(ff, estimateLogicle(ff, colnames(ff)[7:19]))
plotDens(ff, get_channels(ff, c("CD3", "CD19")))

Store the markers of interest for later use

channels_of_interest <-  colnames(ff)[c(7, 9:13, 15:19)]
print(get_markers(ff, channels_of_interest))

Exercises

How many events are measured in Tube 30?

Plot CD64 against FcERI for Tube 30

Preprocessing

Quality control - Individual samples

Provide a directory for the quality control results

QC_dir <- "Workshop_QC"
preprocessed_dir <- "Workshop_preprocessed"
if(!dir.exists(QC_dir)){
  dir.create(QC_dir)
  dir.create(preprocessed_dir)
  dir.create(file.path(QC_dir, "flowAI"))
  dir.create(file.path(QC_dir, "PeacoQC"))
}

Preprocess the samples

cellCount <- rep(NA, length(files))
names(cellCount) <- files
for (file in files){
  # Read the FCS file
  ff <- read.FCS(file.path(dir, file))

  cellCount[file] <- nrow(ff)

  # Compensate with the acquisition defined matrix
  # Alternatively, export the adapted matrix from FlowJo into a csv and read that in R as a matrix
  ff <- compensate(ff, ff@description$`$SPILLOVER`)

  # Transform using a logicle transform on all fluorochrome channels
  ff <- transform(ff, estimateLogicle(ff, colnames(ff)[7:19]))

  # Adaptation to the time because we started from FlowJO exported files,
  # which saves the time in seconds instead of hundredths of seconds
  ff@exprs[,"Time"] <- ff@exprs[,"Time"]*100

  # head(ff@exprs[,"Time"])
  # tail(ff@exprs[,"Time"])
  # ff@description$`$BTIM`
  # ff@description$`$ETIM`

  # Run flowAI on the samples
  resQC <- flow_auto_qc(fcsfiles = ff,
                        folder_results = file.path(QC_dir, "flowAI"),
                        output = 1)

  # Run PeacoQC, the QC algorithm which Annelies is currently developping
  resQC <- PeacoQC(ff = ff,
                   determine_good_cells = "all",
                   channels = c(1,4,7:19),
                   plot = TRUE,
                   output_folder = file.path(QC_dir, "PeacoQC"),
                   compensation_matrix = ff@description$`$SPILLOVER`)
  ff <- ff[resQC$GoodCells,]
  saveRDS(resQC, file.path(QC_dir, "PeacoQC", gsub(".fcs", "_QC.RDS", file)))

  write.FCS(ff, 
            file.path(preprocessed_dir, file))
}

ggplot(data.frame(file = gsub(".*_(Tube_[0-9]*)_.*", "\\1", names(cellCount)),
                  count = cellCount)) +
  geom_bar(aes(x = file, y = count), stat = "identity") +
  theme_minimal() +
  coord_flip()

Make an aggregate file to combine all files

agg_dir <- "Workshop_aggregate"
if(!dir.exists(agg_dir)) dir.create(agg_dir)

set.seed(1)
ff_agg <- AggregateFlowFrames(file.path(preprocessed_dir, files),
                              cTotal = 3000000,
                              writeMeta = TRUE,
                              outputFile = file.path(agg_dir, "aggregate.fcs"))

table(ff_agg@exprs[,"File"])

Plot a comparison of the expression between all files

plot_aggregate(input = ff_agg, 
               labels = c("1","1","1","2","2","2"),
               sample_names = gsub(".*_(Tube_[0-9]*)_.*", "\\1", files),
               channels = channels_of_interest,
               output_image = file.path(QC_dir, "aggregate.png"))

Normalization

Create dir for results, load the problematic data and extract dates

batch_dir <- "Workshop_batch"
if(!dir.exists(batch_dir)) dir.create(batch_dir)

files <- list.files(preprocessed_dir, pattern = "Live.fcs")
files_batch2 <- list.files(preprocessed_dir, pattern = "Batch.fcs") # already preprocessed and QC filtered

Comparison plot between all the files

fs_batch <- read.flowSet(file.path(preprocessed_dir, c(files, files_batch2))) # read in files in order of the date

plot_aggregate(input = fs_batch, 
               labels = rep(c("1", "2"), each = 6),
               channels = channels_of_interest,
               sample_names = gsub(".*_(Tube_[0-9]*)_.*", "\\1", names(fs_batch@frames)),
               output_image = file.path(batch_dir, "aggregate.png"))

We have 2 batches, normalization is needed

Normalization per file

Min max percentile normalization (alignment of 1st and 99th percentile)

fs_minmax <- fsApply(fs_batch, function(ff){
  for (col in channels_of_interest){
    c <- ff@exprs[,col]
    c <- (c - quantile(c, 0.01)) / (quantile(c, 0.99) - quantile(c, 0.01))
    ff@exprs[,col] <- c
  }  
  ff
})

plot_aggregate(input = fs_minmax, 
               labels = rep(c("1", "2"), each = 6),
               channels = channels_of_interest,
               sample_names = gsub(".*_(Tube_[0-9]*)_.*", "\\1", names(fs_minmax@frames)),
               output_image = file.path(batch_dir, "aggregate_minmax_perFile.png"))

gaussNorm normalization (aligment of peaks in the density distribution)

fs_gaussNorm <- gaussNorm(flowset = fs_batch,
                          channel.names = channels_of_interest,
                          peak.density.thr=0.01)

plot_aggregate(input = fs_gaussNorm[[1]], 
               labels = rep(c("1", "2"), each = 6),
               channels = channels_of_interest,
               sample_names = gsub(".*_(Tube_[0-9]*)_.*", "\\1", names(fs_gaussNorm[[1]]@frames)),
               output_image = file.path(batch_dir, "aggregate_gaussNorm_perFile.png"))

CytoNorm (first FlowSOM clustering, then quantile normalization on metacluster level)

## Train model (ideally trained on control/reference samples)
model <- CytoNorm.train(files = file.path(preprocessed_dir, c(files, files_batch2)),
                        labels = 1:12, # 1 label per file
                        channels = channels_of_interest,
                        transformList = NULL,
                        FlowSOM.params = list(xdim = 10,
                                              ydim = 10,
                                              nClus = 10,
                                              nCells = 1000000,
                                              scale = TRUE),
                        normMethod.train = QuantileNorm.train,
                        plot = T,
                        seed = 1,
                        outputDir = file.path(batch_dir, "CytoNorm_tmp"))

## Apply model
CytoNorm.normalize(model = model,
                   files = file.path(preprocessed_dir, c(files, files_batch2)),
                   labels = 1:12,
                   transformList = NULL,
                   transformList.reverse = NULL,
                   outputDir = file.path(batch_dir, "CytoNorm"),
                   normMethod.normalize = QuantileNorm.normalize)

fs_CytoNorm <- read.flowSet(list.files("Workshop_batch/CytoNorm",
                                       pattern = ".fcs",
                                       full.names = T))

plot_aggregate(input = fs_CytoNorm, 
               labels = rep(c("1", "2"), each = 6),
               channels = channels_of_interest,
               sample_names = gsub(".*_(Tube_[0-9]*)_.*", "\\1", names(fs_minmax@frames)),
               output_image = file.path(batch_dir, "aggregate_CytoNorm_perFile.png"))

Normalization per batch

Create a flow frame per batch

batch1_ff <- AggregateFlowFrames(file.path(preprocessed_dir, files),
                                 cTotal = max(fsApply(fs_batch, nrow)) * 6,
                                 keepOrder = TRUE)
batch2_ff <- AggregateFlowFrames(file.path(preprocessed_dir, files_batch2),
                                 cTotal = max(fsApply(fs_batch, nrow)) * 6,
                                 keepOrder = TRUE)

splitFlowframe <- function(ff, labels = exprs(ff)[,"File"]){
  lapply(unique(labels), 
         function(label){ ff[labels == label, ] })
}

Min max norm

fs_minmax <- fsApply(flowSet(batch1_ff,
                             batch2_ff), 
                     function(ff){
                       for (col in channels_of_interest){
                         c <- ff@exprs[,col]
                         c <- (c - quantile(c, 0.01)) / (quantile(c, 0.99) - quantile(c, 0.01))
                         ff@exprs[,col] <- c
                       }  
                       ff
                     })

fs_minmax <- flowSet(unlist(fsApply(fs_minmax, splitFlowframe)))

plot_aggregate(input = fs_minmax, 
               labels = rep(c("1", "2"), each = 6),
               channels = channels_of_interest,
               sample_names = gsub(".*_(Tube_[0-9]*)_.*", "\\1", names(fs_minmax@frames)),
               output_image = file.path(batch_dir, "aggregate_minmax_batch.png"))

gaussNorm normalization (aligment of peaks in the density distribution)

fs_gaussNorm <- gaussNorm(flowset = flowSet(batch1_ff,
                                            batch2_ff),
                          channel.names = channels_of_interest,
                          peak.density.thr=0.01)
fs_gaussNorm <- flowSet(unlist(fsApply(fs_gaussNorm$flowset, splitFlowframe)))


plot_aggregate(input = fs_gaussNorm, 
               labels = rep(c("1", "2"), each = 6),
               channels = channels_of_interest,
               sample_names = gsub(".*_(Tube_[0-9]*)_.*", "\\1", names(fs_gaussNorm@frames)),
               output_image = file.path(batch_dir, "aggregate_gaussNorm_batch.png"))

CytoNorm (first FlowSOM clustering, then quantile normalization on metacluster level)

## Train model (ideally trained on control/reference samples)
model <- CytoNorm.train(files = file.path(preprocessed_dir, c(files, files_batch2)),
                        labels = rep(c("1", "2"), each = 6), # 1 label per batch
                        channels = channels_of_interest,
                        transformList = NULL,
                        FlowSOM.params = list(xdim = 10,
                                              ydim = 10,
                                              nClus = 10,
                                              nCells = 1000000,
                                              scale = TRUE),
                        normMethod.train = QuantileNorm.train,
                        plot = T,
                        seed = 1,
                        outputDir = file.path(batch_dir, "CytoNorm_tmp"))

## Apply model
CytoNorm.normalize(model = model,
                   files = file.path(preprocessed_dir, c(files, files_batch2)),
                   labels = rep(c("1", "2"), each = 6),
                   transformList = NULL,
                   transformList.reverse = NULL,
                   outputDir = file.path(batch_dir, "CytoNorm"),
                   normMethod.normalize = QuantileNorm.normalize)

fs_CytoNorm <- read.flowSet(list.files("Workshop_batch/CytoNorm",
                                       pattern = ".fcs",
                                       full.names = T))

plot_aggregate(input = fs_CytoNorm, 
               labels = rep(c("1", "2"), each = 6),
               channels = channels_of_interest,
               sample_names = gsub(".*_(Tube_[0-9]*)_.*", "\\1", names(fs_minmax@frames)),
               output_image = file.path(batch_dir, "aggregate_CytoNorm_batch.png"))

Reproducing a manual gating

Store channel names into variables for ease of use

ff <- read.FCS(file.path(preprocessed_dir, files[4])) # Get back to original dataset

cd3_channel <- get_channels(ff, c("CD3"))
cd19_channel <- get_channels(ff, c("CD19"))

Use the deGate function to find a split for a specific channel

cd3_threshold <- deGate(ff, cd3_channel)
cd19_threshold <- deGate(ff, cd19_channel)

Create a flowframe with only the CD3+ CD19- cells

selection <- ff@exprs[,cd3_channel] > cd3_threshold & ff@exprs[,cd19_channel] < cd19_threshold 
ff_T <- ff[selection,]

Plot the result

plotDens(ff, c(cd3_channel, cd19_channel))
abline(v = cd3_threshold)
abline(h = cd19_threshold)
points(ff_T@exprs[,c(cd3_channel, cd19_channel)], col = "red", pch = ".")

Exercises

Create a selection of CD161+ CD3- cells and show them on a scatterplot

Loading a manual gating

Identify the flowjo workspace on the drive

wsp_file <- "Data/manualGating_Live.wsp"
cell_types <- c("Macrophages",
                "B cells",
                "NK cells",
                "NK T cells",
                "T cells",
                "DCs",
                "Neutrophils",
                "Basophils")
gating <- FlowSOM::GetFlowJoLabels(files,
                                   wsp_file, 
                                   cell_types = cell_types)
str(gating)

colSums(gating[[files[4]]]$matrix)

gating_labels <- c()
for(i in seq_len(length(gating))){
  labels <- as.character(gating[[files[i]]]$manual)
  selection_qc <- readRDS(file.path(QC_dir, "PeacoQC",gsub(".fcs", "_QC.RDS", files[i])))
  selection_agg <- read.table(file.path(agg_dir, gsub(".fcs", "_selected_aggregate.txt", files[i])))$x
  gating_labels <- c(gating_labels, labels[selection_qc$GoodCells][selection_agg])
}

gating_labels <- factor(gating_labels, levels = levels(gating[[1]]$manual))

Running tSNE on a subset of the data

We use a seed to make sure the results are reproducable later. This does not remove the intrinsic random decisions made by the algorithm!
We select 1000 cells from the flowframe for quicker computation time.
We compute tSNE with the default perplexity of 30.

set.seed(1)
subset <- sample(seq_len(nrow(ff)), 1000)
tsne <- Rtsne(ff@exprs[subset, channels_of_interest])

Plot the result

plotSNE_marker(flowframe = ff[subset,],
               tsne_result = tsne,
               marker = "CD19") +
scale_color_distiller(palette = "RdYlBu")

selection <- readRDS(file.path(QC_dir, "PeacoQC",gsub(".fcs", "_QC.RDS", files[4])))
plotSNE_manual_gating(tsne, gating[[files[4]]]$manual[selection$GoodCells][subset])

Exercises

Run tSNE on another 1000 randomly selected cells from the flowframe

Show the CD3 expression on this tSNE

Running FlowSOM

Run the FlowSOM algorithm on the whole flowset (all cells from the 6 files). When you have more data, it might be recommended to create an aggregate flowframe first with a subset from all files (e.g. 3 million cells for most laptops).
We create a 10 by 10 grid for the initial clustering layer, and than 10 final meta-clusters.
Scale should be TRUE if the input columns have varying ranges which are not biologically relevant.
In the last line, we remove the channel name for further visualizations, only showing the marker name.

fsom <- FlowSOM(ff_agg,
                colsToUse = channels_of_interest,
                scale = FALSE,
                xdim = 10, ydim = 10,
                nClus = 10,
                seed = 1)
fsom$FlowSOM$prettyColnames <- gsub(" <.*", "", fsom$FlowSOM$prettyColnames)

Plot the result

PlotStars(fsom$FlowSOM,
          backgroundValues = fsom$metaclustering)

Plot the result were all circles have the same size

PlotStars(UpdateNodeSize(fsom$FlowSOM, reset = TRUE, maxNodeSize = 8),
          backgroundValues = fsom$metaclustering)

Exercises

Make a fsom49 object with 49 clusters instead of 100 and 8 metaclusters, and without using the AmCyan channel

Plot your fsom49 result with equal node sizes

Plot only CD3, CD19 and CD11b, by making use of the "markers" argument, which takes a vector of channel names as input.
You could make use of the get_channels() function to map marker names to channel names

Plot with the argument view = "grid"

Identify populations

pop_mark <- readxl::read_xlsx("Data/Populations and markers.xlsx")
cellTypes <- parse_markertable(pop_mark)

pop_mark

cluster_names <- query_multiple(fsom = fsom,
                                ff = ff,
                                cell_types = cellTypes,
                                pdf_name = "identify_clusters.pdf")

metacluster_names <- label_metaclusters(fsom, cluster_names[GetClusters(fsom$FlowSOM)])
metacluster_names

Exercises

Compute metacluster_names8 for your second FlowSOM object

Additional visualisations

Show a heatmap with the MFI values

plot_metacluster_MFIs(fsom, 
                      metacluster_names)

Code that produces the same result, without the helper function

MFIs <- MetaclusterMFIs(fsom)[,channels_of_interest]
colnames(MFIs) <- get_markers(ff, colnames(MFIs))
rownames(MFIs) <- paste0("Metacluster ",rownames(MFIs)," (", metacluster_names, ")")
pheatmap::pheatmap(MFIs)

Plot the cluster numbers, to refer to later

PlotNumbers(UpdateNodeSize(fsom$FlowSOM, reset = TRUE, maxNodeSize = 0.0001), 
            fontSize = 0.5)

Plot the metacluster numbers

PlotLabels(UpdateNodeSize(fsom$FlowSOM, reset = TRUE, maxNodeSize = 0.0001),
           labels = fsom$metaclustering,
           fontSize = 0.6)

Plot the cluster labels

PlotLabels(UpdateNodeSize(fsom$FlowSOM, maxNodeSize = 0.0001), 
           cluster_names, 
           fontSize = 0.5)

Plot a scatter plot of CD3/CD19 and highlight cluster 7 in red

PlotClusters2D(fsom$FlowSOM,
               get_channels(ff, "CD3"),
               get_channels(ff, "CD19"),
               7)

Plot a scatter plot of CD3/CD19 and hightlight all clusters of metacluster 3 in red

PlotClusters2D(fsom$FlowSOM,
               get_channels(ff, "CD3"),
               get_channels(ff, "CD19"),
               which(fsom$metaclustering == 3))

Plot an overview of multiple scatter plots for multiple metaclusters

markerpairs <- list(c("CD64", "AmCyan-A"),
                    c("CD3", "CD19"),
                    c("CD3", "CD161"),
                    c("CD11c", "MHCII"),
                    c("Ly-6G", "CD11b")
)
metaclusters_of_interest <- metacluster_names
levels(fsom$metaclustering) <- metacluster_names
png("metacluster_scatters.png",
    width = 500 * length(markerpairs),
    height = 400 * length(metaclusters_of_interest))
PlotOverview2D(fsom,
               markerlist = markerpairs,
               metaclusters = metaclusters_of_interest,
               ff = ff)
dev.off()

Visualize the manual labels from FlowJo

PlotPies(UpdateNodeSize(fsom$FlowSOM, maxNodeSize = 8, reset = TRUE),
         gating_labels)

Exercises

Plot some figures for your own fsom object

Mapping individual samples

Create an empty matrix, and count the number of cells assigned to each cluster for every file

mat_counts <- matrix(rep(0, length(files)*fsom$FlowSOM$map$nNodes),
                     nrow = length(files),
                     dimnames = list(files,
                                     paste0("Cl", seq_len(fsom$FlowSOM$map$nNodes))))

for (file in files){
  ff <- read.FCS(file.path(preprocessed_dir, file))
  fsom_tmp <- NewData(fsom, ff)
  t <- table(GetClusters(fsom_tmp))
  mat_counts[file, as.numeric(names(t))] <- t
}

rownames(mat_counts) <- gsub(".*_Tube_([0-9]*)_Live.*", "Tube \\1", rownames(mat_counts) )

Visualize in a heatmap

pheatmap(mat_counts)

Alternatively, compute the percentages for all files

pctgs <- apply(mat_counts, 2, function(x) x / rowSums(mat_counts))
pctgs <- list(pctgs = pctgs,
              pctgs_meta = t(apply(pctgs, 1, function(x) tapply(x, fsom$metaclustering, sum))))
pctgs

Plot the percentages in a heatmap

pheatmap::pheatmap(pctgs$pctgs_meta)
pheatmap::pheatmap(pctgs$pctgs_meta, scale = "column")

The FlowSOMWorkshop package also provides a function to plot the individual values

plot_pctgs(pctgs$pctgs_meta)

Plot trees with node sizes adapted to the individual files

pdf("individual_files.pdf", 
    useDingbats = FALSE)
for(file in files){
  fsom_mapped <- FlowSOM_subset(fsom, 
                                file.path(dir, file))
  PlotStars(UpdateNodeSize(fsom_mapped$FlowSOM))
}
dev.off()

Comparing groups

Given the cell counts we computed earlier, FlowSOM provides a function to make a plot per group, with the node size adapted to the average of the group, and the background color indicating statistically significant differences in comparison to the first group.

groupRes <- CountGroups(fsom$FlowSOM, 
                        groups=list("WT" = mat_counts[4:6,],
                                    "NK_KO" = mat_counts[1:3,]))
PlotGroups(fsom[[1]], groupRes, p_tresh = 0.3)

Compute some more statistics

statistics_res <- compute_wilcox(pctgs$pctgs_meta,
                                 group1 = rownames(pctgs$pctgs_meta)[grep("028|030|031", files)],
                                 group2 = rownames(pctgs$pctgs_meta)[grep("011|012|013", files)])

statistics_res

saeyslab/FlowSOM_workshop documentation built on Sept. 3, 2021, 9:21 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

saeyslab/FlowSOM_workshop
How to get started with FlowSOM

In saeyslab/FlowSOM_workshop: How to get started with FlowSOM

Computational cytometry tools

Getting started

Exercises

Preprocessing

Quality control - Individual samples

Normalization

Normalization per file

Normalization per batch

Reproducing a manual gating

Exercises

Loading a manual gating

Running tSNE on a subset of the data

Exercises

Running FlowSOM

Exercises

Identify populations

Exercises

Additional visualisations

Exercises

Mapping individual samples

Comparing groups

R Package Documentation

Browse R Packages

We want your feedback!

saeyslab/FlowSOM_workshop How to get started with FlowSOM

In saeyslab/FlowSOM_workshop: How to get started with FlowSOM

Computational cytometry tools

Getting started

Exercises

Preprocessing

Quality control - Individual samples

Normalization

Normalization per file

Normalization per batch

Reproducing a manual gating

Exercises

Loading a manual gating

Running tSNE on a subset of the data

Exercises

Running FlowSOM

Exercises

Identify populations

Exercises

Additional visualisations

Exercises

Mapping individual samples

Comparing groups

R Package Documentation

Browse R Packages

We want your feedback!

saeyslab/FlowSOM_workshop
How to get started with FlowSOM