#' getDensities
#'
#' Returns a dataframe with 4 columns, containing for every file and every
#' channel of interest the x/y values computed by density.
#'
#' @param files Full paths of to the fcs files of the samples
#' @param channels Names of the channels to compute the densities for
#' @param transformList Transformation list to pass to the flowCore
#' \code{transform} function
#' @param selection List with indexation vector for every file.
#' @param quantileValues Quantiles to extract as well
#' @param ... Extra parameters to pass to density
#'
#' @examples
#'
#' dir <- system.file("extdata", package = "CytoNorm")
#' files <- list.files(dir, pattern = "fcs$")
#'
#' ff <- flowCore::read.FCS(file.path(dir, files[1]))
#' channels <- grep("Di$", flowCore::colnames(ff), value = TRUE)
#' transformList <- flowCore::transformList(channels,
#' cytofTransform)
#'
#' densities <- getDensities(files = file.path(dir, files),
#' channels = channels,
#' transformList = transformList,
#' quantileValues = c(0.25, 0.5, 0.75))
#'
#' densities$densities$Marker <- paste0(
#' FlowSOM::GetMarkers(ff, densities$densities$Channel),
#' " (", densities$densities$Channel, ")")
#'
#' library(ggplot2)
#' ggplot(dplyr::filter(densities$densities,
#' File == file.path(dir, files[1]))) +
#' geom_line(aes(x = x, y = y)) +
#' facet_wrap(~ Marker, scales = "free") +
#' theme_minimal()
#'
#' ggplot(dplyr::filter(densities$quantiles,
#' Channel == FlowSOM::GetChannels(ff, "CD66"))) +
#' geom_vline(aes(xintercept = `0.25`), col = "grey") +
#' geom_vline(aes(xintercept = `0.5`), col = "grey", lwd = 1) +
#' geom_vline(aes(xintercept = `0.75`), col = "grey") +
#' geom_line(aes(x = x, y = y),
#' data = dplyr::filter(densities$densities,
#' Channel == FlowSOM::GetChannels(ff, "CD66"))) +
#' facet_wrap(~ File) +
#' theme_minimal()
#'
#' @export
getDensities <- function(files,
channels,
quantileValues = 0.5,
transformList = NULL,
selection = NULL,
...){
densities <- data.frame(matrix(ncol = 4, nrow = 0,
dimnames = list(NULL,
c("File",
"Channel",
"x",
"y"))))
quantiles <- data.frame(matrix(ncol = 2+length(quantileValues),
nrow = 0,
dimnames = list(NULL,
c("File",
"Channel",
quantileValues))))
for(file in files){
o <- utils::capture.output(ff <- flowCore::read.FCS(file))
if (!is.null(ff) && !is.null(transformList)) {
ff <- flowCore::transform(ff, transformList)
}
if (!is.null(ff) & !is.null(selection)){
ff <- ff[selection[[file]], ]
}
for (channel in channels) {
dens <- stats::density(ff@exprs[ , channel], ...)
densities <- rbind(densities,
data.frame(File = file,
Channel = channel,
x = dens$x,
y = dens$y))
quant <- stats::quantile(ff@exprs[, channel],
quantileValues)
quantiles <- rbind(quantiles,
cbind(data.frame(File = file,
Channel = channel),
matrix(quant,
nrow = 1,
dimnames = list(NULL,
quantileValues))))
}
}
return(named.list(densities,
quantiles))
}
#' test_cv
#'
#' Function to inspect whether all control samples contain a similar percentage
#' of cells in all FlowSOM clusters
#'
#' @param fsom FlowSOM list, as generated by prepareFlowSOM
#' @param cluster_values Vector with all amounts of clusters to test, can not be
#' smaller than 3.
#' @param plot If TRUE, a plot of the CV values is generated
#' @param verbose If TRUE, extra progress updates are printed
#' @param seed Seed for reproducible results. Default = 1.
#'
#' @examples
#'
#' dir <- system.file("extdata", package = "CytoNorm")
#' files <- list.files(dir, pattern = "fcs$")
#' data <- data.frame(File = files,
#' Path = file.path(dir, files),
#' Type = stringr::str_match(files, "_([12]).fcs")[,2],
#' Batch = stringr::str_match(files, "PTLG[0-9]*")[,1],
#' stringsAsFactors = FALSE)
#' data$Type <- c("1" = "Train", "2" = "Validation")[data$Type]
#' train_data <- dplyr::filter(data, Type == "Train")
#'
#' ff <- flowCore::read.FCS(data$Path[1])
#' channels <- grep("Di$", flowCore::colnames(ff), value = TRUE)
#' transformList <- flowCore::transformList(channels,
#' cytofTransform)
#'
#' fsom <- prepareFlowSOM(train_data$Path,
#' channels,
#' nCells = 10000, #1000000
#' FlowSOM.params = list(xdim = 15,
#' ydim = 15,
#' nClus = 25,
#' scale = FALSE),
#' transformList = transformList,
#' seed = 1)
#'
#' cvs <- testCV(fsom,
#' cluster_values = c(5,15,25,35,45)) # 3:50
#'
#' @export
testCV <- function(fsom,
cluster_values = 3:50,
plot = TRUE,
verbose = FALSE,
seed = 1) {
nClus <- fsom$map$nNodes
cluster_labels <- FlowSOM::GetClusters(fsom)
if (any(cluster_values < 3)) {
stop("Number of clusters to test cannot be smaller than 3.")
}
if (any(cluster_values > nClus)) {
stop(paste0("Number of clusters to test cannot be higher than number of
FlowSOM nodes (", nClus, ")."))
}
if (length(unique(fsom$data[,"File"])) == 1){
warning("Calculating the CV is not possible when only 1 file is used.")
}
# Determine metacluster labels
meta_cl <- list()
for(mc in cluster_values){
if(verbose) message("Computing ", mc, " metaclusters")
meta_cl[[as.character(mc)]] <-
FlowSOM::metaClustering_consensus(fsom$map$codes,
mc,
seed = seed)
}
meta_cl[[as.character(nClus)]] <- seq_len(nClus)
# Percentages assigned to each of the clusters per file
pctgs <- list()
for(mc in as.character(c(cluster_values, nClus))){
counts <- matrix(0,
nrow = length(unique(fsom$data[,"File"])),
ncol = as.numeric(mc),
dimnames = list(unique(fsom$data[,"File"]),
as.character(seq_len(as.numeric(mc)))))
tmp <- table(fsom$data[,"File"],
meta_cl[[mc]][cluster_labels])
counts[rownames(tmp), colnames(tmp)] <- tmp
pctgs[[mc]] <- t(apply(counts, 1,
function(x){ 100 * x/sum(x) }))
}
# Coefficient of variation for each of the percentages
cvs <- list()
for(mc in as.character(c(cluster_values, nClus))){
cvs[[mc]] <- apply(pctgs[[mc]],
2,
function(x){ stats::sd(x) / mean(x)})
}
res <- named.list(pctgs, cvs, meta_cl)
if(plot){
PlotOverviewCV(fsom, res)
}
return(res)
}
#' Plot overview of CV values for a flowSOM model
#'
#' @param fsom FlowSOM model
#' @param cv_res As generated by the testCV function
#' @param max_cv All values higher than this value will get the same color
#' @param show_cv All values higher than this one will be shown numerically
#'
#' dir <- system.file("extdata", package = "CytoNorm")
#' files <- list.files(dir, pattern = "fcs$")
#' data <- data.frame(File = files,
#' Path = file.path(dir, files),
#' Type = stringr::str_match(files, "_([12]).fcs")[,2],
#' Batch = stringr::str_match(files, "PTLG[0-9]*")[,1],
#' stringsAsFactors = FALSE)
#' data$Type <- c("1" = "Train", "2" = "Validation")[data$Type]
#' train_data <- dplyr::filter(data, Type == "Train")
#'
#' ff <- flowCore::read.FCS(data$Path[1])
#' channels <- grep("Di$", flowCore::colnames(ff), value = TRUE)
#' transformList <- flowCore::transformList(channels,
#' cytofTransform)
#'
#' fsom <- prepareFlowSOM(train_data$Path,
#' channels,
#' nCells = 10000, #1000000
#' FlowSOM.params = list(xdim = 15,
#' ydim = 15,
#' nClus = 25,
#' scale = FALSE),
#' transformList = transformList,
#' seed = 1)
#'
#' cvs <- testCV(fsom,
#' cluster_values = c(5,15,25,35,45)) # 3:50
#'
#' PlotOverviewCV(fsom, cvs)
#' @export
PlotOverviewCV <- function(fsom, cv_res, max_cv = 2.5, show_cv = 1.5){
cvs <- cv_res$cvs
pctgs <- cv_res$pctgs
nMetaClus <- length(levels(fsom$metaclustering))
nClus <- fsom$map$nNodes
cluster_values <- as.numeric(names(cvs))[-length(cvs)]
width <- max(cluster_values)
chosen <- which(cluster_values == nMetaClus)
cv_matrix <- do.call(rbind,
lapply(cvs[as.character(cluster_values)],
function (x) {
c(x,
rep(NA,
(width - length(x))))
}))
cv_matrix <- rbind(cv_matrix,
matrix(c(cvs[[as.character(nClus)]],
rep(NA, (width - (nClus %% width)))),
ncol = width,
byrow = TRUE))
rownames(cv_matrix)[length(cluster_values) + 1] <- "Original\nclustering"
colnames(cv_matrix) <- NULL
disp <- apply(cv_matrix, 2, function(x) as.character(round(x, 2)))
disp[cv_matrix < show_cv] <- ""
disp[is.na(cv_matrix)] <- ""
p_cvs <- pheatmap::pheatmap(cv_matrix,
cluster_cols = FALSE,
cluster_rows = FALSE,
na_col = "white",
border_color = "white",
gaps_row = c(chosen-1, chosen,
length(cluster_values)),
breaks = seq(0, max_cv, length.out = 100),
display_numbers = disp)
scaling <- ifelse(nrow(pctgs[[as.character(nMetaClus)]]) == 1, "none", "column")
p_pctgs_selected <- pheatmap::pheatmap(pctgs[[as.character(nMetaClus)]],
cluster_cols = FALSE,
cluster_rows = FALSE,
na_col = "white",
border_color = "white",
display_numbers = round(pctgs[[as.character(nMetaClus)]],2),
#number_color = "white",
scale = scaling)
gridExtra::grid.arrange(p_cvs[[4]], p_pctgs_selected[[4]])
}
#' emdEvaluation
#'
#' Evaluate how much files differ by computing the maximum Earth Movers Distance
#' for all markers and cellTypes.
#'
#' @param files Full paths of to the fcs files of the control samples.
#' @param channels Channels to evaluate (corresponding with the column
#' names of the flow frame)
#' @param transformList Transformation list to pass to the flowCore
#' \code{transform} function. Default NULL
#' @param prefix Prefix present in the files, which will be removed to match
#' the manual list.
#' @param manual A list which contains for every file a factor array. These
#' arrays contain a cell label for every cell in the files. All
#' arrays should have the same levels. Default = NULL, all
#' cells are evaluated together.
#' @param binSize Binsize to approximate distribution. Default = 0.1.
#' @param minRange Minimum to approximate distribution. Default = -100.
#' @param maxRange Maximum to approximate distribution. Default = 100.
#' @param return_all If TRUE, distributions and pairwise distances are returned
#' as well. Default = FALSE.
#' @param manualThreshold Minimum number of cells to calculate the EMD.
#' Default = 2
#'
#' @return A matrix in which the rows represent the cell types, the columns
#' reprents the markers and the values represent the maximal earth movers
#' distances for the distributions between all files
#'
#' @examples
#' # Describe file names
#' dir <- system.file("extdata",package="CytoNorm")
#' fileNames <- c("Gates_PTLG021_Unstim_Control_1.fcs",
#' "Gates_PTLG028_Unstim_Control_1.fcs")
#' labels <- c("PTLG021","PTLG028")
#' ff <- flowCore::read.FCS(file.path(dir,fileNames[1]))
#' channelsToNormalize <- flowCore::colnames(ff)[c(10, 11, 14, 16:35, 37, 39:51)]
#'
#' # Build transform list
#' transformList <- flowCore::transformList(channelsToNormalize,
#' cytofTransform)
#' emdEvaluation(file.path(dir,fileNames),
#' transformList = transformList,
#' channelsToNormalize)
#'
#' @export
emdEvaluation <- function(files,
channels,
transformList = NULL,
prefix = "^Norm_",
manual = NULL,
binSize = 0.1,
minRange = -100,
maxRange = 100,
return_all = FALSE,
manualThreshold = 2){
if (is.null(manual)) {
cellTypes <- c("AllCells")
} else {
cellTypes <- c("AllCells", levels(manual[[1]]))
}
prettyColnames <- NULL
distr <- list()
for (file in files) {
distr[[file]] <- list()
ff <- flowCore::read.FCS(file)
if (is.null(prettyColnames)){
prettyColnames <- paste0(FlowSOM::GetMarkers(ff, channels = channels),
" <", channels, ">")
}
if (!is.null(transformList)) {
ff <- flowCore::transform(ff, transformList)
}
for (cellType in cellTypes) {
if (is.null(manual) | cellType == "AllCells") {
selection <- seq_len(flowCore::nrow(ff))
} else {
selection <- manual[[gsub(prefix, "", gsub(".*/",
"", file))]] == cellType
}
if (sum(selection) >= manualThreshold){
distr[[file]][[cellType]] <-
apply(flowCore::exprs(ff)[selection, channels],
2,
function(x) {
graphics::hist(x,
breaks = seq(minRange, maxRange, by = binSize),
plot = FALSE)$counts
})
colnames(distr[[file]][[cellType]]) <- prettyColnames
distr[[file]][[cellType]] <- distr[[file]][[cellType]]/sum(selection)
} else {
distr[[file]][[cellType]] <- matrix(data = NA,
nrow = length(seq(minRange, maxRange, by = binSize))-1,
ncol = length(channels),
dimnames = list(NULL, prettyColnames))
}
any(distr[[file]][[cellType]] != 0)
}
}
distances <- list()
for (cellType in cellTypes) {
distances[[cellType]] <- list()
for (name in prettyColnames) {
distances[[cellType]][[name]] <- matrix(NA, nrow = length(files),
ncol = length(files), dimnames = list(files,
files))
for (i in seq_along(files)[-length(files)]) {
file1 <- files[i]
for (j in seq(i + 1, length(files))) {
file2 <- files[j]
distances[[cellType]][[name]][file1, file2] <-
emdist::emd2d(matrix(distr[[file1]][[cellType]][, name]),
matrix(distr[[file2]][[cellType]][, name]))
}
}
}
}
comparison <- matrix(NA, nrow = length(cellTypes), ncol = length(channels),
dimnames = list(cellTypes, prettyColnames))
for (cellType in cellTypes) {
for (name in prettyColnames) {
comparison[cellType, name] <- max(distances[[cellType]][[name]],
na.rm = TRUE)
}
}
if (return_all) {
return(named.list(distr, distances, comparison))
} else {
return(comparison)
}
}
#' madEvaluation
#'
#' Evaluate whether you lose biological information by checking whether the
#' MAD stays similar before and after normalization.
#'
#' @param files Full paths of to the fcs files of the control samples.
#' @param channels Channels to evaluate (corresponding with the column
#' names of the flow frame)
#' @param transformList Transformation list to pass to the flowCore
#' \code{transform} function. Default NULL
#' @param prefix Prefix present in the files, which will be removed to match
#' the manual list.
#' @param manual A list which contains for every file a factor array. These
#' arrays contain a cell label for every cell in the files. All
#' arrays should have the same levels. Default = NULL, all
#' cells are evaluated together.
#' @param return_all If TRUE, individual MAD values are returned
#' as well. Default = FALSE.
#'
#' @return A matrix in which the rows represent the cell types, the columns
#' reprents the markers and the values represent the median MAD values for
#' the distributions of all files
#'
#' @examples
#' # Describe file names
#' dir <- system.file("extdata",package="CytoNorm")
#' fileNames <- c("Gates_PTLG021_Unstim_Control_1.fcs",
#' "Gates_PTLG028_Unstim_Control_1.fcs")
#' labels <- c("PTLG021","PTLG028")
#' ff <- flowCore::read.FCS(file.path(dir,fileNames[1]))
#' channelsToNormalize <- flowCore::colnames(ff)[c(10, 11, 14, 16:35, 37, 39:51)]
#'
#' # Build transform list
#' transformList <- flowCore::transformList(channelsToNormalize,
#' cytofTransform)
#' res <- madEvaluation(file.path(dir,fileNames),
#' transformList = transformList,
#' channelsToNormalize)
#'
#' @importFrom stats mad median
#'
#' @export
madEvaluation <- function (files,
channels,
transformList = NULL,
prefix = "^Norm_",
manual = NULL,
return_all = FALSE)
{
if (is.null(manual)) {
cellTypes <- c("AllCells")
} else {
cellTypes <- c("AllCells", levels(manual[[1]]))
}
mad_values <- list()
for (file in files) {
mad_values[[file]] <- list()
ff <- flowCore::read.FCS(file)
if (!is.null(transformList))
ff <- flowCore::transform(ff, transformList)
for (cellType in cellTypes) {
if (is.null(manual) | cellType == "AllCells") {
selection <- seq_len(flowCore::nrow(ff))
} else {
selection <- manual[[gsub(prefix, "", gsub(".*/",
"", file))]] == cellType
}
if (sum(selection) > 1){
mad_values[[file]][[cellType]] <- apply(flowCore::exprs(ff)[selection,
channels], 2, function(x) {
mad(x)
})
} else {
mad_values[[file]][[cellType]] <- NA
}
any(mad_values[[file]][[cellType]] != 0)
}
}
mad_per_celltype <- list()
for (cellType in cellTypes){
cell_matr <- matrix(NA, nrow = length(files),
ncol = length(channels), dimnames = list(files,
channels))
for (x in seq_along(mad_values)){
cell_matr[x,] <-
mad_values[[x]][[grep(cellType,names(mad_values[[x]]),
fixed = TRUE)]]
}
mad_per_celltype[[cellType]] <- cell_matr
}
comparison <- t(sapply(mad_per_celltype, function(x){
apply(x, 2, median, na.rm = FALSE)
}))
if (return_all) {
return(named.list(mad_values, mad_per_celltype, comparison))
}
else {
return(comparison)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.