#' getQuantiles
#'
#' @param files Full paths of to the fcs files of the samples
#' @param channels Names of the channels to compute the quantiles for
#' @param transformList Transformation list to pass to the flowCore
#' \code{transform} function
#' @param nQ Number of quantiles to compute Default = 99, which
#' results in quantiles for every percent of the data.
#' Ignored if quantileValues is given.
#' @param minCells Minimum number of cells required to compute trust-worthy
#' quantiles. Otherwise NA is returned. Default = 50.
#' @param quantileValues Vector of length with values between 0 and 1, giving
#' the percentages at which the quantiles should be
#' computed. If NULL (default), the quantiles will be
#' evenly distributed, including 0 and 1.
#' @param labels A label for every file, indicating to which group it
#' belongs. If multiple files have the same label, they
#' get aggregated. If NULL, all files are handled separately.
#' @param selection List with indexation vector for every file.
#' @param verbose If TRUE, extra output is printed. Default = FALSE
#' @param plot If TRUE, plots are generated showing all quantiles.
#' Default = FALSE.
#' @param ... Additional arguments to pass to read.FCS
#'
#' @examples
#' dir <- system.file("extdata", package = "CytoNorm")
#' files <- list.files(dir, pattern = "fcs$")
#'
#' ff <- flowCore::read.FCS(file.path(dir, files[1]))
#' channels <- grep("Di$", flowCore::colnames(ff), value = TRUE)
#' transformList <- flowCore::transformList(channels,
#' cytofTransform)
#'
#' quantiles <- getQuantiles(files = file.path(dir, files),
#' channels = channels,
#' transformList = transformList)
#'
#' pheatmap::pheatmap(quantiles[[1]],
#' cluster_rows = FALSE,
#' cluster_cols = FALSE,
#' labels_col =
#' paste0(FlowSOM::GetMarkers(ff, colnames(quantiles[[1]])),
#' " (", colnames(quantiles[[1]]), ")"),
#' main = files[1])
#'
#' @export
getQuantiles <- function(files,
channels,
nQ = 99,
minCells = 50,
quantileValues = NULL,
transformList = NULL,
labels = NULL,
selection = NULL,
verbose = FALSE,
plot = FALSE,
...){
if (is.null(labels)) labels <- files
# Compute quantiles for each label
if (verbose) message("Computing Quantiles")
quantiles <- list()
if (is.null(quantileValues)) {
quantileValues <- ((1:(nQ+1))/(nQ+1))[-(nQ+1)]
} else {
nQ <- length(quantileValues)
}
for(label in unique(labels)){
ids <- which(labels == label & file.exists(files))
if(verbose) message(" ", label, " (FileID ", paste(ids, collapse = "," ), ")")
# Read the file(s) and transform if necessary
if (length(ids) > 1) {
ff <- FlowSOM::AggregateFlowFrames(files[ids], 1e12,
keepOrder = TRUE,
channels = channels,
...)
} else if(length(ids) == 1) {
o <- capture.output(ff <- flowCore::read.FCS(files[ids],...))
if (verbose) message(o)
} else {
ff <- NULL
}
if (!is.null(ff) && !is.null(transformList)) {
ff <- flowCore::transform(ff, transformList)
}
if (!is.null(ff) & !is.null(selection)){
ff <- ff[selection[[file]], ]
}
# Compute quantiles for all channels to normalize
if (!is.null(ff) && flowCore::nrow(ff) > minCells) {
quantiles[[label]] <- apply(flowCore::exprs(ff)[, channels, drop = FALSE],
2,
function(x){
stats::quantile(x,
quantileValues)
})
if(plot){
textPlot(label)
for(channel in channels){
dens <- stats::density(flowCore::exprs(ff)[, channel],
bw = 0.1)
graphics::plot(dens,
bty = "n", xaxt = "n", yaxt = "n",
xlab = "", ylab = "", main = "",
xlim = c(0,
max(flowCore::exprs(ff)[, channel],
8)))
graphics::abline(v = quantiles[[label]][, channel],
col = "grey")
graphics::lines(dens, lwd = 2)
}
}
} else {
if(is.null(ff)){
message(" Could not find ", paste(files[ids], collapse = ", "))
} else {
message(" Less then ", minCells, " cells in ", label,
" (", flowCore::nrow(ff), "). No quantiles computed.")
}
quantiles[[label]] <- matrix(NA,
nrow = nQ,
ncol = length(channels),
dimnames =
list(as.character(quantileValues),
channels))
if(plot){
textPlot(label)
for(channel in channels){
textPlot("NA")
}
}
}
}
return(quantiles)
}
#' QuantileNorm.train
#'
#' Learn the batch effects from control samples.
#' This function computes quantiles to describe the distribution of the data,
#' and infers spline functions to equalize these distributions over the files.
#' Typically, you will use the function \code{\link{QuantileNorm.normalize}}
#' after this function to reverse the batch effects on other files.
#'
#' @param files Full paths of to the fcs files of the control samples
#' @param labels A label for every file, indicating to which batch it
#' belongs, e.g. the plate ID.
#' @param channels Names of the channels to normalize
#' @param transformList Transformation list to pass to the flowCore
#' \code{transform} function
#' @param nQ Number of quantiles to use. Default = 99, which results in
#' quantiles for every percent of the data.
#' @param quantileValues If specified, it should be a vector of length nQ with
#' values between 0 and 1, giving the percentages at
#' which the quantiles should be computed. If NULL
#' (default), the quantiles will be evenly distributed,
#' including 0 and 1.
#' @param limit These values will be modelled to map onto themselves by the
#' spline
#' @param goal Goal distribution. Default "mean", can also be nQ numeric
#' values or one of the batch labels.
#' @param plot If TRUE, a plot is generated (using the \code{layout}
#' function) showing all quantiles. Default = FALSE.
#' @param plotTitle Title to use in the plot. Default = "Quantiles".
#' @param verbose If TRUE, progress updates are printed. Default = FALSE.
#' @param ... Additional arguments to pass to read.FCS
#'
#' @return A list containing all the splines and quantile information. This can
#' be used as input for the \code{\link{QuantileNorm.normalize}} function.
#' @seealso \code{\link{CytoNorm.train}}, \code{\link{QuantileNorm.normalize}}
#'
#' @examples
#'
#' dir <- system.file("extdata", package = "CytoNorm")
#' files <- list.files(dir, pattern = "fcs$")
#' data <- data.frame(File = files,
#' Path = file.path(dir, files),
#' Type = stringr::str_match(files, "_([12]).fcs")[,2],
#' Batch = stringr::str_match(files, "PTLG[0-9]*")[,1],
#' stringsAsFactors = FALSE)
#' data$Type <- c("1" = "Train", "2" = "Validation")[data$Type]
#' train_data <- dplyr::filter(data, Type == "Train")
#'
#' ff <- flowCore::read.FCS(data$Path[1])
#' channels <- grep("Di$", flowCore::colnames(ff), value = TRUE)
#' transformList <- flowCore::transformList(channels,
#' cytofTransform)
#'
#' png("nQ99.png",
#' width = length(channels) * 300,
#' height = (nrow(train_data) * 2 + 1) * 300)
#' model_nQ_99 <- QuantileNorm.train(
#' files = train_data$Path,
#' labels = train_data$Batch,
#' channels = channels,
#' transformList = transformList,
#' nQ = 99,
#' plot = TRUE)
#' dev.off()
#'
#' png("nQ99_limited.png",
#' width = length(channels) * 300,
#' height = (nrow(train_data) * 2 + 1) * 300)
#' model_nQ_99 <- QuantileNorm.train(
#' files = train_data$Path,
#' labels = train_data$Batch,
#' channels = channels,
#' transformList = transformList,
#' nQ = 99,
#' limit = c(0,8),
#' plot = TRUE)
#' dev.off()
#'
#' png("nQ_2.png",
#' width = length(channels) * 300,
#' height = (nrow(train_data) * 2 + 1) * 300)
#' model_nQ_2 <- QuantileNorm.train(
#' files = train_data$Path,
#' labels = train_data$Batch,
#' channels = channels,
#' transformList = transformList,
#' nQ = 2,
#' quantileValues = c(0.001, 0.999),
#' plot = TRUE)
#' dev.off()
#'
#' model_goal_mean <- QuantileNorm.train(
#' files = train_data$Path,
#' labels = train_data$Batch,
#' channels = channels,
#' transformList = transformList)
#'
#' model_goal_batch1 <- QuantileNorm.train(
#' files = train_data$Path,
#' labels = train_data$Batch,
#' channels = channels,
#' transformList = transformList,
#' goal = "PTLG021")
#'
#' model_goal_fixed <- QuantileNorm.train(
#' files = train_data$Path,
#' labels = train_data$Batch,
#' channels = channels,
#' transformList = transformList,
#' nQ = 99,
#' goal = seq(0.01, 0.99, by = 0.01))
#'
#' @export
QuantileNorm.train <- function(files,
labels,
channels,
transformList,
nQ = 99,
limit = NULL,
quantileValues = NULL,
goal = "mean",
verbose = FALSE,
plot = FALSE,
plotTitle = "Quantiles",
...){
if(length(labels) != length(files)){
stop("Input parameters 'labels' and 'files'",
" should have the same length")
}
labels <- as.character(labels)
if(plot){
xdim = 1 + length(channels)
ydim = 2 + 2*length(unique(labels))
graphics::layout(matrix(1:(xdim*ydim), ncol = xdim, byrow = TRUE))
graphics::par(mar=c(1, 1, 0, 0))
textPlot(plotTitle)
ff_tmp <- flowCore::read.FCS(files[file.exists(files)][1],...)
for(channel in channels){
marker <- flowCore::getChannelMarker(ff_tmp, channel)[,2]
if(is.na(marker)){
textPlot(channel)
} else {
textPlot(paste0(marker, " (", channel, ")"))
}
}
}
quantiles <- getQuantiles(files = files,
labels = labels,
channels = channels,
transformList = transformList,
nQ = nQ,
quantileValues = quantileValues,
verbose = verbose,
plot = plot,
...)
if(!is.null(limit)){
quantiles <- lapply(quantiles, function(quantile_matrix){
rbind(quantile_matrix,
matrix(limit,
nrow = length(limit),
ncol = ncol(quantile_matrix),
byrow = FALSE,
dimnames = list(paste0("Fixed_",limit), NULL)))
})
}
# Get the goal distributions
if(is.character(goal) && goal == "mean"){
refQuantiles <- matrix(apply(matrix(unlist(quantiles),
ncol = length(unique(labels))),
1, mean, na.rm = TRUE),
nrow = nQ+length(limit),
dimnames = list(rownames(quantiles[[1]]),
channels))
} else if (is.numeric(goal)) {
if(length(goal) == nQ){
refQuantiles <- matrix(goal,
nrow = nQ,
ncol = length(channels),
dimnames = list(quantileValues,
channels))
} else if (is.matrix(goal)){
if ((nrow(goal) == nQ) & (ncol(goal) == length(channels))){
refQuantiles <- goal
} else if ((nrow(goal) != nQ)){
stop(paste0(nrow(goal), " quantiles in the goal distribution and ",
nQ, " quantiles in the CytoNorm call.
This should be the same."))
} else {
stop(paste0(ncol(goal), " channels in the goal distribution and ",
length(channels), " channels in the CytoNorm call.
This should be the same."))
}
} else {
stop("Goal should be 'mean', a batch label, ",
"a numeric vector of length nQ, or a matrix with nQ rows
and length(channels) columns.")
}
} else if (goal %in% unique(labels)) {
refQuantiles <- quantiles[[goal]]
} else {
stop("Goal should be 'mean', a batch label, ",
"a numeric vector of length nQ, or a matrix with nQ rows
and length(channels) columns.")
}
if(plot){
textPlot("Goal distribution")
for(channel in channels){
graphics::plot(0, type = "n", xlim = c(0, 8),
bty = "n", xaxt = "n", yaxt = "n",
xlab = "", ylab = "", main = "")
graphics::abline(v = refQuantiles[,channel])
}
}
# Compute splines for each label
splines <- list()
if(verbose) message("Computing Splines")
for(label in unique(labels)){
if(verbose) message(" ",label)
if(plot){ textPlot(label) }
splines[[label]] <- list()
if(!is.null(quantiles[[label]]) & !any(is.na(quantiles[[label]])) & !any(is.na(refQuantiles))){
for(channel in channels){
refQ <- refQuantiles[, channel]
labelQ <- quantiles[[label]][, channel]
if(length(unique(labelQ)) > 1){
suppressWarnings(spl <- stats::splinefun(labelQ,
refQ,
method="monoH.FC"))
} else {
spl <- identityFunction
warning("Not enough unique quantiles for ", label, " in ",
channel, ". The identity function will be used.")
}
splines[[label]][[channel]] <- spl
if(plot){
graphics::plot(labelQ, refQ, xlim = c(0, 8), ylim = c(0, 8),
pch = 19, bty = "n", xaxt = "n", yaxt = "n",
xlab = "", ylab = "", main = "")
graphics::lines(c(-0.5, 8), c(-0.5, 8), col="#999999")
x <- seq(-0.5, 8, 0.1)
graphics::lines(x,
splines[[label]][[channel]](x),
col = "#b30000")
}
}
} else {
warning("Not enough cells for ", label,
"\nThe identity function will be used.")
for(channel in channels){
splines[[label]][[channel]] <- identityFunction
if(plot){
graphics::plot(c(0,8), c(0,8), col = "#999999", type = "l",
xlim = c(0,8), ylim = c(0,8),
pch = 19, bty = "n", xaxt = "n", yaxt = "n",
xlab = "", ylab = "", main = "")
}
}
}
}
named.list(channels, splines, quantiles, quantileValues, refQuantiles)
}
#' QuantileNorm.normalize
#'
#' Normalize data, given the batch effects learned from control samples per
#' cell type/cluster (output from \code{\link{QuantileNorm.train}}). New fcs
#' files are written to the given output directory.
#'
#' @param model Model of the batch effercts, as computed by
#' \code{\link{QuantileNorm.train}}
#' @param files Full paths of to the fcs files of the samples to normalize
#' @param labels A label for every file, indicating to which batch it
#' belongs, e.g. the plate ID.
#' @param transformList Transformation list to pass to the flowCore
#' \code{transform} function
#' @param transformList.reverse Transformation list with the reverse functions,
#' so the normalized files can be saved in the untransformed
#' space
#' @param outputDir Directory to put the temporary files in. Default = "."
#' @param prefix Prefix to put in front of the normalized file names.
#' Default = "Norm_"
#' @param removeOriginal Should the original fcs be removed? Default = FALSE.
#' @param verbose If TRUE, progress updates are printed. Default = FALSE.
#' @param ... Additional arguments to pass to read.FCS
#'
#' @return Nothing is returned, but the new FCS files are written to the output
#' directory
#' @seealso \code{\link{QuantileNorm.train}}
#'
#' @examples
#'
#'
#' dir <- system.file("extdata", package = "CytoNorm")
#' files <- list.files(dir, pattern = "fcs$")
#' data <- data.frame(File = files,
#' Path = file.path(dir, files),
#' Type = stringr::str_match(files, "_([12]).fcs")[,2],
#' Batch = stringr::str_match(files, "PTLG[0-9]*")[,1],
#' stringsAsFactors = FALSE)
#' data$Type <- c("1" = "Train", "2" = "Validation")[data$Type]
#' train_data <- dplyr::filter(data, Type == "Train")
#' validation_data <- dplyr::filter(data, Type == "Validation")
#'
#' ff <- flowCore::read.FCS(data$Path[1])
#' channels <- grep("Di$", flowCore::colnames(ff), value = TRUE)
#' transformList <- flowCore::transformList(channels,
#' cytofTransform)
#' transformList.reverse <- flowCore::transformList(channels,
#' cytofTransform.reverse)
#'
#' model_nQ_99 <- QuantileNorm.train(
#' files = train_data$Path,
#' labels = train_data$Batch,
#' channels = channels,
#' transformList = transformList,
#' nQ = 99)
#'
#' QuantileNorm.normalize(model_nQ_99,
#' validation_data$Path,
#' validation_data$Batch,
#' transformList = transformList,
#' transformList.reverse = transformList.reverse)
#' @export
QuantileNorm.normalize <- function(model,
files,
labels,
transformList,
transformList.reverse,
outputDir = ".",
prefix = "Norm_",
removeOriginal = FALSE,
verbose = FALSE,
...){
if(is.null(model$channels) |
is.null(model$splines) |
is.null(model$quantiles)){
stop("The 'model' parameter should be the result of using the ",
"QuantileNorm.train function.")
}
if(length(labels) != length(files)){
stop("Input parameters 'labels' and 'files' ",
"should have the same length")
}
# Create temporary directory
if (!dir.exists(outputDir)) dir.create(outputDir)
labels <- labels
channels <- model$channels
# Normalize each file based on label
for(i in seq_along(files)){
file <- files[i]
if(file.exists(file)){
label <- as.character(labels[i])
if(verbose) message(" ",file," (",label,")")
if(label %in% names(model$splines)){
# Read the file
ff <- flowCore::read.FCS(file,...)
# Transform if necessary
if(!is.null(transformList)){
#description_original <- ff@description
#parameters_original <- ff@parameters
ff <- flowCore::transform(ff, transformList)
}
# Overwrite the values with the normalized values
if (verbose) message("Normalizing ",label)
for (channel in channels) {
flowCore::exprs(ff)[, channel] <-
model$splines[[label]][[channel]](
flowCore::exprs(ff[, channel]))
infinities <- is.infinite(flowCore::exprs(ff)[, channel])
if(any(infinities)){
warning(paste0(label, " ", channel,
": Replacing ", sum(infinities),
" infinity values by max value."))
flowCore::exprs(ff)[infinities, channel] <-
sign(flowCore::exprs(ff)[infinities, channel]) *
max(abs(flowCore::exprs(ff)[-infinities, channel]))
}
}
if (!is.null(transformList.reverse)) {
ff <- flowCore::transform(ff, transformList.reverse)
} else if (!is.null(transformList)) {
warning("Please provide a reverse transformation list if
you want the files to be saved in the original
untransformed space.")
}
if (removeOriginal) {
file.remove(file)
}
suppressWarnings(flowCore::write.FCS(ff,
filename = file.path(outputDir,
paste0(prefix,
gsub(".*/","",file)))))
} else {
warning("The model was not trained for ", label, ".")
}
} else {
warning(file, " does not exist, skipped.")
}
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.