#' Read an FCS file
#'
#' Read an FCS file while suppressing warnings.
#' @param filePath The full path to the FCS files.
#' @param ... Additional arguments to flowCore::read.FCS.
#' @return A flowCore::flowFrame object.
#' @export
ReadFCS <- function (filePath, ...) {
addtlArgs = list(...)
dataset = 1
if ("dataset" %in% names(addtlArgs))
dataset = addtlArgs[["dataset"]]
which.lines = NULL
if ("which.lines" %in% names(addtlArgs))
which.lines = addtlArgs[["which.lines"]]
fcs = tryCatch({
suppressWarnings(flowCore::read.FCS(filePath, dataset = dataset,
which.lines = which.lines,
truncate_max_range = F))
}, error = function(e) {
if (grepl("Please set argument 'emptyValue' as", e$message)) {
suppressWarnings(flowCore::read.FCS(filePath, dataset = dataset,
which.lines = which.lines, emptyValue = F,
truncate_max_range = F))
}
else {
stop(e$message)
}
})
return(fcs)
}
#' @inherit citrus::citrus.readFCSSet
#' @export
ReadFCSSet <- function (dataDirectory, fileList, fileSampleSize = 1000,
transformColumns = NULL, transformCofactor = 5, scaleColumns = NULL,
useChannelDescriptions = F, readParameters = NULL, verbose = TRUE, ...) {
data = list()
fileCounter = 1
fileNames = c()
fileChannelNames = list()
fileReagentNames = list()
addtlArgs = list(...)
conditions = colnames(fileList)
for (i in 1:length(conditions)) {
if (verbose) cat(paste("Reading Condition ", conditions[i], "\n"))
conditionData = list()
fileChannelNames[[conditions[i]]] = list()
fileReagentNames[[conditions[i]]] = list()
p <- dplyr::progress_estimated(length(fileList[, conditions[i]]))
if (verbose) cat("Reading in FCS files.\n")
for (fileName in fileList[, conditions[i]]) {
fileNames[fileCounter] = fileName
filePath = file.path(fileName)
if (!file.exists(filePath)) {
stop(paste("File", filePath, "not found."))
}
if(verbose) cat(paste("\tReading file ", fileName, "\n"))
fcsFile = ReadFCS(filePath)
fcsData = fcsFile@exprs
parameterDescriptions = as.vector(flowCore::pData(flowCore::parameters(fcsFile))$desc)
parameterNames = flowCore::colnames(fcsFile)
invalidDescriptions = unname(which(sapply(parameterDescriptions,
nchar) < 3 | is.na(parameterDescriptions)))
parameterDescriptions[invalidDescriptions] = parameterNames[invalidDescriptions]
if (useChannelDescriptions) {
colnames(fcsData) = parameterDescriptions
}
if (!is.null(readParameters)) {
fcsData = fcsData[, readParameters]
}
fileChannelNames[[conditions[i]]][[fileName]] = parameterNames
fileReagentNames[[conditions[i]]][[fileName]] = parameterDescriptions
fcsData = cbind(fcsData, fileEventNumber = 1:nrow(fcsData),
fileId = fileCounter)
fileCounter = fileCounter + 1
if ((!is.null(fileSampleSize)) && (fileSampleSize <
nrow(fcsData))) {
if (verbose) cat(paste("\tSampling", fileSampleSize, "events.\n"))
fcsData = fcsData[sort(sample(1:nrow(fcsData),
fileSampleSize)), ]
}
conditionData[[fileName]] = fcsData
if (verbose) cat(paste("\t FCS file size is", str(dim(fcsData)), '.\n'))
if (useChannelDescriptions) {
channelDescriptions = as.vector(flowCore::pData(flowCore::parameters(fcsFile))$desc)
nchar(channelDescriptions) > 2
}
cat(capture.output(p$tick()))
}
cat('\n')
data[[conditions[i]]] = do.call("rbind", conditionData)
rm(conditionData)
gc()
}
data = do.call("rbind", data)
results = list(fileIds = matrix(1:(fileCounter - 1), ncol = length(conditions),
dimnames = list(c(), conditions)),
fileNames = fileNames, fileChannelNames = fileChannelNames,
fileReagentNames = fileReagentNames)
if (!is.null(transformColumns)) {
if (any(!is.numeric(transformColumns))) {
containedCols = setdiff(transformColumns, colnames(data))
if (length(containedCols) > 0) {
stop(paste("Transform cols", paste(containedCols, collapse = ", "),
"not found. Valid channel names:",
paste(colnames(data), collapse = ", ")))
}
}
data[, transformColumns] = asinh(data[, transformColumns]/transformCofactor)
results$transformColumns = transformColumns
results$transformCofactor = transformCofactor
}
if (!is.null(scaleColumns)) {
if (any(!is.numeric(scaleColumns))) {
containedCols = setdiff(scaleColumns, colnames(data))
if (length(containedCols) > 0) {
stop(paste("Scale cols", paste(containedCols, collapse = ", "),
"not found. Valid channel names:",
paste(colnames(data), collapse = ", ")))
}
}
results$scaleColumns = scaleColumns
results$scaleColumns.mean = apply(data[, scaleColumns], 2, mean)
results$scaleColumns.SD = apply(data[, scaleColumns], 2, sd)
data[, scaleColumns] = apply(data[, scaleColumns], 2, scale)
}
results$data = data
class(results) = "citrus.combinedFCSSet"
return(results)
}
#' Run CITRUS
#'
#' Run CITRUS on selected channels
#' @param dataDir Full path to the folder containing FCS files.
#' @param selected.desc A character vector of selected channel description.
#' @param fileList A list of full path to FCS files.
#' @param family A character scalar of model type.
#' @param transformCofactor An integer for arcsinh transform channel; default 5
#' @param minimumClusterSizePercent Minimal size of cluster in proportion.
#' @param fileSampleSize The number of cells subsampled from each file.
#' @param nFolds Number of folds used for clustering
#' @param featureType A character vector of feature type (abundances, proportion).
#' @param seed A seed to reproduce the run.
#' @param pattern Regular expression used to select FCS files in the folder.
#' @param labels Optional vector of label used for balancing clustering folds
#' @return A list consisting of feature matrix, citrus clustering object and
#' combinedFCSSet object.
#' @import citrus
#' @export
runCITRUS = function(dataDir, selected.desc, fileList=NULL,
family = "classification", transformCofactor = 5,
minClusterSizePercent = 0.05,
fileSampleSize = 1000, nFolds = 1,
featureType = c("abundances"), seed=1,
pattern='fcs', labels=NULL) {
set.seed(seed)
outputDir = file.path(dataDir, "citrusOutput")
if (is.null(fileList)) {
fileList = list.files(dataDir, pattern=pattern, full=T)
}
fileList = data.frame(defaultCondition=fileList)
combinedFCSSet <- ReadFCSSet(dataDir, fileList,
fileSampleSize = fileSampleSize,
transformColumns = selected.desc,
transformCofactor = transformCofactor,
scaleColumns = selected.desc,
useChannelDescriptions=T,
readParameters = selected.desc,
verbose=FALSE)
citrus.foldClustering <- citrus.clusterAndMapFolds(combinedFCSSet,
clusteringColumns = selected.desc,
labels = labels,
nFolds = nFolds)
conditions <- colnames(fileList)[1]
features <- citrus.calculateFoldFeatureSet(citrus.foldClustering,
combinedFCSSet,
featureType=featureType,
conditions=conditions,
minimumClusterSizePercent=minClusterSizePercent)
return(list(features=features,
foldClustering=citrus.foldClustering,
combinedFCS=combinedFCSSet))
}
#' CITRUS regression
#'
#' Run CITRUS regression function.
#'
#' @param clustering A list object returned by RunCITRUS.
#' @param outcome A vector of response.
#' @return A regression result object.
#' @export
CITRUSRegression <- function(clustering, outcome, seed = 1) {
set.seed(seed)
suppressWarnings({
citrus.regressionResults <- mclapply(c("glmnet"),
citrus.endpointRegress,
citrus.foldFeatureSet=clustering$features,
labels=outcome,
family="classification")
})
return(citrus.regressionResults)
}
#' Plot CITRUS hierarchy
#'
#' Plot the hierarchy used in CITRUS colored by stratefying clusters.
#'
#' @param clustering A list object returned by RunCITRUS.
#' @param regressionRes An object returned by CITRUSRegression.
#' @param lambda A character to indicate lambda choice (min or 1se)
#' @param seed An optional seed for reproducibility.
#' @return A ggplot object of the hierarchy.
#' @export
PlotHierarchy <- function(clustering, regressionRes, lambda = 'min', seed=1) {
set.seed(seed)
diff_cluster <- data.frame(cluster=integer(), selected=logical())
if (lambda == '1se') {
cluster_min <- regressionRes[[1]]$differentialFeatures$cv.1se$clusters
} else {
cluster_min <- regressionRes[[1]]$differentialFeatures$cv.min$clusters
}
if (length(cluster_min)) {
diff_cluster <- rbind(data.frame(cluster = cluster_min, selected = T))
}
graph.list <- citrus::citrus.createHierarchyGraph(clustering$foldClustering$allClustering,
clustering$features$allLargeEnoughClusters)
g <- graph.list$graph
cluster.attributes <- data.frame(cluster=as.numeric(as.character(vertex_attr(g, name='label')))) %>%
dplyr::left_join(diff_cluster, by = "cluster") %>%
tidyr::replace_na(list(selected=F))
assignment <- clustering$foldClustering$allClustering$clusterMembership
n <- c()
for (i in seq(nrow(cluster.attributes))) {
cluster.data <- clustering$combinedFCS$data[assignment[[cluster.attributes$cluster[i]]],]
n <- c(n, nrow(cluster.data))
}
cluster.attributes$size <- n
g <- set.vertex.attribute(g, "alpha", value=sapply(cluster.attributes$selected,
function(x) ifelse(x, 1, 0.3)))
g <- set.vertex.attribute(g, "size", value=log2(cluster.attributes$size))
net <- intergraph::asNetwork(g)
p <- GGally::ggnet2(net, label='label', alpha="alpha", size='size',
edge.size=0.1, label.size=2.5, label.color='#616568',
palette = "Set2") +
guides(size=FALSE, color=guide_legend(title=NULL)) +
theme(legend.position='none')
return(list(p=p, cluster.attributes = cluster.attributes))
}
#' Plot CITRUS Cluster MFI Heatmap
#'
#' Plot heatmap of mean fluroresence intensity of CITRUS clusters.
#' @param clustering A list object returned by RunCITRUS.
#' @param cluster.attributes A data frame returned by PlotHierarchy.
#' @param name Name of selected channels.
#' @param desc Description of selected channels.
#' @return A pheatmap object.
#' @export
PlotClusterMFI = function(clustering, cluster.attributes, name, desc, ...) {
cluster.mean <- c()
assignment = clustering$foldClustering$allClustering$clusterMembership
clusters = cluster.attributes$cluster %>% as.character %>% as.numeric
for (i in seq(clusters)) {
cluster.data = clustering$combinedFCS$data[assignment[[clusters[i]]],]
cluster.mean <- rbind(cluster.mean,
unlist(apply(cluster.data[,name], 2, mean)))
}
colnames(cluster.mean) = desc
rownames(cluster.mean) = unlist(clusters)
head(cluster.mean)
cluster.attributes %<>% dplyr::mutate(label=ifelse(selected, '*', '')) %>%
tidyr::unite(clusterlabeled, c('cluster', 'label'), sep='', remove=F)
p.data <- data.matrix(cluster.mean)
#for(i in seq(10)) {
# p.data = t(apply(p.data, 1, scale))
# p.data = apply(p.data, 2, scale)
#}
rownames(p.data) = rownames(cluster.mean)
colnames(p.data) = colnames(cluster.mean)
annotation = data.frame(selected=cluster.attributes$selected %>%
as.character)
rownames(annotation) = cluster.attributes$cluster
options(repr.plot.height=5.5, repr.plot.width=5)
p = pheatmap::pheatmap(p.data, annotation_row=annotation, annotation_colors = list(
selected = c('FALSE' = "white", 'TRUE' = "firebrick")), ...)
return(p)
}
#' Plot Results of CITRUS regression
#'
#' Plot results of CITRUS regression including effect size of the
#' difference in abundance between groups, boxplot of abundance
#' between groups and distribution of differential clusters.
#' @param clustering A list object returned by RunCITRUS.
#' @param cluster.attributes A object returned by PlotHierarchy.
#' @param outcome A vector of binary outcome.
#' @param name Name of selected channels.
#' @param desc Description of selected channels.
#' @return A ggplot object of effect size.
#' @export
PlotRes = function(clustering, cluster.attributes, outcomes, name, desc = NULL, ylab='ES') {
abundance = data.frame(clustering$features$allFeatures)
colnames(abundance) = sapply(colnames(abundance),
function(x) strsplit(x, '\\.')[[1]][2])
abundance$ID = seq(nrow(abundance))
abundance$outcome = outcomes
diff_clusters = cluster.attributes[cluster.attributes$selected,]$cluster
abundance %<>%
tidyr::gather(cluster, level, -c(ID, outcome)) %>%
dplyr::mutate(cluster = as.numeric(cluster))
p.effsize = abundance %>%
dplyr::filter(cluster %in% diff_clusters) %>%
dplyr::group_by(cluster) %>%
dplyr::mutate(level=as.numeric(level)) %>%
dplyr::summarize(d = -effsize::cohen.d(level, outcome)$estimate,
lwr = -effsize::cohen.d(level, outcome)$conf.int[1],
upr = -effsize::cohen.d(level, outcome)$conf.int[2]) %>%
dplyr::ungroup() %>%
dplyr::left_join(cluster.attributes)
ordering.clusters = p.effsize$cluster[order(p.effsize$d)]
p.effsize = p.effsize %>%
dplyr::mutate(cluster=factor(cluster, levels=ordering.clusters)) %>%
na.omit() %>%
ggplot(data=.) +
geom_point(aes(x=cluster, y=d)) +
geom_segment(aes(x=cluster, xend=cluster,
y=lwr, yend=upr), size=0.2) +
coord_flip() +
geom_hline(aes(yintercept=0), linetype='dashed', color='gray', size=0.5) +
theme_classic() + ylab(ylab)
p.boxplot = abundance %>%
dplyr::filter(cluster %in% diff_clusters) %>%
dplyr::mutate(cluster = factor(cluster, levels=rev(ordering.clusters))) %>%
dplyr::mutate(level = as.numeric(level)) %>%
ggplot(data=.) +
geom_boxplot(aes(x=outcome, y=level), width=0.3) +
geom_jitter(aes(x=outcome, y=level), color='steelblue', size=0.3)+
ylab('abundance') +
facet_wrap(~cluster, ncol=1) +
theme_classic() +
theme(strip.background=element_blank(), axis.title.y=element_blank()) +
coord_flip()
p.dist = PlotDist(clustering, name, rev(ordering.clusters), desc = desc)
return(list(p.effsize=p.effsize, p.boxplot=p.boxplot, p.dist=p.dist))
}
#' Plot Marker Distribution
#'
#' Plot marker distribution of each cluster.
#'
#' @param clustering A list object from RunCITRUS
#' @param name Name of selected channels.
#' @param desc Description of selected channels.
#' @param mincluster Clusters
PlotDist = function(clustering, name, diff_cluster, desc = NULL) {
marker.num = length(name)
signif.feat = as.numeric(as.character(diff_cluster))
assignment = clustering$foldClustering$allClustering$clusterMembership
background = data.frame(clustering$combinedFCS$data[,name])
if(!is.null(desc)) colnames(background) = desc
background = background %>%
tidyr::gather(marker, level)
p = list()
for (i in seq(length(diff_cluster))) {
cluster.df = data.frame(clustering$combinedFCS$data[assignment[[diff_cluster[i]]], name])
if(!is.null(desc)) colnames(cluster.df) = desc
p[[i]] = cluster.df %>%
tidyr::gather(marker, level) %>%
ggplot(data=.) +
geom_density(data=background, aes(x=level), fill='gray', alpha=0.5, size=0.) +
geom_density(aes(x=level), fill='blue', alpha=0.5, size=0.1) +
facet_wrap(~marker, ncol=marker.num, scale='free_x') +
theme_classic() +
theme(strip.background=element_blank(),
axis.title.x=element_blank(),
axis.text=element_blank(),
axis.ticks=element_blank()) +
ylab(diff_cluster[i])
}
p = cowplot::plot_grid(plotlist=p, ncol=1)
return(p)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.