#' Load stuff into analysis object
#'
#' @examples
#' ds <- Load(c("1.fcs", "2.fcs"), transform = T, toTransform = c(7:14), cells = 100000)
#' @export
Load <- function(files, scale = T, transform = F, toTransform = NULL, compensate = F, cells = 1e5, ...) {
fs <- FlowSOM::ReadInput(
FlowSOM::AggregateFlowFrames(files, cTotal = cells),
transform = transform,
toTransform = toTransform,
compensate = compensate,
scale = scale,
...
)
# unscale fs$data[,'File'] (the rough way; FlowSOM does not export the scaling information anymore)
fs$data[, "File"] <- as.numeric(factor(fs$data[, "File"]))
list(fs = fs, files = files)
}
#' Hierarchical clustering for Embed()
#'
#' @export
ds_hclust <- function(codes, data, importance = NULL, mapping, k = 7) {
# This code was proudly adapted from FlowSOM
d <- stats::dist(codes, method = "euclidean")
fit <- stats::hclust(d, method = "ward.D2")
stats::cutree(fit, k = k)
# end of FlowSOM copy
}
#' k-means clustering for Embed()
#'
#' @export
ds_kmeans <- function(codes, data, importance = NULL, mapping, k = 7) {
kmeans(x = codes, centers = k)$cluster
}
#' consensus clustering for Embed()
#'
#' warning: the internally called ConsensusClusterPlus may break the state of
#' random number generator; use caution if the results should be reproducible.
#'
#' @export
ds_consensus <- function(codes, data, importance = NULL, mapping, k = 7) {
seed <- sample(1000000000, 1) # force "normal" behavior
# another FlowSOM copy begins
results <- suppressMessages(ConsensusClusterPlus::ConsensusClusterPlus(
t(codes),
maxK = k, reps = 100, pItem = 0.9, pFeature = 1,
title = tempdir(), plot = "pdf", verbose = FALSE,
clusterAlg = "hc",
distance = "euclidean",
seed = seed
))
results[[k]]$consensusClass
# FlowSOM copy ends
}
#' Mahalanobis-linked-average clustering for Embed()
#'
#' @export
ds_mhca <- function(codes, data, importance = NULL, mapping, k = 7) {
cl <- mhca::cutreeApriori(mhca::mhclust(x = data, g = mapping, quick = T, gIntra = F))
stats::cutree(cl, k = k)
}
#' Embed the analysis object
#'
#' This is the computationally intensive part. You may want to provide
#' a clustering seed.
#'
#' @examples
#' ds <- Embed(ds, colsToUse = c(1, 4, 7:14), nclust = 5, importance = c(1, 1, 2, 1, 3, 1, 1, 1, 1, 1))
#' @export
Embed <- function(ds, importance = NULL, colsToUse = NULL,
xdim = 16, ydim = 16, map = NULL, rlen = 10,
distf = 2, nhbr.method = "manhattan",
nclust = NULL, clustf = NULL,
esmooth = NULL, ek = NULL, eadjust = NULL,
emcoords = "flat", emcoords.pow = 1) {
if (is.null(colsToUse)) colsToUse <- colnames(ds$fs$data)
r <- ds
if (!is.null(colsToUse)) r$colsUsed <- colsToUse
if (!is.null(importance)) r$importance <- importance
if (!is.null(map)) {
if (is.character(map)) {
load(map)
map <- DiffSOM_map$map
}
r$importance <- map$importance
r$colsUsed <- map$colsUsed
map$importance <- NULL
map$colsUsed <- NULL
r$map <- FlowSOM::SOM(r$fs$data[, r$colsUsed],
xdim = map$xdim, ydim = map$ydim,
codes = map$codes,
importance = r$importance,
rlen = 0, distf = distf, nhbr.method = nhbr.method
)
} else {
r$map <- FlowSOM::SOM(r$fs$data[, r$colsUsed],
xdim = xdim, ydim = ydim,
importance = r$importance,
rlen = rlen
)
}
if (!is.null(emcoords)) { # sometimes this helps saving computation time
r$e <- EmbedSOM::EmbedSOM(
fsom = r$fs,
data = r$fs$data[, r$colsUsed], map = r$map,
importance = r$importance,
smooth = esmooth, k = ek, adjust = eadjust,
emcoords = emcoords, emcoords.pow = emcoords.pow
)
}
if (!is.null(map) && is.null(clustf) && is.null(nclust)) {
r$nclust <- map$nclust
r$clust <- map$clust
} else {
if (is.null(nclust)) nclust <- 10
if (is.null(clustf)) clustf <- ds_hclust
r$clust <- clustf(
codes = r$map$codes,
data = r$fs$data[, colsToUse],
importance = r$importance,
mapping = r$map$mapping[, 1],
k = nclust
)
r$nclust <- max(r$clust)
}
r
}
#' Export the FlowSOM map object from the analysis
#'
#' @param filename File name for saving the map (the same can be used for loading the map in Embed()).
#' @return the map object, if filename is not supplied
#'
#' @example ds <- Embed(ds, map=ExportMap(ds), ...) #re-embed without recomputing SOM
#'
#' @export
ExportMap <- function(ds, filename = NULL) {
map <- ds$map
map$importance <- ds$importance
map$colsUsed <- ds$colsUsed
map$mapping <- NULL
map$nclust <- ds$nclust
map$clust <- ds$clust
if (!is.null(filename)) {
DiffSOM_map <- new.env()
DiffSOM_map$map <- map
save(DiffSOM_map, file = filename)
} else {
map
}
}
#' Guess a good square-ish mfrow for multiplots
#'
#' @export
NiceMfrow <- function(n) {
y <- as.integer(sqrt(n))
x <- as.integer((n + y - 1) / y)
c(y, x)
}
#' Guess good marker names (ie. just take the first whitesep-word)
#'
#' @export
NicePrettyColnames <- function(fs, sel = T) {
r <- unlist(lapply(strsplit(split = " ", fs$prettyColnames[sel]), function(x) {
x[1]
}))
names(r) <- names(fs$prettyColnames[sel])
r
}
#' Plot a scatter plot in markers 'x' and (optionally) 'y'.
#' @param alpha used as plot color
#' @param pch forwarded to plot
#' @param cex forwarded to plot
#'
#' @examples
#' PlotScatter(ds, 1, 4)
#' @export
PlotScatter <- function(ds,
fs = 1:(length(ds$files)),
x = NULL,
y = NULL,
alpha = 0.5,
...) {
fid <- CellFile(ds)
par(mar = c(0, 0, 0, 0), mfrow = NiceMfrow(length(fs)))
for (i in fs) {
xa <- if (is.null(x)) {
NULL
} else {
ds$fs$data[fid == i, x]
}
ya <- if (is.null(y)) {
NULL
} else {
ds$fs$data[fid == i, y]
}
EmbedSOM::PlotEmbed(cbind(xa, ya), alpha = alpha, ...)
mtext(ds$files[i], line = -2, cex = 1, col = "red")
}
}
#' Plot aligned sample density plots. Params are forwarded to PlotEmbed.
#'
#' @examples
#' PlotDiff(ds)
#' @export
PlotDiff <- function(ds, ...) {
fid <- CellFile(ds)
fs <- 1:(length(ds$files))
par(mar = c(0, 0, 0, 0), mfrow = NiceMfrow(length(fs)))
for (i in fs) {
EmbedSOM::PlotEmbed(ds$e[fid == i, ], data = ds$fs$data[fid == i, ], ...)
mtext(ds$files[i], cex = 1, col = "#008000", line = -1.3, side = 1, adj = 0.01)
}
}
#' Plot aggregate sample information
#'
#' This plots out aggregate density, all marker expression levels, and
#' clustering with cluster numbers. Extra params are forwarded to PlotEmbed.
#'
#' @param colNames column names to be used for plotting
#'
#' @examples
#' Plot(ds)
#' @export
Plot <- function(ds,
colNames = NULL,
alpha = 0.5,
mar = c(0, 0, 0, 0),
density = T,
clusters = T,
cols = ds$colsUsed,
mfrow = NiceMfrow(length(cols) + density + clusters),
...) {
if (!is.null(mfrow) && !is.null(mar)) par(mar = mar, mfrow = mfrow)
if (density) EmbedSOM::PlotEmbed(ds$e, fsom = ds$fs, alpha = 1, ...)
if (is.null(colNames)) colNames <- NicePrettyColnames(ds$fs)
for (i in cols) {
EmbedSOM::PlotEmbed(ds$e, fsom = ds$fs, value = i, alpha = alpha, ...)
mtext(colNames[i], cex = 2, line = -1.3, side = 1, adj = 0.01)
}
if (clusters) PlotAnnotated(ds, pos = 1, size.text = 2, alpha = alpha, ...)
}
#' Plot annotated version of the plot with clusters
#'
#' This plots out aggregate density, all marker expression levels, and
#' clustering with cluster numbers.
#'
#' @param clusterMap mapping of metaclusters to a new clustering used to join some of them
#' @param annotation vector of strings with annotation
#' @param pos position of the annotation
#'
#' @examples
#' PlotAnnotated(ds)
#' @export
PlotAnnotated <- function(ds,
clusterMap = c(1:ds$nclust),
annotation = c(1:nMapClust),
pos = NULL,
size.point = 1,
size.text = 1,
col.text = "black",
cluster.colors = EmbedSOM::ClusterPalette,
...) {
nMapClust <- max(clusterMap)
EmbedSOM::PlotEmbed(ds$e,
fsom = ds$fs,
clust = clusterMap[ds$clust[ds$map$mapping[, 1]]],
cluster.colors = cluster.colors,
...
)
for (i in c(1:(nMapClust))) {
ex <- ds$e[clusterMap[ds$clust[ds$map$mapping[, 1]]] == i, ]
loc <- c(median(ex[, 1]), median(ex[, 2]))
if (!is.null(pos) || size.text == 0) {
points(loc[1], loc[2], pch = 20, cex = 3 * size.point, col = "white")
points(loc[1], loc[2], pch = 20, cex = 2 * size.point, col = cluster.colors(nMapClust)[i])
}
if (size.text > 0) {
text(loc[1], loc[2], annotation[i], pos = pos, cex = size.text, col = col.text)
}
}
}
#' Plot separated clusters
#'
#' @param alpha alpha for the expression plots
#'
#' @export
PlotClusters <- function(ds, alpha = 1, bgcol = rgb(.75, .75, .75, alpha / 3), ...) {
fgcols <- EmbedSOM::ClusterPalette(ds$nclust, alpha = alpha)
par(mar = c(0, 0, 0, 0), mfrow = NiceMfrow(ds$nclust))
for (i in 1:ds$nclust) {
col <- rep(bgcol, dim(ds$e)[1])
col[ds$clust[ds$map$mapping[, 1]] == i] <- fgcols[i]
EmbedSOM::PlotEmbed(ds$e, fsom = ds$fs, col = col, ...)
mtext(paste0("Cluster ", i), cex = 2, line = -1.3, side = 1, adj = 0.01)
}
}
#' Get the number of input file for each cell
#'
#' @export
CellFile <- function(ds) {
ds$fs$data[, "File"]
}
#' Plot marker expressions for histogram-style comparison of multiple samples/clusters.
#'
#' @param cols Which columns to plot
#' @param names Which names to use for the columns
#' @param files T/F -- plot separate histograms for files
#' @param clusters T/F -- plot separate histograms for clusters
#' @param ref Reference-distribution DiffSOM object
#' @param rdist Scatter distribution generator
#' @param pch,cex,alpha Used for plotting as usual
#'
#' @examples
#' PlotExprs(ds2, ref = ds, clusters = T)
#' @export
PlotExprs <- function(ds,
cols = ds$colsUsed,
names = NicePrettyColnames(ds$fs, sel = cols),
files = F, clusters = F,
ref = NULL,
alpha = 0.1,
rdist = function(n) (0.1 * rnorm(n)),
...) {
if (is.null(cols)) cols <- colnames(ds$fs$data)
nplots <- length(cols)
get.clust <- function(ds) {
ds$clust[ds$map$mapping[, 1]]
}
mrref <- 1
nrref <- 1
rref <- rep(0, length(ds$fs$data[, 1]))
if (!is.null(ref)) {
rref <- c(rref, rep(1, length(ref$fs$data[, 1])))
} else if (clusters || files) {
rref <- c(rref, rep(1, length(rref)))
}
col <- rep(rgb(0, 0, 0, alpha), length(rref))
file <- 0
nfiles <- 1
if (files) {
nfiles <- length(ds$files)
file <- CellFile(ds)
if (!is.null(ref)) {
file <- c(file, CellFile(ref))
nrref <- 2
} else {
if (clusters) {
file <- c(file, file)
} else {
file <- c(file, rep(nfiles + 1, length(file)))
}
mrref <- 0
}
col <- EmbedSOM::ClusterPalette(nfiles, alpha = alpha, vcycle = .7, scycle = .7)[file]
file <- file - 1
}
clust <- 0
nclust <- 1
if (clusters) {
clust <- get.clust(ds)
if (!is.null(ref)) {
clust <- c(clust, rep(ds$nclust + 1, length(ref$fs$data[, 1])))
mrref <- 0
} else {
clust <- c(clust, rep(ds$nclust + 1, length(clust)))
mrref <- 0
}
c2 <- clust
c2[c2 > ds$nclust] <- NA
col <- EmbedSOM::ClusterPalette(ds$nclust, alpha = alpha)[c2]
clust <- clust - 1
nclust <- ds$nclust
}
col[rref > 0] <- rgb(.5, .5, .5, alpha)
par(mar = c(0, 0, 2, 0), mfrow = NiceMfrow(length(cols)))
for (i in 1:length(cols)) {
cc <- cols[i]
d <- ds$fs$data[, cc]
if (!is.null(ref)) {
d <- c(d, ref$fs$data[, cc])
}
else if (clusters || files) {
d <- c(d, d)
}
EmbedSOM::PlotEmbed(
cbind(d, mrref * rref + nrref * (file + nfiles * clust) + rdist(length(d))),
col = col, ...
)
mtext(names[i], outer = F)
}
}
#' Apply gating and output a new diff-som object (to be embedded)
#'
#' @param clusters The cluster numbers to be selected
#' @param negative Invert the selection
#'
#' @examples
#' ds2 <- Gate(ds, clusters = c(5), negative = T) # removes cluster 5
#' @export
Gate <- function(ds, clusters = NULL, negative = F) {
flt <- ds$clust[ds$map$mapping[, 1]] %in% clusters
if (negative) flt <- !flt
r <- list(fs = ds$fs, files = ds$files)
r$fs$data <- r$fs$data[flt, ]
r
}
#' Get a matrix with relative population abundances in clusters (cols) vs files (rows)
#'
#' Useful for exporting the stats and working with them elsewhere.
#'
#' @export
GetProbs <- function(ds, meta = T) {
pad.zero <- function(v, l) {
if (length(v) == l) {
v
} else if (length(v) > l) {
v[1:l]
} else {
c(v, rep(0, times = l - length(v)))
}
}
fid <- CellFile(ds)
nf <- length(ds$files)
ncl <- if (meta) {
ds$nclust
} else {
ds$map$xdim * ds$map$ydim
}
cl <- if (meta) {
ds$clust[ds$map$mapping[, 1]]
}
else {
ds$map$mapping[, 1]
}
probs <- matrix(0, ncol = ncl, nrow = nf)
for (i in (1:nf)) probs[i, ] <- pad.zero(tabulate(cl[fid == i]), ncl) / sum(fid == i)
list(probs = probs, cl = cl)
}
#' Produce a list of colors for plotting significance with each cell.
#'
#' @param control Numbers of files with the base-line samples
#' @param experiment Numbers of files that are expected to differ
#' @param meta Use metacluster. Set to F for lower granularity
#' @param pow Adjustment to p-vals before plotting. Use a lower value for seeing even insignificant stuff.
#' @param alpha default alpha
#' @param col.inconclusive color for inconclusive outcome
#' @param col.less color for the 'less' test significance
#' @param col.greater the other color
#'
#' @export
SignificanceColors <- function(ds, control, experiment,
meta = T, paired = F, pow = 10, alpha = .5,
probs = GetProbs(ds, meta),
col.inconclusive = rgb(.75, .75, .75, alpha / 2),
col.greater = rgb(1, .5, 0, alpha),
col.less = rgb(0, .5, 1, alpha)) {
cl <- probs$cl
probs <- probs$probs
ncl <- ncol(probs)
clusters <- 1:ncl
p_less <- 0 * clusters
p_greater <- 0 * clusters
for (i in clusters) {
p_less[i] <- (1 - wilcox.test(
probs[experiment, i],
probs[control, i],
alternative = "less",
paired = paired
)$p.value)^pow
p_greater[i] <- (1 - wilcox.test(
probs[experiment, i],
probs[control, i],
alternative = "greater",
paired = paired
)$p.value)^pow
}
colv.greater <- col2rgb(col.greater, alpha = T)
colv.less <- col2rgb(col.less, alpha = T)
colv.inconclusive <- col2rgb(col.inconclusive, alpha = T)
colv <- outer(p_greater, colv.greater) +
outer(p_less, colv.less) +
outer(1 - p_greater - p_less, colv.inconclusive)
colv <- colv[, , 1] / 255 # TODO explain the 3rd dimension... :]
colv[colv > 1] <- 1 # fixup rounding errors that sometimes kill rgb()
colv[colv < 0] <- 0
rgb(colv[cl, 1], colv[cl, 2], colv[cl, 3], colv[cl, 4])
}
#' Produce the plot with change significances
#'
#' Extra parameters are forwarded to SignificanceColors function.
#' @param pch,cex,plotf forwarded to PlotEmbed
#' @param filter T/F remove the cells that do not belong in any tested group
#'
#' @examples
#' PlotSignificance(ds, control = c(1, 2, 3), experiment = c(4, 5, 6))
#' @export
PlotSignificance <- function(ds,
control,
experiment,
pch = ".",
cex = 1,
plotf = EmbedSOM::PlotDefault,
filter = F, ...) {
mask <- T
if (filter) {
mask <- CellFile(ds) %in% c(control, experiment)
}
EmbedSOM::PlotEmbed(ds$e[mask, ],
col = SignificanceColors(ds, control, experiment, ...)[mask],
cex = cex, pch = pch, plotf = plotf
)
}
#' Plot correlations between cluster populations in samples
#'
#' @param meta use metaclusters (default TRUE)
#' @param samples limit the choice of samples (default: all)
#' @param clusterMap a map for grouping clusters (default: identity)
#' @param annotation names of clusters
#' @param filter remove cells from samples that are not present (default FALSE)
#'
#' @export
PlotCorrelations <- function(ds,
samples = 1:length(ds$files),
meta = T,
mfrow = if (meta) {
(NiceMfrow(nMapClust))
} else {
c(ds$map$ydim, ds$map$xdim)
},
clusterMap = if (meta) {
(ds$clust)
} else {
(1:(ds$map$xdim * ds$map$ydim))
},
nMapClust = max(clusterMap),
plotOrder = if (meta) {
(
1:nMapClust
)
} else {
(
as.vector(matrix(
ncol = ds$map$ydim,
1:(ds$map$xdim * ds$map$ydim)
)
[, ds$map$ydim:1])
)
},
annotation = if (meta) paste("Cluster", c(1:nMapClust)) else c(),
filter = F, alpha = .5, ...) {
mask <- T
if (filter) {
mask <- CellFile(ds) %in% samples
}
probs <- GetProbs(ds, meta = F)$probs[samples, ]
probs <- sapply(1:nMapClust, function(x) {
apply(matrix(nrow = dim(probs)[1], probs[, clusterMap == x]), 1, sum)
})
par(mar = c(0, 0, 0, 0), mfrow = NiceMfrow(nMapClust))
for (i in plotOrder) {
cors <- as.vector(cor(probs[, i], probs))
EmbedSOM::PlotEmbed(
ds$e[mask, ],
col = EmbedSOM::ExpressionPalette(101, alpha = alpha)
[1 + 50 * (1 + cors[clusterMap[ds$map$mapping[mask, 1]]])],
...
)
mtext(annotation[i], cex = 2, line = -1.3, side = 1, adj = 0.01)
}
}
#' Export a data frame with stuff from the DiffSOM object
#'
#' @param colNames column names to use instead of NicePrettyColnames
#'
#' @export
ExportData <- function(ds, colNames = NULL) {
df <- data.frame(ds$fs$data)
df <- df[, 1:(length(df) - 2)] # strip off the file+file scatter cols from AggregateFlowFrames
if (is.null(colNames)) colNames <- NicePrettyColnames(ds$fs)[1:length(df)]
colnames(df) <- colNames
df <- data.frame(df, File = CellFile(ds))
if (is.null(ds$e)) {
df
} else {
data.frame(
df,
EmbedSOM1 = ds$e[, 1],
EmbedSOM2 = ds$e[, 2],
Precluster = ds$map$mapping[, 1],
Precluster1 = 1 + ((ds$map$mapping[, 1] - 1) %% ds$map$xdim),
Precluster2 = 1 + ((ds$map$mapping[, 1] - 1) %/% ds$map$xdim),
Metacluster = ds$clust[ds$map$mapping[, 1]]
)
}
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.