#' @importFrom sccore checkPackageInstalled
NULL
#' Estimate cell density in giving embedding, density will be estimated for individual samples
#'
#' @param emb cell embedding matrix
#' @param sample.per.cell Named sample factor with cell names (default: stored vector)
#' @param sample.groups A two-level factor on the sample names describing the conditions being compared (default: stored vector)
#' @param bins number of bins for density estimation, default 400
#' @keywords internal
estimateCellDensityKde <- function(emb, sample.per.cell, sample.groups, bins, bandwidth=0.05, expansion.mult=0.05) {
if (is.null(bandwidth)) {
bandwidth <- apply(emb, 2, MASS::bandwidth.nrd)
} else {
bandwidth <- apply(emb, 2, quantile, c(0.1, 0.9)) %>% diff() %>% {. * bandwidth} %>% .[1,]
}
expand_limits_continuous <- utils::getFromNamespace("expand_limits_continuous", "ggplot2")
lims <- emb %>%
apply(2, function(x) expand_limits_continuous(range(x), expansion(mult=expansion.mult))) %>%
as.numeric()
cname <- intersect(names(sample.per.cell), rownames(emb))
sample.per.cell <- sample.per.cell[cname]
emb <- emb[cname, ]
cells.per.samp <- split(names(sample.per.cell), sample.per.cell)
density.mat <-lapply(cells.per.samp, function(nn) {
MASS::kde2d(emb[nn, 1], emb[nn, 2], h=bandwidth, n=bins, lims=lims)$z %>%
as.numeric() %>% {. / sum(.)}
}) %>% do.call("cbind", .) %>%
set_colnames(names(cells.per.samp)) %>%
set_rownames(1:nrow(.)) # needed for indexing in diffCellDensity
cond.densities <- split(names(sample.groups), sample.groups) %>%
lapply(function(ns) rowMeans(density.mat[,ns]))
# coordinate embedding space
mat <- matrix(cond.densities[[1]], ncol = bins, byrow = FALSE)
x1 <- seq(lims[1], lims[2], length.out=bins) %>% setNames(seq(bins))
y1 <- seq(lims[3], lims[4], length.out=bins) %>% setNames(seq(bins))
d1 <- setNames(melt(mat), c('x', 'y', 'z'))
emb2 <- data.frame(x=x1[d1$x], y=y1[d1$y])
#count cell number in each bin
s1 <- seq(from = lims[1], to = lims[2], length.out=bins + 1)
s2 <- seq(from = lims[3], to = lims[4], length.out=bins + 1)
dcounts <- table(cut(emb[,1], breaks = s1), cut(emb[,2], breaks = s2)) #%>% as.matrix.data.frame
emb2$counts <- as.numeric(dcounts)
return(list(density.mat=density.mat, cond.densities=cond.densities, density.emb=emb2, bins=bins,
method='kde', cell.emb=emb))
}
#' Estimate graph smooth based cell density
#'
#' @param graph input graph
#' @param sample.per.cell Named sample factor with cell names (default: stored vector)
#' @param sample.groups A two-level factor on the sample names describing the conditions being compared (default: stored vector)
#' @param n.cores number of cores
#' @param m numeric Maximum order of Chebyshev coeff to compute (default=50)
#' @keywords internal
estimateCellDensityGraph <- function(graph, sample.per.cell, sample.groups, n.cores=1, beta=30, m=50, verbose=TRUE) {
sig.mat <- unique(sample.per.cell) %>% sapply(function(s) as.numeric(sample.per.cell == s)) %>%
set_rownames(names(sample.per.cell)) %>% set_colnames(unique(sample.per.cell))
score.mat <- sccore::smoothSignalOnGraph(
sig.mat, filter=function(...) sccore::heatFilter(..., beta=beta), graph=graph,
m=m, n.cores=n.cores, progress.chunk=(verbose + 1), progress=verbose,
) %>% as.matrix()
score.mat %<>% {t(.) / colSums(.)} %>% t() # Normalize by columns to adjust on the number of cells per sample
cond.densities <- split(names(sample.groups), sample.groups) %>%
sapply(function(samps) apply(score.mat[,samps,drop=FALSE], 1, mean, trim=0.2)) # Robust estimator of sum
cond.densities %<>% {lapply(1:ncol(.), function(i) .[,i])} %>% setNames(colnames(cond.densities))
return(list(density.mat=score.mat, cond.densities=cond.densities, method='graph'))
}
#' Extract contour from embedding
#'
#' @param emb cell embedding matrix
#' @param cell.groups vector of cell type annotation
#' @param group specify cell types for contour, multiple cell types are also supported
#' @param conf confidence interval of contour
#' @keywords internal
getDensityContour <- function(emb, cell.groups, group, color='black', linetype=2, conf="10%", bandwidth=NULL, ...) {
emb %<>% .[rownames(.) %in% names(cell.groups)[cell.groups %in% group], ]
if (is.null(bandwidth)) {
kd <- ks::kde(emb, compute.cont=TRUE, ...)
} else {
h <- matrix(c(bandwidth, 0, 0, bandwidth), ncol=2)
kd <- ks::kde(emb, compute.cont=TRUE, H=h, ...)
}
lcn <- kd %$% contourLines(x=eval.points[[1]], y=eval.points[[2]], z=estimate, levels=cont[conf]) %>%
.[[1]] %>% data.frame() %>% cbind(z=1)
cn <- geom_path(aes(x, y), data=lcn, linetype=linetype, color=color);
return(cn)
}
#' Plot cell density
#'
#' @param bins number of bins for density estimation, should keep consistent with bins in estimateCellDensity
#' @param palette color palette function. Default: `YlOrRd`
#' @keywords internal
plotDensityKde <- function(mat, bins, cell.emb, show.grid=TRUE, lims=NULL, show.labels=FALSE, show.ticks=FALSE,
palette=NULL, legend.title=NULL, ...) {
if (is.null(lims)) {
lims <- c(min(mat$z), max(mat$z)*1.1)
}
breaks <- lapply(mat[c('x', 'y')], function(m) {
seq(quantile(m, 0.1), quantile(m, 0.9), length.out=6) %>%
signif(digits=3)
})
p <- ggplot(mat, aes(x, y, fill = z)) +
geom_raster() +
scale_x_continuous(breaks=breaks$x, expand = c(0,0)) +
scale_y_continuous(breaks=breaks$y, expand = c(0,0)) +
val2ggcol(mat$z, palette=palette, color.range=lims, return.fill=TRUE)
if (!is.null(legend.title)) p <- p + guides(fill=guide_colorbar(title=legend.title))
p %<>% sccore::styleEmbeddingPlot(show.labels=show.labels, show.ticks=show.ticks, ...)
if (show.grid) { # add grid manually
p <- p +
geom_vline(xintercept=breaks$x, col='grey', alpha=0.1) +
geom_hline(yintercept=breaks$y, col='grey', alpha=0.1)
}
return(p)
}
#' Estimate differential cell density
#'
#' @param density.mat estimated cell density matrix with estimateCellDensity
#' @param sample.groups A two-level factor on the sample names describing the conditions being compared (default: stored vector)
#' @param ref.level Reference sample group, e.g., ctrl, healthy, or untreated. (default: stored value)
#' @param target.level target/disease level for sample.group vector
#' @param type method to calculate differential cell density of each bin; subtract: target density minus ref density; entropy: estimated kl divergence entropy between sample groups ; t.test: zscore of t-test,
#' global variance is setting for t.test
#' @keywords internal
diffCellDensity <- function(density.mat, sample.groups, ref.level, target.level, type='subtract'){
nt <- names(sample.groups[sample.groups == target.level]) # sample name of target
nr <- names(sample.groups[sample.groups == ref.level]) # sample name of reference
if (type == 'subtract') {
rmt <- rowMeans(density.mat[, nt])
rmr <- rowMeans(density.mat[, nr])
score <- (rmt - rmr) / max(max(rmt), max(rmr))
} else if (type=='t.test'){
score <- matrixTests::row_t_welch(density.mat[,nt], density.mat[,nr])$statistic %>%
setNames(rownames(density.mat))
} else if (type == 'wilcox') {
pvalue <- matrixTests::row_wilcoxon_twosample(density.mat[,nt], density.mat[,nr])$pvalue
zstat <- abs(qnorm(pvalue / 2))
fc <- rowMeans(density.mat[,nt]) - rowMeans(density.mat[,nr])
score <- zstat * sign(fc)
} else stop("Unknown method: ", type)
return(score)
}
#' @keywords internal
diffCellDensityPermutations <- function(density.mat, sample.groups, ref.level, target.level, type='permutation',
verbose=TRUE, n.permutations=200, n.cores=1) {
nt <- names(sample.groups[sample.groups == target.level]) # sample name of target
nr <- names(sample.groups[sample.groups == ref.level]) # sample name of reference
if (type %in% c('subtract', 't.test', 'wilcox')) {
score <- diffCellDensity(
density.mat, sample.groups=sample.groups, ref.level=ref.level, target.level=target.level, type=type
)
permut.scores <- plapply(1:n.permutations, function(i) {
sg.shuff <- setNames(sample(sample.groups), names(sample.groups))
diffCellDensity(density.mat, sample.groups=sg.shuff, ref.level=ref.level, target.level=target.level, type=type)
}, progress=verbose, n.cores=n.cores, mc.preschedule=TRUE, fail.on.error=TRUE) %>% do.call(cbind, .)
return(list(score=score, permut.scores=permut.scores))
}
if (type == 'permutation') {
checkPackageInstalled("matrixStats", details="for `type='permutation'`", cran=TRUE)
colRed <- matrixStats::colMedians
} else if (type == 'permutation.mean') {
colRed <- Matrix::colMeans
} else stop("Unknown method: ", type)
density.mat <- t(density.mat)
dm.shuffled <- density.mat
permut.diffs <- plapply(1:n.permutations, function(i) { # Null distribution looks normal, so we don't need a lot of samples
rownames(dm.shuffled) %<>% sample()
colRed(dm.shuffled[nt,]) - colRed(dm.shuffled[nr,])
}, progress=verbose, n.cores=1, fail.on.error=TRUE) %>% # according to tests, n.cores>1 here does not speed up calculations
do.call(cbind, .) %>% set_rownames(colnames(density.mat))
sds <- apply(permut.diffs, 1, sd)
score <- (colRed(density.mat[nt,]) - colRed(density.mat[nr,])) / sds
permut.diffs <- apply(permut.diffs, 2, `/`, sds)
score[sds < 1e-10] <- 0
permut.diffs[sds < 1e-10,] <- 0
return(list(score=score, permut.scores=permut.diffs))
}
#' @keywords internal
adjustZScoresByPermutations <- function(score, scores.shuffled, wins=0.01, smooth=FALSE, graph=NULL, beta=30,
l.max=NULL, n.cores=1, verbose=TRUE) {
checkPackageInstalled("matrixStats", details="for adjusting p-values", cran=TRUE)
if (smooth) {
if (is.null(graph)) stop("graph has to be provided if smooth is TRUE")
g.filt <- function(...) sccore::heatFilter(..., beta=beta)
score %<>% smoothSignalOnGraph(filter=g.filt, graph=graph, l.max=l.max, progress=FALSE)
scores.shuffled %<>%
smoothSignalOnGraph(filter=g.filt, graph=graph, n.cores=n.cores, l.max=l.max, progress=verbose) %>%
as.matrix()
}
if (wins > 0) {
uq <- 1 - wins
lq <- wins
score %<>% pmin(quantile(., uq, na.rm=TRUE)) %>% pmax(quantile(., lq, na.rm=TRUE))
scores.shuffled.ranges <- matrixStats::colQuantiles(scores.shuffled, probs=c(lq, uq), na.rm=TRUE)
} else {
scores.shuffled.ranges <- matrixStats::colRanges(scores.shuffled, na.rm=TRUE)
}
p.vals <- c()
sign.mask <- (score >= 0)
if (any(sign.mask)) {
p.vals %<>% c((sapply(score[sign.mask], function(x) sum((x - 1e-10) < scores.shuffled.ranges[,2])) + 1) / (ncol(scores.shuffled) + 1))
}
if (any(!sign.mask)) {
p.vals %<>% c((sapply(score[!sign.mask], function(x) sum((x + 1e-10) > scores.shuffled.ranges[,1])) + 1) / (ncol(scores.shuffled) + 1))
}
z.scores <- pmax(1 - p.vals[names(score)], 0.5) %>% qnorm(lower.tail=TRUE)
z.scores[!sign.mask] %<>% {. * -1}
return(z.scores)
}
#' @keywords internal
findScoreGroupsGraph <- function(scores, min.score, graph) { # TODO: deprecated?
graph %>% igraph::induced_subgraph(names(scores)[scores > min.score]) %>%
igraph::components() %>% .$membership
}
#' @keywords internal
graphFromGrid <- function(grid.ids, n.bins) {
graph <- lapply(grid.ids, function (i) c(i+1, i-1, i+n.bins, i-n.bins)) %>%
mapIds(grid.ids) %>% igraph::graph_from_adj_list() %>% igraph::as.undirected()
igraph::V(graph)$name <- grid.ids
return(graph)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.