Nothing
#' @importFrom magrittr %>% %<>% %$%
#' @import igraph
#' @importFrom methods as is
#' @importFrom rlang .data
NULL
## for magrittr and dplyr functions below
if(getRversion() >= "2.15.1"){
utils::globalVariables(c("."))
}
#' Collapse graph using PAGA 1.2 algorithm, Wolf et al 2019, Genome Biology (2019) <https://genomebiology.biomedcentral.com/articles/10.1186/s13059-019-1663-x>
#'
#' @param graph igraph graph object Graph to be collapsed
#' @param groups factor on vertices describing cluster assignment (can specify integer vertex ids, or character vertex names which will be matched)
#' @param linearize should normally be always TRUE (default=TRUE)
#' @param winsorize winsorize final connectivity statistics value (default=FALSE) Note: Original PAGA has it as always TRUE,
#' but in this case there is no way to distinguish level of connectivity for highly connected groups.
#' @return collapsed graph
#'
#' @export
collapseGraphPaga <- function(graph, groups, linearize=TRUE, winsorize=FALSE) {
if ((!(is(graph, "Matrix") || is(graph, "matrix")) || ncol(graph) != nrow(graph)) && !igraph::is.igraph(graph)) {
stop("Unknown graph format. Only adjacency matrix or igraph are supported.")
}
if (!igraph::is.igraph(graph)) {
ones <- graph %>% as('dgTMatrix') %>% as('symmetricMatrix')
ones@x <- rep(1, length(ones@x))
graph <- igraph::graph_from_adjacency_matrix(ones, mode='directed', weighted=TRUE)
} else {
graph %<>% igraph::as.directed()
igraph::edge.attributes(graph)$weight <- rep(1, igraph::gsize(graph))
}
if (!is.null(igraph::V(graph)$name)) {
groups %<>% .[igraph::V(graph)$name]
graph %<>% igraph::remove.vertex.attribute("name")
}
groups %<>% .[!is.na(.)] %>% as.factor() %>% droplevels()
if (length(groups) != length(igraph::V(graph))){
stop("Groups must be provided for all graph vertices.")
}
cluster.names <- levels(groups)
groups %<>% as.integer()
vc <- igraph::make_clusters(graph, membership = groups, algorithm = 'conos', merges = NULL, modularity = FALSE)
ns <- igraph::sizes(vc)
n <- sum(ns)
es.inner.cluster <- rep(0, length(vc))
es.inner.cluster[as.integer(names(igraph::groups(vc)))] <- sapply(igraph::groups(vc), function(s)
igraph::induced_subgraph(graph, s, impl = 'copy_and_delete') %>% igraph::gsize() %>% `*`(2))
inter.es <- igraph::contract(graph, vc$membership) %>%
igraph::simplify(remove.multiple=FALSE, remove.loops=TRUE, edge.attr.comb=igraph::igraph_opt("weight")) %>%
igraph::as_adj(attr="weight") %>% as('dgTMatrix')
es <- es.inner.cluster + Matrix::rowSums(inter.es)
inter.es %<>% `+`(Matrix::t(.))
expected.random.null.vals <- (es[inter.es@i + 1]*ns[inter.es@j + 1] + es[inter.es@j + 1]*ns[inter.es@i + 1])/(n - 1)
if (!linearize) {
sd.random.null.vals <- (es[inter.es@i + 1]*ns[inter.es@j + 1]*(n-ns[inter.es@j + 1]-1) +
es[inter.es@j + 1]*ns[inter.es@i + 1])*(n-ns[inter.es@i + 1]-1) / ((n - 1)^2)
scaled.values <- ifelse(sd.random.null.vals < 1e-10, 0, (inter.es@x - expected.random.null.vals)/sd.random.null.vals) %>% as.numeric()
} else {
scaled.values <- ifelse(expected.random.null.vals < 1e-10, 1, inter.es@x / expected.random.null.vals) %>% as.numeric()
}
if (!is.logical(winsorize) || winsorize) {
scaled.values %<>% pmin(winsorize)
}
inter.es@x <- scaled.values
dimnames(inter.es) <- list(cluster.names, cluster.names)
return(inter.es)
}
#' Collapse Graph By Sum
#'
#' @inheritParams collapseGraphPaga
#' @param normalize boolean Whether to recalculate edge weight as observed/expected (default=TRUE)
#' @return collapsed graph
#' @examples
#' \donttest{
#' collapsed = collapseGraphPaga(conosGraph, igraph::V(conosGraph), linearize=TRUE, winsorize=FALSE)
#' }
#'
#' @export
collapseGraphSum <- function(graph, groups, normalize=TRUE) {
gcon <- contract.vertices(graph, groups, vertex.attr.comb=list('num'='sum',"ignore")) %>%
simplify(edge.attr.comb=list(weight="sum","ignore"))
if(normalize) {
ex <- outer(V(gcon)$num,V(gcon)$num)/(sum(V(gcon)$num)*(sum(V(gcon)$num)-1)/2)*sum(E(graph)$weight)
gcon2 <- graph_from_adjacency_matrix(as(as_adjacency_matrix(gcon, attr = "weight", sparse = FALSE)/ex,'dgCMatrix'), mode = "undirected", weighted=TRUE)
V(gcon2)$num <- V(gcon)$num
gcon <- gcon2;
}
if(is.factor(groups)) {
V(gcon)$name <- levels(groups)
} else {
# not sure when this was actually needed
gcon <- induced.subgraph(gcon, unique(groups))
}
return(gcon)
}
#' Collapse vertices belonging to each cluster in a graph
#'
#' @inheritParams collapseGraphPaga
#' @param method string Method to be used, either "sum" or "paga" (default="sum")
#' @param plot boolean Whether to show collapsed graph plot (default=FALSE)
#' @param node.scale numeric Scaling to control value of 'vertex.size' in plot.igraph() (default=50)
#' @param edge.scale numeric Scaling to control value of 'edge.width' in plot.igraph() (default=50)
#' @param edge.alpha numeric Scaling to control value of 'alpha.f' in adjustcolor() within plot.igraph() (default=0.3)
#' @param seed numeric Set seed via set.seed() for plotting (default=1)
#' @param ... arguments passed to collapseGraphSum()
#' @return collapsed graph
#' @examples
#' \donttest{
#' cluster.graph = getClusterGraph(conosGraph, igraph::V(conosGraph))
#' }
#'
#' @export
getClusterGraph <- function(graph, groups, method="sum", plot=FALSE, node.scale=50, edge.scale=50, edge.alpha=0.3, seed=1,...) {
V(graph)$num <- 1
if (is.integer(groups) && is.null(names(groups))) {
nv <- vcount(graph)
if (length(groups)!=nv) {
stop('Length of groups should be equal to the number of vertices')
}
if (max(groups)>nv) {
stop('Groups specifies ids that are larger than the number of vertices in the graph')
}
if (any(is.na(groups))) {
# remove vertices that are not part of the groups
vi <- which(!is.na(groups))
g <- induced.subgraph(graph,vi)
groups <- groups[vi]
} else {
g <- graph
}
} else {
gn <- V(graph)$name
groups <- stats::na.omit(groups[names(groups) %in% gn])
if (length(groups)<2) {
stop('Valid names of groups elements include too few cells')
}
if (length(groups)<length(gn)) {
g <- induced.subgraph(graph,names(groups))
} else {
g <- graph;
}
if(is.factor(groups)) {
groups <- groups[V(g)$name]
} else {
groups <- as.factor(stats::setNames(as.character(groups[V(g)$name]),V(g)$name))
}
}
if (method == "sum") {
gcon <- collapseGraphSum(g, groups, ...)
} else if (method == "paga") {
gcon <- collapseGraphPaga(g, groups, ...)
} else {
stop("Unknown method: ", method)
}
if (plot) {
set.seed(seed)
withr::with_par(mar = rep(0.1, 4),
plot.igraph(gcon, layout=layout_with_fr(gcon), vertex.size=V(gcon)$num/(sum(V(gcon)$num)/node.scale),
edge.width=E(gcon)$weight/sum(E(gcon)$weight/edge.scale), adjustcolor('black', alpha.f=edge.alpha))
)
}
return(invisible(gcon))
}
#' Estimate labeling distribution for each vertex, based on provided labels.
#'
#' @param graph igraph graph object
#' @param labels vector of factor or character labels, named by cell names, used in propagateLabelsSolver() and propagateLabelsDiffusion()
#' @param method string Type of propagation. Either 'diffusion' or 'solver'. (default='diffusion') 'solver' gives better result
#' but has bad asymptotics, so it is inappropriate for datasets > 20k cells.
#' @param ... additional arguments passed to either propagateLabelsSolver() or propagateLabelsDiffusion()
#' @return matrix with distribution of label probabilities for each vertex by rows.
#' @examples
#' propagateLabels(conosGraph, labels=cellAnnotations)
#'
#' @export
propagateLabels = function(graph, labels, method="diffusion", ...) {
if (method == "solver") {
label.dist <- propagateLabelsSolver(graph, labels, ...)
} else if (method == "diffusion") {
label.dist <- propagateLabelsDiffusion(graph, labels, ...)
} else {
stop("Unknown method: ", method, ". Only 'solver' and 'diffusion' are supported.")
}
labels <- colnames(label.dist)[apply(label.dist, 1, which.max)] %>%
stats::setNames(rownames(label.dist))
confidence <- apply(label.dist, 1, max) %>% stats::setNames(rownames(label.dist))
return(list(labels=labels, uncertainty=(1 - confidence), label.distribution=label.dist))
}
#' Propagate labels using Zhu, Ghahramani, Lafferty (2003) algorithm, "Semi-Supervised Learning Using Gaussian Fields and Harmonic Functions" <http://mlg.eng.cam.ac.uk/zoubin/papers/zgl.pdf>
#'
#' @param graph igraph graph object Graph input
#' @param labels vector of factor or character labels, named by cell names
#' @param solver Method of solver to use (default="mumps"), either "Matrix" or "mumps" (i.e. "rmumps::Rmumps")
#' @return result from Matrix::solve() or rmumps::Rmumps
#' @examples
#' \donttest{
#' propagateLabelsSolver(conosGraph, labels=cellAnnotations)
#' }
#' @export
propagateLabelsSolver <- function(graph, labels, solver="mumps") {
if (!solver %in% c("mumps", "Matrix")){
stop("Unknown solver: ", solver, ". Only 'mumps' and 'Matrix' are currently supported")
}
if (!requireNamespace("rmumps", quietly=TRUE)) {
warning("Package 'rmumps' is required to use 'mumps' solver. Fall back to 'Matrix'")
solver <- "Matrix"
}
adj.mat <- igraph::as_adjacency_matrix(graph, attr="weight")
labeled.cbs <- intersect(colnames(adj.mat), names(labels))
unlabeled.cbs <- setdiff(colnames(adj.mat), names(labels))
labels <- as.factor(labels[labeled.cbs])
weight.sum.mat <- Matrix::Diagonal(x=Matrix::colSums(adj.mat)) %>%
`dimnames<-`(dimnames(adj.mat))
laplasian.uu <- (weight.sum.mat[unlabeled.cbs, unlabeled.cbs] - adj.mat[unlabeled.cbs, unlabeled.cbs])
type.scores <- Matrix::sparseMatrix(i=1:length(labels), j=as.integer(labels), x=1.0) %>%
`colnames<-`(levels(labels)) %>% `rownames<-`(labeled.cbs)
right.side <- Matrix::drop0(adj.mat[unlabeled.cbs, labeled.cbs] %*% type.scores)
if (solver == "Matrix") {
res <- Matrix::solve(laplasian.uu, right.side)
} else {
res <- rmumps::Rmumps$new(laplasian.uu, copy=FALSE)$solve(right.side)
}
colnames(res) <- levels(labels)
rownames(res) <- unlabeled.cbs
return(rbind(res, type.scores))
}
#' Estimate labeling distribution for each vertex, based on provided labels using a Random Walk on graph
#'
#' @param graph igraph graph object Graph input
#' @param labels vector of factor or character labels, named by cell names
#' @param max.iters integer Maximal number of iterations (default=100)
#' @param diffusion.fading numeric Constant used for diffusion on the graph, exp(-diffusion.fading * (edge_length + diffusion.fading.const)) (default=10.0)
#' @param diffusion.fading.const numeric Another constant used for diffusion on the graph, exp(-diffusion.fading * (edge_length + diffusion.fading.const)) (default=0.1)
#' @param tol numeric Absolute tolerance as a stopping criteria (default=0.025)
#' @param fixed.initial.labels boolean Prohibit changes of initial labels during diffusion (default=TRUE)
#' @param verbose boolean Verbose mode (default=TRUE)
#' @return matrix from input graph, with labels propagated
#' @examples
#' propagateLabelsDiffusion(conosGraph, labels=cellAnnotations)
#'
#' @export
propagateLabelsDiffusion <- function(graph, labels, max.iters=100, diffusion.fading=10.0, diffusion.fading.const=0.1, tol=0.025, fixed.initial.labels=TRUE, verbose=TRUE) {
if (is.factor(labels)) {
labels <- as.character(labels) %>% stats::setNames(names(labels))
}
edges <- igraph::as_edgelist(graph)
edge.weights <- igraph::edge.attributes(graph)$weight
labels <- labels[intersect(names(labels), igraph::vertex.attributes(graph)$name)]
label.distribution <- propagate_labels(edges, edge.weights, vert_labels=labels, max_n_iters=max.iters, verbose=verbose,
diffusion_fading=diffusion.fading, diffusion_fading_const=diffusion.fading.const,
tol=tol, fixed_initial_labels=fixed.initial.labels)
return(label.distribution)
}
### Graph Smoothing (re-implementation of the pygsp package)
### https://github.com/epfl-lts2/pygsp/
#' Graph filter with the heat kernel: \eqn{f(x) = exp(-\beta |x / \lambda_m - a|^b)}
#'
#' @param x numeric Values to be filtered. Normally, these are graph laplacian engenvalues.
#' @param l.max numeric Maximum eigenvalue on the graph (\eqn{\lambda_m} in the equation)
#' @param offset numeric Mean kernel value (\eqn{a} in the equation), must be in [0:1] (default=0)
#' @param order numeric Parameter \eqn{b} in the equation. Larger values correspond to the sharper kernel form (default=1). The values should be positive.
#' @param beta numeric Parameter \eqn{\beta} in the equation. Larger values provide stronger smoothing. \eqn{\beta=0} corresponds to no smoothing (default=30).
#' @return smoothed values for 'x'
#' @family graph smoothing
#'
#' @export
heatFilter <- function(x, l.max, order=1, offset=0, beta=30) {
exp(-beta * abs(x / l.max - offset) ** order)
}
#' Compute Chebyshev Coefficients
#'
#' @param filt graph filter function
#' @param l.max Maximum eigenvalue of the graph
#' @param m numeric Maximum order of Chebyshev coeff to compute (default=30)
#' @param n numeric grid order used to compute quadrature (default=m+1)
#' @return vector of Chebyshev coefficients
#' @family graph smoothing
#'
#' @keywords internal
computeChebyshevCoeffs <- function(filt, l.max, m=30, n=m+1) {
a <- l.max / 2
tmp.n <- 0:(n-1)
num <- cos(pi * (tmp.n + 0.5) / n)
coeffs <- sapply(0:m, function(i) 2 / n * (filt(a * (num + 1)) %*% cos(pi * i * (tmp.n + 0.5) / n)))
return(coeffs)
}
#' Smooth with Chebyshev Polynomials
#'
#' @param lap graph laplacian
#' @param coeffs numeric vector Chebyshev coefficients for a filter
#' @param signal Matrix or vector Signal to smooth
#' @param l.max numeric maximal eigenvalue of the graph
#' @param n.cores numeric Number of cores for parallel run (default=1)
#' @param progress.chunks numeric Number of chunks per core for estimating progress (default=5). Large values are not suggested, as it may bring overhead.
#' @param progress boolean Flag on whether progress must be shown (default=TRUE, i.e. 'progress.chunks > 1')
#' @return smoothed signal
#' @family graph smoothing
#'
#' @keywords internal
smoothChebyshev <- function(lap, coeffs, signal, l.max, n.cores=1, progress.chunks=5, progress=(progress.chunks > 1)) {
m <- length(coeffs)
if (m < 2){
stop("Parameter 'coeffs' has an invalid length")
}
lap.norm <- lap / (l.max / 2)
fac <- 2 * (lap.norm - Matrix::Diagonal(ncol(lap)))
smoothChebyshevInner <- function(lap.norm, fac, signal, coeffs) {
twf.cur <- (lap.norm %*% signal) - signal
twf.old <- signal
r <- 0.5 * coeffs[1] * twf.old + coeffs[2] * twf.cur
for (k in 3:length(coeffs)) {
twf.new <- fac %*% twf.cur - twf.old
r <- r + coeffs[k] * twf.new
twf.old <- twf.cur
twf.cur <- twf.new
}
return(r)
}
if ((!is.null(ncol(signal))) && (ncol(signal) > 1) && ((n.cores > 1) || (progress.chunks > 1))) {
n.chunks <- min(progress.chunks * n.cores, ncol(signal))
r <- 1:ncol(signal) %>% split(ceiling(seq_along(.) / (length(.) / n.chunks))) %>%
plapply(function(ids) smoothChebyshevInner(lap.norm, fac, signal[,ids,drop=FALSE], coeffs),
n.cores=n.cores, mc.preschedule=TRUE, progress=progress) %>%
Reduce(cbind, .)
} else {
r <- smoothChebyshevInner(lap.norm, fac, signal, coeffs)
}
if (is.null(ncol(signal))){
return(r[,1])
}
return(r)
}
#' Smooth Signal on Graph
#'
#' @inheritParams computeChebyshevCoeffs
#' @inheritDotParams smoothChebyshev n.cores progress.chunks progress
#' @param signal signal to be smoothed
#' @param filter function that accepts signal `x` and the maximal Laplacian eigenvalue `l.max`. See \code{\link{heatFilter}} as an example.
#' @param graph igraph object with the graph (default=NULL)
#' @param lap graph laplacian (default=NULL). If NULL, `lap` estimated from graph.
#' @param l.max maximal eigenvalue of `lap` (default=NULL). If NULL, estimated from `lap`.
#' @param m numeric Maximum order of Chebyshev coeff to compute (default=50)
#' @family graph smoothing
#'
#' @export
smoothSignalOnGraph <- function(signal, filter, graph=NULL, lap=NULL, l.max=NULL, m=50, ...) {
if (is.null(lap)) {
if (is.null(graph)){
stop("Either graph or lap must be provided")
}
lap <- igraph::laplacian_matrix(graph, sparse=TRUE)
}
if (is.null(colnames(lap)) || (is.null(names(signal)) && is.null(rownames(signal)))) {
warning("Either graph vertices or signal have no names. Using the existing order.")
if ((length(signal) != ncol(lap)) && (length(signal) != length(lap)))
stop("signal must have the same length as the number of nodes in graph")
} else {
if (is.null(dim(signal))) {
signal <- signal[colnames(lap)]
} else {
signal <- signal[colnames(lap),,drop=FALSE]
}
}
if (is.null(l.max)) {
if (!igraph::is_connected(graph))
stop("The provided graph is not connected. It has to be connected to estimate l.max.")
l.max <- irlba::partial_eigen(lap, n=1)$values
}
coeffs <- computeChebyshevCoeffs(function(x) filter(x, l.max), m=m, l.max)
sig.smoothed <- smoothChebyshev(lap, coeffs, signal, l.max, ...)
return(sig.smoothed)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.