#' @include seurat.R
NULL
#' SNN Graph Construction
#'
#' Constructs a Shared Nearest Neighbor (SNN) Graph for a given dataset. We
#' first determine the k-nearest neighbors of each cell (defined by k.param *
#' k.scale). We use this knn graph to construct the SNN graph by calculating the
#' neighborhood overlap (Jaccard distance) between every cell and its k.param *
#' k.scale nearest neighbors (defining the neighborhood for each cell as the
#' k.param nearest neighbors).
#'
#' @param object Seurat object
#' @param genes.use A vector of gene names to use in construction of SNN graph
#' if building directly based on expression data rather than a dimensionally
#' reduced representation (i.e. PCs).
#' @param reduction.type Name of dimensional reduction technique to use in
#' construction of SNN graph. (e.g. "pca", "ica")
#' @param dims.use A vector of the dimensions to use in construction of the SNN
#' graph (e.g. To use the first 10 PCs, pass 1:10)
#' @param k.param Defines k for the k-nearest neighbor algorithm
#' @param k.scale Granularity option for k.param
#' @param plot.SNN Plot the SNN graph
#' @param prune.SNN Sets the cutoff for acceptable Jaccard distances when
#' computing the neighborhood overlap for the SNN construction. Any edges with
#' values less than or equal to this will be set to 0 and removed from the SNN
#' graph. Essentially sets the strigency of pruning (0 --- no pruning, 1 ---
#' prune everything).
#' @param print.output Whether or not to print output to the console
#' @param distance.matrix Build SNN from distance matrix (experimental)
#' @param force.recalc Force recalculation of SNN.
#' @importFrom FNN get.knn
#' @importFrom igraph plot.igraph graph.adjlist graph.adjacency E
#' @importFrom Matrix sparseMatrix
#' @return Returns the object with object@@snn filled
#' @export
BuildSNN <- function(
object,
genes.use = NULL,
reduction.type = "pca",
dims.use = NULL,
k.param = 10,
k.scale = 10,
plot.SNN = FALSE,
prune.SNN = 1/15,
print.output = TRUE,
distance.matrix = NULL,
force.recalc = FALSE
) {
if (! is.null(x = distance.matrix)) {
data.use <- distance.matrix
} else if (is.null(x = genes.use) && is.null(x = dims.use)) {
genes.use <- object@var.genes
data.use <- t(x = as.matrix(x = object@data[genes.use, ]))
} else if (! is.null(x = dims.use)) {
data.use <- GetCellEmbeddings(object, reduction.type = reduction.type,
dims.use = dims.use)
} else if (!is.null(genes.use) && is.null(dims.use)) {
data.use <- t(x = as.matrix(x = object@data[genes.use, ]))
} else {
stop("Data error!")
}
parameters.to.store <- as.list(environment(), all = TRUE)[names(formals("BuildSNN"))]
if (CalcInfoExists(object, "BuildSNN")){
parameters.to.store$object <- NULL
old.parameters <- GetAllCalcParam(object, "BuildSNN")
old.parameters$time <- NULL
if(all(old.parameters %in% parameters.to.store)){
warning("Build parameters exactly match those of already computed and stored SNN. To force recalculation, set force.recalc to TRUE.")
return(object)
}
}
object <- SetCalcParams(object = object,
calculation = "BuildSNN",
... = parameters.to.store)
n.cells <- nrow(x = data.use)
if (n.cells < k.param) {
warning("k.param set larger than number of cells. Setting k.param to number of cells - 1.")
k.param <- n.cells - 1
}
# find the k-nearest neighbors for each single cell
if (is.null(x = distance.matrix)) {
my.knn <- get.knn(
data <- as.matrix(x = data.use),
k = min(k.scale * k.param, n.cells - 1)
)
nn.ranked <- cbind(1:n.cells, my.knn$nn.index[, 1:(k.param-1)])
nn.large <- my.knn$nn.index
} else {
warning("Building SNN based on a provided distance matrix")
n <- nrow(x = distance.matrix)
k.for.nn <- k.param * k.scale
knn.mat <- matrix(data = 0, ncol = k.for.nn, nrow = n)
knd.mat <- knn.mat
for (i in 1:n){
knn.mat[i, ] <- order(data.use[i, ])[1:k.for.nn]
knd.mat[i, ] <- data.use[i, knn.mat[i, ]]
}
nn.large <- knn.mat
nn.ranked <- knn.mat[, 2:k.param]
}
w <- CalcSNNSparse(
cell.names = object@cell.names,
k.param = k.param,
nn.large = nn.large,
nn.ranked = nn.ranked,
prune.SNN = prune.SNN,
print.output = print.output
)
object@snn <- w
if (plot.SNN) {
if (length(x = object@dr$tsne@cell.embeddings) < 1) {
warning("Please compute a tSNE for SNN visualization. See RunTSNE().")
} else {
net <- graph.adjacency(
adjmatrix = w,
mode = "undirected",
weighted = TRUE,
diag = FALSE
)
plot.igraph(
x = net,
layout = as.matrix(x = object@dr$tsne@cell.embeddings),
edge.width = E(graph = net)$weight,
vertex.label = NA,
vertex.size = 0
)
}
}
return(object)
}
# Function to convert the knn graph into the snn graph. Stored in a sparse
# representation.
# @param cell.names A vector of cell names which will correspond to the row/
# column names of the SNN
# @param k.param Defines nearest neighborhood when computing NN graph
# @param nn.large Full KNN graph (computed with get.knn with k set to
# k.param * k.scale)
# @param nn.ranked Subset of full KNN graph that only contains the first
# k.param nearest neighbors. Used to define Jaccard
# distances between any two cells
# @param prune.snn Sets the cutoff for acceptable Jaccard distances when
# computing the neighborhood overlap for the SNN
# construction. Any edges with values less than or equal to
# this will be set to 0 and removed from the SNN graph.
# Essentially sets the strigency of pruning (0 --- no
# pruning, 1 --- prune everything).
# @param print.output Whether or not to print output to the console
# @return Returns an adjacency matrix representation of the SNN
# graph
#
#' @importFrom utils txtProgressBar setTxtProgressBar
#
CalcSNNSparse <- function(
cell.names,
k.param,
nn.large,
nn.ranked,
prune.SNN,
print.output
) {
n.cells <- length(cell.names)
counter <- 1
idx1 <- vector(mode = "integer", length = n.cells ^ 2 / k.param)
idx2 <- vector(mode = "integer", length = n.cells ^ 2 / k.param)
edge.weight <- vector(mode = "double", length = n.cells ^ 2 / k.param)
id <- 1
# fill out the adjacency matrix w with edge weights only between your target
# cell and its k.scale*k.param-nearest neighbors
# speed things up (don't have to calculate all pairwise distances)
# define the edge weights with Jaccard distance
if (print.output) {
print("Constructing SNN")
pb <- txtProgressBar(min = 0, max = n.cells, style = 3)
}
for (i in 1:n.cells) {
for (j in 1:ncol(x = nn.large)) {
s <- intersect(x = nn.ranked[i, ], y = nn.ranked[nn.large[i, j], ])
u <- union(nn.ranked[i, ], nn.ranked[nn.large[i, j], ])
e <- length(x = s) / length(x = u)
if (e > prune.SNN) {
idx1[id] <- i
idx2[id] <- nn.large[i, j]
edge.weight[id] <- e
id <- id + 1
}
}
if (print.output) {
setTxtProgressBar(pb = pb, value = i)
}
}
if (print.output) {
close(con = pb)
}
idx1 <- idx1[! is.na(x = idx1) & idx1 != 0]
idx2 <- idx2[! is.na(x = idx2) & idx2 != 0]
edge.weight <- edge.weight[! is.na(x = edge.weight) & edge.weight != 0]
w <- sparseMatrix(
i = idx1,
j = idx2,
x = edge.weight,
dims = c(n.cells, n.cells)
)
diag(x = w) <- 1
rownames(x = w) <- cell.names
colnames(x = w) <- cell.names
return(w)
}
# This function calculates the pairwise connectivity of clusters.
# @param object Seurat object containing the snn graph and cluster assignments
# @return matrix with all pairwise connectivities calculated
CalcConnectivity <- function(object) {
SNN <- object@snn
cluster.names <- unique(x = object@ident)
num.clusters <- length(x = cluster.names)
connectivity <- matrix(data = 0, nrow = num.clusters, ncol = num.clusters)
rownames(x = connectivity) <- cluster.names
colnames(x = connectivity) <- cluster.names
n <- 1
for (i in cluster.names) {
for (j in cluster.names[-(1:n)]) {
subSNN <- SNN[
match(x = WhichCells(object = object, ident = i), colnames(x = SNN)),
match(x = WhichCells(object = object, ident = j), rownames(x = SNN))
]
if (is.object(x = subSNN)) {
connectivity[i, j] <- sum(subSNN) / (nrow(x = subSNN) * ncol(x = subSNN))
} else {
connectivity[i, j] <- mean(x = subSNN)
}
}
n <- n + 1
}
return(connectivity)
}
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.