Nothing
#' Build kNN graph
#'
#' Build kNN graph either from distance (from == "dist") or from coordinates (from == "coordinates")
#'
#' @param X either distance or matrix of coordinates (rows are samples and cols are coordinates)
#' @param k kNN parameter
#' @param from from which data type to build kNN network: "dist" if X is a distance (dissimilarity) or "coordinates" if X is a matrix with coordinates as cols and cells as rows
#' @param use.nn2 whether use \link[RANN]{nn2} method to buid kNN network faster (available only for "coordinates" option)
#' @param return_neighbors_order whether return order of neighbors (not available for nn2 option)
#' @param dist_method method to compute dist (if X is a matrix of coordinates) available: c("cor", "euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski")
#' @param cor_method if distance is computed as correlation (dist_method == "cor), which type of correlation to use (available: "pearson", "kendall", "spearman")
#' @param p p param in \code{"dist"} function
#' @param directed whether to build a directed graph
#' @param DoSNN whether to apply shared nearest neighbors (default is \code{FALSE})
#' @param which.snn whether to use \link[bluster]{neighborsToSNNGraph} or \link[dbscan]{sNN} for sNN graph construction
#' @param pruning quantile to perform edge pruning (default is \code{NULL} - no pruning applied) based on PCA distance distribution
#' @param kmin keep at least \code{kmin} edges in single-cell graph when pruning applied (idnored if \code{is.null(pruning)})
#' @param ... other parameters of \link[bluster]{neighborsToSNNGraph} or \link[dbscan]{sNN}
#'
#' @return a list with components
#' \itemize{
#' \item graph.knn - igraph object
#' \item order - Nxk matrix with indices of k nearest neighbors ordered by relevance (from 1st to k-th)
#' }
#'
#' @export
#'
build_knn_graph <- function(
X,
k = 5,
from = c("dist", "coordinates"),
use.nn2 = TRUE,
return_neighbors_order = FALSE,
dist_method = "euclidean",
cor_method = "pearson",
p = 2,
directed = FALSE,
DoSNN = FALSE,
which.snn = c("bluster", "dbscan"),
pruning = NULL,
kmin = 0,
...
)
{
av.methods <- c("dist", "coordinates")
method <- pmatch(from[1], av.methods)
if(is.na(method)){
stop(paste("Unknown method", from, "Available methods are", paste(av.methods, collapse = ", ")))
}
if (method == 2){ # from coordinates
if(use.nn2){
if(dist_method != "euclidean"){
stop(paste0("Fast nn2 function from RANN package is used, so",
dist_method, "distnce is not acceptable.
To use nn2 method, please, choose eucleadian distance.
If you want to use", dist_method, "distance, please set parameter use.nn2 to FALSE"))}
mode <- ifelse(directed, 'out', 'all')
return(build_knn_graph_nn2(
X = X,
k = k,
mode = mode,
DoSNN = DoSNN,
pruning = pruning,
which.snn = which.snn,
kmin = kmin, ...))
} else {
av.dist <- c("cor", "euclidean", "maximum", "manhattan", "canberra", "binary", "minkowski")
dist_method_ <- pmatch(dist_method, av.dist)
if(is.na(dist_method_)){
stop(paste("Unknown distance method:", dist_method, "Available dist methods are", paste(av.dist, collapse = ", ") ))
}
if(dist_method_ == 1){
#print("cor")
#print(cor_method)
av.cor_methods <- c("pearson", "kendall", "spearman")
cor_method_ <- pmatch(cor_method, av.cor_methods)
if(is.na(cor_method_)){
stop(paste("Unknown cor method:", cor_method, "Available cor methods are", paste(av.cor_methods)))
}
X <- stats::as.dist(as.matrix(1 - stats::cor(t(X), method = cor_method)))
} else {
X <- stats::dist(X, method = dist_method)
}
}
} else {
if(use.nn2 == TRUE){
stop("Method nn2 cannot be applied to distance, to use fast nn2 method, please provide coordinates rather than distance
and set parameter from to coordinates")
}
return(knn_graph_from_dist(D = X, k = k, return_neighbors_order = return_neighbors_order))
}
### now X is distance in any case
return(knn_graph_from_dist(D = X, k = k, return_neighbors_order = return_neighbors_order))
}
#' Build kNN graph using RANN::nn2
#' (used in \code{"build_knn_graph"})
#'
#' @param X matrix of coordinates (rows are samples and cols are coordinates)
#' @param k kNN parameter
#' @param mode mode of \link[igraph]{graph_from_adj_list} ('all' -- undirected graph, 'out' -- directed graph)
#' @param DoSNN whether to apply shared nearest neighbors (default is \code{FALSE})
#' @param which.snn whether to use \link[bluster]{neighborsToSNNGraph} or \link[dbscan]{sNN} for sNN graph construction
#' @param pruning quantile to perform edge pruning (default is \code{NULL} - no pruning applied) based on PCA distance distribution
#' @param kmin keep at least \code{kmin} edges in single-cell graph when pruning applied (idnored if \code{is.null(pruning)})
#' @param ... other parameters of \link[bluster]{neighborsToSNNGraph} or \link[dbscan]{sNN}
#'
#' @return a list with components
#' \itemize{
#' \item graph.knn - igraph object
#' }
#'
build_knn_graph_nn2 <- function(
X,
k = min(5, ncol(X)),
mode = 'all',
DoSNN = FALSE,
which.snn = c("bluster", "dbscan"),
pruning = NULL,
kmin = 0,
...
){
if(is.numeric(pruning)){
if(pruning < 0 | pruning > 1){
stop(paste("Pruning has to be numeric from 0 to 1 or NULL"))
}
} else {
if(!is.null(pruning)){
warning("Pruning has to be numeric from 0 to 1 or NULL. pruning was set to NULL")
}
pruning <- NULL
}
if(DoSNN){
which.snn <- which.snn[1]
if(!(which.snn %in% c("bluster", "dbscan"))){
stop(paste("Which SNN:", which.snn, "is unknown, available parameters are:", paste(c("bluster", "dbscan"), collapse = ", ")))
}
if(which.snn == "bluster"){
nn2.res <- RANN::nn2(data = X, k = k) # it is faster and memory more efficietn to run RANN::nn2() + bluster::neighborsToSNNGraph() instead of bluster::makeSNNGraph()
snn <- bluster::neighborsToSNNGraph(nn2.res$nn.idx, ...)
if(!is.null(pruning)){
#pca_edje_dist <- apply(igraph::get.edgelist(snn), 1, function(i){stats::dist(X[i,])})
pca_edje_dist <- apply(igraph::as_edgelist(snn), 1, function(i){stats::dist(X[i,])})
igraph::E(snn)$pca_edje_dist <- pca_edje_dist
pruning_cutoff <- stats::quantile(pca_edje_dist, pruning)
edges_to_remove <- which(pca_edje_dist > pruning_cutoff)
snn <- igraph::delete_edges(snn, edges_to_remove)
}
graph.knn <- igraph::simplify(snn, remove.multiple = TRUE)
#igraph::E(snn)$weight <- 1
return(res <- list(graph.knn = graph.knn))
} else if (which.snn == "dbscan"){
kt <- max(1, round(k/3))
dbsnn <- dbscan::sNN(x = X, k = 2*(k-1), ...) # does not create self-loop, so k = k-1
if(!is.null(pruning)){
pca_edje_dist <- as.vector(dbsnn$dist)
pruning_cutoff <- stats::quantile(pca_edje_dist, pruning)
remove_edges <- dbsnn$dist > pruning_cutoff
# keep at least (kmin) edges
if(kmin > 0){
remove_edges[,1:kmin] <- FALSE
}
nn.idx <- dbsnn$id
dbsnn$id[remove_edges] <- NA
dbsnn$dist[remove_edges] <- NA
dbsnn$shared[remove_edges] <- NA
}
adj.knn <- split(dbsnn$id, rep(1:nrow(dbsnn$id), times = ncol(dbsnn$id))) # get adj list
adj.knn <- lapply(adj.knn, function(x){x[!is.na(x)]}) # remove NA
edge.dist <- split(dbsnn$dist, rep(1:nrow(dbsnn$dist), times = ncol(dbsnn$dist))) # get edje weight in a format of adj list
edge.dist <- lapply(edge.dist, function(x){x[!is.na(x)]}) # remove NA
edge.weight <- split(dbsnn$shared, rep(1:nrow(dbsnn$shared), times = ncol(dbsnn$shared))) # get edje weight in a format of adj list
edge.weight <- lapply(edge.weight, function(x){x[!is.na(x)]}) # remove NA
graph.knn <- igraph::graph_from_adj_list(adj.knn, duplicate = F, mode = mode)
message(paste("N edges =", igraph::ecount(graph.knn)))
message(paste("Length of edge.weight =", length(unlist(edge.weight))))
message(paste("Length of edge.weight =", length(unlist(edge.weight))))
igraph::E(graph.knn)$weight <- unlist(edge.weight)
igraph::E(graph.knn)$pca_edje_dist <- unlist(edge.dist)
return(res <- list(graph.knn = graph.knn))
} else {
stop(paste("Which SNN:", which.snn, "is unknown, available parameters are:", paste(c("bluster", "dbscan"), collapse = ", ")))
}
}
# run just kNN
nn2.res <- RANN::nn2(data = X, k = k)
if(!is.null(pruning)){
nn2.res$nn.dists <- nn2.res$nn.dists[,-1] #remove self loops
nn2.res$nn.idx <- nn2.res$nn.idx[,-1]
pca_edje_dist <- as.vector(nn2.res$nn.dists)
pruning_cutoff <- stats::quantile(pca_edje_dist, pruning)
remove_edges <- nn2.res$nn.dists > pruning_cutoff
# keep at least (kmin) edges
if(kmin > 0){
remove_edges[,1:kmin] <- FALSE
}
nn.idx <- nn2.res$nn.idx
nn.idx[remove_edges] <- NA
} else {
nn.idx <- nn2.res$nn.idx
}
adj.knn <- split(nn.idx, rep(1:nrow(nn.idx), times = ncol(nn.idx))) # get adj list
adj.knn <- lapply(adj.knn, function(x){x[!is.na(x)]}) # remove NA
graph.knn <- igraph::graph_from_adj_list(adj.knn, duplicate = FALSE, mode = mode)
graph.knn <- igraph::simplify(graph.knn, remove.multiple = TRUE)
igraph::E(graph.knn)$weight <- 1
#pca_edje_dist <- apply(igraph::get.edgelist(graph.knn), 1, function(i){stats::dist(X[i,])})
pca_edje_dist <- apply(igraph::as_edgelist(graph.knn), 1, function(i){stats::dist(X[i,])})
igraph::E(graph.knn)$pca_edje_dist <- pca_edje_dist
return(res <- list(graph.knn = graph.knn))
}
#' Build kNN graph from distance
#' (used in \code{"build_knn_graph"})
#'
#' @param D dist matrix or dist object (preferentially)
#' @param k kNN parameter
#' @param return_neighbors_order whether return order of neighbors (not available for nn2 option)
#' @param mode mode of \link[igraph]{graph_from_adj_list} ('all' -- undirected graph, 'out' -- directed graph)
#'
#' @return a list with components
#' \itemize{
#' \item graph.knn - igraph object
#' \item order - Nxk matrix with indices of k nearest neighbors ordered by relevance (from 1st to k-th)
#' }
#'
knn_graph_from_dist <- function(D, k = 5, return_neighbors_order = TRUE, mode = 'all'){
##print("Start knn_graph_from_dist")
if(!is.matrix(D) & !inherits(D, "dist")){
stop("D mast be a matrix or dist!")
}
if(!inherits(D, "dist")){
D <- stats::as.dist(D)
}
N <- (1 + sqrt(1+8*length(D)))/2 # number of cells
if (k >= N)
stop("Not enought neighbors in data set!")
if (k < 1)
stop("Invalid number of nearest neighbors, k must be >= 1!")
row <- function(i, N){
return(c(if(i>1) D[(i-1)+c(0:(i-2))*(N - 1 - c(1:(i-1))/2)],
NA,
if(i < N) D[((i-1)*(N-1) - ((i-1)*(i-2)/2) + 1) : (((i-1)*(N-1) - ((i-1)*(i-2)/2) + 1) + N-i-1)]))
}
neighbors <- t(sapply(1:N, function(i) {order(row(i,N))[1:k]}))
adj.knn <- split(neighbors, rep(1:nrow(neighbors), times = ncol(neighbors)))
graph.knn <- igraph::graph_from_adj_list(adj.knn, duplicate = FALSE, mode = mode)
graph.knn <- igraph::simplify(graph.knn, remove.multiple = TRUE)
igraph::E(graph.knn)$weight <- 1
if(return_neighbors_order){
res <- list(graph.knn = graph.knn,
order = neighbors)
} else {res <- list(graph.knn = graph.knn)}
return(res)
}
Any scripts or data that you put into this service are public.
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.