#' Isolation forest based KNN graph
#'
#' Calculate a KNN graph from a feature matrix and
#' prune the graph using isolation forest.
#'
#' @param feature_mat A feature matrix based on which KNN graph is computed
#' @param k Number of neighbours in KNN graph.
#' @param hardwareSetup Hardware setup for parallelising isolation forest
#'
#' @return edgeList
#'
#' @importFrom stats sd predict
#' @importFrom isotree isolation.forest
calculate_knn_graph <- function(feature_mat, k, hardwareSetup) {
cell_list <- rownames(feature_mat)
## For each cell, calculate the distance to save memory
start_time <- Sys.time()
edgeList <- list()
for (i in seq(dim(feature_mat)[1])) {
#if ((i %% 10000) == 0)
# message(sprintf("Start pruning %i th cell. Cost %f seconds...", i, Sys.time() - start_time))
# Calculate Euclidean distance for i-th sample
dist_array <- array(
apply(feature_mat, 1, function(x)
sqrt(sum((feature_mat[i, ] - x)**2))
)
)
nn <- order(dist_array)[1:(k + 1)]
nn <- nn[nn != i] ## Excluding self in nearest neighbours
iso <- isotree::isolation.forest(data = feature_mat[nn, ],
ntrees = 100, ## as in original isolation forest paper
nthreads = hardwareSetup$coresUage) ## parallelising
preds <- stats::predict(iso, feature_mat[nn, ])
pruned <- nn[which(preds <= 0.5)] ## Use 0.5 cutoff for outliers
new_edges <- sapply(pruned, simplify = FALSE, function(x) return(c(i, x)))
## check if repetitively adding existing edges
if (length(edgeList) > 0) {
exist_edges <- vapply(new_edges, FUN.VALUE = logical(1), function(x) {
hasEdge <- vapply(edgeList, FUN.VALUE = logical(1), function(y) {
return(setequal(x, y)) ## compare a fixed new edge with all existing edges
})
if (any(hasEdge)){
return(TRUE)
} else {
return(FALSE)
}
})
## Drop existing edges from new edges
if (any(exist_edges))
new_edges <- new_edges[-which(exist_edges)]
}
edgeList <- c(edgeList, new_edges)
## Old KNN method consider mean + 1 std away as outliers
#k_dist <- dist_array[nn]
#for (j in seq(k)) {
# if (dist_array[nn[j]] <= boundary) {
# weight <- 1.0
# } else {
# weight <- 0.0
# }
# edgeList[[i]] <- c(i, nn[j], weight)
#}
}
edgeList <- do.call(rbind, edgeList)
edgeList <- as.data.frame(edgeList)
edgeList[, 1] <- cell_list[edgeList[, 1]]
edgeList[, 2] <- cell_list[edgeList[, 2]]
#colnames(edgeList) <- c("V1", "V2", "weight") # removed 0 weighted edges
return(edgeList)
}
#' Infer a cell-cell network from the encoded space using one-stat KNN
#'
#' Compute a cell network from a feature matrix using KNN.
#'
#' @param feature_mat An encoded matrix generated by the feature auto-encoder.
#' @param k Number of neighbours in KNN graph. If k is not provided,
#' then the best heuristic k = floor(sqrt(N)) will be used,
#' where N is the sample size.
#' @param hardwareSetup Hardware setup for running isolation forest
#'
#' @return An igraph object to plot the cell network.
#'
#' @references
#' \insertRef{scGNN}{scRGNet}
#' \insertRef{isotree}{scRGNet}
#' \insertRef{isolationForest}{scRGNet}
#' \insertRef{stats}{scRGNet}
#' \insertRef{defaultK}{scRGNet}
#' \insertRef{igraph}{scRGNet}
#'
#' @examples
#' # Example 1:
#' # Tested examples. Not run for fast package compiling
#' \dontrun{
#' # Accessing the demo gene_counts_small dataset available with the package
#' inputCountsPath <- system.file("extdata", "GSE138852_small.csv", package = "scRGNet")
#' # Preprocess the raw counts
#' counts <- preprocessCSV(path = inputCountsPath)
#' ltmg <- runLTMG(counts)
#' hyperParams <- setHyperParams(regu_epochs = 5L)
#' hardwareSetup <- setHardware(coresUsage = 1L)
#' z <- runFeatureAE(scDataset = counts, LTMG_mat = ltmg, hyperParams, hardwareSetup)
#' net <- generateNetwork(z)
#'}
#'
#' @export
#' @importFrom Rdpack reprompt
#' @importFrom igraph graph_from_data_frame
generateNetwork <- function(feature_mat, k = NULL, hardwareSetup = NULL) {
N <- dim(feature_mat)[1]
if (is.null(k)) {
k <- floor(sqrt(N)) ## Best heuristic for k
} else {
if ((!is.numeric(k)) | k %% 1 != 0 | k < 1 | k > N) ## Lazy evaluation :)
stop("Invalid argument for k: Must be a positive integer greater than 0.")
}
if (is.null(hardwareSetup))
hardwareSetup <- setHardware()
cell_list <- rownames(feature_mat)
edgeList <- calculate_knn_graph(feature_mat, k, hardwareSetup)
graph <- igraph::graph_from_data_frame(edgeList, directed = FALSE)
#adj <- igraph::as_adj(graph, attr = "weight")
return(graph)
}
# [END]
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.