R/affinity.R In jlmelville/uwot: The Uniform Manifold Approximation and Projection (UMAP) Method for Dimensionality Reduction

Defines functions order_sparsenn_graph_tnng_to_sparsenn_to_sparseperplexity_similaritiessymmetrizefuzzy_simplicial_setsmooth_knn_matrixsmooth_knnfuzzy_set_union

```# set_op_mix_ratio = between 0 and 1 mixes in fuzzy set intersection
# set to 0 for intersection only
#' @import Matrix
fuzzy_set_union <- function(X, set_op_mix_ratio = 1) {
XX <- X * Matrix::t(X)
if (set_op_mix_ratio == 0) {
Matrix::drop0(XX)
}
else if (set_op_mix_ratio == 1) {
Matrix::drop0(X + Matrix::t(X) - XX)
}
else {
Matrix::drop0(
set_op_mix_ratio * (X + Matrix::t(X) - XX) + (1 - set_op_mix_ratio) * XX
)
}
}

# Calculate the (asymmetric) affinity matrix based on the nearest neighborhoods
# default target for calibration is the sum of affinities = log2(n_nbrs)
# nn distances should be stored column-wise
smooth_knn <- function(nn_dist,
nn_ptr = NULL,
skip_first = TRUE,
target = NULL,
local_connectivity = 1.0,
grain_size = 1,
ret_sigma = FALSE,
verbose = FALSE) {
}
tsmessage(
"Commencing smooth kNN distance calibration",
)
if (length(target) == 1) {
tsmessage(" with target n_neighbors = ", formatC(2 ^ target), time_stamp = FALSE)
}
else {
tsmessage(time_stamp = FALSE)
}
affinity_matrix_res <- smooth_knn_distances_parallel(
nn_dist = nn_dist,
nn_ptr = nn_ptr,
skip_first = skip_first,
target = target,
n_iter = 64,
local_connectivity = local_connectivity,
tol = 1e-5,
min_k_dist_scale = 1e-3,
grain_size = grain_size,
ret_sigma = ret_sigma
)
if (verbose && affinity_matrix_res\$n_failures > 0) {
tsmessage(affinity_matrix_res\$n_failures, " smooth knn distance failures")
}
affinity_matrix_res
}

smooth_knn_matrix <- function(nn,
target = NULL,
local_connectivity = 1.0,
bandwidth = 1.0,
ret_sigma = FALSE,
grain_size = 1,
verbose = FALSE) {
}

osparse <- NULL
if (is_sparse_matrix(nn)) {
nn <- Matrix::drop0(nn)
osparse <- order_sparse(nn)
nn_dist <- osparse\$x
nn_ptr <- osparse\$p
n_nbrs <- diff(nn_ptr)
if (any(n_nbrs < 1)) {
stop("All observations need at least one neighbor")
}
if (is.null(target)) {
# add 1 to n_nbrs to account for implicit self neighbor
target <- log2(n_nbrs + 1) * bandwidth
}
skip_first <- FALSE
}
else {
nnt <- nn_graph_t(nn)
n_nbrs <- nrow(nnt\$dist)
if (is.null(target)) {
target <- log2(n_nbrs) * bandwidth
}
nn_ptr <- n_nbrs
nn_dist <- as.vector(nnt\$dist)
skip_first <- TRUE
}
affinity_matrix_res <- smooth_knn(
nn_dist = nn_dist,
nn_ptr = nn_ptr,
skip_first = skip_first,
target = target,
local_connectivity = local_connectivity,
ret_sigma = ret_sigma,
grain_size = grain_size,
verbose = verbose
)

v <- affinity_matrix_res\$matrix
if (is_sparse_matrix(nn)) {
# use j instead of i to transpose it
v <- Matrix::sparseMatrix(j = osparse\$i, p = osparse\$p, x = v,
dims = osparse\$dims, index1 = FALSE)
Matrix::diag(v) <- 0.0
v <- Matrix::drop0(v)
}
else {
v <- nng_to_sparse(nnt\$idx, v, self_nbr = TRUE, by_row = FALSE)
}
affinity_matrix_res\$matrix <- v
affinity_matrix_res
}

# Given nearest neighbor data and a measure of distance compute
# the fuzzy simplicial set (here represented as a fuzzy graph in the form of a
# sparse matrix) associated to the data. This is done by locally approximating
# geodesic distance at each point, creating a fuzzy simplicial set for each such
# point, and then combining all the local fuzzy simplicial sets into a global
# one via a fuzzy union
fuzzy_simplicial_set <- function(nn,
target = NULL,
set_op_mix_ratio = 1.0,
local_connectivity = 1.0, bandwidth = 1.0,
ret_sigma = FALSE,
grain_size = 1,
verbose = FALSE) {
affinity_matrix_res <- smooth_knn_matrix(nn = nn,
target = target,
local_connectivity = local_connectivity,
bandwidth = bandwidth,
ret_sigma = ret_sigma,
grain_size = grain_size,
verbose = verbose)
res <- fuzzy_set_union(affinity_matrix_res\$matrix, set_op_mix_ratio = set_op_mix_ratio)
if (ret_sigma) {
res <- list(matrix = res)
res\$sigma <- affinity_matrix_res\$sigma
res\$rho <- affinity_matrix_res\$rho
}
res
}

symmetrize <- function(P) {
0.5 * (P + Matrix::t(P))
}

perplexity_similarities <- function(nn, perplexity = NULL, ret_sigma = FALSE,
grain_size = 1,
kernel = "gauss",
verbose = FALSE) {
}
if (is.null(perplexity) && kernel != "knn") {
stop("Must provide perplexity")
}

sigma <- NULL
if (kernel == "gauss") {
tsmessage(
"Commencing calibration for perplexity = ", formatC(perplexity),
)

nnt <- nn_graph_t(nn)
n_vertices <- ncol(nnt\$dist)
affinity_matrix_res <- calc_row_probabilities_parallel(
nn_dist = as.vector(nnt\$dist),
n_vertices = n_vertices,
perplexity = perplexity,
ret_sigma = ret_sigma,
grain_size = grain_size
)
if (verbose && affinity_matrix_res\$n_failures > 0) {
tsmessage(affinity_matrix_res\$n_failures, " perplexity failures")
}

dint <- NULL
if (ret_sigma && !is.null(affinity_matrix_res\$sigma)) {
# An analytical version of the "soft" correlation dimension estimate of
# intrinsic dimensionality from multi-scale SNE by Lee et al (2015).
# http://jlmelville.github.io/sneer/dimensionality.html
d <- nnt\$dist
p <- affinity_matrix_res\$matrix
logp <- log(p + .Machine\$double.eps)
s <- affinity_matrix_res\$sigma
h <- -colSums(p * logp)
lph <- sweep(logp, 2, h, `+`)
dhdb <- colSums(d * d * p * lph)
dint <- -2 * dhdb / (s * s)
}

affinity_matrix <- nng_to_sparse(nnt\$idx, as.vector(affinity_matrix_res\$matrix),
self_nbr = TRUE, by_row = FALSE
)
if (!is.null(affinity_matrix_res\$sigma)) {
sigma <- affinity_matrix_res\$sigma
}
}
else {
# knn kernel
tsmessage("Using knn graph for input weights with k = ", ncol(nn\$idx))
# Make each row sum to 1, ignoring the self-index
# i.e. diagonal will be zero
affinity_matrix <- nng_to_sparse(nn\$idx, val = 1 / (ncol(nn\$idx) - 1))
Matrix::diag(affinity_matrix) <- 0
affinity_matrix <- Matrix::drop0(affinity_matrix)
}
res <- list(matrix = symmetrize(affinity_matrix))
if (ret_sigma && !is.null(sigma)) {
res\$sigma <- sigma
if (!is.null(dint)) {
res\$dint <- dint
}
}
res
}

# Convert the matrix of NN indices to a sparse asymmetric matrix where each
# edge has a weight of val (scalar or vector)
# return a sparse matrix with dimensions of nrow(nn_idx) x max_nbr_id
nn_to_sparse <- function(nn_idxv, n_obs, val = 1, self_nbr = FALSE,
max_nbr_id = NULL, by_row = TRUE) {
n_nbrs <- length(nn_idxv) / n_obs

if (is.null(max_nbr_id)) {
max_nbr_id <- ifelse(self_nbr, n_obs, max(nn_idxv))
}

if (length(val) == 1) {
xs <- rep(val, n_obs * n_nbrs)
}
else {
xs <- val
}
if (by_row) {
is <- rep(1:n_obs, times = n_nbrs)
}
else {
is <- rep(1:n_obs, each = n_nbrs)
}

dims <- c(n_obs, max_nbr_id)
res <- Matrix::sparseMatrix(i = is, j = nn_idxv, x = xs, dims = dims)

if (self_nbr) {
Matrix::diag(res) <- 0
res <- Matrix::drop0(res)
}
res
}

nng_to_sparse <- function(nn_idx, val = 1, self_nbr = FALSE,
max_nbr_id = NULL, by_row = TRUE) {
if (by_row) {
n_obs <- nrow(nn_idx)
}
else {
n_obs <- ncol(nn_idx)
}

nn_to_sparse(as.vector(nn_idx), n_obs, val = val, self_nbr = self_nbr,
max_nbr_id = max_nbr_id, by_row = by_row)
}

# transpose the index and distance matrix
nn_graph_t <- function(nn_graph) {
list(idx = t(nn_graph\$idx), dist = t(nn_graph\$dist))
}

order_sparse <- function(spm) {
x <- spm@x
i <- spm@i
p <- spm@p

x_sort <- rep(0, length(x))
i_sort <- rep(0, length(i))

n_vertices <- length(p) - 1

for (v in 1:n_vertices) {
p_begin <- p[v]
p_end <- p[v + 1]
if (p_end - p_begin == 0) {
next
}
pb1 <- p_begin + 1
x_order <- order(x[pb1:p_end])
x_sort[pb1:p_end] <- x[x_order + p_begin]
i_sort[pb1:p_end] <- i[x_order + p_begin]
}

list(i = i_sort, p = p, x = x_sort, order = x_order, dims = spm@Dim)
}
```
jlmelville/uwot documentation built on March 5, 2023, 6:58 p.m.