R/affinity.R
In uwot: The Uniform Manifold Approximation and Projection (UMAP) Method for Dimensionality Reduction

# set_op_mix_ratio = between 0 and 1 mixes in fuzzy set intersection
# set to 0 for intersection only
#' @import Matrix
fuzzy_set_union <- function(X, set_op_mix_ratio = 1) {
  XX <- X * Matrix::t(X)
  if (set_op_mix_ratio == 0) {
    Matrix::drop0(XX)
  } else if (set_op_mix_ratio == 1) {
    Matrix::drop0(X + Matrix::t(X) - XX)
  } else {
    Matrix::drop0(
      set_op_mix_ratio * (X + Matrix::t(X) - XX) + (1 - set_op_mix_ratio) * XX
    )
  }
}

# Calculate the (asymmetric) affinity matrix based on the nearest neighborhoods
# default target for calibration is the sum of affinities = log2(n_nbrs)
# nn distances should be stored column-wise
smooth_knn <- function(nn_dist,
                       nn_ptr = NULL,
                       skip_first = TRUE,
                       target = NULL,
                       local_connectivity = 1.0,
                       n_threads = NULL,
                       grain_size = 1,
                       ret_sigma = FALSE,
                       verbose = FALSE) {
  if (is.null(n_threads)) {
    n_threads <- default_num_threads()
  }
  tsmessage(
    "Commencing smooth kNN distance calibration",
    pluralize("thread", n_threads, " using"),
    appendLF = FALSE
  )
  if (length(target) == 1) {
    tsmessage(" with target n_neighbors = ", formatC(2^target), time_stamp = FALSE)
  } else {
    tsmessage(time_stamp = FALSE)
  }
  affinity_matrix_res <- smooth_knn_distances_parallel(
    nn_dist = nn_dist,
    nn_ptr = nn_ptr,
    skip_first = skip_first,
    target = target,
    n_iter = 64,
    local_connectivity = local_connectivity,
    tol = 1e-5,
    min_k_dist_scale = 1e-3,
    n_threads = n_threads,
    grain_size = grain_size,
    ret_sigma = ret_sigma
  )
  if (verbose && affinity_matrix_res$n_failures > 0) {
    tsmessage(affinity_matrix_res$n_failures, " smooth knn distance failures")
  }
  affinity_matrix_res
}

smooth_knn_matrix <- function(nn,
                              target = NULL,
                              local_connectivity = 1.0,
                              bandwidth = 1.0,
                              ret_sigma = FALSE,
                              n_threads = NULL,
                              grain_size = 1,
                              verbose = FALSE) {
  if (is.null(n_threads)) {
    n_threads <- default_num_threads()
  }

  osparse <- NULL
  if (is_sparse_matrix(nn)) {
    nn <- Matrix::drop0(nn)
    osparse <- order_sparse(nn)
    nn_dist <- osparse$x
    nn_ptr <- osparse$p
    n_nbrs <- diff(nn_ptr)
    if (any(n_nbrs < 1)) {
      stop("All observations need at least one neighbor")
    }
    if (is.null(target)) {
      # add 1 to n_nbrs to account for implicit self neighbor
      target <- log2(n_nbrs + 1) * bandwidth
    }
    skip_first <- FALSE
  } else {
    nnt <- nn_graph_t(nn)
    n_nbrs <- nrow(nnt$dist)
    if (is.null(target)) {
      target <- log2(n_nbrs) * bandwidth
    }
    nn_ptr <- n_nbrs
    nn_dist <- as.vector(nnt$dist)
    skip_first <- TRUE
  }
  affinity_matrix_res <- smooth_knn(
    nn_dist = nn_dist,
    nn_ptr = nn_ptr,
    skip_first = skip_first,
    target = target,
    local_connectivity = local_connectivity,
    ret_sigma = ret_sigma,
    n_threads = n_threads,
    grain_size = grain_size,
    verbose = verbose
  )

  v <- affinity_matrix_res$matrix
  if (is_sparse_matrix(nn)) {
    # use j instead of i to transpose it
    v <- Matrix::sparseMatrix(
      j = osparse$i, p = osparse$p, x = v,
      dims = osparse$dims, index1 = FALSE
    )
    Matrix::diag(v) <- 0.0
    v <- Matrix::drop0(v)
  } else {
    v <- nng_to_sparse(nnt$idx, v, self_nbr = TRUE, by_row = FALSE)
  }
  affinity_matrix_res$matrix <- v
  affinity_matrix_res
}

# Given nearest neighbor data and a measure of distance compute
# the fuzzy simplicial set (here represented as a fuzzy graph in the form of a
# sparse matrix) associated to the data. This is done by locally approximating
# geodesic distance at each point, creating a fuzzy simplicial set for each such
# point, and then combining all the local fuzzy simplicial sets into a global
# one via a fuzzy union
fuzzy_simplicial_set <- function(nn,
                                 target = NULL,
                                 set_op_mix_ratio = 1.0,
                                 local_connectivity = 1.0, bandwidth = 1.0,
                                 ret_sigma = FALSE,
                                 n_threads = NULL,
                                 grain_size = 1,
                                 verbose = FALSE) {
  affinity_matrix_res <- smooth_knn_matrix(
    nn = nn,
    target = target,
    local_connectivity = local_connectivity,
    bandwidth = bandwidth,
    ret_sigma = ret_sigma,
    n_threads = n_threads,
    grain_size = grain_size,
    verbose = verbose
  )
  res <- fuzzy_set_union(affinity_matrix_res$matrix, set_op_mix_ratio = set_op_mix_ratio)
  if (ret_sigma) {
    res <- list(matrix = res)
    res$sigma <- affinity_matrix_res$sigma
    res$rho <- affinity_matrix_res$rho
  }
  res
}

symmetrize <- function(P) {
  0.5 * (P + Matrix::t(P))
}

perplexity_similarities <- function(nn, perplexity = NULL, ret_sigma = FALSE,
                                    n_threads = NULL,
                                    grain_size = 1,
                                    kernel = "gauss",
                                    verbose = FALSE) {
  if (is.null(n_threads)) {
    n_threads <- default_num_threads()
  }
  if (is.null(perplexity) && kernel != "knn") {
    stop("Must provide perplexity")
  }

  sigma <- NULL
  if (kernel == "gauss") {
    tsmessage(
      "Commencing calibration for perplexity = ", formatC(perplexity),
      pluralize("thread", n_threads, " using")
    )

    nnt <- nn_graph_t(nn)
    n_vertices <- ncol(nnt$dist)
    affinity_matrix_res <- calc_row_probabilities_parallel(
      nn_dist = as.vector(nnt$dist),
      n_vertices = n_vertices,
      perplexity = perplexity,
      ret_sigma = ret_sigma,
      n_threads = n_threads,
      grain_size = grain_size
    )
    if (verbose && affinity_matrix_res$n_failures > 0) {
      tsmessage(affinity_matrix_res$n_failures, " perplexity failures")
    }

    dint <- NULL
    if (ret_sigma && !is.null(affinity_matrix_res$sigma)) {
      # An analytical version of the "soft" correlation dimension estimate of
      # intrinsic dimensionality from multi-scale SNE by Lee et al (2015).
      # http://jlmelville.github.io/sneer/dimensionality.html
      d <- nnt$dist
      p <- affinity_matrix_res$matrix
      logp <- log(p + .Machine$double.eps)
      s <- affinity_matrix_res$sigma
      h <- -colSums(p * logp)
      lph <- sweep(logp, 2, h, `+`)
      dhdb <- colSums(d * d * p * lph)
      dint <- -2 * dhdb / (s * s)
    }

    affinity_matrix <- nng_to_sparse(nnt$idx, as.vector(affinity_matrix_res$matrix),
      self_nbr = TRUE, by_row = FALSE
    )
    if (!is.null(affinity_matrix_res$sigma)) {
      sigma <- affinity_matrix_res$sigma
    }
  } else {
    # knn kernel
    tsmessage("Using knn graph for input weights with k = ", ncol(nn$idx))
    # Make each row sum to 1, ignoring the self-index
    # i.e. diagonal will be zero
    affinity_matrix <- nng_to_sparse(nn$idx, val = 1 / (ncol(nn$idx) - 1))
    Matrix::diag(affinity_matrix) <- 0
    affinity_matrix <- Matrix::drop0(affinity_matrix)
  }
  res <- list(matrix = symmetrize(affinity_matrix))
  if (ret_sigma && !is.null(sigma)) {
    res$sigma <- sigma
    if (!is.null(dint)) {
      res$dint <- dint
    }
  }
  res
}

# Convert the matrix of NN indices to a sparse asymmetric matrix where each
# edge has a weight of val (scalar or vector)
# return a sparse matrix with dimensions of nrow(nn_idx) x max_nbr_id
nn_to_sparse <- function(nn_idxv, n_obs, val = 1, self_nbr = FALSE,
                         max_nbr_id = NULL, by_row = TRUE) {
  n_nbrs <- length(nn_idxv) / n_obs


  if (is.null(max_nbr_id)) {
    max_nbr_id <- ifelse(self_nbr, n_obs, max(nn_idxv))
  }

  if (length(val) == 1) {
    xs <- rep(val, n_obs * n_nbrs)
  } else {
    xs <- val
  }
  if (by_row) {
    is <- rep(1:n_obs, times = n_nbrs)
  } else {
    is <- rep(1:n_obs, each = n_nbrs)
  }

  dims <- c(n_obs, max_nbr_id)
  res <- Matrix::sparseMatrix(i = is, j = nn_idxv, x = xs, dims = dims)

  if (self_nbr) {
    Matrix::diag(res) <- 0
    res <- Matrix::drop0(res)
  }
  res
}

nng_to_sparse <- function(nn_idx, val = 1, self_nbr = FALSE,
                          max_nbr_id = NULL, by_row = TRUE) {
  if (by_row) {
    n_obs <- nrow(nn_idx)
  } else {
    n_obs <- ncol(nn_idx)
  }

  nn_to_sparse(as.vector(nn_idx), n_obs,
    val = val, self_nbr = self_nbr,
    max_nbr_id = max_nbr_id, by_row = by_row
  )
}

# transpose the index and distance matrix
nn_graph_t <- function(nn_graph) {
  list(idx = t(nn_graph$idx), dist = t(nn_graph$dist))
}

order_sparse <- function(spm) {
  x <- spm@x
  i <- spm@i
  p <- spm@p

  x_sort <- rep(0, length(x))
  i_sort <- rep(0, length(i))

  n_vertices <- length(p) - 1

  for (v in 1:n_vertices) {
    p_begin <- p[v]
    p_end <- p[v + 1]
    if (p_end - p_begin == 0) {
      next
    }
    pb1 <- p_begin + 1
    x_order <- order(x[pb1:p_end])
    x_sort[pb1:p_end] <- x[x_order + p_begin]
    i_sort[pb1:p_end] <- i[x_order + p_begin]
  }

  list(i = i_sort, p = p, x = x_sort, order = x_order, dims = spm@Dim)
}

Any scripts or data that you put into this service are public.

uwot documentation built on April 12, 2025, 1:50 a.m.

rdrr.io home R language documentation Run R code online

CRAN packages Bioconductor packages R-Forge packages GitHub packages

Note that we can't provide technical support on individual packages. You should contact the package authors for that.

uwot
The Uniform Manifold Approximation and Projection (UMAP) Method for Dimensionality Reduction

R/affinity.R
In uwot: The Uniform Manifold Approximation and Projection (UMAP) Method for Dimensionality Reduction

Defines functions order_sparse nn_graph_t nng_to_sparse nn_to_sparse perplexity_similarities symmetrize fuzzy_simplicial_set smooth_knn_matrix smooth_knn fuzzy_set_union

Try the uwot package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

uwot The Uniform Manifold Approximation and Projection (UMAP) Method for Dimensionality Reduction

R/affinity.R In uwot: The Uniform Manifold Approximation and Projection (UMAP) Method for Dimensionality Reduction

Defines functions order_sparse nn_graph_t nng_to_sparse nn_to_sparse perplexity_similarities symmetrize fuzzy_simplicial_set smooth_knn_matrix smooth_knn fuzzy_set_union

Try the uwot package in your browser

R Package Documentation

Browse R Packages

We want your feedback!

uwot
The Uniform Manifold Approximation and Projection (UMAP) Method for Dimensionality Reduction

R/affinity.R
In uwot: The Uniform Manifold Approximation and Projection (UMAP) Method for Dimensionality Reduction