R/cmdscale_landmarks.R

Defines functions cmdscale_landmarks

Documented in cmdscale_landmarks

#' Perform MDS on landmarks and project other samples to the same space
#'
#' @param dist_2lm Distance matrix between the landmarks and all the samples in original dataset
#' @param ndim The number of dimensions
#' @param rescale Whether or not to rescale the final dimensionality reduction (recommended)
#' @param ... Extra params to pass to [irlba::irlba()]
#'
#' @importFrom irlba partial_eigen
#' @importFrom dynutils scale_uniform
#'
#' @return The dimensionality reduction in the form of a `ncol(dist_2lm)` by `ndim` matrix.
#'
#' @export
#'
#' @examples
#' library(Matrix)
#' x <- as.matrix(iris[,1:4])
#' dist_2lm <- select_landmarks(x)
#' cmdscale_landmarks(dist_2lm)
cmdscale_landmarks <- function(dist_2lm, ndim = 3, rescale = TRUE, ...) {
  assert_that(
    !is.null(attr(dist_2lm, "landmark_ix")) || (!is.null(rownames(dist_2lm)) && !is.null(colnames(dist_2lm))),
    is.numeric(ndim), length(ndim) == 1, ndim >= 1,
    is.logical(rescale), length(rescale) == 1, !is.na(rescale)
  )

  ix_lm <- attr(dist_2lm, "landmark_ix")
  if (is.null(ix_lm)) {
    ix_lm <- match(rownames(dist_2lm), colnames(dist_2lm))
  }

  # short hand notations
  x <- dist_2lm[, ix_lm, drop = FALSE]^2
  n <- as.integer(nrow(x))
  N <- as.integer(ncol(dist_2lm))

  # double center data
  mu_n <- rowMeans(x)
  mu <- mean(x)
  x_c <- sweep(x, 1, mu_n, "-")
  x_dc <- sweep(x_c, 2, mu_n, "-") + mu

  # classical MDS on landmarks
  if (ndim > 0.5 * min(nrow(x_dc), ncol(x_dc))) {
    e <- eigen(-x_dc / 2, symmetric = TRUE)
    e$values <- e$values[seq_len(ndim)]
    e$vectors <- e$vectors[, seq_len(ndim), drop = FALSE]
  } else {
    e <- irlba::partial_eigen(-x_dc / 2, symmetric = TRUE, n = ndim, ...)
  }
  ev <- e$values
  evec <- e$vectors
  ndim1 <- sum(ev > 0)
  if (ndim1 < ndim) {
    warning(gettextf("only %d of the first %d eigenvalues are > 0", ndim1, ndim), domain = NA)
    evec <- evec[, ev > 0, drop = FALSE]
    ev <- ev[ev > 0]
    ndim <- ndim1
  }

  # distance-based triangulation
  points_inv <- evec / rep(sqrt(ev), each = n)
  dimred <- -t(t(points_inv) %*% ((dist_2lm - rep(mu_n, each = N)) / 2))
  dimred <- (-t(dist_2lm - rep(mu_n, each = N)) / 2) %*% points_inv

  if (rescale) {
    dimred <- dynutils::scale_uniform(dimred)
  }

  colnames(dimred) <- paste0("comp_", seq_len(ndim))

  dimred
}

Try the lmds package in your browser

Any scripts or data that you put into this service are public.

lmds documentation built on Sept. 27, 2019, 5:03 p.m.