R-CMD-check

knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>",
  out.width = "100%",
  fig.path = "man/figures/",
  message = FALSE,
  dpi = 300
)
set.seed(1)
library(tidyverse)

lmds: Landmark Multi-Dimensional Scaling

A fast dimensionality reduction method scaleable to large numbers of samples. Landmark Multi-Dimensional Scaling (LMDS) is an extension of classical Torgerson MDS, but rather than calculating a complete distance matrix between all pairs of samples, only the distances between a set of landmarks and the samples are calculated.

library(lmds)
x <- as.matrix(iris[,1:4])
dimred <- lmds(x, ndim = 2)
qplot(dimred[,1], dimred[,2]) + labs(title = "lmds()") + theme_classic()

dimred <- cmdscale(dist(x))
qplot(dimred[,1], dimred[,2]) + labs(title = "cmdscale()") + theme_classic()

Execution time

The execution time of lmds() scales linearly with respect to the dataset size.

largex <- Matrix::rsparsematrix(nrow = 100000, ncol = 10000, density = .01)

log <- list()
num_samples <- 100
time <- 0
while (time < 10) {
  subx <- largex[seq_len(num_samples),]
  time <- system.time({
    dimred <- lmds(subx)
  })[["user.self"]]
  log[[length(log) + 1]] <- tibble(method = "lmds()", num_samples, time)
  num_samples <- num_samples * 1.5
}

num_samples <- 100
time <- 0
while (time < 10) {
  subx <- largex[seq_len(num_samples),]
  time <- system.time({
    dist <- dist(subx)
    dimred <- cmdscale(dist)
  })[["user.self"]]
  log[[length(log) + 1]] <- tibble(method = "cmdscale()", num_samples, time)
  num_samples <- num_samples * 1.5
}

logdf <- bind_rows(log)

ggplot(logdf, aes(num_samples, time, colour = method)) + 
  geom_point() +
  geom_line() +
  theme_classic()

Latest changes

Check out news(package = "lmds") or NEWS.md for a full list of changes.

cat(dynutils::recent_news())


dynverse/lmds documentation built on Sept. 25, 2023, 4:39 p.m.