inst/doc/Speeding_up_anticlustering.R

## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

set.seed(1234)

## ----setup--------------------------------------------------------------------
library(anticlust)

## -----------------------------------------------------------------------------

K <- 3
system.time(anticlustering(iris[, -5], K = K, method = "exchange"))
system.time(anticlustering(iris[, -5], K = K, method = "local-maximum"))


## -----------------------------------------------------------------------------

K <- 3
system.time(anticlustering(iris[, -5], K = K, method = "exchange"))
system.time(anticlustering(iris[, -5], K = K, method = "local-maximum"))
system.time(anticlustering(iris[, -5], K = K, method = "local-maximum", repetitions = 10))


## -----------------------------------------------------------------------------
N <- 5000
M <- 2
data <- matrix(rnorm(N * M), ncol = M)
start <- Sys.time()
groups1 <- fast_anticlustering(data, K = 2)  # default uses all exchange partners
Sys.time() - start 

## -----------------------------------------------------------------------------
start <- Sys.time()
groups2 <- fast_anticlustering(data, K = 2, k_neighbours = 20)
Sys.time() - start 

## -----------------------------------------------------------------------------
variance_objective(data, groups1)
variance_objective(data, groups2)

## -----------------------------------------------------------------------------
N <- 1000
M <- 5
K <- 3
data <- matrix(rnorm(N*M), ncol = M)
system.time(anticlustering(data, K = K))
system.time(anticlustering(data, K = K, preclustering = TRUE))

## -----------------------------------------------------------------------------
N <- nrow(iris)
K <- 3
initial_clusters <- sample(rep_len(1:K, N))
initial_clusters
table(initial_clusters)

## -----------------------------------------------------------------------------
exchange_partners <- sample(rep_len(1:(N/10), N)) #somewhat ugly but works
exchange_partners
table(exchange_partners)

## -----------------------------------------------------------------------------
system.time(anticlustering(iris[, -5], K = initial_clusters))
system.time(anticlustering(iris[, -5], K = initial_clusters, categories = exchange_partners))

## -----------------------------------------------------------------------------
N <- 1000
M <- 2
K <- 5
data <- matrix(rnorm(M*N), ncol = M)

initial_clusters <- sample(rep_len(1:K, N))
exchange_partners <- sample(rep_len(1:(N/10), N))

system.time(anticlustering(data, K = initial_clusters))
system.time(anticlustering(data, K = initial_clusters, categories = exchange_partners))

## -----------------------------------------------------------------------------
groups <- anticlustering(iris[, -5], K = 5, categories = iris$Species)
table(groups, iris$Species)

## -----------------------------------------------------------------------------
initial_groups <- categorical_sampling(iris$Species, K = 5)
table(initial_groups, iris$Species) # even!

## -----------------------------------------------------------------------------
N <- nrow(iris)
exchange_partners <- sample(rep_len(1:(N/10), N))

## -----------------------------------------------------------------------------
groups <- anticlustering(
  iris[, -5],
  K = initial_groups, 
  categories = cbind(iris$Species, exchange_partners)
)

## -----------------------------------------------------------------------------
table(groups, iris$Species)

## -----------------------------------------------------------------------------
N <- 100000
M <- 3
K <- 5
data <- matrix(rnorm(M*N), ncol = M)

start <- Sys.time()
groups <- fast_anticlustering(
  kplus_moment_variables(data, T = 2), 
  K = K, 
  exchange_partners = generate_exchange_partners(10, N = N)
)
Sys.time() - start
mean_sd_tab(data, groups) # means and standard deviations are similar

Try the anticlust package in your browser

Any scripts or data that you put into this service are public.

anticlust documentation built on April 4, 2025, 1:03 a.m.