inst/tinytest/test-equivalent-preclustering.R

library("anticlust")

# different options for preclustering have the same result - variance objective
for (M in 1:4) {
  for (K in 2:5) {
    N <- K * 3
    features <- matrix(rnorm(N * M), ncol = M)
    
    # determine a random seed to make the exchange method reproducible
    seed <- sample(1000, size = 1)
    set.seed(seed)
    
    ## First option
    # Set `preclustering = TRUE`
    ac1 <- anticlustering(
      features,
      K = K,
      objective = "variance",
      preclustering = TRUE
    )
    
    ## Second option
    # Call `balanced_clustering` and use output as `categories` argument
    preclusters <- balanced_clustering(
      features,
      K = N / K
    )
    set.seed(seed)
    ac2 <- anticlustering(
      features,
      K = K,
      objective = "variance",
      categories = preclusters
    )
    
    ## Third option
    # Use `fast_anticlustering` function
    set.seed(seed)
    ac3 <- fast_anticlustering(
      features,
      K = categorical_sampling(preclusters, K),
      exchange_partners = anticlust:::nearest_neighbours(features, N-1, preclusters)
    )
    
    expect_true(all(ac1 == ac2))
    expect_true(all(ac3 == ac3))
    
  }
}


# different options for preclustering have the same result - distance objective
for (M in 1:4) {
  for (K in 2:5) {
    N <- K * 3
    features <- matrix(rnorm(N * M), ncol = M)
    
    # determine a random seed to make the exchange method reproducible
    seed <- sample(1000, size = 1)
    set.seed(seed)
    
    ## First option
    # Set `preclustering = TRUE`
    ac1 <- anticlustering(
      features,
      K = K,
      objective = "distance",
      preclustering = TRUE
    )
    
    ## Second option
    # Call `balanced_clustering` and use output as `categories` argument
    preclusters <- balanced_clustering(
      features,
      K = N / K
    )
    set.seed(seed)
    ac2 <- anticlustering(
      features,
      K = K,
      objective = "distance",
      categories = preclusters
    )
    
    ## Third option
    # Use distance input (and categories for preclusters because the
    # preclustering algorithm is slightly different for distance input)
    set.seed(seed)
    ac3 <- anticlustering(
      dist(features),
      K = K,
      objective = "distance",
      categories = preclusters
    )
    
    expect_equal(all(ac1 == ac2), TRUE)
    expect_equal(all(ac2 == ac3), TRUE)
  }
}

Try the anticlust package in your browser

Any scripts or data that you put into this service are public.

anticlust documentation built on April 4, 2025, 1:03 a.m.