context("Select Number of Mixture Components")
test_that("Too low or non-integer 'K' value.", {
# Define simple data matrix
n <- 10
d <- 5
X <- matrix(rep(0, n*d), nrow = n)
# Less than 1.
K <- -1
expect_error(gclust(X, K), "Input 'K' must be greater than or equal to 1.")
# Not a number.
K <- "string"
expect_error(gclust(X, K), "Input 'K' must be an integer.")
K <- matrix(c(1, 0 , 0 ,1), nrow = 2)
expect_error(gclust(X, K), "Input 'K' must be an integer.")
})
test_that("'X' is not a proper matrix.", {
# Make data frame and vector as incorrect matrices.
n <- 10
X <- data.frame(rep("string1", n), rep("string2", n))
K <- 1
expect_error(gclust(X, K), "Input 'X' is not a numeric matrix.")
X <- rep(10, n)
K <- 1
expect_error(gclust(X, K), "Input 'X' is not a numeric matrix.")
})
test_that("Small K, separable cases.", {
# Test that gclust regonizes the true number of clusters
# when they are highly separated.
set.seed(123)
num_sims <- 10
num_right <- 0
num_wrong <- 0
g <- 2
n <- 50*g
for (s in 1:num_sims) {
# Selects two clusters for mixture of two highly centered components.
X1 <- rnorm(n/g, 0, 0.5)
X2 <- rnorm(n/g, 5, 0.5)
X <- as.matrix(c(X1, X2), nrow = n)
K <- 5
ghat <- gclust(X, K)
if (ghat == g) {
num_right <- num_right + 1
} else {
num_wrong <- num_wrong + 1
}
}
expect_true(num_right > num_wrong)
# Same test for 3 components.
set.seed(789)
g <- 3
num_right <- 0
num_wrong <- 0
n <- 50*g
for (s in 1:num_sims) {
X1 <- rnorm(n/g, 0, 0.5)
X2 <- rnorm(n/g, 4, 0.5)
X3 <- rnorm(n/g, 8, 0.5)
X <- as.matrix(c(X1, X2, X3), nrow = n)
K <- 5
ghat <- gclust(X, K)
if (ghat == g) {
num_right <- num_right + 1
} else {
num_wrong <- num_wrong + 1
}
}
expect_true(num_right > num_wrong)
})
test_that("BIC higher for correct model than misspecified model.", {
set.seed(123)
num_sims <- 20
result <- lapply(1:num_sims, function(i) {
# Generate 3-Block SBM
n <- 120
num_class1 <- n/3
num_class2 <- n/3
# Params
num_class3 <- n - num_class1 - num_class2
assignments <- c(rep(1, num_class1), rep(2, num_class2), rep(3, num_class3))
B <- matrix(c(0.8, 0.3, 0.2,
0.3, 0.8, 0.3,
0.2, 0.3, 0.8), nrow = 3)
# Simulation.
g <- igraph::sample_sbm(n,
pref.matrix=B,
block.sizes=c(num_class1, num_class2, num_class3))
# Embed.
X <- igraph::embed_adjacency_matrix(g, 3)$X
# Model selection.
Kmax <- 5
Khat <- gclust(X, Kmax)
# Cluster. In one we choose BIC-optimal K (likely to be 3).
# In the other, we force a clustering with K = 2. gclust should select a K
# with the higher BIC.
bic_gclust <- mclust::Mclust(X, Khat)$bic
bic_k2gmm <- mclust::Mclust(X, 2)$bic
return(list(bic_gclust = bic_gclust, bic_k2gmm = bic_k2gmm))
})
# Split results into separate vectors.
result_bic_gclust <- sapply(result, function(res){res$bic_gclust})
result_bic_k2gmm <- sapply(result, function(res) {res$bic_k2gmm})
# Test difference in BIC via Wilcoxon Test.
alpha <- 0.05
pval <- wilcox.test(result_bic_gclust, result_bic_k2gmm, alt='greater', exact=FALSE)$p.value
expect_lt(pval, alpha)
})
Add the following code to your website.
For more information on customizing the embed code, read Embedding Snippets.