tests/testthat/test-kcmeans.R

gen_data <- function(nobs = 1000, K0 = 3, K = 30, J = 2) {
  # Sample categorical variables
  X <- matrix(rnorm(nobs * J), nobs, J)
  Z <- sample(1:K, nobs, replace = T)
  # Create the low-dimensional latent categorical variable
  Z0 <- Z %% K0
  # Draw outcome variables
  y <- Z0 + rnorm(nobs)
  # Setup dataframe
  SimDat <- as.data.frame(X)
  SimDat$y <- y
  SimDat$Z <- as.matrix(Z)
  return(SimDat)
}#GEN_DATA

test_that("kcmeans computes", {
  # Generate data
  SimDat <- gen_data()
  # Get data from the included SimDat data
  y <- SimDat$y
  X <- SimDat$Z
  # Compute kcmeans
  kcmeans_fit <- kcmeans(y, X, K = 3)
  # Check output with expectations
  expect_equal(length(kcmeans_fit), 5)
})#TEST_THAT

test_that("kcmeans computes with additional controls", {
  # Generate data
  SimDat <- gen_data()
  # Get data from the included SimDat data
  y <- SimDat$y
  X <- cbind(SimDat$Z, SimDat$V1, SimDat$V2)
  # Compute kcmeans
  kcmeans_fit <- kcmeans(y, X, K = 3)
  # Check output with expectations
  expect_equal(length(kcmeans_fit), 5)
})#TEST_THAT

test_that("predict.kcmeans computes w/ unseen categories", {
  # Generate data
  SimDat <- gen_data()
  # Get data from the included SimDat data
  y <- SimDat$y
  X <- cbind(SimDat$Z, SimDat$V1, SimDat$V2)
  # Compute kcmeans
  kcmeans_fit <- kcmeans(y, X, K = 3)
  # Compute predictions w/ unseen categories
  newdata <- X
  newdata[1:20, 1] <- -22
  fitted_values <- predict(kcmeans_fit, newdata)
  fitted_clusters <- predict(kcmeans_fit, newdata, clusters = T)
  # Check output with expectations
  expect_equal(length(fitted_values), 1000)
  expect_equal(length(fitted_clusters), 1000)
})#TEST_THAT

test_that("predict.kcmeans does not scramble order", {
  # Generate data
  SimDat <- gen_data()
  # Get data from the included SimDat data
  y <- SimDat$y
  X <- cbind(SimDat$Z, SimDat$V1, SimDat$V2)
  # Compute kcmeans
  kcmeans_fit <- kcmeans(y, X, K = 3)
  # Compute initial predicted clusters
  cl_hat <- predict(kcmeans_fit, X, clusters = TRUE)
  # Generate random bootstrap sample
  indx <- sample(1:1000, 1000, replace = T)
  cl_hat_booth <- predict(kcmeans_fit, X[indx, ], clusters = TRUE)
  # Compare to previously predicted clusters
  cluster_equal = (1 - abs(cl_hat[indx] - cl_hat_booth)) == 1
  # Check output with expectations
  expect_equal(all(cluster_equal), TRUE)
})#TEST_THAT

Try the kcmeans package in your browser

Any scripts or data that you put into this service are public.

kcmeans documentation built on May 29, 2024, 8:17 a.m.