tests/testthat/test-plnmixture.R

context("test-plnmixture")

library(purrr)
data(trichoptera)
## use a subset to save some time
trichoptera <- prepare_data(trichoptera$Abundance[1:20, 1:5], trichoptera$Covariate[1:20, ])

mix_wt <- PLNmixture(Abundance ~ 1 + offset(log(Offset)), clusters = 1:3, data = trichoptera,
                      control = PLNmixture_param(smoothing = "none"))
mix_wo <- PLNmixture(Abundance ~ 0 + offset(log(Offset)), clusters = 1:3, data = trichoptera,
                     control = PLNmixture_param(smoothing = "none"))

models_sphr <- PLNmixture(Abundance ~ 1 + offset(log(Offset)), clusters = 1:3, data = trichoptera,
                          control = PLNmixture_param(covariance = "spherical", smoothing = "none"))

models_diag <- PLNmixture(Abundance ~ 1 + offset(log(Offset)), clusters = 1:3, data = trichoptera,
                          control = PLNmixture_param(covariance = "diagonal", smoothing = "none"))

models_full <- PLNmixture(Abundance ~ 1 + offset(log(Offset)), clusters = 1:3, data = trichoptera,
                          control = PLNmixture_param(covariance = "full", smoothing = "none"))

n <- nrow(trichoptera$Abundance)
p <- ncol(trichoptera$Abundance)
k <- 3
d <- 0

### ======================== GENERAL
test_that("PLN works for abitrary cluster sequences when smoothing is requested", {
  expect_is(PLNmixture(
    Abundance ~ 1 + offset(log(Offset)), clusters = c(2, 4),
    data = trichoptera,
    control = PLNmixture_param(smoothing = "both")
  ), "PLNmixturefamily")
})


### ============================================================================
###
### SPHERICAL, NO COVARIATE
model  <- getModel(models_sphr, k)

test_that("Check that PLNmixture is running and robust (spherical variant)",  {

  expect_is(models_sphr, "PLNmixturefamily")
  expect_is(model , "PLNmixturefit")
  expect_is(model$components[[1]], "PLNfit")
  expect_is(model$components[[2]], "PLNfit")
  expect_is(model$components[[3]], "PLNfit")
  expect_equal(model$components[[1]]$vcov_model, "spherical")
  expect_equal(model$components[[2]]$vcov_model, "spherical")
  expect_equal(model$components[[3]]$vcov_model, "spherical")

  expect_is(plot(models_sphr, reverse = TRUE), "ggplot")
  expect_is(plot(models_diag, reverse = TRUE), "ggplot")
  expect_is(plot(models_full, reverse = TRUE), "ggplot")
  expect_is(plot(models_sphr), "ggplot")
  expect_is(plot(models_diag), "ggplot")
  expect_is(plot(models_full), "ggplot")
  expect_is(plot(models_sphr, type = 'diagnostic'), "ggplot")
  expect_is(plot(models_diag, type = 'diagnostic'), "ggplot")
  expect_is(plot(models_full, type = 'diagnostic'), "ggplot")

  expect_error(PLNmixture(Abundance ~ 0 + offset(log(Offset)), clusters =-2, data = trichoptera))

  expect_error(PLNmixture(Abundance ~ 0, weights = rep(1.0, nrow(trichoptera)), data = trichoptera))

  expect_equal(sum(map_dbl(mix_wt$models, "loglik")), sum(map_dbl(mix_wo$models, "loglik")), tolerance = 1e-1)

})

test_that("PLNmixture fit: check classes, getters and field access",  {

  expect_is(model, "PLNmixturefit")
  expect_equal(model$n, n)
  expect_equal(model$p, p)
  expect_equal(model$k, k)
  expect_equal(model$d, d)

  expect_equal(model$model_par$Mu, model$group_means)
  expect_equal(model$model_par$Sigma, sigma(model))
  expect_equal(model$model_par$Pi, model$mixtureParam)
  expect_true(inherits(model$mixtureParam   , "numeric"))
  expect_true(inherits(model$group_means    , "data.frame"))
  expect_true(inherits(model$model_par$Sigma, "list"))
  expect_true(all(map_lgl(model$model_par$Sigma, inherits, "dgCMatrix")))

  ## fields and active bindings
   expect_equal(dim(model$model_par$Theta), c(d, p))
   expect_equal(dim(model$model_par$Mu), c(p, k))
   expect_true(all(map_lgl(model$model_par$Sigma, ~all.equal(dim(.x), c(p,p)))))
   expect_equal(length(model$mixtureParam), k)
   expect_equal(sum(model$loglik_vec), model$loglik)
   expect_lt(model$BIC, model$loglik)
   expect_gt(model$R_squared, 0)
   expect_equal(model$nb_param, p * d + (k - 1) + k * (1 + p))

   ## S3 methods
   expect_equal(dim(fitted(model)), c(n, p))
   expect_equal(sigma(model), model$model_par$Sigma)
   expect_equal(coef(model), matrix(0, 0, p))
   expect_equal(coef(model, "main")      , model$model_par$Theta)
   expect_equal(coef(model, "means")     , model$model_par$Mu)
   expect_equal(coef(model, "covariance"), model$model_par$Sigma)
   expect_equal(coef(model, "mixture")   , model$model_par$Pi)

   expect_true(inherits(plot(model, type = "pca"   , plot = FALSE), "ggplot"))
   expect_true(inherits(plot(model, type = "matrix", plot = FALSE), "ggplot"))

   ## R6 methods
   expect_true(inherits(model$plot_clustering_pca(plot = FALSE), "ggplot"))
   expect_true(inherits(model$plot_clustering_data(plot = FALSE), "ggplot"))
})

test_that("PLNmixture fit: check print message",  {
  output <- paste(
"Poisson Lognormal mixture model with 3 components and spherical covariances.",
"* Useful fields",
"    $posteriorProb, $memberships, $mixtureParam, $group_means",
"    $model_par, $latent, $latent_pos, $optim_par",
"    $loglik, $BIC, $ICL, $loglik_vec, $nb_param, $criteria",
"    $component[[i]] (a PLNfit with associated methods and fields)",
"* Useful S3 methods",
"    print(), coef(), sigma(), fitted(), predict()",
sep="\n"
)
  expect_output(model$show(),
                output,
                fixed = TRUE)
})

test_that("Predictions have the right dimensions.", {
  predictions_response <- predict(model, newdata = trichoptera, type = "response")
  predictions_post     <- predict(model, newdata = trichoptera, "posterior")
  predictions_score    <- predict(model, newdata = trichoptera, type = "position")
  ## Train = Test
  expect_length(predictions_response, n)
  expect_is(predictions_response, "factor")
  expect_equal(dim(predictions_post),  c(n, k))
  expect_equal(dim(predictions_score), c(n, p))
  ## Posterior probabilities are between 0 and 1
  expect_lte(max(predictions_post), 1)
  expect_gte(min(predictions_post), 0)


  ## Train != Test
  test <- 1:nrow(trichoptera) < (nrow(trichoptera)/2)
  expect_equal(dim(predict(model, newdata = trichoptera[test, ], type = "posterior")), c(sum(test), k))

})

test_that("Predictions are not affected by inclusion of an intercept.", {
  expect_equal(dim(predict(getModel(mix_wo, 3), newdata = trichoptera)),
               dim(predict(getModel(mix_wt, 3), newdata = trichoptera)))
})

### ============================================================================
###
### DIAGONAL, NO COVARIATE
model <- getModel(models_diag, k)
test_that("Diagonal model of the covariance is working", {

  expect_is(models_diag, "PLNmixturefamily")
  expect_is(model , "PLNmixturefit")
  expect_is(model$components[[1]], "PLNfit")
  expect_is(model$components[[2]], "PLNfit")
  expect_is(model$components[[3]], "PLNfit")
  expect_equal(model$components[[1]]$vcov_model, "diagonal")
  expect_equal(model$components[[2]]$vcov_model, "diagonal")
  expect_equal(model$components[[3]]$vcov_model, "diagonal")

  expect_equal(model$n, n)
  expect_equal(model$p, p)
  expect_equal(model$k, k)
  expect_equal(model$d, d)

  expect_equal(model$model_par$Mu, model$group_means)
  expect_equal(model$model_par$Sigma, sigma(model))
  expect_equal(model$model_par$Pi, model$mixtureParam)
  expect_true(inherits(model$mixtureParam   , "numeric"))
  expect_true(inherits(model$group_means    , "data.frame"))
  expect_true(inherits(model$model_par$Sigma, "list"))
  expect_true(all(map_lgl(model$model_par$Sigma, inherits, "dgCMatrix")))

  ## fields and active bindings
   expect_equal(dim(model$model_par$Theta), c(d, p))
   expect_equal(dim(model$model_par$Mu), c(p, k))
   expect_true(all(map_lgl(model$model_par$Sigma, ~all.equal(dim(.x), c(p,p)))))
   expect_equal(length(model$mixtureParam), k)
   expect_equal(sum(model$loglik_vec), model$loglik)
   expect_lt(model$BIC, model$loglik)
   expect_gt(model$R_squared, 0)
   expect_equal(model$nb_param, p * d + (k - 1) + 2 * k * p)

   ## S3 methods
   expect_equal(dim(fitted(model)), c(n, p))
   expect_equal(sigma(model), model$model_par$Sigma)
   expect_equal(coef(model), matrix(0, 0, p))
   expect_equal(coef(model, "main")      , model$model_par$Theta)
   expect_equal(coef(model, "means")     , model$model_par$Mu)
   expect_equal(coef(model, "covariance"), model$model_par$Sigma)
   expect_equal(coef(model, "mixture")   , model$model_par$Pi)

   expect_true(inherits(plot(model, type = "pca"   , plot = FALSE), "ggplot"))
   expect_true(inherits(plot(model, type = "matrix", plot = FALSE), "ggplot"))

   ## R6 methods
   expect_true(inherits(model$plot_clustering_pca(plot = FALSE), "ggplot"))
   expect_true(inherits(model$plot_clustering_data(plot = FALSE), "ggplot"))

   ## Prediction
   predictions_response <- predict(model, newdata = trichoptera, type = "response")
   predictions_post     <- predict(model, newdata = trichoptera, "posterior")
   predictions_score    <- predict(model, newdata = trichoptera, type = "position")
   ## Train = Test
   expect_length(predictions_response, n)
   expect_is(predictions_response, "factor")
   expect_equal(dim(predictions_post),  c(n, k))
   expect_equal(dim(predictions_score), c(n, p))
   ## Posterior probabilities are between 0 and 1
   expect_lte(max(predictions_post), 1)
   expect_gte(min(predictions_post), 0)

   ## Train != Test
   test <- 1:nrow(trichoptera) < (nrow(trichoptera)/2)
   expect_equal(dim(predict(model, newdata = trichoptera[test, ], type = "posterior")), c(sum(test), k))

})

### ============================================================================
###
### FULL, NO COVARIATE
model <- getModel(models_full, k)
test_that("Full model of the covariance is working", {

  expect_is(models_diag, "PLNmixturefamily")
  expect_is(model , "PLNmixturefit")
  expect_is(model$components[[1]], "PLNfit")
  expect_is(model$components[[2]], "PLNfit")
  expect_is(model$components[[3]], "PLNfit")
  expect_equal(model$components[[1]]$vcov_model, "full")
  expect_equal(model$components[[2]]$vcov_model, "full")
  expect_equal(model$components[[3]]$vcov_model, "full")

  expect_equal(model$n, n)
  expect_equal(model$p, p)
  expect_equal(model$k, k)
  expect_equal(model$d, d)

  expect_equal(model$model_par$Mu, model$group_means)
  expect_equal(model$model_par$Sigma, sigma(model))
  expect_equal(model$model_par$Pi, model$mixtureParam)
  expect_true(inherits(model$mixtureParam   , "numeric"))
  expect_true(inherits(model$group_means    , "data.frame"))
  expect_true(inherits(model$model_par$Sigma, "list"))
  expect_true(all(map_lgl(model$model_par$Sigma, inherits, "matrix")))

  ## fields and active bindings
   expect_equal(dim(model$model_par$Theta), c(d, p))
   expect_equal(dim(model$model_par$Mu), c(p, k))
   expect_true(all(map_lgl(model$model_par$Sigma, ~all.equal(dim(.x), c(p,p)))))
   expect_equal(length(model$mixtureParam), k)
   expect_equal(sum(model$loglik_vec), model$loglik)
   expect_lt(model$BIC, model$loglik)
   expect_gt(model$R_squared, 0)
   expect_equal(model$nb_param, p * d + (k - 1) + k * (p + p * (p + 1) / 2) )

   ## S3 methods
   expect_equal(dim(fitted(model)), c(n, p))
   expect_equal(sigma(model), model$model_par$Sigma)
   expect_equal(coef(model), matrix(0, 0, p))
   expect_equal(coef(model, "main")      , model$model_par$Theta)
   expect_equal(coef(model, "means")     , model$model_par$Mu)
   expect_equal(coef(model, "covariance"), model$model_par$Sigma)
   expect_equal(coef(model, "mixture")   , model$model_par$Pi)

   expect_true(inherits(plot(model, type = "pca"   , plot = FALSE), "ggplot"))
   expect_true(inherits(plot(model, type = "matrix", plot = FALSE), "ggplot"))

   ## R6 methods
   expect_true(inherits(model$plot_clustering_pca(plot = FALSE), "ggplot"))
   expect_true(inherits(model$plot_clustering_data(plot = FALSE), "ggplot"))

   ## Prediction
   predictions_response <- predict(model, newdata = trichoptera, type = "response")
   predictions_post     <- predict(model, newdata = trichoptera, "posterior")
   predictions_score    <- predict(model, newdata = trichoptera, type = "position")
   ## Train = Test
   expect_length(predictions_response, n)
   expect_is(predictions_response, "factor")
   expect_equal(dim(predictions_post),  c(n, k))
   expect_equal(dim(predictions_score), c(n, p))
   ## Posterior probabilities are between 0 and 1
   expect_lte(max(predictions_post), 1)
   expect_gte(min(predictions_post), 0)

   ## Train != Test
   test <- 1:nrow(trichoptera) < (nrow(trichoptera)/2)
   expect_equal(dim(predict(model, newdata = trichoptera[test, ], type = "posterior")), c(sum(test), k))

})

#### With COVARIATE

models_sphr_cov <- PLNmixture(Abundance ~ 1 + Precipitation + offset(log(Offset)),
                              clusters = 1:3, data = trichoptera,
                              control = PLNmixture_param(covariance = "spherical", smoothing = "none"))

models_diag_cov <- PLNmixture(Abundance ~ 1 + Precipitation + offset(log(Offset)),
                              clusters = 1:3, data = trichoptera,
                              control = PLNmixture_param(covariance = "diagonal", smoothing = "none"))

models_full_cov <- PLNmixture(Abundance ~ 1 + Precipitation + offset(log(Offset)),
                              clusters = 1:3, data = trichoptera,
                              control = PLNmixture_param(covariance = "full", smoothing = "none"))
d <- 1

model  <- getModel(models_sphr_cov, k)
test_that("Spherical model of the covariance is working with covariate", {

  expect_is(models_sphr, "PLNmixturefamily")
  expect_is(model , "PLNmixturefit")
  expect_is(model$components[[1]], "PLNfit")
  expect_is(model$components[[2]], "PLNfit")
  expect_is(model$components[[3]], "PLNfit")
  expect_equal(model$components[[1]]$vcov_model, "spherical")
  expect_equal(model$components[[2]]$vcov_model, "spherical")
  expect_equal(model$components[[3]]$vcov_model, "spherical")

  expect_equal(model$model_par$Mu, model$group_means)
  expect_equal(model$model_par$Sigma, sigma(model))
  expect_equal(model$model_par$Pi, model$mixtureParam)
  expect_true(inherits(model$mixtureParam   , "numeric"))
  expect_true(inherits(model$group_means    , "data.frame"))
  expect_true(inherits(model$model_par$Sigma, "list"))
  expect_true(all(map_lgl(model$model_par$Sigma, inherits, "dgCMatrix")))

   ## fields and active bindings
   expect_equal(dim(model$model_par$Theta), c(d, p))
   expect_equal(dim(model$model_par$Mu), c(p, k))
   expect_true(all(map_lgl(model$model_par$Sigma, ~all.equal(dim(.x), c(p,p)))))
   expect_equal(length(model$mixtureParam), k)
   expect_equal(sum(model$loglik_vec), model$loglik)
   expect_lt(model$BIC, model$loglik)
   expect_gt(model$R_squared, 0)
   expect_equal(model$nb_param, p * d + (k - 1) + k * (p + 1) )

   ## S3 methods
   expect_equal(dim(fitted(model)), c(n, p))
   expect_equal(sigma(model), model$model_par$Sigma)
   expect_equal(coef(model, "main")      , model$model_par$Theta)
   expect_equal(coef(model, "means")     , model$model_par$Mu)
   expect_equal(coef(model, "covariance"), model$model_par$Sigma)
   expect_equal(coef(model, "mixture")   , model$model_par$Pi)

   expect_true(inherits(plot(model, type = "pca"   , plot = FALSE), "ggplot"))
   expect_true(inherits(plot(model, type = "matrix", plot = FALSE), "ggplot"))

   ## R6 methods
   expect_true(inherits(model$plot_clustering_pca(plot = FALSE), "ggplot"))
   expect_true(inherits(model$plot_clustering_data(plot = FALSE), "ggplot"))

   ## Prediction
   predictions_response <- predict(model, newdata = trichoptera, type = "response")
   predictions_post     <- predict(model, newdata = trichoptera, "posterior")
   predictions_score    <- predict(model, newdata = trichoptera, type = "position")
   ## Train = Test
   expect_length(predictions_response, n)
   expect_is(predictions_response, "factor")
   expect_equal(dim(predictions_post),  c(n, k))
   expect_equal(dim(predictions_score), c(n, p))
   ## Posterior probabilities are between 0 and 1
   expect_lte(max(predictions_post), 1)
   expect_gte(min(predictions_post), 0)

   ## Train != Test
   test <- 1:nrow(trichoptera) < (nrow(trichoptera)/2)
   expect_equal(dim(predict(model, newdata = trichoptera[test, ], type = "posterior")), c(sum(test), k))

})

model  <- getModel(models_diag_cov, k)
test_that("Diagonal model of the covariance is working with covariate", {

  expect_is(models_sphr, "PLNmixturefamily")
  expect_is(model , "PLNmixturefit")
  expect_is(model$components[[1]], "PLNfit")
  expect_is(model$components[[2]], "PLNfit")
  expect_is(model$components[[3]], "PLNfit")
  expect_equal(model$components[[1]]$vcov_model, "diagonal")
  expect_equal(model$components[[2]]$vcov_model, "diagonal")
  expect_equal(model$components[[3]]$vcov_model, "diagonal")

  expect_equal(model$model_par$Mu, model$group_means)
  expect_equal(model$model_par$Sigma, sigma(model))
  expect_equal(model$model_par$Pi, model$mixtureParam)
  expect_true(inherits(model$mixtureParam   , "numeric"))
  expect_true(inherits(model$group_means    , "data.frame"))
  expect_true(inherits(model$model_par$Sigma, "list"))
  expect_true(all(map_lgl(model$model_par$Sigma, inherits, "dgCMatrix")))

   ## fields and active bindings
   expect_equal(dim(model$model_par$Theta), c(d, p))
   expect_equal(dim(model$model_par$Mu), c(p, k))
   expect_true(all(map_lgl(model$model_par$Sigma, ~all.equal(dim(.x), c(p,p)))))
   expect_equal(length(model$mixtureParam), k)
   expect_equal(sum(model$loglik_vec), model$loglik)
   expect_lt(model$BIC, model$loglik)
   expect_gt(model$R_squared, 0)
   expect_equal(model$nb_param, p * d + (k - 1) + k * (p + p) )

   ## S3 methods
   expect_equal(dim(fitted(model)), c(n, p))
   expect_equal(sigma(model), model$model_par$Sigma)
   expect_equal(coef(model, "main")      , model$model_par$Theta)
   expect_equal(coef(model, "means")     , model$model_par$Mu)
   expect_equal(coef(model, "covariance"), model$model_par$Sigma)
   expect_equal(coef(model, "mixture")   , model$model_par$Pi)

   expect_true(inherits(plot(model, type = "pca"   , plot = FALSE), "ggplot"))
   expect_true(inherits(plot(model, type = "matrix", plot = FALSE), "ggplot"))

   ## R6 methods
   expect_true(inherits(model$plot_clustering_pca(plot = FALSE), "ggplot"))
   expect_true(inherits(model$plot_clustering_data(plot = FALSE), "ggplot"))

   ## Prediction
   predictions_response <- predict(model, newdata = trichoptera, type = "response")
   predictions_post     <- predict(model, newdata = trichoptera, "posterior")
   predictions_score    <- predict(model, newdata = trichoptera, type = "position")
   ## Train = Test
   expect_length(predictions_response, n)
   expect_is(predictions_response, "factor")
   expect_equal(dim(predictions_post),  c(n, k))
   expect_equal(dim(predictions_score), c(n, p))
   ## Posterior probabilities are between 0 and 1
   expect_lte(max(predictions_post), 1)
   expect_gte(min(predictions_post), 0)

   ## Train != Test
   test <- 1:nrow(trichoptera) < (nrow(trichoptera)/2)
   expect_equal(dim(predict(model, newdata = trichoptera[test, ], type = "posterior")), c(sum(test), k))

})

model  <- getModel(models_full_cov, k)
test_that("Full model of the covariance is working with covariate", {

  expect_is(models_sphr, "PLNmixturefamily")
  expect_is(model , "PLNmixturefit")
  expect_is(model$components[[1]], "PLNfit")
  expect_is(model$components[[2]], "PLNfit")
  expect_is(model$components[[3]], "PLNfit")
  expect_equal(model$components[[1]]$vcov_model, "full")
  expect_equal(model$components[[2]]$vcov_model, "full")
  expect_equal(model$components[[3]]$vcov_model, "full")

  expect_equal(model$model_par$Mu, model$group_means)
  expect_equal(model$model_par$Sigma, sigma(model))
  expect_equal(model$model_par$Pi, model$mixtureParam)
  expect_true(inherits(model$mixtureParam   , "numeric"))
  expect_true(inherits(model$group_means    , "data.frame"))
  expect_true(inherits(model$model_par$Sigma, "list"))
  expect_true(all(map_lgl(model$model_par$Sigma, inherits, "matrix")))

   ## fields and active bindings
   expect_equal(dim(model$model_par$Theta), c(d, p))
   expect_equal(dim(model$model_par$Mu), c(p, k))
   expect_true(all(map_lgl(model$model_par$Sigma, ~all.equal(dim(.x), c(p,p)))))
   expect_equal(length(model$mixtureParam), k)
   expect_equal(sum(model$loglik_vec), model$loglik)
   expect_lt(model$BIC, model$loglik)
   expect_lt(model$ICL, model$loglik)
   expect_gt(model$R_squared, 0)
   expect_equal(model$nb_param, p * d + (k - 1) + k * (p + p * (p+1)/2) )

   ## S3 methods
   expect_equal(dim(fitted(model)), c(n, p))
   expect_equal(sigma(model), model$model_par$Sigma)
   expect_equal(coef(model, "main")      , model$model_par$Theta)
   expect_equal(coef(model, "means")     , model$model_par$Mu)
   expect_equal(coef(model, "covariance"), model$model_par$Sigma)
   expect_equal(coef(model, "mixture")   , model$model_par$Pi)

   expect_true(inherits(plot(model, type = "pca"   , plot = FALSE), "ggplot"))
   expect_true(inherits(plot(model, type = "matrix", plot = FALSE), "ggplot"))

   ## R6 methods
   expect_true(inherits(model$plot_clustering_pca(plot = FALSE), "ggplot"))
   expect_true(inherits(model$plot_clustering_data(plot = FALSE), "ggplot"))

   ## Prediction
   predictions_response <- predict(model, newdata = trichoptera, type = "response")
   predictions_post     <- predict(model, newdata = trichoptera, "posterior")
   predictions_score    <- predict(model, newdata = trichoptera, type = "position")
   ## Train = Test
   expect_length(predictions_response, n)
   expect_is(predictions_response, "factor")
   expect_equal(dim(predictions_post),  c(n, k))
   expect_equal(dim(predictions_score), c(n, p))
   ## Posterior probabilities are between 0 and 1
   expect_lte(max(predictions_post), 1)
   expect_gte(min(predictions_post), 0)

   ## Train != Test
   test <- 1:nrow(trichoptera) < (nrow(trichoptera)/2)
   expect_equal(dim(predict(model, newdata = trichoptera[test, ], type = "posterior")), c(sum(test), k))

})
PLN-team/PLNmodels documentation built on April 15, 2024, 9:01 a.m.